sim/aarch64/simulator.c

   1 /* simulator.c -- Interface for the AArch64 simulator.
   2
   3    Copyright (C) 2015-2021 Free Software Foundation, Inc.
   4
   5    Contributed by Red Hat.
   6
   7    This file is part of GDB.
   8
   9    This program is free software; you can redistribute it and/or modify
  10    it under the terms of the GNU General Public License as published by
  11    the Free Software Foundation; either version 3 of the License, or
  12    (at your option) any later version.
  13
  14    This program is distributed in the hope that it will be useful,
  15    but WITHOUT ANY WARRANTY; without even the implied warranty of
  16    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  17    GNU General Public License for more details.
  18
  19    You should have received a copy of the GNU General Public License
  20    along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
  21
  22 /* This must come before any other includes.  */
  23 #include "defs.h"
  24
  25 #include <stdlib.h>
  26 #include <stdio.h>
  27 #include <string.h>
  28 #include <sys/types.h>
  29 #include <math.h>
  30 #include <time.h>
  31 #include <limits.h>
  32
  33 #include "simulator.h"
  34 #include "cpustate.h"
  35 #include "memory.h"
  36
  37 #define NO_SP 0
  38 #define SP_OK 1
  39
  40 #define TST(_flag)   (aarch64_test_CPSR_bit (cpu, _flag))
  41 #define IS_SET(_X)   (TST (( _X )) ? 1 : 0)
  42 #define IS_CLEAR(_X) (TST (( _X )) ? 0 : 1)
  43
  44 /* Space saver macro.  */
  45 #define INSTR(HIGH, LOW) uimm (aarch64_get_instr (cpu), (HIGH), (LOW))
  46
  47 #define HALT_UNALLOC                                                    \
  48   do                                                                    \
  49     {                                                                   \
  50       TRACE_DISASM (cpu, aarch64_get_PC (cpu));                         \
  51       TRACE_INSN (cpu,                                                  \
  52                   "Unallocated instruction detected at sim line %d,"    \
  53                   " exe addr %" PRIx64,                                 \
  54                   __LINE__, aarch64_get_PC (cpu));                      \
  55       sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),\
  56                        sim_stopped, SIM_SIGILL);                        \
  57     }                                                                   \
  58   while (0)
  59
  60 #define HALT_NYI                                                        \
  61   do                                                                    \
  62     {                                                                   \
  63       TRACE_DISASM (cpu, aarch64_get_PC (cpu));                         \
  64       TRACE_INSN (cpu,                                                  \
  65                   "Unimplemented instruction detected at sim line %d,"  \
  66                   " exe addr %" PRIx64,                                 \
  67                   __LINE__, aarch64_get_PC (cpu));                      \
  68       if (! TRACE_ANY_P (cpu))                                          \
  69         sim_io_eprintf (CPU_STATE (cpu), "SIM Error: Unimplemented instruction: %#08x\n", \
  70                         aarch64_get_instr (cpu));                       \
  71       sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),\
  72                        sim_stopped, SIM_SIGABRT);                       \
  73     }                                                                   \
  74   while (0)
  75
  76 #define NYI_assert(HI, LO, EXPECTED)                                    \
  77   do                                                                    \
  78     {                                                                   \
  79       if (INSTR ((HI), (LO)) != (EXPECTED))                             \
  80         HALT_NYI;                                                       \
  81     }                                                                   \
  82   while (0)
  83
  84 /* Helper functions used by expandLogicalImmediate.  */
  85
  86 /* for i = 1, ... N result<i-1> = 1 other bits are zero  */
  87 static inline uint64_t
  88 ones (int N)
  89 {
  90   return (N == 64 ? (uint64_t)-1UL : ((1UL << N) - 1));
  91 }
  92
  93 /* result<0> to val<N>  */
  94 static inline uint64_t
  95 pickbit (uint64_t val, int N)
  96 {
  97   return pickbits64 (val, N, N);
  98 }
  99
 100 static uint64_t
 101 expand_logical_immediate (uint32_t S, uint32_t R, uint32_t N)
 102 {
 103   uint64_t mask;
 104   uint64_t imm;
 105   unsigned simd_size;
 106
 107   /* The immediate value is S+1 bits to 1, left rotated by SIMDsize - R
 108      (in other words, right rotated by R), then replicated. */
 109   if (N != 0)
 110     {
 111       simd_size = 64;
 112       mask = 0xffffffffffffffffull;
 113     }
 114   else
 115     {
 116       switch (S)
 117         {
 118         case 0x00 ... 0x1f: /* 0xxxxx */ simd_size = 32;           break;
 119         case 0x20 ... 0x2f: /* 10xxxx */ simd_size = 16; S &= 0xf; break;
 120         case 0x30 ... 0x37: /* 110xxx */ simd_size =  8; S &= 0x7; break;
 121         case 0x38 ... 0x3b: /* 1110xx */ simd_size =  4; S &= 0x3; break;
 122         case 0x3c ... 0x3d: /* 11110x */ simd_size =  2; S &= 0x1; break;
 123         default: return 0;
 124         }
 125       mask = (1ull << simd_size) - 1;
 126       /* Top bits are IGNORED.  */
 127       R &= simd_size - 1;
 128     }
 129
 130   /* NOTE: if S = simd_size - 1 we get 0xf..f which is rejected.  */
 131   if (S == simd_size - 1)
 132     return 0;
 133
 134   /* S+1 consecutive bits to 1.  */
 135   /* NOTE: S can't be 63 due to detection above.  */
 136   imm = (1ull << (S + 1)) - 1;
 137
 138   /* Rotate to the left by simd_size - R.  */
 139   if (R != 0)
 140     imm = ((imm << (simd_size - R)) & mask) | (imm >> R);
 141
 142   /* Replicate the value according to SIMD size.  */
 143   switch (simd_size)
 144     {
 145     case  2: imm = (imm <<  2) | imm;
 146     case  4: imm = (imm <<  4) | imm;
 147     case  8: imm = (imm <<  8) | imm;
 148     case 16: imm = (imm << 16) | imm;
 149     case 32: imm = (imm << 32) | imm;
 150     case 64: break;
 151     default: return 0;
 152     }
 153
 154   return imm;
 155 }
 156
 157 /* Instr[22,10] encodes N immr and imms. we want a lookup table
 158    for each possible combination i.e. 13 bits worth of int entries.  */
 159 #define  LI_TABLE_SIZE  (1 << 13)
 160 static uint64_t LITable[LI_TABLE_SIZE];
 161
 162 void
 163 aarch64_init_LIT_table (void)
 164 {
 165   unsigned index;
 166
 167   for (index = 0; index < LI_TABLE_SIZE; index++)
 168     {
 169       uint32_t N    = uimm (index, 12, 12);
 170       uint32_t immr = uimm (index, 11, 6);
 171       uint32_t imms = uimm (index, 5, 0);
 172
 173       LITable [index] = expand_logical_immediate (imms, immr, N);
 174     }
 175 }
 176
 177 static void
 178 dexNotify (sim_cpu *cpu)
 179 {
 180   /* instr[14,0] == type : 0 ==> method entry, 1 ==> method reentry
 181                            2 ==> exit Java, 3 ==> start next bytecode.  */
 182   uint32_t type = INSTR (14, 0);
 183
 184   TRACE_EVENTS (cpu, "Notify Insn encountered, type = 0x%x", type);
 185
 186   switch (type)
 187     {
 188     case 0:
 189       /* aarch64_notifyMethodEntry (aarch64_get_reg_u64 (cpu, R23, 0),
 190          aarch64_get_reg_u64 (cpu, R22, 0));  */
 191       break;
 192     case 1:
 193       /* aarch64_notifyMethodReentry (aarch64_get_reg_u64 (cpu, R23, 0),
 194          aarch64_get_reg_u64 (cpu, R22, 0));  */
 195       break;
 196     case 2:
 197       /* aarch64_notifyMethodExit ();  */
 198       break;
 199     case 3:
 200       /* aarch64_notifyBCStart (aarch64_get_reg_u64 (cpu, R23, 0),
 201          aarch64_get_reg_u64 (cpu, R22, 0));  */
 202       break;
 203     }
 204 }
 205
 206 /* secondary decode within top level groups  */
 207
 208 static void
 209 dexPseudo (sim_cpu *cpu)
 210 {
 211   /* assert instr[28,27] = 00
 212
 213      We provide 2 pseudo instructions:
 214
 215      HALT stops execution of the simulator causing an immediate
 216      return to the x86 code which entered it.
 217
 218      CALLOUT initiates recursive entry into x86 code.  A register
 219      argument holds the address of the x86 routine.  Immediate
 220      values in the instruction identify the number of general
 221      purpose and floating point register arguments to be passed
 222      and the type of any value to be returned.  */
 223
 224   uint32_t PSEUDO_HALT      =  0xE0000000U;
 225   uint32_t PSEUDO_CALLOUT   =  0x00018000U;
 226   uint32_t PSEUDO_CALLOUTR  =  0x00018001U;
 227   uint32_t PSEUDO_NOTIFY    =  0x00014000U;
 228   uint32_t dispatch;
 229
 230   if (aarch64_get_instr (cpu) == PSEUDO_HALT)
 231     {
 232       TRACE_EVENTS (cpu, " Pseudo Halt Instruction");
 233       sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
 234                        sim_stopped, SIM_SIGTRAP);
 235     }
 236
 237   dispatch = INSTR (31, 15);
 238
 239   /* We do not handle callouts at the moment.  */
 240   if (dispatch == PSEUDO_CALLOUT || dispatch == PSEUDO_CALLOUTR)
 241     {
 242       TRACE_EVENTS (cpu, " Callout");
 243       sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
 244                        sim_stopped, SIM_SIGABRT);
 245     }
 246
 247   else if (dispatch == PSEUDO_NOTIFY)
 248     dexNotify (cpu);
 249
 250   else
 251     HALT_UNALLOC;
 252 }
 253
 254 /* Load-store single register (unscaled offset)
 255    These instructions employ a base register plus an unscaled signed
 256    9 bit offset.
 257
 258    N.B. the base register (source) can be Xn or SP. all other
 259    registers may not be SP.  */
 260
 261 /* 32 bit load 32 bit unscaled signed 9 bit.  */
 262 static void
 263 ldur32 (sim_cpu *cpu, int32_t offset)
 264 {
 265   unsigned rn = INSTR (9, 5);
 266   unsigned rt = INSTR (4, 0);
 267
 268   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
 269   aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u32
 270                        (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
 271                         + offset));
 272 }
 273
 274 /* 64 bit load 64 bit unscaled signed 9 bit.  */
 275 static void
 276 ldur64 (sim_cpu *cpu, int32_t offset)
 277 {
 278   unsigned rn = INSTR (9, 5);
 279   unsigned rt = INSTR (4, 0);
 280
 281   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
 282   aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u64
 283                        (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
 284                         + offset));
 285 }
 286
 287 /* 32 bit load zero-extended byte unscaled signed 9 bit.  */
 288 static void
 289 ldurb32 (sim_cpu *cpu, int32_t offset)
 290 {
 291   unsigned rn = INSTR (9, 5);
 292   unsigned rt = INSTR (4, 0);
 293
 294   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
 295   aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u8
 296                        (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
 297                         + offset));
 298 }
 299
 300 /* 32 bit load sign-extended byte unscaled signed 9 bit.  */
 301 static void
 302 ldursb32 (sim_cpu *cpu, int32_t offset)
 303 {
 304   unsigned rn = INSTR (9, 5);
 305   unsigned rt = INSTR (4, 0);
 306
 307   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
 308   aarch64_set_reg_u64 (cpu, rt, NO_SP, (uint32_t) aarch64_get_mem_s8
 309                        (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
 310                         + offset));
 311 }
 312
 313 /* 64 bit load sign-extended byte unscaled signed 9 bit.  */
 314 static void
 315 ldursb64 (sim_cpu *cpu, int32_t offset)
 316 {
 317   unsigned rn = INSTR (9, 5);
 318   unsigned rt = INSTR (4, 0);
 319
 320   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
 321   aarch64_set_reg_s64 (cpu, rt, NO_SP, aarch64_get_mem_s8
 322                        (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
 323                         + offset));
 324 }
 325
 326 /* 32 bit load zero-extended short unscaled signed 9 bit  */
 327 static void
 328 ldurh32 (sim_cpu *cpu, int32_t offset)
 329 {
 330   unsigned rn = INSTR (9, 5);
 331   unsigned rd = INSTR (4, 0);
 332
 333   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
 334   aarch64_set_reg_u64 (cpu, rd, NO_SP, aarch64_get_mem_u16
 335                        (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
 336                         + offset));
 337 }
 338
 339 /* 32 bit load sign-extended short unscaled signed 9 bit  */
 340 static void
 341 ldursh32 (sim_cpu *cpu, int32_t offset)
 342 {
 343   unsigned rn = INSTR (9, 5);
 344   unsigned rd = INSTR (4, 0);
 345
 346   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
 347   aarch64_set_reg_u64 (cpu, rd, NO_SP, (uint32_t) aarch64_get_mem_s16
 348                        (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
 349                         + offset));
 350 }
 351
 352 /* 64 bit load sign-extended short unscaled signed 9 bit  */
 353 static void
 354 ldursh64 (sim_cpu *cpu, int32_t offset)
 355 {
 356   unsigned rn = INSTR (9, 5);
 357   unsigned rt = INSTR (4, 0);
 358
 359   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
 360   aarch64_set_reg_s64 (cpu, rt, NO_SP, aarch64_get_mem_s16
 361                        (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
 362                         + offset));
 363 }
 364
 365 /* 64 bit load sign-extended word unscaled signed 9 bit  */
 366 static void
 367 ldursw (sim_cpu *cpu, int32_t offset)
 368 {
 369   unsigned rn = INSTR (9, 5);
 370   unsigned rd = INSTR (4, 0);
 371
 372   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
 373   aarch64_set_reg_u64 (cpu, rd, NO_SP, (uint32_t) aarch64_get_mem_s32
 374                        (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
 375                         + offset));
 376 }
 377
 378 /* N.B. with stores the value in source is written to the address
 379    identified by source2 modified by offset.  */
 380
 381 /* 32 bit store 32 bit unscaled signed 9 bit.  */
 382 static void
 383 stur32 (sim_cpu *cpu, int32_t offset)
 384 {
 385   unsigned rn = INSTR (9, 5);
 386   unsigned rd = INSTR (4, 0);
 387
 388   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
 389   aarch64_set_mem_u32 (cpu,
 390                        aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset,
 391                        aarch64_get_reg_u32 (cpu, rd, NO_SP));
 392 }
 393
 394 /* 64 bit store 64 bit unscaled signed 9 bit  */
 395 static void
 396 stur64 (sim_cpu *cpu, int32_t offset)
 397 {
 398   unsigned rn = INSTR (9, 5);
 399   unsigned rd = INSTR (4, 0);
 400
 401   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
 402   aarch64_set_mem_u64 (cpu,
 403                        aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset,
 404                        aarch64_get_reg_u64 (cpu, rd, NO_SP));
 405 }
 406
 407 /* 32 bit store byte unscaled signed 9 bit  */
 408 static void
 409 sturb (sim_cpu *cpu, int32_t offset)
 410 {
 411   unsigned rn = INSTR (9, 5);
 412   unsigned rd = INSTR (4, 0);
 413
 414   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
 415   aarch64_set_mem_u8 (cpu,
 416                       aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset,
 417                       aarch64_get_reg_u8 (cpu, rd, NO_SP));
 418 }
 419
 420 /* 32 bit store short unscaled signed 9 bit  */
 421 static void
 422 sturh (sim_cpu *cpu, int32_t offset)
 423 {
 424   unsigned rn = INSTR (9, 5);
 425   unsigned rd = INSTR (4, 0);
 426
 427   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
 428   aarch64_set_mem_u16 (cpu,
 429                        aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset,
 430                        aarch64_get_reg_u16 (cpu, rd, NO_SP));
 431 }
 432
 433 /* Load single register pc-relative label
 434    Offset is a signed 19 bit immediate count in words
 435    rt may not be SP.  */
 436
 437 /* 32 bit pc-relative load  */
 438 static void
 439 ldr32_pcrel (sim_cpu *cpu, int32_t offset)
 440 {
 441   unsigned rd = INSTR (4, 0);
 442
 443   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
 444   aarch64_set_reg_u64 (cpu, rd, NO_SP,
 445                        aarch64_get_mem_u32
 446                        (cpu, aarch64_get_PC (cpu) + offset * 4));
 447 }
 448
 449 /* 64 bit pc-relative load  */
 450 static void
 451 ldr_pcrel (sim_cpu *cpu, int32_t offset)
 452 {
 453   unsigned rd = INSTR (4, 0);
 454
 455   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
 456   aarch64_set_reg_u64 (cpu, rd, NO_SP,
 457                        aarch64_get_mem_u64
 458                        (cpu, aarch64_get_PC (cpu) + offset * 4));
 459 }
 460
 461 /* sign extended 32 bit pc-relative load  */
 462 static void
 463 ldrsw_pcrel (sim_cpu *cpu, int32_t offset)
 464 {
 465   unsigned rd = INSTR (4, 0);
 466
 467   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
 468   aarch64_set_reg_u64 (cpu, rd, NO_SP,
 469                        aarch64_get_mem_s32
 470                        (cpu, aarch64_get_PC (cpu) + offset * 4));
 471 }
 472
 473 /* float pc-relative load  */
 474 static void
 475 fldrs_pcrel (sim_cpu *cpu, int32_t offset)
 476 {
 477   unsigned int rd = INSTR (4, 0);
 478
 479   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
 480   aarch64_set_vec_u32 (cpu, rd, 0,
 481                        aarch64_get_mem_u32
 482                        (cpu, aarch64_get_PC (cpu) + offset * 4));
 483 }
 484
 485 /* double pc-relative load  */
 486 static void
 487 fldrd_pcrel (sim_cpu *cpu, int32_t offset)
 488 {
 489   unsigned int st = INSTR (4, 0);
 490
 491   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
 492   aarch64_set_vec_u64 (cpu, st, 0,
 493                        aarch64_get_mem_u64
 494                        (cpu, aarch64_get_PC (cpu) + offset * 4));
 495 }
 496
 497 /* long double pc-relative load.  */
 498 static void
 499 fldrq_pcrel (sim_cpu *cpu, int32_t offset)
 500 {
 501   unsigned int st = INSTR (4, 0);
 502   uint64_t addr = aarch64_get_PC (cpu) + offset * 4;
 503   FRegister a;
 504
 505   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
 506   aarch64_get_mem_long_double (cpu, addr, & a);
 507   aarch64_set_FP_long_double (cpu, st, a);
 508 }
 509
 510 /* This can be used to scale an offset by applying
 511    the requisite shift. the second argument is either
 512    16, 32 or 64.  */
 513
 514 #define SCALE(_offset, _elementSize) \
 515     ((_offset) << ScaleShift ## _elementSize)
 516
 517 /* This can be used to optionally scale a register derived offset
 518    by applying the requisite shift as indicated by the Scaling
 519    argument.  The second argument is either Byte, Short, Word
 520    or Long. The third argument is either Scaled or Unscaled.
 521    N.B. when _Scaling is Scaled the shift gets ANDed with
 522    all 1s while when it is Unscaled it gets ANDed with 0.  */
 523
 524 #define OPT_SCALE(_offset, _elementType, _Scaling) \
 525   ((_offset) << (_Scaling ? ScaleShift ## _elementType : 0))
 526
 527 /* This can be used to zero or sign extend a 32 bit register derived
 528    value to a 64 bit value.  the first argument must be the value as
 529    a uint32_t and the second must be either UXTW or SXTW. The result
 530    is returned as an int64_t.  */
 531
 532 static inline int64_t
 533 extend (uint32_t value, Extension extension)
 534 {
 535   union
 536   {
 537     uint32_t u;
 538     int32_t   n;
 539   } x;
 540
 541   /* A branchless variant of this ought to be possible.  */
 542   if (extension == UXTW || extension == NoExtension)
 543     return value;
 544
 545   x.u = value;
 546   return x.n;
 547 }
 548
 549 /* Scalar Floating Point
 550
 551    FP load/store single register (4 addressing modes)
 552
 553    N.B. the base register (source) can be the stack pointer.
 554    The secondary source register (source2) can only be an Xn register.  */
 555
 556 /* Load 32 bit unscaled signed 9 bit with pre- or post-writeback.  */
 557 static void
 558 fldrs_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
 559 {
 560   unsigned rn = INSTR (9, 5);
 561   unsigned st = INSTR (4, 0);
 562   uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
 563
 564   if (wb != Post)
 565     address += offset;
 566
 567   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
 568   aarch64_set_vec_u32 (cpu, st, 0, aarch64_get_mem_u32 (cpu, address));
 569   if (wb == Post)
 570     address += offset;
 571
 572   if (wb != NoWriteBack)
 573     aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
 574 }
 575
 576 /* Load 8 bit with unsigned 12 bit offset.  */
 577 static void
 578 fldrb_abs (sim_cpu *cpu, uint32_t offset)
 579 {
 580   unsigned rd = INSTR (4, 0);
 581   unsigned rn = INSTR (9, 5);
 582   uint64_t addr = aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset;
 583
 584   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
 585   aarch64_set_vec_u8 (cpu, rd, 0, aarch64_get_mem_u32 (cpu, addr));
 586 }
 587
 588 /* Load 16 bit scaled unsigned 12 bit.  */
 589 static void
 590 fldrh_abs (sim_cpu *cpu, uint32_t offset)
 591 {
 592   unsigned rd = INSTR (4, 0);
 593   unsigned rn = INSTR (9, 5);
 594   uint64_t addr = aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 16);
 595
 596   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
 597   aarch64_set_vec_u16 (cpu, rd, 0, aarch64_get_mem_u16 (cpu, addr));
 598 }
 599
 600 /* Load 32 bit scaled unsigned 12 bit.  */
 601 static void
 602 fldrs_abs (sim_cpu *cpu, uint32_t offset)
 603 {
 604   unsigned rd = INSTR (4, 0);
 605   unsigned rn = INSTR (9, 5);
 606   uint64_t addr = aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 32);
 607
 608   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
 609   aarch64_set_vec_u32 (cpu, rd, 0, aarch64_get_mem_u32 (cpu, addr));
 610 }
 611
 612 /* Load 64 bit scaled unsigned 12 bit.  */
 613 static void
 614 fldrd_abs (sim_cpu *cpu, uint32_t offset)
 615 {
 616   unsigned rd = INSTR (4, 0);
 617   unsigned rn = INSTR (9, 5);
 618   uint64_t addr = aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 64);
 619
 620   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
 621   aarch64_set_vec_u64 (cpu, rd, 0, aarch64_get_mem_u64 (cpu, addr));
 622 }
 623
 624 /* Load 128 bit scaled unsigned 12 bit.  */
 625 static void
 626 fldrq_abs (sim_cpu *cpu, uint32_t offset)
 627 {
 628   unsigned rd = INSTR (4, 0);
 629   unsigned rn = INSTR (9, 5);
 630   uint64_t addr = aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 128);
 631
 632   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
 633   aarch64_set_vec_u64 (cpu, rd, 0, aarch64_get_mem_u64 (cpu, addr));
 634   aarch64_set_vec_u64 (cpu, rd, 1, aarch64_get_mem_u64 (cpu, addr + 8));
 635 }
 636
 637 /* Load 32 bit scaled or unscaled zero- or sign-extended
 638    32-bit register offset.  */
 639 static void
 640 fldrs_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
 641 {
 642   unsigned rm = INSTR (20, 16);
 643   unsigned rn = INSTR (9, 5);
 644   unsigned st = INSTR (4, 0);
 645   uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
 646   int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
 647   uint64_t displacement = OPT_SCALE (extended, 32, scaling);
 648
 649   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
 650   aarch64_set_vec_u32 (cpu, st, 0, aarch64_get_mem_u32
 651                        (cpu, address + displacement));
 652 }
 653
 654 /* Load 64 bit unscaled signed 9 bit with pre- or post-writeback.  */
 655 static void
 656 fldrd_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
 657 {
 658   unsigned rn = INSTR (9, 5);
 659   unsigned st = INSTR (4, 0);
 660   uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
 661
 662   if (wb != Post)
 663     address += offset;
 664
 665   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
 666   aarch64_set_vec_u64 (cpu, st, 0, aarch64_get_mem_u64 (cpu, address));
 667
 668   if (wb == Post)
 669     address += offset;
 670
 671   if (wb != NoWriteBack)
 672     aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
 673 }
 674
 675 /* Load 64 bit scaled or unscaled zero- or sign-extended 32-bit register offset.  */
 676 static void
 677 fldrd_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
 678 {
 679   unsigned rm = INSTR (20, 16);
 680   int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
 681   uint64_t displacement = OPT_SCALE (extended, 64, scaling);
 682
 683   fldrd_wb (cpu, displacement, NoWriteBack);
 684 }
 685
 686 /* Load 128 bit unscaled signed 9 bit with pre- or post-writeback.  */
 687 static void
 688 fldrq_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
 689 {
 690   FRegister a;
 691   unsigned rn = INSTR (9, 5);
 692   unsigned st = INSTR (4, 0);
 693   uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
 694
 695   if (wb != Post)
 696     address += offset;
 697
 698   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
 699   aarch64_get_mem_long_double (cpu, address, & a);
 700   aarch64_set_FP_long_double (cpu, st, a);
 701
 702   if (wb == Post)
 703     address += offset;
 704
 705   if (wb != NoWriteBack)
 706     aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
 707 }
 708
 709 /* Load 128 bit scaled or unscaled zero- or sign-extended 32-bit register offset  */
 710 static void
 711 fldrq_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
 712 {
 713   unsigned rm = INSTR (20, 16);
 714   int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
 715   uint64_t displacement = OPT_SCALE (extended, 128, scaling);
 716
 717   fldrq_wb (cpu, displacement, NoWriteBack);
 718 }
 719
 720 /* Memory Access
 721
 722    load-store single register
 723    There are four addressing modes available here which all employ a
 724    64 bit source (base) register.
 725
 726    N.B. the base register (source) can be the stack pointer.
 727    The secondary source register (source2)can only be an Xn register.
 728
 729    Scaled, 12-bit, unsigned immediate offset, without pre- and
 730    post-index options.
 731    Unscaled, 9-bit, signed immediate offset with pre- or post-index
 732    writeback.
 733    scaled or unscaled 64-bit register offset.
 734    scaled or unscaled 32-bit extended register offset.
 735
 736    All offsets are assumed to be raw from the decode i.e. the
 737    simulator is expected to adjust scaled offsets based on the
 738    accessed data size with register or extended register offset
 739    versions the same applies except that in the latter case the
 740    operation may also require a sign extend.
 741
 742    A separate method is provided for each possible addressing mode.  */
 743
 744 /* 32 bit load 32 bit scaled unsigned 12 bit  */
 745 static void
 746 ldr32_abs (sim_cpu *cpu, uint32_t offset)
 747 {
 748   unsigned rn = INSTR (9, 5);
 749   unsigned rt = INSTR (4, 0);
 750
 751   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
 752   /* The target register may not be SP but the source may be.  */
 753   aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u32
 754                        (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
 755                         + SCALE (offset, 32)));
 756 }
 757
 758 /* 32 bit load 32 bit unscaled signed 9 bit with pre- or post-writeback.  */
 759 static void
 760 ldr32_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
 761 {
 762   unsigned rn = INSTR (9, 5);
 763   unsigned rt = INSTR (4, 0);
 764   uint64_t address;
 765
 766   if (rn == rt && wb != NoWriteBack)
 767     HALT_UNALLOC;
 768
 769   address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
 770
 771   if (wb != Post)
 772     address += offset;
 773
 774   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
 775   aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u32 (cpu, address));
 776
 777   if (wb == Post)
 778     address += offset;
 779
 780   if (wb != NoWriteBack)
 781     aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
 782 }
 783
 784 /* 32 bit load 32 bit scaled or unscaled
 785    zero- or sign-extended 32-bit register offset  */
 786 static void
 787 ldr32_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
 788 {
 789   unsigned rm = INSTR (20, 16);
 790   unsigned rn = INSTR (9, 5);
 791   unsigned rt = INSTR (4, 0);
 792   /* rn may reference SP, rm and rt must reference ZR  */
 793
 794   uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
 795   int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
 796   uint64_t displacement =  OPT_SCALE (extended, 32, scaling);
 797
 798   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
 799   aarch64_set_reg_u64 (cpu, rt, NO_SP,
 800                        aarch64_get_mem_u32 (cpu, address + displacement));
 801 }
 802
 803 /* 64 bit load 64 bit scaled unsigned 12 bit  */
 804 static void
 805 ldr_abs (sim_cpu *cpu, uint32_t offset)
 806 {
 807   unsigned rn = INSTR (9, 5);
 808   unsigned rt = INSTR (4, 0);
 809
 810   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
 811   /* The target register may not be SP but the source may be.  */
 812   aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u64
 813                        (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
 814                         + SCALE (offset, 64)));
 815 }
 816
 817 /* 64 bit load 64 bit unscaled signed 9 bit with pre- or post-writeback.  */
 818 static void
 819 ldr_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
 820 {
 821   unsigned rn = INSTR (9, 5);
 822   unsigned rt = INSTR (4, 0);
 823   uint64_t address;
 824
 825   if (rn == rt && wb != NoWriteBack)
 826     HALT_UNALLOC;
 827
 828   address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
 829
 830   if (wb != Post)
 831     address += offset;
 832
 833   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
 834   aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u64 (cpu, address));
 835
 836   if (wb == Post)
 837     address += offset;
 838
 839   if (wb != NoWriteBack)
 840     aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
 841 }
 842
 843 /* 64 bit load 64 bit scaled or unscaled zero-
 844    or sign-extended 32-bit register offset.  */
 845 static void
 846 ldr_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
 847 {
 848   unsigned rm = INSTR (20, 16);
 849   unsigned rn = INSTR (9, 5);
 850   unsigned rt = INSTR (4, 0);
 851   /* rn may reference SP, rm and rt must reference ZR  */
 852
 853   uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
 854   int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
 855   uint64_t displacement =  OPT_SCALE (extended, 64, scaling);
 856
 857   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
 858   aarch64_set_reg_u64 (cpu, rt, NO_SP,
 859                        aarch64_get_mem_u64 (cpu, address + displacement));
 860 }
 861
 862 /* 32 bit load zero-extended byte scaled unsigned 12 bit.  */
 863 static void
 864 ldrb32_abs (sim_cpu *cpu, uint32_t offset)
 865 {
 866   unsigned rn = INSTR (9, 5);
 867   unsigned rt = INSTR (4, 0);
 868
 869   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
 870   /* The target register may not be SP but the source may be
 871      there is no scaling required for a byte load.  */
 872   aarch64_set_reg_u64 (cpu, rt, NO_SP,
 873                        aarch64_get_mem_u8
 874                        (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset));
 875 }
 876
 877 /* 32 bit load zero-extended byte unscaled signed 9 bit with pre- or post-writeback.  */
 878 static void
 879 ldrb32_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
 880 {
 881   unsigned rn = INSTR (9, 5);
 882   unsigned rt = INSTR (4, 0);
 883   uint64_t address;
 884
 885   if (rn == rt && wb != NoWriteBack)
 886     HALT_UNALLOC;
 887
 888   address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
 889
 890   if (wb != Post)
 891     address += offset;
 892
 893   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
 894   aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u8 (cpu, address));
 895
 896   if (wb == Post)
 897     address += offset;
 898
 899   if (wb != NoWriteBack)
 900     aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
 901 }
 902
 903 /* 32 bit load zero-extended byte scaled or unscaled zero-
 904    or sign-extended 32-bit register offset.  */
 905 static void
 906 ldrb32_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
 907 {
 908   unsigned rm = INSTR (20, 16);
 909   unsigned rn = INSTR (9, 5);
 910   unsigned rt = INSTR (4, 0);
 911   /* rn may reference SP, rm and rt must reference ZR  */
 912
 913   uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
 914   int64_t displacement = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
 915                                  extension);
 916
 917   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
 918   /* There is no scaling required for a byte load.  */
 919   aarch64_set_reg_u64 (cpu, rt, NO_SP,
 920                        aarch64_get_mem_u8 (cpu, address + displacement));
 921 }
 922
 923 /* 64 bit load sign-extended byte unscaled signed 9 bit
 924    with pre- or post-writeback.  */
 925 static void
 926 ldrsb_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
 927 {
 928   unsigned rn = INSTR (9, 5);
 929   unsigned rt = INSTR (4, 0);
 930   uint64_t address;
 931   int64_t val;
 932
 933   if (rn == rt && wb != NoWriteBack)
 934     HALT_UNALLOC;
 935
 936   address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
 937
 938   if (wb != Post)
 939     address += offset;
 940
 941   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
 942   val = aarch64_get_mem_s8 (cpu, address);
 943   aarch64_set_reg_s64 (cpu, rt, NO_SP, val);
 944
 945   if (wb == Post)
 946     address += offset;
 947
 948   if (wb != NoWriteBack)
 949     aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
 950 }
 951
 952 /* 64 bit load sign-extended byte scaled unsigned 12 bit.  */
 953 static void
 954 ldrsb_abs (sim_cpu *cpu, uint32_t offset)
 955 {
 956   ldrsb_wb (cpu, offset, NoWriteBack);
 957 }
 958
 959 /* 64 bit load sign-extended byte scaled or unscaled zero-
 960    or sign-extended 32-bit register offset.  */
 961 static void
 962 ldrsb_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
 963 {
 964   unsigned rm = INSTR (20, 16);
 965   unsigned rn = INSTR (9, 5);
 966   unsigned rt = INSTR (4, 0);
 967   /* rn may reference SP, rm and rt must reference ZR  */
 968
 969   uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
 970   int64_t displacement = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
 971                                  extension);
 972   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
 973   /* There is no scaling required for a byte load.  */
 974   aarch64_set_reg_s64 (cpu, rt, NO_SP,
 975                        aarch64_get_mem_s8 (cpu, address + displacement));
 976 }
 977
 978 /* 32 bit load zero-extended short scaled unsigned 12 bit.  */
 979 static void
 980 ldrh32_abs (sim_cpu *cpu, uint32_t offset)
 981 {
 982   unsigned rn = INSTR (9, 5);
 983   unsigned rt = INSTR (4, 0);
 984   uint32_t val;
 985
 986   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
 987   /* The target register may not be SP but the source may be.  */
 988   val = aarch64_get_mem_u16 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
 989                              + SCALE (offset, 16));
 990   aarch64_set_reg_u32 (cpu, rt, NO_SP, val);
 991 }
 992
 993 /* 32 bit load zero-extended short unscaled signed 9 bit
 994    with pre- or post-writeback.  */
 995 static void
 996 ldrh32_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
 997 {
 998   unsigned rn = INSTR (9, 5);
 999   unsigned rt = INSTR (4, 0);
1000   uint64_t address;
1001
1002   if (rn == rt && wb != NoWriteBack)
1003     HALT_UNALLOC;
1004
1005   address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1006
1007   if (wb != Post)
1008     address += offset;
1009
1010   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1011   aarch64_set_reg_u32 (cpu, rt, NO_SP, aarch64_get_mem_u16 (cpu, address));
1012
1013   if (wb == Post)
1014     address += offset;
1015
1016   if (wb != NoWriteBack)
1017     aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
1018 }
1019
1020 /* 32 bit load zero-extended short scaled or unscaled zero-
1021    or sign-extended 32-bit register offset.  */
1022 static void
1023 ldrh32_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1024 {
1025   unsigned rm = INSTR (20, 16);
1026   unsigned rn = INSTR (9, 5);
1027   unsigned rt = INSTR (4, 0);
1028   /* rn may reference SP, rm and rt must reference ZR  */
1029
1030   uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1031   int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
1032   uint64_t displacement =  OPT_SCALE (extended, 16, scaling);
1033
1034   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1035   aarch64_set_reg_u32 (cpu, rt, NO_SP,
1036                        aarch64_get_mem_u16 (cpu, address + displacement));
1037 }
1038
1039 /* 32 bit load sign-extended short scaled unsigned 12 bit.  */
1040 static void
1041 ldrsh32_abs (sim_cpu *cpu, uint32_t offset)
1042 {
1043   unsigned rn = INSTR (9, 5);
1044   unsigned rt = INSTR (4, 0);
1045   int32_t val;
1046
1047   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1048   /* The target register may not be SP but the source may be.  */
1049   val = aarch64_get_mem_s16 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
1050                              + SCALE (offset, 16));
1051   aarch64_set_reg_s32 (cpu, rt, NO_SP, val);
1052 }
1053
1054 /* 32 bit load sign-extended short unscaled signed 9 bit
1055    with pre- or post-writeback.  */
1056 static void
1057 ldrsh32_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
1058 {
1059   unsigned rn = INSTR (9, 5);
1060   unsigned rt = INSTR (4, 0);
1061   uint64_t address;
1062
1063   if (rn == rt && wb != NoWriteBack)
1064     HALT_UNALLOC;
1065
1066   address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1067
1068   if (wb != Post)
1069     address += offset;
1070
1071   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1072   aarch64_set_reg_s32 (cpu, rt, NO_SP,
1073                        (int32_t) aarch64_get_mem_s16 (cpu, address));
1074
1075   if (wb == Post)
1076     address += offset;
1077
1078   if (wb != NoWriteBack)
1079     aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
1080 }
1081
1082 /* 32 bit load sign-extended short scaled or unscaled zero-
1083    or sign-extended 32-bit register offset.  */
1084 static void
1085 ldrsh32_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1086 {
1087   unsigned rm = INSTR (20, 16);
1088   unsigned rn = INSTR (9, 5);
1089   unsigned rt = INSTR (4, 0);
1090   /* rn may reference SP, rm and rt must reference ZR  */
1091
1092   uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1093   int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
1094   uint64_t displacement =  OPT_SCALE (extended, 16, scaling);
1095
1096   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1097   aarch64_set_reg_s32 (cpu, rt, NO_SP,
1098                        (int32_t) aarch64_get_mem_s16
1099                        (cpu, address + displacement));
1100 }
1101
1102 /* 64 bit load sign-extended short scaled unsigned 12 bit.  */
1103 static void
1104 ldrsh_abs (sim_cpu *cpu, uint32_t offset)
1105 {
1106   unsigned rn = INSTR (9, 5);
1107   unsigned rt = INSTR (4, 0);
1108   int64_t val;
1109
1110   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1111   /* The target register may not be SP but the source may be.  */
1112   val = aarch64_get_mem_s16  (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
1113                               + SCALE (offset, 16));
1114   aarch64_set_reg_s64 (cpu, rt, NO_SP, val);
1115 }
1116
1117 /* 64 bit load sign-extended short unscaled signed 9 bit
1118    with pre- or post-writeback.  */
1119 static void
1120 ldrsh64_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
1121 {
1122   unsigned rn = INSTR (9, 5);
1123   unsigned rt = INSTR (4, 0);
1124   uint64_t address;
1125   int64_t val;
1126
1127   if (rn == rt && wb != NoWriteBack)
1128     HALT_UNALLOC;
1129
1130   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1131   address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1132
1133   if (wb != Post)
1134     address += offset;
1135
1136   val = aarch64_get_mem_s16 (cpu, address);
1137   aarch64_set_reg_s64 (cpu, rt, NO_SP, val);
1138
1139   if (wb == Post)
1140     address += offset;
1141
1142   if (wb != NoWriteBack)
1143     aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
1144 }
1145
1146 /* 64 bit load sign-extended short scaled or unscaled zero-
1147    or sign-extended 32-bit register offset.  */
1148 static void
1149 ldrsh_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1150 {
1151   unsigned rm = INSTR (20, 16);
1152   unsigned rn = INSTR (9, 5);
1153   unsigned rt = INSTR (4, 0);
1154
1155   /* rn may reference SP, rm and rt must reference ZR  */
1156
1157   uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1158   int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
1159   uint64_t displacement = OPT_SCALE (extended, 16, scaling);
1160   int64_t val;
1161
1162   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1163   val = aarch64_get_mem_s16 (cpu, address + displacement);
1164   aarch64_set_reg_s64 (cpu, rt, NO_SP, val);
1165 }
1166
1167 /* 64 bit load sign-extended 32 bit scaled unsigned 12 bit.  */
1168 static void
1169 ldrsw_abs (sim_cpu *cpu, uint32_t offset)
1170 {
1171   unsigned rn = INSTR (9, 5);
1172   unsigned rt = INSTR (4, 0);
1173   int64_t val;
1174
1175   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1176   val = aarch64_get_mem_s32 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
1177                              + SCALE (offset, 32));
1178   /* The target register may not be SP but the source may be.  */
1179   return aarch64_set_reg_s64 (cpu, rt, NO_SP, val);
1180 }
1181
1182 /* 64 bit load sign-extended 32 bit unscaled signed 9 bit
1183    with pre- or post-writeback.  */
1184 static void
1185 ldrsw_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
1186 {
1187   unsigned rn = INSTR (9, 5);
1188   unsigned rt = INSTR (4, 0);
1189   uint64_t address;
1190
1191   if (rn == rt && wb != NoWriteBack)
1192     HALT_UNALLOC;
1193
1194   address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1195
1196   if (wb != Post)
1197     address += offset;
1198
1199   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1200   aarch64_set_reg_s64 (cpu, rt, NO_SP, aarch64_get_mem_s32 (cpu, address));
1201
1202   if (wb == Post)
1203     address += offset;
1204
1205   if (wb != NoWriteBack)
1206     aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
1207 }
1208
1209 /* 64 bit load sign-extended 32 bit scaled or unscaled zero-
1210    or sign-extended 32-bit register offset.  */
1211 static void
1212 ldrsw_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1213 {
1214   unsigned rm = INSTR (20, 16);
1215   unsigned rn = INSTR (9, 5);
1216   unsigned rt = INSTR (4, 0);
1217   /* rn may reference SP, rm and rt must reference ZR  */
1218
1219   uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1220   int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
1221   uint64_t displacement =  OPT_SCALE (extended, 32, scaling);
1222
1223   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1224   aarch64_set_reg_s64 (cpu, rt, NO_SP,
1225                        aarch64_get_mem_s32 (cpu, address + displacement));
1226 }
1227
1228 /* N.B. with stores the value in source is written to the
1229    address identified by source2 modified by source3/offset.  */
1230
1231 /* 32 bit store scaled unsigned 12 bit.  */
1232 static void
1233 str32_abs (sim_cpu *cpu, uint32_t offset)
1234 {
1235   unsigned rn = INSTR (9, 5);
1236   unsigned rt = INSTR (4, 0);
1237
1238   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1239   /* The target register may not be SP but the source may be.  */
1240   aarch64_set_mem_u32 (cpu, (aarch64_get_reg_u64 (cpu, rn, SP_OK)
1241                              + SCALE (offset, 32)),
1242                        aarch64_get_reg_u32 (cpu, rt, NO_SP));
1243 }
1244
1245 /* 32 bit store unscaled signed 9 bit with pre- or post-writeback.  */
1246 static void
1247 str32_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
1248 {
1249   unsigned rn = INSTR (9, 5);
1250   unsigned rt = INSTR (4, 0);
1251   uint64_t address;
1252
1253   if (rn == rt && wb != NoWriteBack)
1254     HALT_UNALLOC;
1255
1256   address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1257   if (wb != Post)
1258     address += offset;
1259
1260   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1261   aarch64_set_mem_u32 (cpu, address, aarch64_get_reg_u32 (cpu, rt, NO_SP));
1262
1263   if (wb == Post)
1264     address += offset;
1265
1266   if (wb != NoWriteBack)
1267     aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
1268 }
1269
1270 /* 32 bit store scaled or unscaled zero- or
1271    sign-extended 32-bit register offset.  */
1272 static void
1273 str32_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1274 {
1275   unsigned rm = INSTR (20, 16);
1276   unsigned rn = INSTR (9, 5);
1277   unsigned rt = INSTR (4, 0);
1278
1279   uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1280   int64_t  extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
1281   uint64_t displacement = OPT_SCALE (extended, 32, scaling);
1282
1283   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1284   aarch64_set_mem_u32 (cpu, address + displacement,
1285                        aarch64_get_reg_u64 (cpu, rt, NO_SP));
1286 }
1287
1288 /* 64 bit store scaled unsigned 12 bit.  */
1289 static void
1290 str_abs (sim_cpu *cpu, uint32_t offset)
1291 {
1292   unsigned rn = INSTR (9, 5);
1293   unsigned rt = INSTR (4, 0);
1294
1295   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1296   aarch64_set_mem_u64 (cpu,
1297                        aarch64_get_reg_u64 (cpu, rn, SP_OK)
1298                        + SCALE (offset, 64),
1299                        aarch64_get_reg_u64 (cpu, rt, NO_SP));
1300 }
1301
1302 /* 64 bit store unscaled signed 9 bit with pre- or post-writeback.  */
1303 static void
1304 str_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
1305 {
1306   unsigned rn = INSTR (9, 5);
1307   unsigned rt = INSTR (4, 0);
1308   uint64_t address;
1309
1310   if (rn == rt && wb != NoWriteBack)
1311     HALT_UNALLOC;
1312
1313   address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1314
1315   if (wb != Post)
1316     address += offset;
1317
1318   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1319   aarch64_set_mem_u64 (cpu, address, aarch64_get_reg_u64 (cpu, rt, NO_SP));
1320
1321   if (wb == Post)
1322     address += offset;
1323
1324   if (wb != NoWriteBack)
1325     aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
1326 }
1327
1328 /* 64 bit store scaled or unscaled zero-
1329    or sign-extended 32-bit register offset.  */
1330 static void
1331 str_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1332 {
1333   unsigned rm = INSTR (20, 16);
1334   unsigned rn = INSTR (9, 5);
1335   unsigned rt = INSTR (4, 0);
1336   /* rn may reference SP, rm and rt must reference ZR  */
1337
1338   uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1339   int64_t   extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
1340                                extension);
1341   uint64_t displacement = OPT_SCALE (extended, 64, scaling);
1342
1343   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1344   aarch64_set_mem_u64 (cpu, address + displacement,
1345                        aarch64_get_reg_u64 (cpu, rt, NO_SP));
1346 }
1347
1348 /* 32 bit store byte scaled unsigned 12 bit.  */
1349 static void
1350 strb_abs (sim_cpu *cpu, uint32_t offset)
1351 {
1352   unsigned rn = INSTR (9, 5);
1353   unsigned rt = INSTR (4, 0);
1354
1355   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1356   /* The target register may not be SP but the source may be.
1357      There is no scaling required for a byte load.  */
1358   aarch64_set_mem_u8 (cpu,
1359                       aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset,
1360                       aarch64_get_reg_u8 (cpu, rt, NO_SP));
1361 }
1362
1363 /* 32 bit store byte unscaled signed 9 bit with pre- or post-writeback.  */
1364 static void
1365 strb_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
1366 {
1367   unsigned rn = INSTR (9, 5);
1368   unsigned rt = INSTR (4, 0);
1369   uint64_t address;
1370
1371   if (rn == rt && wb != NoWriteBack)
1372     HALT_UNALLOC;
1373
1374   address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1375
1376   if (wb != Post)
1377     address += offset;
1378
1379   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1380   aarch64_set_mem_u8 (cpu, address, aarch64_get_reg_u8 (cpu, rt, NO_SP));
1381
1382   if (wb == Post)
1383     address += offset;
1384
1385   if (wb != NoWriteBack)
1386     aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
1387 }
1388
1389 /* 32 bit store byte scaled or unscaled zero-
1390    or sign-extended 32-bit register offset.  */
1391 static void
1392 strb_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1393 {
1394   unsigned rm = INSTR (20, 16);
1395   unsigned rn = INSTR (9, 5);
1396   unsigned rt = INSTR (4, 0);
1397   /* rn may reference SP, rm and rt must reference ZR  */
1398
1399   uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1400   int64_t displacement = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
1401                                  extension);
1402
1403   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1404   /* There is no scaling required for a byte load.  */
1405   aarch64_set_mem_u8 (cpu, address + displacement,
1406                       aarch64_get_reg_u8 (cpu, rt, NO_SP));
1407 }
1408
1409 /* 32 bit store short scaled unsigned 12 bit.  */
1410 static void
1411 strh_abs (sim_cpu *cpu, uint32_t offset)
1412 {
1413   unsigned rn = INSTR (9, 5);
1414   unsigned rt = INSTR (4, 0);
1415
1416   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1417   /* The target register may not be SP but the source may be.  */
1418   aarch64_set_mem_u16 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
1419                        + SCALE (offset, 16),
1420                        aarch64_get_reg_u16 (cpu, rt, NO_SP));
1421 }
1422
1423 /* 32 bit store short unscaled signed 9 bit with pre- or post-writeback.  */
1424 static void
1425 strh_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
1426 {
1427   unsigned rn = INSTR (9, 5);
1428   unsigned rt = INSTR (4, 0);
1429   uint64_t address;
1430
1431   if (rn == rt && wb != NoWriteBack)
1432     HALT_UNALLOC;
1433
1434   address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1435
1436   if (wb != Post)
1437     address += offset;
1438
1439   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1440   aarch64_set_mem_u16 (cpu, address, aarch64_get_reg_u16 (cpu, rt, NO_SP));
1441
1442   if (wb == Post)
1443     address += offset;
1444
1445   if (wb != NoWriteBack)
1446     aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
1447 }
1448
1449 /* 32 bit store short scaled or unscaled zero-
1450    or sign-extended 32-bit register offset.  */
1451 static void
1452 strh_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1453 {
1454   unsigned rm = INSTR (20, 16);
1455   unsigned rn = INSTR (9, 5);
1456   unsigned rt = INSTR (4, 0);
1457   /* rn may reference SP, rm and rt must reference ZR  */
1458
1459   uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1460   int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
1461   uint64_t displacement =  OPT_SCALE (extended, 16, scaling);
1462
1463   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1464   aarch64_set_mem_u16 (cpu, address + displacement,
1465                        aarch64_get_reg_u16 (cpu, rt, NO_SP));
1466 }
1467
1468 /* Prefetch unsigned 12 bit.  */
1469 static void
1470 prfm_abs (sim_cpu *cpu, uint32_t offset)
1471 {
1472   /* instr[4,0] = prfop : 00000 ==> PLDL1KEEP, 00001 ==> PLDL1STRM,
1473                           00010 ==> PLDL2KEEP, 00001 ==> PLDL2STRM,
1474                           00100 ==> PLDL3KEEP, 00101 ==> PLDL3STRM,
1475                           10000 ==> PSTL1KEEP, 10001 ==> PSTL1STRM,
1476                           10010 ==> PSTL2KEEP, 10001 ==> PSTL2STRM,
1477                           10100 ==> PSTL3KEEP, 10101 ==> PSTL3STRM,
1478                           ow ==> UNALLOC
1479      PrfOp prfop = prfop (instr, 4, 0);
1480      uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK)
1481      + SCALE (offset, 64).  */
1482
1483   /* TODO : implement prefetch of address.  */
1484 }
1485
1486 /* Prefetch scaled or unscaled zero- or sign-extended 32-bit register offset.  */
1487 static void
1488 prfm_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1489 {
1490   /* instr[4,0] = prfop : 00000 ==> PLDL1KEEP, 00001 ==> PLDL1STRM,
1491                           00010 ==> PLDL2KEEP, 00001 ==> PLDL2STRM,
1492                           00100 ==> PLDL3KEEP, 00101 ==> PLDL3STRM,
1493                           10000 ==> PSTL1KEEP, 10001 ==> PSTL1STRM,
1494                           10010 ==> PSTL2KEEP, 10001 ==> PSTL2STRM,
1495                           10100 ==> PSTL3KEEP, 10101 ==> PSTL3STRM,
1496                           ow ==> UNALLOC
1497      rn may reference SP, rm may only reference ZR
1498      PrfOp prfop = prfop (instr, 4, 0);
1499      uint64_t base = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1500      int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
1501                                 extension);
1502      uint64_t displacement =  OPT_SCALE (extended, 64, scaling);
1503      uint64_t address = base + displacement.  */
1504
1505   /* TODO : implement prefetch of address  */
1506 }
1507
1508 /* 64 bit pc-relative prefetch.  */
1509 static void
1510 prfm_pcrel (sim_cpu *cpu, int32_t offset)
1511 {
1512   /* instr[4,0] = prfop : 00000 ==> PLDL1KEEP, 00001 ==> PLDL1STRM,
1513                           00010 ==> PLDL2KEEP, 00001 ==> PLDL2STRM,
1514                           00100 ==> PLDL3KEEP, 00101 ==> PLDL3STRM,
1515                           10000 ==> PSTL1KEEP, 10001 ==> PSTL1STRM,
1516                           10010 ==> PSTL2KEEP, 10001 ==> PSTL2STRM,
1517                           10100 ==> PSTL3KEEP, 10101 ==> PSTL3STRM,
1518                           ow ==> UNALLOC
1519      PrfOp prfop = prfop (instr, 4, 0);
1520      uint64_t address = aarch64_get_PC (cpu) + offset.  */
1521
1522   /* TODO : implement this  */
1523 }
1524
1525 /* Load-store exclusive.  */
1526
1527 static void
1528 ldxr (sim_cpu *cpu)
1529 {
1530   unsigned rn = INSTR (9, 5);
1531   unsigned rt = INSTR (4, 0);
1532   uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1533   int size = INSTR (31, 30);
1534   /* int ordered = INSTR (15, 15);  */
1535   /* int exclusive = ! INSTR (23, 23);  */
1536
1537   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1538   switch (size)
1539     {
1540     case 0:
1541       aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u8 (cpu, address));
1542       break;
1543     case 1:
1544       aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u16 (cpu, address));
1545       break;
1546     case 2:
1547       aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u32 (cpu, address));
1548       break;
1549     case 3:
1550       aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u64 (cpu, address));
1551       break;
1552     }
1553 }
1554
1555 static void
1556 stxr (sim_cpu *cpu)
1557 {
1558   unsigned rn = INSTR (9, 5);
1559   unsigned rt = INSTR (4, 0);
1560   unsigned rs = INSTR (20, 16);
1561   uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1562   int      size = INSTR (31, 30);
1563   uint64_t data = aarch64_get_reg_u64 (cpu, rt, NO_SP);
1564
1565   switch (size)
1566     {
1567     case 0: aarch64_set_mem_u8 (cpu, address, data); break;
1568     case 1: aarch64_set_mem_u16 (cpu, address, data); break;
1569     case 2: aarch64_set_mem_u32 (cpu, address, data); break;
1570     case 3: aarch64_set_mem_u64 (cpu, address, data); break;
1571     }
1572
1573   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1574   aarch64_set_reg_u64 (cpu, rs, NO_SP, 0); /* Always exclusive...  */
1575 }
1576
1577 static void
1578 dexLoadLiteral (sim_cpu *cpu)
1579 {
1580   /* instr[29,27] == 011
1581      instr[25,24] == 00
1582      instr[31,30:26] = opc: 000 ==> LDRW,  001 ==> FLDRS
1583                             010 ==> LDRX,  011 ==> FLDRD
1584                             100 ==> LDRSW, 101 ==> FLDRQ
1585                             110 ==> PRFM, 111 ==> UNALLOC
1586      instr[26] ==> V : 0 ==> GReg, 1 ==> FReg
1587      instr[23, 5] == simm19  */
1588
1589   /* unsigned rt = INSTR (4, 0);  */
1590   uint32_t dispatch = (INSTR (31, 30) << 1) | INSTR (26, 26);
1591   int32_t imm = simm32 (aarch64_get_instr (cpu), 23, 5);
1592
1593   switch (dispatch)
1594     {
1595     case 0: ldr32_pcrel (cpu, imm); break;
1596     case 1: fldrs_pcrel (cpu, imm); break;
1597     case 2: ldr_pcrel   (cpu, imm); break;
1598     case 3: fldrd_pcrel (cpu, imm); break;
1599     case 4: ldrsw_pcrel (cpu, imm); break;
1600     case 5: fldrq_pcrel (cpu, imm); break;
1601     case 6: prfm_pcrel  (cpu, imm); break;
1602     case 7:
1603     default:
1604       HALT_UNALLOC;
1605     }
1606 }
1607
1608 /* Immediate arithmetic
1609    The aimm argument is a 12 bit unsigned value or a 12 bit unsigned
1610    value left shifted by 12 bits (done at decode).
1611
1612    N.B. the register args (dest, source) can normally be Xn or SP.
1613    the exception occurs for flag setting instructions which may
1614    only use Xn for the output (dest).  */
1615
1616 /* 32 bit add immediate.  */
1617 static void
1618 add32 (sim_cpu *cpu, uint32_t aimm)
1619 {
1620   unsigned rn = INSTR (9, 5);
1621   unsigned rd = INSTR (4, 0);
1622
1623   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1624   aarch64_set_reg_u64 (cpu, rd, SP_OK,
1625                        aarch64_get_reg_u32 (cpu, rn, SP_OK) + aimm);
1626 }
1627
1628 /* 64 bit add immediate.  */
1629 static void
1630 add64 (sim_cpu *cpu, uint32_t aimm)
1631 {
1632   unsigned rn = INSTR (9, 5);
1633   unsigned rd = INSTR (4, 0);
1634
1635   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1636   aarch64_set_reg_u64 (cpu, rd, SP_OK,
1637                        aarch64_get_reg_u64 (cpu, rn, SP_OK) + aimm);
1638 }
1639
1640 static void
1641 set_flags_for_add32 (sim_cpu *cpu, int32_t value1, int32_t value2)
1642 {
1643   int32_t   result = value1 + value2;
1644   int64_t   sresult = (int64_t) value1 + (int64_t) value2;
1645   uint64_t  uresult = (uint64_t)(uint32_t) value1
1646     + (uint64_t)(uint32_t) value2;
1647   uint32_t  flags = 0;
1648
1649   if (result == 0)
1650     flags |= Z;
1651
1652   if (result & (1 << 31))
1653     flags |= N;
1654
1655   if (uresult != (uint32_t)uresult)
1656     flags |= C;
1657
1658   if (sresult != (int32_t)sresult)
1659     flags |= V;
1660
1661   aarch64_set_CPSR (cpu, flags);
1662 }
1663
1664 #define NEG(a) (((a) & signbit) == signbit)
1665 #define POS(a) (((a) & signbit) == 0)
1666
1667 static void
1668 set_flags_for_add64 (sim_cpu *cpu, uint64_t value1, uint64_t value2)
1669 {
1670   uint64_t result = value1 + value2;
1671   uint32_t flags = 0;
1672   uint64_t signbit = 1ULL << 63;
1673
1674   if (result == 0)
1675     flags |= Z;
1676
1677   if (NEG (result))
1678     flags |= N;
1679
1680   if (   (NEG (value1) && NEG (value2))
1681       || (NEG (value1) && POS (result))
1682       || (NEG (value2) && POS (result)))
1683     flags |= C;
1684
1685   if (   (NEG (value1) && NEG (value2) && POS (result))
1686       || (POS (value1) && POS (value2) && NEG (result)))
1687     flags |= V;
1688
1689   aarch64_set_CPSR (cpu, flags);
1690 }
1691
1692 static void
1693 set_flags_for_sub32 (sim_cpu *cpu, uint32_t value1, uint32_t value2)
1694 {
1695   uint32_t result = value1 - value2;
1696   uint32_t flags = 0;
1697   uint32_t signbit = 1U << 31;
1698
1699   if (result == 0)
1700     flags |= Z;
1701
1702   if (NEG (result))
1703     flags |= N;
1704
1705   if (   (NEG (value1) && POS (value2))
1706       || (NEG (value1) && POS (result))
1707       || (POS (value2) && POS (result)))
1708     flags |= C;
1709
1710   if (   (NEG (value1) && POS (value2) && POS (result))
1711       || (POS (value1) && NEG (value2) && NEG (result)))
1712     flags |= V;
1713
1714   aarch64_set_CPSR (cpu, flags);
1715 }
1716
1717 static void
1718 set_flags_for_sub64 (sim_cpu *cpu, uint64_t value1, uint64_t value2)
1719 {
1720   uint64_t result = value1 - value2;
1721   uint32_t flags = 0;
1722   uint64_t signbit = 1ULL << 63;
1723
1724   if (result == 0)
1725     flags |= Z;
1726
1727   if (NEG (result))
1728     flags |= N;
1729
1730   if (   (NEG (value1) && POS (value2))
1731       || (NEG (value1) && POS (result))
1732       || (POS (value2) && POS (result)))
1733     flags |= C;
1734
1735   if (   (NEG (value1) && POS (value2) && POS (result))
1736       || (POS (value1) && NEG (value2) && NEG (result)))
1737     flags |= V;
1738
1739   aarch64_set_CPSR (cpu, flags);
1740 }
1741
1742 static void
1743 set_flags_for_binop32 (sim_cpu *cpu, uint32_t result)
1744 {
1745   uint32_t flags = 0;
1746
1747   if (result == 0)
1748     flags |= Z;
1749   else
1750     flags &= ~ Z;
1751
1752   if (result & (1 << 31))
1753     flags |= N;
1754   else
1755     flags &= ~ N;
1756
1757   aarch64_set_CPSR (cpu, flags);
1758 }
1759
1760 static void
1761 set_flags_for_binop64 (sim_cpu *cpu, uint64_t result)
1762 {
1763   uint32_t flags = 0;
1764
1765   if (result == 0)
1766     flags |= Z;
1767   else
1768     flags &= ~ Z;
1769
1770   if (result & (1ULL << 63))
1771     flags |= N;
1772   else
1773     flags &= ~ N;
1774
1775   aarch64_set_CPSR (cpu, flags);
1776 }
1777
1778 /* 32 bit add immediate set flags.  */
1779 static void
1780 adds32 (sim_cpu *cpu, uint32_t aimm)
1781 {
1782   unsigned rn = INSTR (9, 5);
1783   unsigned rd = INSTR (4, 0);
1784   /* TODO : do we need to worry about signs here?  */
1785   int32_t value1 = aarch64_get_reg_s32 (cpu, rn, SP_OK);
1786
1787   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1788   aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + aimm);
1789   set_flags_for_add32 (cpu, value1, aimm);
1790 }
1791
1792 /* 64 bit add immediate set flags.  */
1793 static void
1794 adds64 (sim_cpu *cpu, uint32_t aimm)
1795 {
1796   unsigned rn = INSTR (9, 5);
1797   unsigned rd = INSTR (4, 0);
1798   uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1799   uint64_t value2 = aimm;
1800
1801   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1802   aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2);
1803   set_flags_for_add64 (cpu, value1, value2);
1804 }
1805
1806 /* 32 bit sub immediate.  */
1807 static void
1808 sub32 (sim_cpu *cpu, uint32_t aimm)
1809 {
1810   unsigned rn = INSTR (9, 5);
1811   unsigned rd = INSTR (4, 0);
1812
1813   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1814   aarch64_set_reg_u64 (cpu, rd, SP_OK,
1815                        aarch64_get_reg_u32 (cpu, rn, SP_OK) - aimm);
1816 }
1817
1818 /* 64 bit sub immediate.  */
1819 static void
1820 sub64 (sim_cpu *cpu, uint32_t aimm)
1821 {
1822   unsigned rn = INSTR (9, 5);
1823   unsigned rd = INSTR (4, 0);
1824
1825   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1826   aarch64_set_reg_u64 (cpu, rd, SP_OK,
1827                        aarch64_get_reg_u64 (cpu, rn, SP_OK) - aimm);
1828 }
1829
1830 /* 32 bit sub immediate set flags.  */
1831 static void
1832 subs32 (sim_cpu *cpu, uint32_t aimm)
1833 {
1834   unsigned rn = INSTR (9, 5);
1835   unsigned rd = INSTR (4, 0);
1836   uint32_t value1 = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1837   uint32_t value2 = aimm;
1838
1839   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1840   aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 - value2);
1841   set_flags_for_sub32 (cpu, value1, value2);
1842 }
1843
1844 /* 64 bit sub immediate set flags.  */
1845 static void
1846 subs64 (sim_cpu *cpu, uint32_t aimm)
1847 {
1848   unsigned rn = INSTR (9, 5);
1849   unsigned rd = INSTR (4, 0);
1850   uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1851   uint32_t value2 = aimm;
1852
1853   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1854   aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 - value2);
1855   set_flags_for_sub64 (cpu, value1, value2);
1856 }
1857
1858 /* Data Processing Register.  */
1859
1860 /* First two helpers to perform the shift operations.  */
1861
1862 static inline uint32_t
1863 shifted32 (uint32_t value, Shift shift, uint32_t count)
1864 {
1865   switch (shift)
1866     {
1867     default:
1868     case LSL:
1869       return (value << count);
1870     case LSR:
1871       return (value >> count);
1872     case ASR:
1873       {
1874         int32_t svalue = value;
1875         return (svalue >> count);
1876       }
1877     case ROR:
1878       {
1879         uint32_t top = value >> count;
1880         uint32_t bottom = value << (32 - count);
1881         return (bottom | top);
1882       }
1883     }
1884 }
1885
1886 static inline uint64_t
1887 shifted64 (uint64_t value, Shift shift, uint32_t count)
1888 {
1889   switch (shift)
1890     {
1891     default:
1892     case LSL:
1893       return (value << count);
1894     case LSR:
1895       return (value >> count);
1896     case ASR:
1897       {
1898         int64_t svalue = value;
1899         return (svalue >> count);
1900       }
1901     case ROR:
1902       {
1903         uint64_t top = value >> count;
1904         uint64_t bottom = value << (64 - count);
1905         return (bottom | top);
1906       }
1907     }
1908 }
1909
1910 /* Arithmetic shifted register.
1911    These allow an optional LSL, ASR or LSR to the second source
1912    register with a count up to the register bit count.
1913
1914    N.B register args may not be SP.  */
1915
1916 /* 32 bit ADD shifted register.  */
1917 static void
1918 add32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
1919 {
1920   unsigned rm = INSTR (20, 16);
1921   unsigned rn = INSTR (9, 5);
1922   unsigned rd = INSTR (4, 0);
1923
1924   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1925   aarch64_set_reg_u64 (cpu, rd, NO_SP,
1926                        aarch64_get_reg_u32 (cpu, rn, NO_SP)
1927                        + shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP),
1928                                     shift, count));
1929 }
1930
1931 /* 64 bit ADD shifted register.  */
1932 static void
1933 add64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
1934 {
1935   unsigned rm = INSTR (20, 16);
1936   unsigned rn = INSTR (9, 5);
1937   unsigned rd = INSTR (4, 0);
1938
1939   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1940   aarch64_set_reg_u64 (cpu, rd, NO_SP,
1941                        aarch64_get_reg_u64 (cpu, rn, NO_SP)
1942                        + shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP),
1943                                     shift, count));
1944 }
1945
1946 /* 32 bit ADD shifted register setting flags.  */
1947 static void
1948 adds32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
1949 {
1950   unsigned rm = INSTR (20, 16);
1951   unsigned rn = INSTR (9, 5);
1952   unsigned rd = INSTR (4, 0);
1953
1954   uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP);
1955   uint32_t value2 = shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP),
1956                                shift, count);
1957
1958   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1959   aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2);
1960   set_flags_for_add32 (cpu, value1, value2);
1961 }
1962
1963 /* 64 bit ADD shifted register setting flags.  */
1964 static void
1965 adds64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
1966 {
1967   unsigned rm = INSTR (20, 16);
1968   unsigned rn = INSTR (9, 5);
1969   unsigned rd = INSTR (4, 0);
1970
1971   uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP);
1972   uint64_t value2 = shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP),
1973                                shift, count);
1974
1975   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1976   aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2);
1977   set_flags_for_add64 (cpu, value1, value2);
1978 }
1979
1980 /* 32 bit SUB shifted register.  */
1981 static void
1982 sub32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
1983 {
1984   unsigned rm = INSTR (20, 16);
1985   unsigned rn = INSTR (9, 5);
1986   unsigned rd = INSTR (4, 0);
1987
1988   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1989   aarch64_set_reg_u64 (cpu, rd, NO_SP,
1990                        aarch64_get_reg_u32 (cpu, rn, NO_SP)
1991                        - shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP),
1992                                     shift, count));
1993 }
1994
1995 /* 64 bit SUB shifted register.  */
1996 static void
1997 sub64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
1998 {
1999   unsigned rm = INSTR (20, 16);
2000   unsigned rn = INSTR (9, 5);
2001   unsigned rd = INSTR (4, 0);
2002
2003   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2004   aarch64_set_reg_u64 (cpu, rd, NO_SP,
2005                        aarch64_get_reg_u64 (cpu, rn, NO_SP)
2006                        - shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP),
2007                                     shift, count));
2008 }
2009
2010 /* 32 bit SUB shifted register setting flags.  */
2011 static void
2012 subs32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
2013 {
2014   unsigned rm = INSTR (20, 16);
2015   unsigned rn = INSTR (9, 5);
2016   unsigned rd = INSTR (4, 0);
2017
2018   uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP);
2019   uint32_t value2 = shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP),
2020                               shift, count);
2021
2022   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2023   aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 - value2);
2024   set_flags_for_sub32 (cpu, value1, value2);
2025 }
2026
2027 /* 64 bit SUB shifted register setting flags.  */
2028 static void
2029 subs64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
2030 {
2031   unsigned rm = INSTR (20, 16);
2032   unsigned rn = INSTR (9, 5);
2033   unsigned rd = INSTR (4, 0);
2034
2035   uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP);
2036   uint64_t value2 = shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP),
2037                                shift, count);
2038
2039   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2040   aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 - value2);
2041   set_flags_for_sub64 (cpu, value1, value2);
2042 }
2043
2044 /* First a couple more helpers to fetch the
2045    relevant source register element either
2046    sign or zero extended as required by the
2047    extension value.  */
2048
2049 static uint32_t
2050 extreg32 (sim_cpu *cpu, unsigned int lo, Extension extension)
2051 {
2052   switch (extension)
2053     {
2054     case UXTB: return aarch64_get_reg_u8  (cpu, lo, NO_SP);
2055     case UXTH: return aarch64_get_reg_u16 (cpu, lo, NO_SP);
2056     case UXTW: /* Fall through.  */
2057     case UXTX: return aarch64_get_reg_u32 (cpu, lo, NO_SP);
2058     case SXTB: return aarch64_get_reg_s8  (cpu, lo, NO_SP);
2059     case SXTH: return aarch64_get_reg_s16 (cpu, lo, NO_SP);
2060     case SXTW: /* Fall through.  */
2061     case SXTX: /* Fall through.  */
2062     default:   return aarch64_get_reg_s32 (cpu, lo, NO_SP);
2063   }
2064 }
2065
2066 static uint64_t
2067 extreg64 (sim_cpu *cpu, unsigned int lo, Extension extension)
2068 {
2069   switch (extension)
2070     {
2071     case UXTB: return aarch64_get_reg_u8  (cpu, lo, NO_SP);
2072     case UXTH: return aarch64_get_reg_u16 (cpu, lo, NO_SP);
2073     case UXTW: return aarch64_get_reg_u32 (cpu, lo, NO_SP);
2074     case UXTX: return aarch64_get_reg_u64 (cpu, lo, NO_SP);
2075     case SXTB: return aarch64_get_reg_s8  (cpu, lo, NO_SP);
2076     case SXTH: return aarch64_get_reg_s16 (cpu, lo, NO_SP);
2077     case SXTW: return aarch64_get_reg_s32 (cpu, lo, NO_SP);
2078     case SXTX:
2079     default:   return aarch64_get_reg_s64 (cpu, lo, NO_SP);
2080     }
2081 }
2082
2083 /* Arithmetic extending register
2084    These allow an optional sign extension of some portion of the
2085    second source register followed by an optional left shift of
2086    between 1 and 4 bits (i.e. a shift of 0-4 bits???)
2087
2088    N.B output (dest) and first input arg (source) may normally be Xn
2089    or SP. However, for flag setting operations dest can only be
2090    Xn. Second input registers are always Xn.  */
2091
2092 /* 32 bit ADD extending register.  */
2093 static void
2094 add32_ext (sim_cpu *cpu, Extension extension, uint32_t shift)
2095 {
2096   unsigned rm = INSTR (20, 16);
2097   unsigned rn = INSTR (9, 5);
2098   unsigned rd = INSTR (4, 0);
2099
2100   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2101   aarch64_set_reg_u64 (cpu, rd, SP_OK,
2102                        aarch64_get_reg_u32 (cpu, rn, SP_OK)
2103                        + (extreg32 (cpu, rm, extension) << shift));
2104 }
2105
2106 /* 64 bit ADD extending register.
2107    N.B. This subsumes the case with 64 bit source2 and UXTX #n or LSL #0.  */
2108 static void
2109 add64_ext (sim_cpu *cpu, Extension extension, uint32_t shift)
2110 {
2111   unsigned rm = INSTR (20, 16);
2112   unsigned rn = INSTR (9, 5);
2113   unsigned rd = INSTR (4, 0);
2114
2115   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2116   aarch64_set_reg_u64 (cpu, rd, SP_OK,
2117                        aarch64_get_reg_u64 (cpu, rn, SP_OK)
2118                        + (extreg64 (cpu, rm, extension) << shift));
2119 }
2120
2121 /* 32 bit ADD extending register setting flags.  */
2122 static void
2123 adds32_ext (sim_cpu *cpu, Extension extension, uint32_t shift)
2124 {
2125   unsigned rm = INSTR (20, 16);
2126   unsigned rn = INSTR (9, 5);
2127   unsigned rd = INSTR (4, 0);
2128
2129   uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, SP_OK);
2130   uint32_t value2 = extreg32 (cpu, rm, extension) << shift;
2131
2132   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2133   aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2);
2134   set_flags_for_add32 (cpu, value1, value2);
2135 }
2136
2137 /* 64 bit ADD extending register setting flags  */
2138 /* N.B. this subsumes the case with 64 bit source2 and UXTX #n or LSL #0  */
2139 static void
2140 adds64_ext (sim_cpu *cpu, Extension extension, uint32_t shift)
2141 {
2142   unsigned rm = INSTR (20, 16);
2143   unsigned rn = INSTR (9, 5);
2144   unsigned rd = INSTR (4, 0);
2145
2146   uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, SP_OK);
2147   uint64_t value2 = extreg64 (cpu, rm, extension) << shift;
2148
2149   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2150   aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2);
2151   set_flags_for_add64 (cpu, value1, value2);
2152 }
2153
2154 /* 32 bit SUB extending register.  */
2155 static void
2156 sub32_ext (sim_cpu *cpu, Extension extension, uint32_t shift)
2157 {
2158   unsigned rm = INSTR (20, 16);
2159   unsigned rn = INSTR (9, 5);
2160   unsigned rd = INSTR (4, 0);
2161
2162   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2163   aarch64_set_reg_u64 (cpu, rd, SP_OK,
2164                        aarch64_get_reg_u32 (cpu, rn, SP_OK)
2165                        - (extreg32 (cpu, rm, extension) << shift));
2166 }
2167
2168 /* 64 bit SUB extending register.  */
2169 /* N.B. this subsumes the case with 64 bit source2 and UXTX #n or LSL #0.  */
2170 static void
2171 sub64_ext (sim_cpu *cpu, Extension extension, uint32_t shift)
2172 {
2173   unsigned rm = INSTR (20, 16);
2174   unsigned rn = INSTR (9, 5);
2175   unsigned rd = INSTR (4, 0);
2176
2177   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2178   aarch64_set_reg_u64 (cpu, rd, SP_OK,
2179                        aarch64_get_reg_u64 (cpu, rn, SP_OK)
2180                        - (extreg64 (cpu, rm, extension) << shift));
2181 }
2182
2183 /* 32 bit SUB extending register setting flags.  */
2184 static void
2185 subs32_ext (sim_cpu *cpu, Extension extension, uint32_t shift)
2186 {
2187   unsigned rm = INSTR (20, 16);
2188   unsigned rn = INSTR (9, 5);
2189   unsigned rd = INSTR (4, 0);
2190
2191   uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, SP_OK);
2192   uint32_t value2 = extreg32 (cpu, rm, extension) << shift;
2193
2194   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2195   aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 - value2);
2196   set_flags_for_sub32 (cpu, value1, value2);
2197 }
2198
2199 /* 64 bit SUB extending register setting flags  */
2200 /* N.B. this subsumes the case with 64 bit source2 and UXTX #n or LSL #0  */
2201 static void
2202 subs64_ext (sim_cpu *cpu, Extension extension, uint32_t shift)
2203 {
2204   unsigned rm = INSTR (20, 16);
2205   unsigned rn = INSTR (9, 5);
2206   unsigned rd = INSTR (4, 0);
2207
2208   uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, SP_OK);
2209   uint64_t value2 = extreg64 (cpu, rm, extension) << shift;
2210
2211   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2212   aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 - value2);
2213   set_flags_for_sub64 (cpu, value1, value2);
2214 }
2215
2216 static void
2217 dexAddSubtractImmediate (sim_cpu *cpu)
2218 {
2219   /* instr[31]    = size : 0 ==> 32 bit, 1 ==> 64 bit
2220      instr[30]    = op : 0 ==> ADD, 1 ==> SUB
2221      instr[29]    = set : 0 ==> no flags, 1 ==> set flags
2222      instr[28,24] = 10001
2223      instr[23,22] = shift : 00 == LSL#0, 01 = LSL#12 1x = UNALLOC
2224      instr[21,10] = uimm12
2225      instr[9,5]   = Rn
2226      instr[4,0]   = Rd  */
2227
2228   /* N.B. the shift is applied at decode before calling the add/sub routine.  */
2229   uint32_t shift = INSTR (23, 22);
2230   uint32_t imm = INSTR (21, 10);
2231   uint32_t dispatch = INSTR (31, 29);
2232
2233   NYI_assert (28, 24, 0x11);
2234
2235   if (shift > 1)
2236     HALT_UNALLOC;
2237
2238   if (shift)
2239     imm <<= 12;
2240
2241   switch (dispatch)
2242     {
2243     case 0: add32 (cpu, imm); break;
2244     case 1: adds32 (cpu, imm); break;
2245     case 2: sub32 (cpu, imm); break;
2246     case 3: subs32 (cpu, imm); break;
2247     case 4: add64 (cpu, imm); break;
2248     case 5: adds64 (cpu, imm); break;
2249     case 6: sub64 (cpu, imm); break;
2250     case 7: subs64 (cpu, imm); break;
2251     }
2252 }
2253
2254 static void
2255 dexAddSubtractShiftedRegister (sim_cpu *cpu)
2256 {
2257   /* instr[31]    = size : 0 ==> 32 bit, 1 ==> 64 bit
2258      instr[30,29] = op : 00 ==> ADD, 01 ==> ADDS, 10 ==> SUB, 11 ==> SUBS
2259      instr[28,24] = 01011
2260      instr[23,22] = shift : 0 ==> LSL, 1 ==> LSR, 2 ==> ASR, 3 ==> UNALLOC
2261      instr[21]    = 0
2262      instr[20,16] = Rm
2263      instr[15,10] = count : must be 0xxxxx for 32 bit
2264      instr[9,5]   = Rn
2265      instr[4,0]   = Rd  */
2266
2267   uint32_t size = INSTR (31, 31);
2268   uint32_t count = INSTR (15, 10);
2269   Shift shiftType = INSTR (23, 22);
2270
2271   NYI_assert (28, 24, 0x0B);
2272   NYI_assert (21, 21, 0);
2273
2274   /* Shift encoded as ROR is unallocated.  */
2275   if (shiftType == ROR)
2276     HALT_UNALLOC;
2277
2278   /* 32 bit operations must have count[5] = 0
2279      or else we have an UNALLOC.  */
2280   if (size == 0 && uimm (count, 5, 5))
2281     HALT_UNALLOC;
2282
2283   /* Dispatch on size:op i.e instr [31,29].  */
2284   switch (INSTR (31, 29))
2285     {
2286     case 0: add32_shift  (cpu, shiftType, count); break;
2287     case 1: adds32_shift (cpu, shiftType, count); break;
2288     case 2: sub32_shift  (cpu, shiftType, count); break;
2289     case 3: subs32_shift (cpu, shiftType, count); break;
2290     case 4: add64_shift  (cpu, shiftType, count); break;
2291     case 5: adds64_shift (cpu, shiftType, count); break;
2292     case 6: sub64_shift  (cpu, shiftType, count); break;
2293     case 7: subs64_shift (cpu, shiftType, count); break;
2294     }
2295 }
2296
2297 static void
2298 dexAddSubtractExtendedRegister (sim_cpu *cpu)
2299 {
2300   /* instr[31]    = size : 0 ==> 32 bit, 1 ==> 64 bit
2301      instr[30]    = op : 0 ==> ADD, 1 ==> SUB
2302      instr[29]    = set? : 0 ==> no flags, 1 ==> set flags
2303      instr[28,24] = 01011
2304      instr[23,22] = opt : 0 ==> ok, 1,2,3 ==> UNALLOC
2305      instr[21]    = 1
2306      instr[20,16] = Rm
2307      instr[15,13] = option : 000 ==> UXTB, 001 ==> UXTH,
2308                              000 ==> LSL|UXTW, 001 ==> UXTZ,
2309                              000 ==> SXTB, 001 ==> SXTH,
2310                              000 ==> SXTW, 001 ==> SXTX,
2311      instr[12,10] = shift : 0,1,2,3,4 ==> ok, 5,6,7 ==> UNALLOC
2312      instr[9,5]   = Rn
2313      instr[4,0]   = Rd  */
2314
2315   Extension extensionType = INSTR (15, 13);
2316   uint32_t shift = INSTR (12, 10);
2317
2318   NYI_assert (28, 24, 0x0B);
2319   NYI_assert (21, 21, 1);
2320
2321   /* Shift may not exceed 4.  */
2322   if (shift > 4)
2323     HALT_UNALLOC;
2324
2325   /* Dispatch on size:op:set?.  */
2326   switch (INSTR (31, 29))
2327     {
2328     case 0: add32_ext  (cpu, extensionType, shift); break;
2329     case 1: adds32_ext (cpu, extensionType, shift); break;
2330     case 2: sub32_ext  (cpu, extensionType, shift); break;
2331     case 3: subs32_ext (cpu, extensionType, shift); break;
2332     case 4: add64_ext  (cpu, extensionType, shift); break;
2333     case 5: adds64_ext (cpu, extensionType, shift); break;
2334     case 6: sub64_ext  (cpu, extensionType, shift); break;
2335     case 7: subs64_ext (cpu, extensionType, shift); break;
2336     }
2337 }
2338
2339 /* Conditional data processing
2340    Condition register is implicit 3rd source.  */
2341
2342 /* 32 bit add with carry.  */
2343 /* N.B register args may not be SP.  */
2344
2345 static void
2346 adc32 (sim_cpu *cpu)
2347 {
2348   unsigned rm = INSTR (20, 16);
2349   unsigned rn = INSTR (9, 5);
2350   unsigned rd = INSTR (4, 0);
2351
2352   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2353   aarch64_set_reg_u64 (cpu, rd, NO_SP,
2354                        aarch64_get_reg_u32 (cpu, rn, NO_SP)
2355                        + aarch64_get_reg_u32 (cpu, rm, NO_SP)
2356                        + IS_SET (C));
2357 }
2358
2359 /* 64 bit add with carry  */
2360 static void
2361 adc64 (sim_cpu *cpu)
2362 {
2363   unsigned rm = INSTR (20, 16);
2364   unsigned rn = INSTR (9, 5);
2365   unsigned rd = INSTR (4, 0);
2366
2367   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2368   aarch64_set_reg_u64 (cpu, rd, NO_SP,
2369                        aarch64_get_reg_u64 (cpu, rn, NO_SP)
2370                        + aarch64_get_reg_u64 (cpu, rm, NO_SP)
2371                        + IS_SET (C));
2372 }
2373
2374 /* 32 bit add with carry setting flags.  */
2375 static void
2376 adcs32 (sim_cpu *cpu)
2377 {
2378   unsigned rm = INSTR (20, 16);
2379   unsigned rn = INSTR (9, 5);
2380   unsigned rd = INSTR (4, 0);
2381
2382   uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP);
2383   uint32_t value2 = aarch64_get_reg_u32 (cpu, rm, NO_SP);
2384   uint32_t carry = IS_SET (C);
2385
2386   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2387   aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2 + carry);
2388   set_flags_for_add32 (cpu, value1, value2 + carry);
2389 }
2390
2391 /* 64 bit add with carry setting flags.  */
2392 static void
2393 adcs64 (sim_cpu *cpu)
2394 {
2395   unsigned rm = INSTR (20, 16);
2396   unsigned rn = INSTR (9, 5);
2397   unsigned rd = INSTR (4, 0);
2398
2399   uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP);
2400   uint64_t value2 = aarch64_get_reg_u64 (cpu, rm, NO_SP);
2401   uint64_t carry = IS_SET (C);
2402
2403   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2404   aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2 + carry);
2405   set_flags_for_add64 (cpu, value1, value2 + carry);
2406 }
2407
2408 /* 32 bit sub with carry.  */
2409 static void
2410 sbc32 (sim_cpu *cpu)
2411 {
2412   unsigned rm = INSTR (20, 16);
2413   unsigned rn = INSTR (9, 5); /* ngc iff rn == 31.  */
2414   unsigned rd = INSTR (4, 0);
2415
2416   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2417   aarch64_set_reg_u64 (cpu, rd, NO_SP,
2418                        aarch64_get_reg_u32 (cpu, rn, NO_SP)
2419                        - aarch64_get_reg_u32 (cpu, rm, NO_SP)
2420                        - 1 + IS_SET (C));
2421 }
2422
2423 /* 64 bit sub with carry  */
2424 static void
2425 sbc64 (sim_cpu *cpu)
2426 {
2427   unsigned rm = INSTR (20, 16);
2428   unsigned rn = INSTR (9, 5);
2429   unsigned rd = INSTR (4, 0);
2430
2431   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2432   aarch64_set_reg_u64 (cpu, rd, NO_SP,
2433                        aarch64_get_reg_u64 (cpu, rn, NO_SP)
2434                        - aarch64_get_reg_u64 (cpu, rm, NO_SP)
2435                        - 1 + IS_SET (C));
2436 }
2437
2438 /* 32 bit sub with carry setting flags  */
2439 static void
2440 sbcs32 (sim_cpu *cpu)
2441 {
2442   unsigned rm = INSTR (20, 16);
2443   unsigned rn = INSTR (9, 5);
2444   unsigned rd = INSTR (4, 0);
2445
2446   uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP);
2447   uint32_t value2 = aarch64_get_reg_u32 (cpu, rm, NO_SP);
2448   uint32_t carry  = IS_SET (C);
2449   uint32_t result = value1 - value2 + 1 - carry;
2450
2451   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2452   aarch64_set_reg_u64 (cpu, rd, NO_SP, result);
2453   set_flags_for_sub32 (cpu, value1, value2 + 1 - carry);
2454 }
2455
2456 /* 64 bit sub with carry setting flags  */
2457 static void
2458 sbcs64 (sim_cpu *cpu)
2459 {
2460   unsigned rm = INSTR (20, 16);
2461   unsigned rn = INSTR (9, 5);
2462   unsigned rd = INSTR (4, 0);
2463
2464   uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP);
2465   uint64_t value2 = aarch64_get_reg_u64 (cpu, rm, NO_SP);
2466   uint64_t carry  = IS_SET (C);
2467   uint64_t result = value1 - value2 + 1 - carry;
2468
2469   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2470   aarch64_set_reg_u64 (cpu, rd, NO_SP, result);
2471   set_flags_for_sub64 (cpu, value1, value2 + 1 - carry);
2472 }
2473
2474 static void
2475 dexAddSubtractWithCarry (sim_cpu *cpu)
2476 {
2477   /* instr[31]    = size : 0 ==> 32 bit, 1 ==> 64 bit
2478      instr[30]    = op : 0 ==> ADC, 1 ==> SBC
2479      instr[29]    = set? : 0 ==> no flags, 1 ==> set flags
2480      instr[28,21] = 1 1010 000
2481      instr[20,16] = Rm
2482      instr[15,10] = op2 : 00000 ==> ok, ow ==> UNALLOC
2483      instr[9,5]   = Rn
2484      instr[4,0]   = Rd  */
2485
2486   uint32_t op2 = INSTR (15, 10);
2487
2488   NYI_assert (28, 21, 0xD0);
2489
2490   if (op2 != 0)
2491     HALT_UNALLOC;
2492
2493   /* Dispatch on size:op:set?.  */
2494   switch (INSTR (31, 29))
2495     {
2496     case 0: adc32 (cpu); break;
2497     case 1: adcs32 (cpu); break;
2498     case 2: sbc32 (cpu); break;
2499     case 3: sbcs32 (cpu); break;
2500     case 4: adc64 (cpu); break;
2501     case 5: adcs64 (cpu); break;
2502     case 6: sbc64 (cpu); break;
2503     case 7: sbcs64 (cpu); break;
2504     }
2505 }
2506
2507 static uint32_t
2508 testConditionCode (sim_cpu *cpu, CondCode cc)
2509 {
2510   /* This should be reduceable to branchless logic
2511      by some careful testing of bits in CC followed
2512      by the requisite masking and combining of bits
2513      from the flag register.
2514
2515      For now we do it with a switch.  */
2516   int res;
2517
2518   switch (cc)
2519     {
2520     case EQ:  res = IS_SET (Z);    break;
2521     case NE:  res = IS_CLEAR (Z);  break;
2522     case CS:  res = IS_SET (C);    break;
2523     case CC:  res = IS_CLEAR (C);  break;
2524     case MI:  res = IS_SET (N);    break;
2525     case PL:  res = IS_CLEAR (N);  break;
2526     case VS:  res = IS_SET (V);    break;
2527     case VC:  res = IS_CLEAR (V);  break;
2528     case HI:  res = IS_SET (C) && IS_CLEAR (Z);  break;
2529     case LS:  res = IS_CLEAR (C) || IS_SET (Z);  break;
2530     case GE:  res = IS_SET (N) == IS_SET (V);    break;
2531     case LT:  res = IS_SET (N) != IS_SET (V);    break;
2532     case GT:  res = IS_CLEAR (Z) && (IS_SET (N) == IS_SET (V));  break;
2533     case LE:  res = IS_SET (Z) || (IS_SET (N) != IS_SET (V));    break;
2534     case AL:
2535     case NV:
2536     default:
2537       res = 1;
2538       break;
2539     }
2540   return res;
2541 }
2542
2543 static void
2544 CondCompare (sim_cpu *cpu) /* aka: ccmp and ccmn  */
2545 {
2546   /* instr[31]    = size : 0 ==> 32 bit, 1 ==> 64 bit
2547      instr[30]    = compare with positive (1) or negative value (0)
2548      instr[29,21] = 1 1101 0010
2549      instr[20,16] = Rm or const
2550      instr[15,12] = cond
2551      instr[11]    = compare reg (0) or const (1)
2552      instr[10]    = 0
2553      instr[9,5]   = Rn
2554      instr[4]     = 0
2555      instr[3,0]   = value for CPSR bits if the comparison does not take place.  */
2556   signed int negate;
2557   unsigned rm;
2558   unsigned rn;
2559
2560   NYI_assert (29, 21, 0x1d2);
2561   NYI_assert (10, 10, 0);
2562   NYI_assert (4, 4, 0);
2563
2564   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2565   if (! testConditionCode (cpu, INSTR (15, 12)))
2566     {
2567       aarch64_set_CPSR (cpu, INSTR (3, 0));
2568       return;
2569     }
2570
2571   negate = INSTR (30, 30) ? 1 : -1;
2572   rm = INSTR (20, 16);
2573   rn = INSTR ( 9,  5);
2574
2575   if (INSTR (31, 31))
2576     {
2577       if (INSTR (11, 11))
2578         set_flags_for_sub64 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK),
2579                              negate * (uint64_t) rm);
2580       else
2581         set_flags_for_sub64 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK),
2582                              negate * aarch64_get_reg_u64 (cpu, rm, SP_OK));
2583     }
2584   else
2585     {
2586       if (INSTR (11, 11))
2587         set_flags_for_sub32 (cpu, aarch64_get_reg_u32 (cpu, rn, SP_OK),
2588                              negate * rm);
2589       else
2590         set_flags_for_sub32 (cpu, aarch64_get_reg_u32 (cpu, rn, SP_OK),
2591                              negate * aarch64_get_reg_u32 (cpu, rm, SP_OK));
2592     }
2593 }
2594
2595 static void
2596 do_vec_MOV_whole_vector (sim_cpu *cpu)
2597 {
2598   /* MOV Vd.T, Vs.T  (alias for ORR Vd.T, Vn.T, Vm.T where Vn == Vm)
2599
2600      instr[31]    = 0
2601      instr[30]    = half(0)/full(1)
2602      instr[29,21] = 001110101
2603      instr[20,16] = Vs
2604      instr[15,10] = 000111
2605      instr[9,5]   = Vs
2606      instr[4,0]   = Vd  */
2607
2608   unsigned vs = INSTR (9, 5);
2609   unsigned vd = INSTR (4, 0);
2610
2611   NYI_assert (29, 21, 0x075);
2612   NYI_assert (15, 10, 0x07);
2613
2614   if (INSTR (20, 16) != vs)
2615     HALT_NYI;
2616
2617   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2618   if (INSTR (30, 30))
2619     aarch64_set_vec_u64 (cpu, vd, 1, aarch64_get_vec_u64 (cpu, vs, 1));
2620
2621   aarch64_set_vec_u64 (cpu, vd, 0, aarch64_get_vec_u64 (cpu, vs, 0));
2622 }
2623
2624 static void
2625 do_vec_SMOV_into_scalar (sim_cpu *cpu)
2626 {
2627   /* instr[31]    = 0
2628      instr[30]    = word(0)/long(1)
2629      instr[29,21] = 00 1110 000
2630      instr[20,16] = element size and index
2631      instr[15,10] = 00 0010 11
2632      instr[9,5]   = V source
2633      instr[4,0]   = R dest  */
2634
2635   unsigned vs = INSTR (9, 5);
2636   unsigned rd = INSTR (4, 0);
2637   unsigned imm5 = INSTR (20, 16);
2638   unsigned full = INSTR (30, 30);
2639   int size, index;
2640
2641   NYI_assert (29, 21, 0x070);
2642   NYI_assert (15, 10, 0x0B);
2643
2644   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2645
2646   if (imm5 & 0x1)
2647     {
2648       size = 0;
2649       index = (imm5 >> 1) & 0xF;
2650     }
2651   else if (imm5 & 0x2)
2652     {
2653       size = 1;
2654       index = (imm5 >> 2) & 0x7;
2655     }
2656   else if (full && (imm5 & 0x4))
2657     {
2658       size = 2;
2659       index = (imm5 >> 3) & 0x3;
2660     }
2661   else
2662     HALT_UNALLOC;
2663
2664   switch (size)
2665     {
2666     case 0:
2667       if (full)
2668         aarch64_set_reg_s64 (cpu, rd, NO_SP,
2669                              aarch64_get_vec_s8 (cpu, vs, index));
2670       else
2671         aarch64_set_reg_s32 (cpu, rd, NO_SP,
2672                              aarch64_get_vec_s8 (cpu, vs, index));
2673       break;
2674
2675     case 1:
2676       if (full)
2677         aarch64_set_reg_s64 (cpu, rd, NO_SP,
2678                              aarch64_get_vec_s16 (cpu, vs, index));
2679       else
2680         aarch64_set_reg_s32 (cpu, rd, NO_SP,
2681                              aarch64_get_vec_s16 (cpu, vs, index));
2682       break;
2683
2684     case 2:
2685       aarch64_set_reg_s64 (cpu, rd, NO_SP,
2686                            aarch64_get_vec_s32 (cpu, vs, index));
2687       break;
2688
2689     default:
2690       HALT_UNALLOC;
2691     }
2692 }
2693
2694 static void
2695 do_vec_UMOV_into_scalar (sim_cpu *cpu)
2696 {
2697   /* instr[31]    = 0
2698      instr[30]    = word(0)/long(1)
2699      instr[29,21] = 00 1110 000
2700      instr[20,16] = element size and index
2701      instr[15,10] = 00 0011 11
2702      instr[9,5]   = V source
2703      instr[4,0]   = R dest  */
2704
2705   unsigned vs = INSTR (9, 5);
2706   unsigned rd = INSTR (4, 0);
2707   unsigned imm5 = INSTR (20, 16);
2708   unsigned full = INSTR (30, 30);
2709   int size, index;
2710
2711   NYI_assert (29, 21, 0x070);
2712   NYI_assert (15, 10, 0x0F);
2713
2714   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2715
2716   if (!full)
2717     {
2718       if (imm5 & 0x1)
2719         {
2720           size = 0;
2721           index = (imm5 >> 1) & 0xF;
2722         }
2723       else if (imm5 & 0x2)
2724         {
2725           size = 1;
2726           index = (imm5 >> 2) & 0x7;
2727         }
2728       else if (imm5 & 0x4)
2729         {
2730           size = 2;
2731           index = (imm5 >> 3) & 0x3;
2732         }
2733       else
2734         HALT_UNALLOC;
2735     }
2736   else if (imm5 & 0x8)
2737     {
2738       size = 3;
2739       index = (imm5 >> 4) & 0x1;
2740     }
2741   else
2742     HALT_UNALLOC;
2743
2744   switch (size)
2745     {
2746     case 0:
2747       aarch64_set_reg_u32 (cpu, rd, NO_SP,
2748                            aarch64_get_vec_u8 (cpu, vs, index));
2749       break;
2750
2751     case 1:
2752       aarch64_set_reg_u32 (cpu, rd, NO_SP,
2753                            aarch64_get_vec_u16 (cpu, vs, index));
2754       break;
2755
2756     case 2:
2757       aarch64_set_reg_u32 (cpu, rd, NO_SP,
2758                            aarch64_get_vec_u32 (cpu, vs, index));
2759       break;
2760
2761     case 3:
2762       aarch64_set_reg_u64 (cpu, rd, NO_SP,
2763                            aarch64_get_vec_u64 (cpu, vs, index));
2764       break;
2765
2766     default:
2767       HALT_UNALLOC;
2768     }
2769 }
2770
2771 static void
2772 do_vec_INS (sim_cpu *cpu)
2773 {
2774   /* instr[31,21] = 01001110000
2775      instr[20,16] = element size and index
2776      instr[15,10] = 000111
2777      instr[9,5]   = W source
2778      instr[4,0]   = V dest  */
2779
2780   int index;
2781   unsigned rs = INSTR (9, 5);
2782   unsigned vd = INSTR (4, 0);
2783
2784   NYI_assert (31, 21, 0x270);
2785   NYI_assert (15, 10, 0x07);
2786
2787   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2788   if (INSTR (16, 16))
2789     {
2790       index = INSTR (20, 17);
2791       aarch64_set_vec_u8 (cpu, vd, index,
2792                           aarch64_get_reg_u8 (cpu, rs, NO_SP));
2793     }
2794   else if (INSTR (17, 17))
2795     {
2796       index = INSTR (20, 18);
2797       aarch64_set_vec_u16 (cpu, vd, index,
2798                            aarch64_get_reg_u16 (cpu, rs, NO_SP));
2799     }
2800   else if (INSTR (18, 18))
2801     {
2802       index = INSTR (20, 19);
2803       aarch64_set_vec_u32 (cpu, vd, index,
2804                            aarch64_get_reg_u32 (cpu, rs, NO_SP));
2805     }
2806   else if (INSTR (19, 19))
2807     {
2808       index = INSTR (20, 20);
2809       aarch64_set_vec_u64 (cpu, vd, index,
2810                            aarch64_get_reg_u64 (cpu, rs, NO_SP));
2811     }
2812   else
2813     HALT_NYI;
2814 }
2815
2816 static void
2817 do_vec_DUP_vector_into_vector (sim_cpu *cpu)
2818 {
2819   /* instr[31]    = 0
2820      instr[30]    = half(0)/full(1)
2821      instr[29,21] = 00 1110 000
2822      instr[20,16] = element size and index
2823      instr[15,10] = 0000 01
2824      instr[9,5]   = V source
2825      instr[4,0]   = V dest.  */
2826
2827   unsigned full = INSTR (30, 30);
2828   unsigned vs = INSTR (9, 5);
2829   unsigned vd = INSTR (4, 0);
2830   int i, index;
2831
2832   NYI_assert (29, 21, 0x070);
2833   NYI_assert (15, 10, 0x01);
2834
2835   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2836   if (INSTR (16, 16))
2837     {
2838       index = INSTR (20, 17);
2839
2840       for (i = 0; i < (full ? 16 : 8); i++)
2841         aarch64_set_vec_u8 (cpu, vd, i, aarch64_get_vec_u8 (cpu, vs, index));
2842     }
2843   else if (INSTR (17, 17))
2844     {
2845       index = INSTR (20, 18);
2846
2847       for (i = 0; i < (full ? 8 : 4); i++)
2848         aarch64_set_vec_u16 (cpu, vd, i, aarch64_get_vec_u16 (cpu, vs, index));
2849     }
2850   else if (INSTR (18, 18))
2851     {
2852       index = INSTR (20, 19);
2853
2854       for (i = 0; i < (full ? 4 : 2); i++)
2855         aarch64_set_vec_u32 (cpu, vd, i, aarch64_get_vec_u32 (cpu, vs, index));
2856     }
2857   else
2858     {
2859       if (INSTR (19, 19) == 0)
2860         HALT_UNALLOC;
2861
2862       if (! full)
2863         HALT_UNALLOC;
2864
2865       index = INSTR (20, 20);
2866
2867       for (i = 0; i < 2; i++)
2868         aarch64_set_vec_u64 (cpu, vd, i, aarch64_get_vec_u64 (cpu, vs, index));
2869     }
2870 }
2871
2872 static void
2873 do_vec_TBL (sim_cpu *cpu)
2874 {
2875   /* instr[31]    = 0
2876      instr[30]    = half(0)/full(1)
2877      instr[29,21] = 00 1110 000
2878      instr[20,16] = Vm
2879      instr[15]    = 0
2880      instr[14,13] = vec length
2881      instr[12,10] = 000
2882      instr[9,5]   = V start
2883      instr[4,0]   = V dest  */
2884
2885   int full    = INSTR (30, 30);
2886   int len     = INSTR (14, 13) + 1;
2887   unsigned vm = INSTR (20, 16);
2888   unsigned vn = INSTR (9, 5);
2889   unsigned vd = INSTR (4, 0);
2890   unsigned i;
2891
2892   NYI_assert (29, 21, 0x070);
2893   NYI_assert (12, 10, 0);
2894
2895   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2896   for (i = 0; i < (full ? 16 : 8); i++)
2897     {
2898       unsigned int selector = aarch64_get_vec_u8 (cpu, vm, i);
2899       uint8_t val;
2900
2901       if (selector < 16)
2902         val = aarch64_get_vec_u8 (cpu, vn, selector);
2903       else if (selector < 32)
2904         val = len < 2 ? 0 : aarch64_get_vec_u8 (cpu, vn + 1, selector - 16);
2905       else if (selector < 48)
2906         val = len < 3 ? 0 : aarch64_get_vec_u8 (cpu, vn + 2, selector - 32);
2907       else if (selector < 64)
2908         val = len < 4 ? 0 : aarch64_get_vec_u8 (cpu, vn + 3, selector - 48);
2909       else
2910         val = 0;
2911
2912       aarch64_set_vec_u8 (cpu, vd, i, val);
2913     }
2914 }
2915
2916 static void
2917 do_vec_TRN (sim_cpu *cpu)
2918 {
2919   /* instr[31]    = 0
2920      instr[30]    = half(0)/full(1)
2921      instr[29,24] = 00 1110
2922      instr[23,22] = size
2923      instr[21]    = 0
2924      instr[20,16] = Vm
2925      instr[15]    = 0
2926      instr[14]    = TRN1 (0) / TRN2 (1)
2927      instr[13,10] = 1010
2928      instr[9,5]   = V source
2929      instr[4,0]   = V dest.  */
2930
2931   int full    = INSTR (30, 30);
2932   int second  = INSTR (14, 14);
2933   unsigned vm = INSTR (20, 16);
2934   unsigned vn = INSTR (9, 5);
2935   unsigned vd = INSTR (4, 0);
2936   unsigned i;
2937
2938   NYI_assert (29, 24, 0x0E);
2939   NYI_assert (13, 10, 0xA);
2940
2941   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2942   switch (INSTR (23, 22))
2943     {
2944     case 0:
2945       for (i = 0; i < (full ? 8 : 4); i++)
2946         {
2947           aarch64_set_vec_u8
2948             (cpu, vd, i * 2,
2949              aarch64_get_vec_u8 (cpu, second ? vm : vn, i * 2));
2950           aarch64_set_vec_u8
2951             (cpu, vd, 1 * 2 + 1,
2952              aarch64_get_vec_u8 (cpu, second ? vn : vm, i * 2 + 1));
2953         }
2954       break;
2955
2956     case 1:
2957       for (i = 0; i < (full ? 4 : 2); i++)
2958         {
2959           aarch64_set_vec_u16
2960             (cpu, vd, i * 2,
2961              aarch64_get_vec_u16 (cpu, second ? vm : vn, i * 2));
2962           aarch64_set_vec_u16
2963             (cpu, vd, 1 * 2 + 1,
2964              aarch64_get_vec_u16 (cpu, second ? vn : vm, i * 2 + 1));
2965         }
2966       break;
2967
2968     case 2:
2969       aarch64_set_vec_u32
2970         (cpu, vd, 0, aarch64_get_vec_u32 (cpu, second ? vm : vn, 0));
2971       aarch64_set_vec_u32
2972         (cpu, vd, 1, aarch64_get_vec_u32 (cpu, second ? vn : vm, 1));
2973       aarch64_set_vec_u32
2974         (cpu, vd, 2, aarch64_get_vec_u32 (cpu, second ? vm : vn, 2));
2975       aarch64_set_vec_u32
2976         (cpu, vd, 3, aarch64_get_vec_u32 (cpu, second ? vn : vm, 3));
2977       break;
2978
2979     case 3:
2980       if (! full)
2981         HALT_UNALLOC;
2982
2983       aarch64_set_vec_u64 (cpu, vd, 0,
2984                            aarch64_get_vec_u64 (cpu, second ? vm : vn, 0));
2985       aarch64_set_vec_u64 (cpu, vd, 1,
2986                            aarch64_get_vec_u64 (cpu, second ? vn : vm, 1));
2987       break;
2988     }
2989 }
2990
2991 static void
2992 do_vec_DUP_scalar_into_vector (sim_cpu *cpu)
2993 {
2994   /* instr[31]    = 0
2995      instr[30]    = 0=> zero top 64-bits, 1=> duplicate into top 64-bits
2996                     [must be 1 for 64-bit xfer]
2997      instr[29,20] = 00 1110 0000
2998      instr[19,16] = element size: 0001=> 8-bits, 0010=> 16-bits,
2999                                   0100=> 32-bits. 1000=>64-bits
3000      instr[15,10] = 0000 11
3001      instr[9,5]   = W source
3002      instr[4,0]   = V dest.  */
3003
3004   unsigned i;
3005   unsigned Vd = INSTR (4, 0);
3006   unsigned Rs = INSTR (9, 5);
3007   int both    = INSTR (30, 30);
3008
3009   NYI_assert (29, 20, 0x0E0);
3010   NYI_assert (15, 10, 0x03);
3011
3012   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3013   switch (INSTR (19, 16))
3014     {
3015     case 1:
3016       for (i = 0; i < (both ? 16 : 8); i++)
3017         aarch64_set_vec_u8 (cpu, Vd, i, aarch64_get_reg_u8 (cpu, Rs, NO_SP));
3018       break;
3019
3020     case 2:
3021       for (i = 0; i < (both ? 8 : 4); i++)
3022         aarch64_set_vec_u16 (cpu, Vd, i, aarch64_get_reg_u16 (cpu, Rs, NO_SP));
3023       break;
3024
3025     case 4:
3026       for (i = 0; i < (both ? 4 : 2); i++)
3027         aarch64_set_vec_u32 (cpu, Vd, i, aarch64_get_reg_u32 (cpu, Rs, NO_SP));
3028       break;
3029
3030     case 8:
3031       if (!both)
3032         HALT_NYI;
3033       aarch64_set_vec_u64 (cpu, Vd, 0, aarch64_get_reg_u64 (cpu, Rs, NO_SP));
3034       aarch64_set_vec_u64 (cpu, Vd, 1, aarch64_get_reg_u64 (cpu, Rs, NO_SP));
3035       break;
3036
3037     default:
3038       HALT_NYI;
3039     }
3040 }
3041
3042 static void
3043 do_vec_UZP (sim_cpu *cpu)
3044 {
3045   /* instr[31]    = 0
3046      instr[30]    = half(0)/full(1)
3047      instr[29,24] = 00 1110
3048      instr[23,22] = size: byte(00), half(01), word (10), long (11)
3049      instr[21]    = 0
3050      instr[20,16] = Vm
3051      instr[15]    = 0
3052      instr[14]    = lower (0) / upper (1)
3053      instr[13,10] = 0110
3054      instr[9,5]   = Vn
3055      instr[4,0]   = Vd.  */
3056
3057   int full = INSTR (30, 30);
3058   int upper = INSTR (14, 14);
3059
3060   unsigned vm = INSTR (20, 16);
3061   unsigned vn = INSTR (9, 5);
3062   unsigned vd = INSTR (4, 0);
3063
3064   uint64_t val_m1 = aarch64_get_vec_u64 (cpu, vm, 0);
3065   uint64_t val_m2 = aarch64_get_vec_u64 (cpu, vm, 1);
3066   uint64_t val_n1 = aarch64_get_vec_u64 (cpu, vn, 0);
3067   uint64_t val_n2 = aarch64_get_vec_u64 (cpu, vn, 1);
3068
3069   uint64_t val1;
3070   uint64_t val2;
3071
3072   uint64_t input2 = full ? val_n2 : val_m1;
3073
3074   NYI_assert (29, 24, 0x0E);
3075   NYI_assert (21, 21, 0);
3076   NYI_assert (15, 15, 0);
3077   NYI_assert (13, 10, 6);
3078
3079   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3080   switch (INSTR (23, 22))
3081     {
3082     case 0:
3083       val1 = (val_n1 >> (upper * 8)) & 0xFFULL;
3084       val1 |= (val_n1 >> ((upper * 8) + 8)) & 0xFF00ULL;
3085       val1 |= (val_n1 >> ((upper * 8) + 16)) & 0xFF0000ULL;
3086       val1 |= (val_n1 >> ((upper * 8) + 24)) & 0xFF000000ULL;
3087
3088       val1 |= (input2 << (32 - (upper * 8))) & 0xFF00000000ULL;
3089       val1 |= (input2 << (24 - (upper * 8))) & 0xFF0000000000ULL;
3090       val1 |= (input2 << (16 - (upper * 8))) & 0xFF000000000000ULL;
3091       val1 |= (input2 << (8 - (upper * 8))) & 0xFF00000000000000ULL;
3092
3093       if (full)
3094         {
3095           val2 = (val_m1 >> (upper * 8)) & 0xFFULL;
3096           val2 |= (val_m1 >> ((upper * 8) + 8)) & 0xFF00ULL;
3097           val2 |= (val_m1 >> ((upper * 8) + 16)) & 0xFF0000ULL;
3098           val2 |= (val_m1 >> ((upper * 8) + 24)) & 0xFF000000ULL;
3099
3100           val2 |= (val_m2 << (32 - (upper * 8))) & 0xFF00000000ULL;
3101           val2 |= (val_m2 << (24 - (upper * 8))) & 0xFF0000000000ULL;
3102           val2 |= (val_m2 << (16 - (upper * 8))) & 0xFF000000000000ULL;
3103           val2 |= (val_m2 << (8 - (upper * 8))) & 0xFF00000000000000ULL;
3104         }
3105       break;
3106
3107     case 1:
3108       val1 = (val_n1 >> (upper * 16)) & 0xFFFFULL;
3109       val1 |= (val_n1 >> ((upper * 16) + 16)) & 0xFFFF0000ULL;
3110
3111       val1 |= (input2 << (32 - (upper * 16))) & 0xFFFF00000000ULL;;
3112       val1 |= (input2 << (16 - (upper * 16))) & 0xFFFF000000000000ULL;
3113
3114       if (full)
3115         {
3116           val2 = (val_m1 >> (upper * 16)) & 0xFFFFULL;
3117           val2 |= (val_m1 >> ((upper * 16) + 16)) & 0xFFFF0000ULL;
3118
3119           val2 |= (val_m2 << (32 - (upper * 16))) & 0xFFFF00000000ULL;
3120           val2 |= (val_m2 << (16 - (upper * 16))) & 0xFFFF000000000000ULL;
3121         }
3122       break;
3123
3124     case 2:
3125       val1 = (val_n1 >> (upper * 32)) & 0xFFFFFFFF;
3126       val1 |= (input2 << (32 - (upper * 32))) & 0xFFFFFFFF00000000ULL;
3127
3128       if (full)
3129         {
3130           val2 = (val_m1 >> (upper * 32)) & 0xFFFFFFFF;
3131           val2 |= (val_m2 << (32 - (upper * 32))) & 0xFFFFFFFF00000000ULL;
3132         }
3133       break;
3134
3135     case 3:
3136       if (! full)
3137         HALT_UNALLOC;
3138
3139       val1 = upper ? val_n2 : val_n1;
3140       val2 = upper ? val_m2 : val_m1;
3141       break;
3142     }
3143
3144   aarch64_set_vec_u64 (cpu, vd, 0, val1);
3145   if (full)
3146     aarch64_set_vec_u64 (cpu, vd, 1, val2);
3147 }
3148
3149 static void
3150 do_vec_ZIP (sim_cpu *cpu)
3151 {
3152   /* instr[31]    = 0
3153      instr[30]    = half(0)/full(1)
3154      instr[29,24] = 00 1110
3155      instr[23,22] = size: byte(00), hald(01), word (10), long (11)
3156      instr[21]    = 0
3157      instr[20,16] = Vm
3158      instr[15]    = 0
3159      instr[14]    = lower (0) / upper (1)
3160      instr[13,10] = 1110
3161      instr[9,5]   = Vn
3162      instr[4,0]   = Vd.  */
3163
3164   int full = INSTR (30, 30);
3165   int upper = INSTR (14, 14);
3166
3167   unsigned vm = INSTR (20, 16);
3168   unsigned vn = INSTR (9, 5);
3169   unsigned vd = INSTR (4, 0);
3170
3171   uint64_t val_m1 = aarch64_get_vec_u64 (cpu, vm, 0);
3172   uint64_t val_m2 = aarch64_get_vec_u64 (cpu, vm, 1);
3173   uint64_t val_n1 = aarch64_get_vec_u64 (cpu, vn, 0);
3174   uint64_t val_n2 = aarch64_get_vec_u64 (cpu, vn, 1);
3175
3176   uint64_t val1 = 0;
3177   uint64_t val2 = 0;
3178
3179   uint64_t input1 = upper ? val_n1 : val_m1;
3180   uint64_t input2 = upper ? val_n2 : val_m2;
3181
3182   NYI_assert (29, 24, 0x0E);
3183   NYI_assert (21, 21, 0);
3184   NYI_assert (15, 15, 0);
3185   NYI_assert (13, 10, 0xE);
3186
3187   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3188   switch (INSTR (23, 23))
3189     {
3190     case 0:
3191       val1 =
3192           ((input1 <<  0) & (0xFF    <<  0))
3193         | ((input2 <<  8) & (0xFF    <<  8))
3194         | ((input1 <<  8) & (0xFF    << 16))
3195         | ((input2 << 16) & (0xFF    << 24))
3196         | ((input1 << 16) & (0xFFULL << 32))
3197         | ((input2 << 24) & (0xFFULL << 40))
3198         | ((input1 << 24) & (0xFFULL << 48))
3199         | ((input2 << 32) & (0xFFULL << 56));
3200
3201       val2 =
3202           ((input1 >> 32) & (0xFF    <<  0))
3203         | ((input2 >> 24) & (0xFF    <<  8))
3204         | ((input1 >> 24) & (0xFF    << 16))
3205         | ((input2 >> 16) & (0xFF    << 24))
3206         | ((input1 >> 16) & (0xFFULL << 32))
3207         | ((input2 >>  8) & (0xFFULL << 40))
3208         | ((input1 >>  8) & (0xFFULL << 48))
3209         | ((input2 >>  0) & (0xFFULL << 56));
3210       break;
3211
3212     case 1:
3213       val1 =
3214           ((input1 <<  0) & (0xFFFF    <<  0))
3215         | ((input2 << 16) & (0xFFFF    << 16))
3216         | ((input1 << 16) & (0xFFFFULL << 32))
3217         | ((input2 << 32) & (0xFFFFULL << 48));
3218
3219       val2 =
3220           ((input1 >> 32) & (0xFFFF    <<  0))
3221         | ((input2 >> 16) & (0xFFFF    << 16))
3222         | ((input1 >> 16) & (0xFFFFULL << 32))
3223         | ((input2 >>  0) & (0xFFFFULL << 48));
3224       break;
3225
3226     case 2:
3227       val1 = (input1 & 0xFFFFFFFFULL) | (input2 << 32);
3228       val2 = (input2 & 0xFFFFFFFFULL) | (input1 << 32);
3229       break;
3230
3231     case 3:
3232       val1 = input1;
3233       val2 = input2;
3234       break;
3235     }
3236
3237   aarch64_set_vec_u64 (cpu, vd, 0, val1);
3238   if (full)
3239     aarch64_set_vec_u64 (cpu, vd, 1, val2);
3240 }
3241
3242 /* Floating point immediates are encoded in 8 bits.
3243    fpimm[7] = sign bit.
3244    fpimm[6:4] = signed exponent.
3245    fpimm[3:0] = fraction (assuming leading 1).
3246    i.e. F = s * 1.f * 2^(e - b).  */
3247
3248 static float
3249 fp_immediate_for_encoding_32 (uint32_t imm8)
3250 {
3251   float u;
3252   uint32_t s, e, f, i;
3253
3254   s = (imm8 >> 7) & 0x1;
3255   e = (imm8 >> 4) & 0x7;
3256   f = imm8 & 0xf;
3257
3258   /* The fp value is s * n/16 * 2r where n is 16+e.  */
3259   u = (16.0 + f) / 16.0;
3260
3261   /* N.B. exponent is signed.  */
3262   if (e < 4)
3263     {
3264       int epos = e;
3265
3266       for (i = 0; i <= epos; i++)
3267         u *= 2.0;
3268     }
3269   else
3270     {
3271       int eneg = 7 - e;
3272
3273       for (i = 0; i < eneg; i++)
3274         u /= 2.0;
3275     }
3276
3277   if (s)
3278     u = - u;
3279
3280   return u;
3281 }
3282
3283 static double
3284 fp_immediate_for_encoding_64 (uint32_t imm8)
3285 {
3286   double u;
3287   uint32_t s, e, f, i;
3288
3289   s = (imm8 >> 7) & 0x1;
3290   e = (imm8 >> 4) & 0x7;
3291   f = imm8 & 0xf;
3292
3293   /* The fp value is s * n/16 * 2r where n is 16+e.  */
3294   u = (16.0 + f) / 16.0;
3295
3296   /* N.B. exponent is signed.  */
3297   if (e < 4)
3298     {
3299       int epos = e;
3300
3301       for (i = 0; i <= epos; i++)
3302         u *= 2.0;
3303     }
3304   else
3305     {
3306       int eneg = 7 - e;
3307
3308       for (i = 0; i < eneg; i++)
3309         u /= 2.0;
3310     }
3311
3312   if (s)
3313     u = - u;
3314
3315   return u;
3316 }
3317
3318 static void
3319 do_vec_MOV_immediate (sim_cpu *cpu)
3320 {
3321   /* instr[31]    = 0
3322      instr[30]    = full/half selector
3323      instr[29,19] = 00111100000
3324      instr[18,16] = high 3 bits of uimm8
3325      instr[15,12] = size & shift:
3326                                   0000 => 32-bit
3327                                   0010 => 32-bit + LSL#8
3328                                   0100 => 32-bit + LSL#16
3329                                   0110 => 32-bit + LSL#24
3330                                   1010 => 16-bit + LSL#8
3331                                   1000 => 16-bit
3332                                   1101 => 32-bit + MSL#16
3333                                   1100 => 32-bit + MSL#8
3334                                   1110 => 8-bit
3335                                   1111 => double
3336      instr[11,10] = 01
3337      instr[9,5]   = low 5-bits of uimm8
3338      instr[4,0]   = Vd.  */
3339
3340   int full     = INSTR (30, 30);
3341   unsigned vd  = INSTR (4, 0);
3342   unsigned val = (INSTR (18, 16) << 5) | INSTR (9, 5);
3343   unsigned i;
3344
3345   NYI_assert (29, 19, 0x1E0);
3346   NYI_assert (11, 10, 1);
3347
3348   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3349   switch (INSTR (15, 12))
3350     {
3351     case 0x0: /* 32-bit, no shift.  */
3352     case 0x2: /* 32-bit, shift by 8.  */
3353     case 0x4: /* 32-bit, shift by 16.  */
3354     case 0x6: /* 32-bit, shift by 24.  */
3355       val <<= (8 * INSTR (14, 13));
3356       for (i = 0; i < (full ? 4 : 2); i++)
3357         aarch64_set_vec_u32 (cpu, vd, i, val);
3358       break;
3359
3360     case 0xa: /* 16-bit, shift by 8.  */
3361       val <<= 8;
3362       /* Fall through.  */
3363     case 0x8: /* 16-bit, no shift.  */
3364       for (i = 0; i < (full ? 8 : 4); i++)
3365         aarch64_set_vec_u16 (cpu, vd, i, val);
3366       break;
3367
3368     case 0xd: /* 32-bit, mask shift by 16.  */
3369       val <<= 8;
3370       val |= 0xFF;
3371       /* Fall through.  */
3372     case 0xc: /* 32-bit, mask shift by 8. */
3373       val <<= 8;
3374       val |= 0xFF;
3375       for (i = 0; i < (full ? 4 : 2); i++)
3376         aarch64_set_vec_u32 (cpu, vd, i, val);
3377       break;
3378
3379     case 0xe: /* 8-bit, no shift.  */
3380       for (i = 0; i < (full ? 16 : 8); i++)
3381         aarch64_set_vec_u8 (cpu, vd, i, val);
3382       break;
3383
3384     case 0xf: /* FMOV Vs.{2|4}S, #fpimm.  */
3385       {
3386         float u = fp_immediate_for_encoding_32 (val);
3387         for (i = 0; i < (full ? 4 : 2); i++)
3388           aarch64_set_vec_float (cpu, vd, i, u);
3389         break;
3390       }
3391
3392     default:
3393       HALT_NYI;
3394     }
3395 }
3396
3397 static void
3398 do_vec_MVNI (sim_cpu *cpu)
3399 {
3400   /* instr[31]    = 0
3401      instr[30]    = full/half selector
3402      instr[29,19] = 10111100000
3403      instr[18,16] = high 3 bits of uimm8
3404      instr[15,12] = selector
3405      instr[11,10] = 01
3406      instr[9,5]   = low 5-bits of uimm8
3407      instr[4,0]   = Vd.  */
3408
3409   int full     = INSTR (30, 30);
3410   unsigned vd  = INSTR (4, 0);
3411   unsigned val = (INSTR (18, 16) << 5) | INSTR (9, 5);
3412   unsigned i;
3413
3414   NYI_assert (29, 19, 0x5E0);
3415   NYI_assert (11, 10, 1);
3416
3417   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3418   switch (INSTR (15, 12))
3419     {
3420     case 0x0: /* 32-bit, no shift.  */
3421     case 0x2: /* 32-bit, shift by 8.  */
3422     case 0x4: /* 32-bit, shift by 16.  */
3423     case 0x6: /* 32-bit, shift by 24.  */
3424       val <<= (8 * INSTR (14, 13));
3425       val = ~ val;
3426       for (i = 0; i < (full ? 4 : 2); i++)
3427         aarch64_set_vec_u32 (cpu, vd, i, val);
3428       return;
3429
3430     case 0xa: /* 16-bit, 8 bit shift. */
3431       val <<= 8;
3432     case 0x8: /* 16-bit, no shift. */
3433       val = ~ val;
3434       for (i = 0; i < (full ? 8 : 4); i++)
3435         aarch64_set_vec_u16 (cpu, vd, i, val);
3436       return;
3437
3438     case 0xd: /* 32-bit, mask shift by 16.  */
3439       val <<= 8;
3440       val |= 0xFF;
3441     case 0xc: /* 32-bit, mask shift by 8. */
3442       val <<= 8;
3443       val |= 0xFF;
3444       val = ~ val;
3445       for (i = 0; i < (full ? 4 : 2); i++)
3446         aarch64_set_vec_u32 (cpu, vd, i, val);
3447       return;
3448
3449     case 0xE: /* MOVI Dn, #mask64 */
3450       {
3451         uint64_t mask = 0;
3452
3453         for (i = 0; i < 8; i++)
3454           if (val & (1 << i))
3455             mask |= (0xFFUL << (i * 8));
3456         aarch64_set_vec_u64 (cpu, vd, 0, mask);
3457         aarch64_set_vec_u64 (cpu, vd, 1, mask);
3458         return;
3459       }
3460
3461     case 0xf: /* FMOV Vd.2D, #fpimm.  */
3462       {
3463         double u = fp_immediate_for_encoding_64 (val);
3464
3465         if (! full)
3466           HALT_UNALLOC;
3467
3468         aarch64_set_vec_double (cpu, vd, 0, u);
3469         aarch64_set_vec_double (cpu, vd, 1, u);
3470         return;
3471       }
3472
3473     default:
3474       HALT_NYI;
3475     }
3476 }
3477
3478 #define ABS(A) ((A) < 0 ? - (A) : (A))
3479
3480 static void
3481 do_vec_ABS (sim_cpu *cpu)
3482 {
3483   /* instr[31]    = 0
3484      instr[30]    = half(0)/full(1)
3485      instr[29,24] = 00 1110
3486      instr[23,22] = size: 00=> 8-bit, 01=> 16-bit, 10=> 32-bit, 11=> 64-bit
3487      instr[21,10] = 10 0000 1011 10
3488      instr[9,5]   = Vn
3489      instr[4.0]   = Vd.  */
3490
3491   unsigned vn = INSTR (9, 5);
3492   unsigned vd = INSTR (4, 0);
3493   unsigned full = INSTR (30, 30);
3494   unsigned i;
3495
3496   NYI_assert (29, 24, 0x0E);
3497   NYI_assert (21, 10, 0x82E);
3498
3499   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3500   switch (INSTR (23, 22))
3501     {
3502     case 0:
3503       for (i = 0; i < (full ? 16 : 8); i++)
3504         aarch64_set_vec_s8 (cpu, vd, i,
3505                             ABS (aarch64_get_vec_s8 (cpu, vn, i)));
3506       break;
3507
3508     case 1:
3509       for (i = 0; i < (full ? 8 : 4); i++)
3510         aarch64_set_vec_s16 (cpu, vd, i,
3511                              ABS (aarch64_get_vec_s16 (cpu, vn, i)));
3512       break;
3513
3514     case 2:
3515       for (i = 0; i < (full ? 4 : 2); i++)
3516         aarch64_set_vec_s32 (cpu, vd, i,
3517                              ABS (aarch64_get_vec_s32 (cpu, vn, i)));
3518       break;
3519
3520     case 3:
3521       if (! full)
3522         HALT_NYI;
3523       for (i = 0; i < 2; i++)
3524         aarch64_set_vec_s64 (cpu, vd, i,
3525                              ABS (aarch64_get_vec_s64 (cpu, vn, i)));
3526       break;
3527     }
3528 }
3529
3530 static void
3531 do_vec_ADDV (sim_cpu *cpu)
3532 {
3533   /* instr[31]    = 0
3534      instr[30]    = full/half selector
3535      instr[29,24] = 00 1110
3536      instr[23,22] = size: 00=> 8-bit, 01=> 16-bit, 10=> 32-bit, 11=> 64-bit
3537      instr[21,10] = 11 0001 1011 10
3538      instr[9,5]   = Vm
3539      instr[4.0]   = Rd.  */
3540
3541   unsigned vm = INSTR (9, 5);
3542   unsigned rd = INSTR (4, 0);
3543   unsigned i;
3544   int      full = INSTR (30, 30);
3545
3546   NYI_assert (29, 24, 0x0E);
3547   NYI_assert (21, 10, 0xC6E);
3548
3549   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3550   switch (INSTR (23, 22))
3551     {
3552     case 0:
3553       {
3554         uint8_t val = 0;
3555         for (i = 0; i < (full ? 16 : 8); i++)
3556           val += aarch64_get_vec_u8 (cpu, vm, i);
3557         aarch64_set_vec_u64 (cpu, rd, 0, val);
3558         return;
3559       }
3560
3561     case 1:
3562       {
3563         uint16_t val = 0;
3564         for (i = 0; i < (full ? 8 : 4); i++)
3565           val += aarch64_get_vec_u16 (cpu, vm, i);
3566         aarch64_set_vec_u64 (cpu, rd, 0, val);
3567         return;
3568       }
3569
3570     case 2:
3571       {
3572         uint32_t val = 0;
3573         if (! full)
3574           HALT_UNALLOC;
3575         for (i = 0; i < 4; i++)
3576           val += aarch64_get_vec_u32 (cpu, vm, i);
3577         aarch64_set_vec_u64 (cpu, rd, 0, val);
3578         return;
3579       }
3580
3581     case 3:
3582       HALT_UNALLOC;
3583     }
3584 }
3585
3586 static void
3587 do_vec_ins_2 (sim_cpu *cpu)
3588 {
3589   /* instr[31,21] = 01001110000
3590      instr[20,18] = size & element selector
3591      instr[17,14] = 0000
3592      instr[13]    = direction: to vec(0), from vec (1)
3593      instr[12,10] = 111
3594      instr[9,5]   = Vm
3595      instr[4,0]   = Vd.  */
3596
3597   unsigned elem;
3598   unsigned vm = INSTR (9, 5);
3599   unsigned vd = INSTR (4, 0);
3600
3601   NYI_assert (31, 21, 0x270);
3602   NYI_assert (17, 14, 0);
3603   NYI_assert (12, 10, 7);
3604
3605   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3606   if (INSTR (13, 13) == 1)
3607     {
3608       if (INSTR (18, 18) == 1)
3609         {
3610           /* 32-bit moves.  */
3611           elem = INSTR (20, 19);
3612           aarch64_set_reg_u64 (cpu, vd, NO_SP,
3613                                aarch64_get_vec_u32 (cpu, vm, elem));
3614         }
3615       else
3616         {
3617           /* 64-bit moves.  */
3618           if (INSTR (19, 19) != 1)
3619             HALT_NYI;
3620
3621           elem = INSTR (20, 20);
3622           aarch64_set_reg_u64 (cpu, vd, NO_SP,
3623                                aarch64_get_vec_u64 (cpu, vm, elem));
3624         }
3625     }
3626   else
3627     {
3628       if (INSTR (18, 18) == 1)
3629         {
3630           /* 32-bit moves.  */
3631           elem = INSTR (20, 19);
3632           aarch64_set_vec_u32 (cpu, vd, elem,
3633                                aarch64_get_reg_u32 (cpu, vm, NO_SP));
3634         }
3635       else
3636         {
3637           /* 64-bit moves.  */
3638           if (INSTR (19, 19) != 1)
3639             HALT_NYI;
3640
3641           elem = INSTR (20, 20);
3642           aarch64_set_vec_u64 (cpu, vd, elem,
3643                                aarch64_get_reg_u64 (cpu, vm, NO_SP));
3644         }
3645     }
3646 }
3647
3648 #define DO_VEC_WIDENING_MUL(N, DST_TYPE, READ_TYPE, WRITE_TYPE)   \
3649   do                                                              \
3650     {                                                             \
3651       DST_TYPE a[N], b[N];                                        \
3652                                                                   \
3653       for (i = 0; i < (N); i++)                                   \
3654         {                                                         \
3655           a[i] = aarch64_get_vec_##READ_TYPE (cpu, vn, i + bias); \
3656           b[i] = aarch64_get_vec_##READ_TYPE (cpu, vm, i + bias); \
3657         }                                                         \
3658       for (i = 0; i < (N); i++)                                   \
3659         aarch64_set_vec_##WRITE_TYPE (cpu, vd, i, a[i] * b[i]);   \
3660     }                                                             \
3661   while (0)
3662
3663 static void
3664 do_vec_mull (sim_cpu *cpu)
3665 {
3666   /* instr[31]    = 0
3667      instr[30]    = lower(0)/upper(1) selector
3668      instr[29]    = signed(0)/unsigned(1)
3669      instr[28,24] = 0 1110
3670      instr[23,22] = size: 8-bit (00), 16-bit (01), 32-bit (10)
3671      instr[21]    = 1
3672      instr[20,16] = Vm
3673      instr[15,10] = 11 0000
3674      instr[9,5]   = Vn
3675      instr[4.0]   = Vd.  */
3676
3677   int    unsign = INSTR (29, 29);
3678   int    bias = INSTR (30, 30);
3679   unsigned vm = INSTR (20, 16);
3680   unsigned vn = INSTR ( 9,  5);
3681   unsigned vd = INSTR ( 4,  0);
3682   unsigned i;
3683
3684   NYI_assert (28, 24, 0x0E);
3685   NYI_assert (15, 10, 0x30);
3686
3687   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3688   /* NB: Read source values before writing results, in case
3689      the source and destination vectors are the same.  */
3690   switch (INSTR (23, 22))
3691     {
3692     case 0:
3693       if (bias)
3694         bias = 8;
3695       if (unsign)
3696         DO_VEC_WIDENING_MUL (8, uint16_t, u8, u16);
3697       else
3698         DO_VEC_WIDENING_MUL (8, int16_t, s8, s16);
3699       return;
3700
3701     case 1:
3702       if (bias)
3703         bias = 4;
3704       if (unsign)
3705         DO_VEC_WIDENING_MUL (4, uint32_t, u16, u32);
3706       else
3707         DO_VEC_WIDENING_MUL (4, int32_t, s16, s32);
3708       return;
3709
3710     case 2:
3711       if (bias)
3712         bias = 2;
3713       if (unsign)
3714         DO_VEC_WIDENING_MUL (2, uint64_t, u32, u64);
3715       else
3716         DO_VEC_WIDENING_MUL (2, int64_t, s32, s64);
3717       return;
3718
3719     case 3:
3720       HALT_NYI;
3721     }
3722 }
3723
3724 static void
3725 do_vec_fadd (sim_cpu *cpu)
3726 {
3727   /* instr[31]    = 0
3728      instr[30]    = half(0)/full(1)
3729      instr[29,24] = 001110
3730      instr[23]    = FADD(0)/FSUB(1)
3731      instr[22]    = float (0)/double(1)
3732      instr[21]    = 1
3733      instr[20,16] = Vm
3734      instr[15,10] = 110101
3735      instr[9,5]   = Vn
3736      instr[4.0]   = Vd.  */
3737
3738   unsigned vm = INSTR (20, 16);
3739   unsigned vn = INSTR (9, 5);
3740   unsigned vd = INSTR (4, 0);
3741   unsigned i;
3742   int      full = INSTR (30, 30);
3743
3744   NYI_assert (29, 24, 0x0E);
3745   NYI_assert (21, 21, 1);
3746   NYI_assert (15, 10, 0x35);
3747
3748   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3749   if (INSTR (23, 23))
3750     {
3751       if (INSTR (22, 22))
3752         {
3753           if (! full)
3754             HALT_NYI;
3755
3756           for (i = 0; i < 2; i++)
3757             aarch64_set_vec_double (cpu, vd, i,
3758                                     aarch64_get_vec_double (cpu, vn, i)
3759                                     - aarch64_get_vec_double (cpu, vm, i));
3760         }
3761       else
3762         {
3763           for (i = 0; i < (full ? 4 : 2); i++)
3764             aarch64_set_vec_float (cpu, vd, i,
3765                                    aarch64_get_vec_float (cpu, vn, i)
3766                                    - aarch64_get_vec_float (cpu, vm, i));
3767         }
3768     }
3769   else
3770     {
3771       if (INSTR (22, 22))
3772         {
3773           if (! full)
3774             HALT_NYI;
3775
3776           for (i = 0; i < 2; i++)
3777             aarch64_set_vec_double (cpu, vd, i,
3778                                     aarch64_get_vec_double (cpu, vm, i)
3779                                     + aarch64_get_vec_double (cpu, vn, i));
3780         }
3781       else
3782         {
3783           for (i = 0; i < (full ? 4 : 2); i++)
3784             aarch64_set_vec_float (cpu, vd, i,
3785                                    aarch64_get_vec_float (cpu, vm, i)
3786                                    + aarch64_get_vec_float (cpu, vn, i));
3787         }
3788     }
3789 }
3790
3791 static void
3792 do_vec_add (sim_cpu *cpu)
3793 {
3794   /* instr[31]    = 0
3795      instr[30]    = full/half selector
3796      instr[29,24] = 001110
3797      instr[23,22] = size: 00=> 8-bit, 01=> 16-bit, 10=> 32-bit, 11=> 64-bit
3798      instr[21]    = 1
3799      instr[20,16] = Vn
3800      instr[15,10] = 100001
3801      instr[9,5]   = Vm
3802      instr[4.0]   = Vd.  */
3803
3804   unsigned vm = INSTR (20, 16);
3805   unsigned vn = INSTR (9, 5);
3806   unsigned vd = INSTR (4, 0);
3807   unsigned i;
3808   int      full = INSTR (30, 30);
3809
3810   NYI_assert (29, 24, 0x0E);
3811   NYI_assert (21, 21, 1);
3812   NYI_assert (15, 10, 0x21);
3813
3814   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3815   switch (INSTR (23, 22))
3816     {
3817     case 0:
3818       for (i = 0; i < (full ? 16 : 8); i++)
3819         aarch64_set_vec_u8 (cpu, vd, i, aarch64_get_vec_u8 (cpu, vn, i)
3820                             + aarch64_get_vec_u8 (cpu, vm, i));
3821       return;
3822
3823     case 1:
3824       for (i = 0; i < (full ? 8 : 4); i++)
3825         aarch64_set_vec_u16 (cpu, vd, i, aarch64_get_vec_u16 (cpu, vn, i)
3826                              + aarch64_get_vec_u16 (cpu, vm, i));
3827       return;
3828
3829     case 2:
3830       for (i = 0; i < (full ? 4 : 2); i++)
3831         aarch64_set_vec_u32 (cpu, vd, i, aarch64_get_vec_u32 (cpu, vn, i)
3832                              + aarch64_get_vec_u32 (cpu, vm, i));
3833       return;
3834
3835     case 3:
3836       if (! full)
3837         HALT_UNALLOC;
3838       aarch64_set_vec_u64 (cpu, vd, 0, aarch64_get_vec_u64 (cpu, vn, 0)
3839                            + aarch64_get_vec_u64 (cpu, vm, 0));
3840       aarch64_set_vec_u64 (cpu, vd, 1,
3841                            aarch64_get_vec_u64 (cpu, vn, 1)
3842                            + aarch64_get_vec_u64 (cpu, vm, 1));
3843       return;
3844     }
3845 }
3846
3847 static void
3848 do_vec_mul (sim_cpu *cpu)
3849 {
3850   /* instr[31]    = 0
3851      instr[30]    = full/half selector
3852      instr[29,24] = 00 1110
3853      instr[23,22] = size: 00=> 8-bit, 01=> 16-bit, 10=> 32-bit
3854      instr[21]    = 1
3855      instr[20,16] = Vn
3856      instr[15,10] = 10 0111
3857      instr[9,5]   = Vm
3858      instr[4.0]   = Vd.  */
3859
3860   unsigned vm = INSTR (20, 16);
3861   unsigned vn = INSTR (9, 5);
3862   unsigned vd = INSTR (4, 0);
3863   unsigned i;
3864   int      full = INSTR (30, 30);
3865   int      bias = 0;
3866
3867   NYI_assert (29, 24, 0x0E);
3868   NYI_assert (21, 21, 1);
3869   NYI_assert (15, 10, 0x27);
3870
3871   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3872   switch (INSTR (23, 22))
3873     {
3874     case 0:
3875       DO_VEC_WIDENING_MUL (full ? 16 : 8, uint8_t, u8, u8);
3876       return;
3877
3878     case 1:
3879       DO_VEC_WIDENING_MUL (full ? 8 : 4, uint16_t, u16, u16);
3880       return;
3881
3882     case 2:
3883       DO_VEC_WIDENING_MUL (full ? 4 : 2, uint32_t, u32, u32);
3884       return;
3885
3886     case 3:
3887       HALT_UNALLOC;
3888     }
3889 }
3890
3891 static void
3892 do_vec_MLA (sim_cpu *cpu)
3893 {
3894   /* instr[31]    = 0
3895      instr[30]    = full/half selector
3896      instr[29,24] = 00 1110
3897      instr[23,22] = size: 00=> 8-bit, 01=> 16-bit, 10=> 32-bit
3898      instr[21]    = 1
3899      instr[20,16] = Vn
3900      instr[15,10] = 1001 01
3901      instr[9,5]   = Vm
3902      instr[4.0]   = Vd.  */
3903
3904   unsigned vm = INSTR (20, 16);
3905   unsigned vn = INSTR (9, 5);
3906   unsigned vd = INSTR (4, 0);
3907   unsigned i;
3908   int      full = INSTR (30, 30);
3909
3910   NYI_assert (29, 24, 0x0E);
3911   NYI_assert (21, 21, 1);
3912   NYI_assert (15, 10, 0x25);
3913
3914   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3915   switch (INSTR (23, 22))
3916     {
3917     case 0:
3918       for (i = 0; i < (full ? 16 : 8); i++)
3919         aarch64_set_vec_u8 (cpu, vd, i,
3920                             aarch64_get_vec_u8 (cpu, vd, i)
3921                             + (aarch64_get_vec_u8 (cpu, vn, i)
3922                                * aarch64_get_vec_u8 (cpu, vm, i)));
3923       return;
3924
3925     case 1:
3926       for (i = 0; i < (full ? 8 : 4); i++)
3927         aarch64_set_vec_u16 (cpu, vd, i,
3928                              aarch64_get_vec_u16 (cpu, vd, i)
3929                              + (aarch64_get_vec_u16 (cpu, vn, i)
3930                                 * aarch64_get_vec_u16 (cpu, vm, i)));
3931       return;
3932
3933     case 2:
3934       for (i = 0; i < (full ? 4 : 2); i++)
3935         aarch64_set_vec_u32 (cpu, vd, i,
3936                              aarch64_get_vec_u32 (cpu, vd, i)
3937                              + (aarch64_get_vec_u32 (cpu, vn, i)
3938                                 * aarch64_get_vec_u32 (cpu, vm, i)));
3939       return;
3940
3941     default:
3942       HALT_UNALLOC;
3943     }
3944 }
3945
3946 static float
3947 fmaxnm (float a, float b)
3948 {
3949   if (! isnan (a))
3950     {
3951       if (! isnan (b))
3952         return a > b ? a : b;
3953       return a;
3954     }
3955   else if (! isnan (b))
3956     return b;
3957   return a;
3958 }
3959
3960 static float
3961 fminnm (float a, float b)
3962 {
3963   if (! isnan (a))
3964     {
3965       if (! isnan (b))
3966         return a < b ? a : b;
3967       return a;
3968     }
3969   else if (! isnan (b))
3970     return b;
3971   return a;
3972 }
3973
3974 static double
3975 dmaxnm (double a, double b)
3976 {
3977   if (! isnan (a))
3978     {
3979       if (! isnan (b))
3980         return a > b ? a : b;
3981       return a;
3982     }
3983   else if (! isnan (b))
3984     return b;
3985   return a;
3986 }
3987
3988 static double
3989 dminnm (double a, double b)
3990 {
3991   if (! isnan (a))
3992     {
3993       if (! isnan (b))
3994         return a < b ? a : b;
3995       return a;
3996     }
3997   else if (! isnan (b))
3998     return b;
3999   return a;
4000 }
4001
4002 static void
4003 do_vec_FminmaxNMP (sim_cpu *cpu)
4004 {
4005   /* instr [31]    = 0
4006      instr [30]    = half (0)/full (1)
4007      instr [29,24] = 10 1110
4008      instr [23]    = max(0)/min(1)
4009      instr [22]    = float (0)/double (1)
4010      instr [21]    = 1
4011      instr [20,16] = Vn
4012      instr [15,10] = 1100 01
4013      instr [9,5]   = Vm
4014      instr [4.0]   = Vd.  */
4015
4016   unsigned vm = INSTR (20, 16);
4017   unsigned vn = INSTR (9, 5);
4018   unsigned vd = INSTR (4, 0);
4019   int      full = INSTR (30, 30);
4020
4021   NYI_assert (29, 24, 0x2E);
4022   NYI_assert (21, 21, 1);
4023   NYI_assert (15, 10, 0x31);
4024
4025   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4026   if (INSTR (22, 22))
4027     {
4028       double (* fn)(double, double) = INSTR (23, 23)
4029         ? dminnm : dmaxnm;
4030
4031       if (! full)
4032         HALT_NYI;
4033       aarch64_set_vec_double (cpu, vd, 0,
4034                               fn (aarch64_get_vec_double (cpu, vn, 0),
4035                                   aarch64_get_vec_double (cpu, vn, 1)));
4036       aarch64_set_vec_double (cpu, vd, 0,
4037                               fn (aarch64_get_vec_double (cpu, vm, 0),
4038                                   aarch64_get_vec_double (cpu, vm, 1)));
4039     }
4040   else
4041     {
4042       float (* fn)(float, float) = INSTR (23, 23)
4043         ? fminnm : fmaxnm;
4044
4045       aarch64_set_vec_float (cpu, vd, 0,
4046                              fn (aarch64_get_vec_float (cpu, vn, 0),
4047                                  aarch64_get_vec_float (cpu, vn, 1)));
4048       if (full)
4049         aarch64_set_vec_float (cpu, vd, 1,
4050                                fn (aarch64_get_vec_float (cpu, vn, 2),
4051                                    aarch64_get_vec_float (cpu, vn, 3)));
4052
4053       aarch64_set_vec_float (cpu, vd, (full ? 2 : 1),
4054                              fn (aarch64_get_vec_float (cpu, vm, 0),
4055                                  aarch64_get_vec_float (cpu, vm, 1)));
4056       if (full)
4057         aarch64_set_vec_float (cpu, vd, 3,
4058                                fn (aarch64_get_vec_float (cpu, vm, 2),
4059                                    aarch64_get_vec_float (cpu, vm, 3)));
4060     }
4061 }
4062
4063 static void
4064 do_vec_AND (sim_cpu *cpu)
4065 {
4066   /* instr[31]    = 0
4067      instr[30]    = half (0)/full (1)
4068      instr[29,21] = 001110001
4069      instr[20,16] = Vm
4070      instr[15,10] = 000111
4071      instr[9,5]   = Vn
4072      instr[4.0]   = Vd.  */
4073
4074   unsigned vm = INSTR (20, 16);
4075   unsigned vn = INSTR (9, 5);
4076   unsigned vd = INSTR (4, 0);
4077   unsigned i;
4078   int      full = INSTR (30, 30);
4079
4080   NYI_assert (29, 21, 0x071);
4081   NYI_assert (15, 10, 0x07);
4082
4083   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4084   for (i = 0; i < (full ? 4 : 2); i++)
4085     aarch64_set_vec_u32 (cpu, vd, i,
4086                          aarch64_get_vec_u32 (cpu, vn, i)
4087                          & aarch64_get_vec_u32 (cpu, vm, i));
4088 }
4089
4090 static void
4091 do_vec_BSL (sim_cpu *cpu)
4092 {
4093   /* instr[31]    = 0
4094      instr[30]    = half (0)/full (1)
4095      instr[29,21] = 101110011
4096      instr[20,16] = Vm
4097      instr[15,10] = 000111
4098      instr[9,5]   = Vn
4099      instr[4.0]   = Vd.  */
4100
4101   unsigned vm = INSTR (20, 16);
4102   unsigned vn = INSTR (9, 5);
4103   unsigned vd = INSTR (4, 0);
4104   unsigned i;
4105   int      full = INSTR (30, 30);
4106
4107   NYI_assert (29, 21, 0x173);
4108   NYI_assert (15, 10, 0x07);
4109
4110   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4111   for (i = 0; i < (full ? 16 : 8); i++)
4112     aarch64_set_vec_u8 (cpu, vd, i,
4113                         (    aarch64_get_vec_u8 (cpu, vd, i)
4114                            & aarch64_get_vec_u8 (cpu, vn, i))
4115                         | ((~ aarch64_get_vec_u8 (cpu, vd, i))
4116                            & aarch64_get_vec_u8 (cpu, vm, i)));
4117 }
4118
4119 static void
4120 do_vec_EOR (sim_cpu *cpu)
4121 {
4122   /* instr[31]    = 0
4123      instr[30]    = half (0)/full (1)
4124      instr[29,21] = 10 1110 001
4125      instr[20,16] = Vm
4126      instr[15,10] = 000111
4127      instr[9,5]   = Vn
4128      instr[4.0]   = Vd.  */
4129
4130   unsigned vm = INSTR (20, 16);
4131   unsigned vn = INSTR (9, 5);
4132   unsigned vd = INSTR (4, 0);
4133   unsigned i;
4134   int      full = INSTR (30, 30);
4135
4136   NYI_assert (29, 21, 0x171);
4137   NYI_assert (15, 10, 0x07);
4138
4139   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4140   for (i = 0; i < (full ? 4 : 2); i++)
4141     aarch64_set_vec_u32 (cpu, vd, i,
4142                          aarch64_get_vec_u32 (cpu, vn, i)
4143                          ^ aarch64_get_vec_u32 (cpu, vm, i));
4144 }
4145
4146 static void
4147 do_vec_bit (sim_cpu *cpu)
4148 {
4149   /* instr[31]    = 0
4150      instr[30]    = half (0)/full (1)
4151      instr[29,23] = 10 1110 1
4152      instr[22]    = BIT (0) / BIF (1)
4153      instr[21]    = 1
4154      instr[20,16] = Vm
4155      instr[15,10] = 0001 11
4156      instr[9,5]   = Vn
4157      instr[4.0]   = Vd.  */
4158
4159   unsigned vm = INSTR (20, 16);
4160   unsigned vn = INSTR (9, 5);
4161   unsigned vd = INSTR (4, 0);
4162   unsigned full = INSTR (30, 30);
4163   unsigned test_false = INSTR (22, 22);
4164   unsigned i;
4165
4166   NYI_assert (29, 23, 0x5D);
4167   NYI_assert (21, 21, 1);
4168   NYI_assert (15, 10, 0x07);
4169
4170   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4171   for (i = 0; i < (full ? 4 : 2); i++)
4172     {
4173       uint32_t vd_val = aarch64_get_vec_u32 (cpu, vd, i);
4174       uint32_t vn_val = aarch64_get_vec_u32 (cpu, vn, i);
4175       uint32_t vm_val = aarch64_get_vec_u32 (cpu, vm, i);
4176       if (test_false)
4177         aarch64_set_vec_u32 (cpu, vd, i,
4178                              (vd_val & vm_val) | (vn_val & ~vm_val));
4179       else
4180         aarch64_set_vec_u32 (cpu, vd, i,
4181                              (vd_val & ~vm_val) | (vn_val & vm_val));
4182     }
4183 }
4184
4185 static void
4186 do_vec_ORN (sim_cpu *cpu)
4187 {
4188   /* instr[31]    = 0
4189      instr[30]    = half (0)/full (1)
4190      instr[29,21] = 00 1110 111
4191      instr[20,16] = Vm
4192      instr[15,10] = 00 0111
4193      instr[9,5]   = Vn
4194      instr[4.0]   = Vd.  */
4195
4196   unsigned vm = INSTR (20, 16);
4197   unsigned vn = INSTR (9, 5);
4198   unsigned vd = INSTR (4, 0);
4199   unsigned i;
4200   int      full = INSTR (30, 30);
4201
4202   NYI_assert (29, 21, 0x077);
4203   NYI_assert (15, 10, 0x07);
4204
4205   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4206   for (i = 0; i < (full ? 16 : 8); i++)
4207     aarch64_set_vec_u8 (cpu, vd, i,
4208                         aarch64_get_vec_u8 (cpu, vn, i)
4209                         | ~ aarch64_get_vec_u8 (cpu, vm, i));
4210 }
4211
4212 static void
4213 do_vec_ORR (sim_cpu *cpu)
4214 {
4215   /* instr[31]    = 0
4216      instr[30]    = half (0)/full (1)
4217      instr[29,21] = 00 1110 101
4218      instr[20,16] = Vm
4219      instr[15,10] = 0001 11
4220      instr[9,5]   = Vn
4221      instr[4.0]   = Vd.  */
4222
4223   unsigned vm = INSTR (20, 16);
4224   unsigned vn = INSTR (9, 5);
4225   unsigned vd = INSTR (4, 0);
4226   unsigned i;
4227   int      full = INSTR (30, 30);
4228
4229   NYI_assert (29, 21, 0x075);
4230   NYI_assert (15, 10, 0x07);
4231
4232   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4233   for (i = 0; i < (full ? 16 : 8); i++)
4234     aarch64_set_vec_u8 (cpu, vd, i,
4235                         aarch64_get_vec_u8 (cpu, vn, i)
4236                         | aarch64_get_vec_u8 (cpu, vm, i));
4237 }
4238
4239 static void
4240 do_vec_BIC (sim_cpu *cpu)
4241 {
4242   /* instr[31]    = 0
4243      instr[30]    = half (0)/full (1)
4244      instr[29,21] = 00 1110 011
4245      instr[20,16] = Vm
4246      instr[15,10] = 00 0111
4247      instr[9,5]   = Vn
4248      instr[4.0]   = Vd.  */
4249
4250   unsigned vm = INSTR (20, 16);
4251   unsigned vn = INSTR (9, 5);
4252   unsigned vd = INSTR (4, 0);
4253   unsigned i;
4254   int      full = INSTR (30, 30);
4255
4256   NYI_assert (29, 21, 0x073);
4257   NYI_assert (15, 10, 0x07);
4258
4259   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4260   for (i = 0; i < (full ? 16 : 8); i++)
4261     aarch64_set_vec_u8 (cpu, vd, i,
4262                         aarch64_get_vec_u8 (cpu, vn, i)
4263                         & ~ aarch64_get_vec_u8 (cpu, vm, i));
4264 }
4265
4266 static void
4267 do_vec_XTN (sim_cpu *cpu)
4268 {
4269   /* instr[31]    = 0
4270      instr[30]    = first part (0)/ second part (1)
4271      instr[29,24] = 00 1110
4272      instr[23,22] = size: byte(00), half(01), word (10)
4273      instr[21,10] = 1000 0100 1010
4274      instr[9,5]   = Vs
4275      instr[4,0]   = Vd.  */
4276
4277   unsigned vs = INSTR (9, 5);
4278   unsigned vd = INSTR (4, 0);
4279   unsigned bias = INSTR (30, 30);
4280   unsigned i;
4281
4282   NYI_assert (29, 24, 0x0E);
4283   NYI_assert (21, 10, 0x84A);
4284
4285   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4286   switch (INSTR (23, 22))
4287     {
4288     case 0:
4289       for (i = 0; i < 8; i++)
4290         aarch64_set_vec_u8 (cpu, vd, i + (bias * 8),
4291                             aarch64_get_vec_u16 (cpu, vs, i));
4292       return;
4293
4294     case 1:
4295       for (i = 0; i < 4; i++)
4296         aarch64_set_vec_u16 (cpu, vd, i + (bias * 4),
4297                              aarch64_get_vec_u32 (cpu, vs, i));
4298       return;
4299
4300     case 2:
4301       for (i = 0; i < 2; i++)
4302         aarch64_set_vec_u32 (cpu, vd, i + (bias * 2),
4303                              aarch64_get_vec_u64 (cpu, vs, i));
4304       return;
4305     }
4306 }
4307
4308 /* Return the number of bits set in the input value.  */
4309 #if __GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4)
4310 # define popcount __builtin_popcount
4311 #else
4312 static int
4313 popcount (unsigned char x)
4314 {
4315   static const unsigned char popcnt[16] =
4316     {
4317       0, 1, 1, 2,
4318       1, 2, 2, 3,
4319       1, 2, 2, 3,
4320       2, 3, 3, 4
4321     };
4322
4323   /* Only counts the low 8 bits of the input as that is all we need.  */
4324   return popcnt[x % 16] + popcnt[x / 16];
4325 }
4326 #endif
4327
4328 static void
4329 do_vec_CNT (sim_cpu *cpu)
4330 {
4331   /* instr[31]    = 0
4332      instr[30]    = half (0)/ full (1)
4333      instr[29,24] = 00 1110
4334      instr[23,22] = size: byte(00)
4335      instr[21,10] = 1000 0001 0110
4336      instr[9,5]   = Vs
4337      instr[4,0]   = Vd.  */
4338
4339   unsigned vs = INSTR (9, 5);
4340   unsigned vd = INSTR (4, 0);
4341   int full = INSTR (30, 30);
4342   int size = INSTR (23, 22);
4343   int i;
4344
4345   NYI_assert (29, 24, 0x0E);
4346   NYI_assert (21, 10, 0x816);
4347
4348   if (size != 0)
4349     HALT_UNALLOC;
4350
4351   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4352
4353   for (i = 0; i < (full ? 16 : 8); i++)
4354     aarch64_set_vec_u8 (cpu, vd, i,
4355                         popcount (aarch64_get_vec_u8 (cpu, vs, i)));
4356 }
4357
4358 static void
4359 do_vec_maxv (sim_cpu *cpu)
4360 {
4361   /* instr[31]    = 0
4362      instr[30]    = half(0)/full(1)
4363      instr[29]    = signed (0)/unsigned(1)
4364      instr[28,24] = 0 1110
4365      instr[23,22] = size: byte(00), half(01), word (10)
4366      instr[21]    = 1
4367      instr[20,17] = 1 000
4368      instr[16]    = max(0)/min(1)
4369      instr[15,10] = 1010 10
4370      instr[9,5]   = V source
4371      instr[4.0]   = R dest.  */
4372
4373   unsigned vs = INSTR (9, 5);
4374   unsigned rd = INSTR (4, 0);
4375   unsigned full = INSTR (30, 30);
4376   unsigned i;
4377
4378   NYI_assert (28, 24, 0x0E);
4379   NYI_assert (21, 21, 1);
4380   NYI_assert (20, 17, 8);
4381   NYI_assert (15, 10, 0x2A);
4382
4383   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4384   switch ((INSTR (29, 29) << 1) | INSTR (16, 16))
4385     {
4386     case 0: /* SMAXV.  */
4387        {
4388         int64_t smax;
4389         switch (INSTR (23, 22))
4390           {
4391           case 0:
4392             smax = aarch64_get_vec_s8 (cpu, vs, 0);
4393             for (i = 1; i < (full ? 16 : 8); i++)
4394               smax = max (smax, aarch64_get_vec_s8 (cpu, vs, i));
4395             break;
4396           case 1:
4397             smax = aarch64_get_vec_s16 (cpu, vs, 0);
4398             for (i = 1; i < (full ? 8 : 4); i++)
4399               smax = max (smax, aarch64_get_vec_s16 (cpu, vs, i));
4400             break;
4401           case 2:
4402             smax = aarch64_get_vec_s32 (cpu, vs, 0);
4403             for (i = 1; i < (full ? 4 : 2); i++)
4404               smax = max (smax, aarch64_get_vec_s32 (cpu, vs, i));
4405             break;
4406           case 3:
4407             HALT_UNALLOC;
4408           }
4409         aarch64_set_reg_s64 (cpu, rd, NO_SP, smax);
4410         return;
4411       }
4412
4413     case 1: /* SMINV.  */
4414       {
4415         int64_t smin;
4416         switch (INSTR (23, 22))
4417           {
4418           case 0:
4419             smin = aarch64_get_vec_s8 (cpu, vs, 0);
4420             for (i = 1; i < (full ? 16 : 8); i++)
4421               smin = min (smin, aarch64_get_vec_s8 (cpu, vs, i));
4422             break;
4423           case 1:
4424             smin = aarch64_get_vec_s16 (cpu, vs, 0);
4425             for (i = 1; i < (full ? 8 : 4); i++)
4426               smin = min (smin, aarch64_get_vec_s16 (cpu, vs, i));
4427             break;
4428           case 2:
4429             smin = aarch64_get_vec_s32 (cpu, vs, 0);
4430             for (i = 1; i < (full ? 4 : 2); i++)
4431               smin = min (smin, aarch64_get_vec_s32 (cpu, vs, i));
4432             break;
4433
4434           case 3:
4435             HALT_UNALLOC;
4436           }
4437         aarch64_set_reg_s64 (cpu, rd, NO_SP, smin);
4438         return;
4439       }
4440
4441     case 2: /* UMAXV.  */
4442       {
4443         uint64_t umax;
4444         switch (INSTR (23, 22))
4445           {
4446           case 0:
4447             umax = aarch64_get_vec_u8 (cpu, vs, 0);
4448             for (i = 1; i < (full ? 16 : 8); i++)
4449               umax = max (umax, aarch64_get_vec_u8 (cpu, vs, i));
4450             break;
4451           case 1:
4452             umax = aarch64_get_vec_u16 (cpu, vs, 0);
4453             for (i = 1; i < (full ? 8 : 4); i++)
4454               umax = max (umax, aarch64_get_vec_u16 (cpu, vs, i));
4455             break;
4456           case 2:
4457             umax = aarch64_get_vec_u32 (cpu, vs, 0);
4458             for (i = 1; i < (full ? 4 : 2); i++)
4459               umax = max (umax, aarch64_get_vec_u32 (cpu, vs, i));
4460             break;
4461
4462           case 3:
4463             HALT_UNALLOC;
4464           }
4465         aarch64_set_reg_u64 (cpu, rd, NO_SP, umax);
4466         return;
4467       }
4468
4469     case 3: /* UMINV.  */
4470       {
4471         uint64_t umin;
4472         switch (INSTR (23, 22))
4473           {
4474           case 0:
4475             umin = aarch64_get_vec_u8 (cpu, vs, 0);
4476             for (i = 1; i < (full ? 16 : 8); i++)
4477               umin = min (umin, aarch64_get_vec_u8 (cpu, vs, i));
4478             break;
4479           case 1:
4480             umin = aarch64_get_vec_u16 (cpu, vs, 0);
4481             for (i = 1; i < (full ? 8 : 4); i++)
4482               umin = min (umin, aarch64_get_vec_u16 (cpu, vs, i));
4483             break;
4484           case 2:
4485             umin = aarch64_get_vec_u32 (cpu, vs, 0);
4486             for (i = 1; i < (full ? 4 : 2); i++)
4487               umin = min (umin, aarch64_get_vec_u32 (cpu, vs, i));
4488             break;
4489
4490           case 3:
4491             HALT_UNALLOC;
4492           }
4493         aarch64_set_reg_u64 (cpu, rd, NO_SP, umin);
4494         return;
4495       }
4496     }
4497 }
4498
4499 static void
4500 do_vec_fminmaxV (sim_cpu *cpu)
4501 {
4502   /* instr[31,24] = 0110 1110
4503      instr[23]    = max(0)/min(1)
4504      instr[22,14] = 011 0000 11
4505      instr[13,12] = nm(00)/normal(11)
4506      instr[11,10] = 10
4507      instr[9,5]   = V source
4508      instr[4.0]   = R dest.  */
4509
4510   unsigned vs = INSTR (9, 5);
4511   unsigned rd = INSTR (4, 0);
4512   unsigned i;
4513   float res   = aarch64_get_vec_float (cpu, vs, 0);
4514
4515   NYI_assert (31, 24, 0x6E);
4516   NYI_assert (22, 14, 0x0C3);
4517   NYI_assert (11, 10, 2);
4518
4519   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4520   if (INSTR (23, 23))
4521     {
4522       switch (INSTR (13, 12))
4523         {
4524         case 0: /* FMNINNMV.  */
4525           for (i = 1; i < 4; i++)
4526             res = fminnm (res, aarch64_get_vec_float (cpu, vs, i));
4527           break;
4528
4529         case 3: /* FMINV.  */
4530           for (i = 1; i < 4; i++)
4531             res = min (res, aarch64_get_vec_float (cpu, vs, i));
4532           break;
4533
4534         default:
4535           HALT_NYI;
4536         }
4537     }
4538   else
4539     {
4540       switch (INSTR (13, 12))
4541         {
4542         case 0: /* FMNAXNMV.  */
4543           for (i = 1; i < 4; i++)
4544             res = fmaxnm (res, aarch64_get_vec_float (cpu, vs, i));
4545           break;
4546
4547         case 3: /* FMAXV.  */
4548           for (i = 1; i < 4; i++)
4549             res = max (res, aarch64_get_vec_float (cpu, vs, i));
4550           break;
4551
4552         default:
4553           HALT_NYI;
4554         }
4555     }
4556
4557   aarch64_set_FP_float (cpu, rd, res);
4558 }
4559
4560 static void
4561 do_vec_Fminmax (sim_cpu *cpu)
4562 {
4563   /* instr[31]    = 0
4564      instr[30]    = half(0)/full(1)
4565      instr[29,24] = 00 1110
4566      instr[23]    = max(0)/min(1)
4567      instr[22]    = float(0)/double(1)
4568      instr[21]    = 1
4569      instr[20,16] = Vm
4570      instr[15,14] = 11
4571      instr[13,12] = nm(00)/normal(11)
4572      instr[11,10] = 01
4573      instr[9,5]   = Vn
4574      instr[4,0]   = Vd.  */
4575
4576   unsigned vm = INSTR (20, 16);
4577   unsigned vn = INSTR (9, 5);
4578   unsigned vd = INSTR (4, 0);
4579   unsigned full = INSTR (30, 30);
4580   unsigned min = INSTR (23, 23);
4581   unsigned i;
4582
4583   NYI_assert (29, 24, 0x0E);
4584   NYI_assert (21, 21, 1);
4585   NYI_assert (15, 14, 3);
4586   NYI_assert (11, 10, 1);
4587
4588   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4589   if (INSTR (22, 22))
4590     {
4591       double (* func)(double, double);
4592
4593       if (! full)
4594         HALT_NYI;
4595
4596       if (INSTR (13, 12) == 0)
4597         func = min ? dminnm : dmaxnm;
4598       else if (INSTR (13, 12) == 3)
4599         func = min ? fmin : fmax;
4600       else
4601         HALT_NYI;
4602
4603       for (i = 0; i < 2; i++)
4604         aarch64_set_vec_double (cpu, vd, i,
4605                                 func (aarch64_get_vec_double (cpu, vn, i),
4606                                       aarch64_get_vec_double (cpu, vm, i)));
4607     }
4608   else
4609     {
4610       float (* func)(float, float);
4611
4612       if (INSTR (13, 12) == 0)
4613         func = min ? fminnm : fmaxnm;
4614       else if (INSTR (13, 12) == 3)
4615         func = min ? fminf : fmaxf;
4616       else
4617         HALT_NYI;
4618
4619       for (i = 0; i < (full ? 4 : 2); i++)
4620         aarch64_set_vec_float (cpu, vd, i,
4621                                func (aarch64_get_vec_float (cpu, vn, i),
4622                                      aarch64_get_vec_float (cpu, vm, i)));
4623     }
4624 }
4625
4626 static void
4627 do_vec_SCVTF (sim_cpu *cpu)
4628 {
4629   /* instr[31]    = 0
4630      instr[30]    = Q
4631      instr[29,23] = 00 1110 0
4632      instr[22]    = float(0)/double(1)
4633      instr[21,10] = 10 0001 1101 10
4634      instr[9,5]   = Vn
4635      instr[4,0]   = Vd.  */
4636
4637   unsigned vn = INSTR (9, 5);
4638   unsigned vd = INSTR (4, 0);
4639   unsigned full = INSTR (30, 30);
4640   unsigned size = INSTR (22, 22);
4641   unsigned i;
4642
4643   NYI_assert (29, 23, 0x1C);
4644   NYI_assert (21, 10, 0x876);
4645
4646   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4647   if (size)
4648     {
4649       if (! full)
4650         HALT_UNALLOC;
4651
4652       for (i = 0; i < 2; i++)
4653         {
4654           double val = (double) aarch64_get_vec_u64 (cpu, vn, i);
4655           aarch64_set_vec_double (cpu, vd, i, val);
4656         }
4657     }
4658   else
4659     {
4660       for (i = 0; i < (full ? 4 : 2); i++)
4661         {
4662           float val = (float) aarch64_get_vec_u32 (cpu, vn, i);
4663           aarch64_set_vec_float (cpu, vd, i, val);
4664         }
4665     }
4666 }
4667
4668 #define VEC_CMP(SOURCE, CMP)                                            \
4669   do                                                                    \
4670     {                                                                   \
4671       switch (size)                                                     \
4672         {                                                               \
4673         case 0:                                                         \
4674           for (i = 0; i < (full ? 16 : 8); i++)                         \
4675             aarch64_set_vec_u8 (cpu, vd, i,                             \
4676                                 aarch64_get_vec_##SOURCE##8 (cpu, vn, i) \
4677                                 CMP                                     \
4678                                 aarch64_get_vec_##SOURCE##8 (cpu, vm, i) \
4679                                 ? -1 : 0);                              \
4680           return;                                                       \
4681         case 1:                                                         \
4682           for (i = 0; i < (full ? 8 : 4); i++)                          \
4683             aarch64_set_vec_u16 (cpu, vd, i,                            \
4684                                  aarch64_get_vec_##SOURCE##16 (cpu, vn, i) \
4685                                  CMP                                    \
4686                                  aarch64_get_vec_##SOURCE##16 (cpu, vm, i) \
4687                                  ? -1 : 0);                             \
4688           return;                                                       \
4689         case 2:                                                         \
4690           for (i = 0; i < (full ? 4 : 2); i++)                          \
4691             aarch64_set_vec_u32 (cpu, vd, i, \
4692                                  aarch64_get_vec_##SOURCE##32 (cpu, vn, i) \
4693                                  CMP                                    \
4694                                  aarch64_get_vec_##SOURCE##32 (cpu, vm, i) \
4695                                  ? -1 : 0);                             \
4696           return;                                                       \
4697         case 3:                                                         \
4698           if (! full)                                                   \
4699             HALT_UNALLOC;                                               \
4700           for (i = 0; i < 2; i++)                                       \
4701             aarch64_set_vec_u64 (cpu, vd, i, \
4702                                  aarch64_get_vec_##SOURCE##64 (cpu, vn, i) \
4703                                  CMP                                    \
4704                                  aarch64_get_vec_##SOURCE##64 (cpu, vm, i) \
4705                                  ? -1ULL : 0);                          \
4706           return;                                                       \
4707         }                                                               \
4708     }                                                                   \
4709   while (0)
4710
4711 #define VEC_CMP0(SOURCE, CMP)                                           \
4712   do                                                                    \
4713     {                                                                   \
4714       switch (size)                                                     \
4715         {                                                               \
4716         case 0:                                                         \
4717           for (i = 0; i < (full ? 16 : 8); i++)                         \
4718             aarch64_set_vec_u8 (cpu, vd, i,                             \
4719                                 aarch64_get_vec_##SOURCE##8 (cpu, vn, i) \
4720                                 CMP 0 ? -1 : 0);                        \
4721           return;                                                       \
4722         case 1:                                                         \
4723           for (i = 0; i < (full ? 8 : 4); i++)                          \
4724             aarch64_set_vec_u16 (cpu, vd, i,                            \
4725                                  aarch64_get_vec_##SOURCE##16 (cpu, vn, i) \
4726                                  CMP 0 ? -1 : 0);                       \
4727           return;                                                       \
4728         case 2:                                                         \
4729           for (i = 0; i < (full ? 4 : 2); i++)                          \
4730             aarch64_set_vec_u32 (cpu, vd, i,                            \
4731                                  aarch64_get_vec_##SOURCE##32 (cpu, vn, i) \
4732                                  CMP 0 ? -1 : 0);                       \
4733           return;                                                       \
4734         case 3:                                                         \
4735           if (! full)                                                   \
4736             HALT_UNALLOC;                                               \
4737           for (i = 0; i < 2; i++)                                       \
4738             aarch64_set_vec_u64 (cpu, vd, i,                            \
4739                                  aarch64_get_vec_##SOURCE##64 (cpu, vn, i) \
4740                                  CMP 0 ? -1ULL : 0);                    \
4741           return;                                                       \
4742         }                                                               \
4743     }                                                                   \
4744   while (0)
4745
4746 #define VEC_FCMP0(CMP)                                                  \
4747   do                                                                    \
4748     {                                                                   \
4749       if (vm != 0)                                                      \
4750         HALT_NYI;                                                       \
4751       if (INSTR (22, 22))                                               \
4752         {                                                               \
4753           if (! full)                                                   \
4754             HALT_NYI;                                                   \
4755           for (i = 0; i < 2; i++)                                       \
4756             aarch64_set_vec_u64 (cpu, vd, i,                            \
4757                                  aarch64_get_vec_double (cpu, vn, i)    \
4758                                  CMP 0.0 ? -1 : 0);                     \
4759         }                                                               \
4760       else                                                              \
4761         {                                                               \
4762           for (i = 0; i < (full ? 4 : 2); i++)                          \
4763             aarch64_set_vec_u32 (cpu, vd, i,                            \
4764                                  aarch64_get_vec_float (cpu, vn, i)     \
4765                                  CMP 0.0 ? -1 : 0);                     \
4766         }                                                               \
4767       return;                                                           \
4768     }                                                                   \
4769   while (0)
4770
4771 #define VEC_FCMP(CMP)                                                   \
4772   do                                                                    \
4773     {                                                                   \
4774       if (INSTR (22, 22))                                               \
4775         {                                                               \
4776           if (! full)                                                   \
4777             HALT_NYI;                                                   \
4778           for (i = 0; i < 2; i++)                                       \
4779             aarch64_set_vec_u64 (cpu, vd, i,                            \
4780                                  aarch64_get_vec_double (cpu, vn, i)    \
4781                                  CMP                                    \
4782                                  aarch64_get_vec_double (cpu, vm, i)    \
4783                                  ? -1 : 0);                             \
4784         }                                                               \
4785       else                                                              \
4786         {                                                               \
4787           for (i = 0; i < (full ? 4 : 2); i++)                          \
4788             aarch64_set_vec_u32 (cpu, vd, i,                            \
4789                                  aarch64_get_vec_float (cpu, vn, i)     \
4790                                  CMP                                    \
4791                                  aarch64_get_vec_float (cpu, vm, i)     \
4792                                  ? -1 : 0);                             \
4793         }                                                               \
4794       return;                                                           \
4795     }                                                                   \
4796   while (0)
4797
4798 static void
4799 do_vec_compare (sim_cpu *cpu)
4800 {
4801   /* instr[31]    = 0
4802      instr[30]    = half(0)/full(1)
4803      instr[29]    = part-of-comparison-type
4804      instr[28,24] = 0 1110
4805      instr[23,22] = size of integer compares: byte(00), half(01), word (10), long (11)
4806                     type of float compares: single (-0) / double (-1)
4807      instr[21]    = 1
4808      instr[20,16] = Vm or 00000 (compare vs 0)
4809      instr[15,10] = part-of-comparison-type
4810      instr[9,5]   = Vn
4811      instr[4.0]   = Vd.  */
4812
4813   int full = INSTR (30, 30);
4814   int size = INSTR (23, 22);
4815   unsigned vm = INSTR (20, 16);
4816   unsigned vn = INSTR (9, 5);
4817   unsigned vd = INSTR (4, 0);
4818   unsigned i;
4819
4820   NYI_assert (28, 24, 0x0E);
4821   NYI_assert (21, 21, 1);
4822
4823   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4824   if ((INSTR (11, 11)
4825        && INSTR (14, 14))
4826       || ((INSTR (11, 11) == 0
4827            && INSTR (10, 10) == 0)))
4828     {
4829       /* A compare vs 0.  */
4830       if (vm != 0)
4831         {
4832           if (INSTR (15, 10) == 0x2A)
4833             do_vec_maxv (cpu);
4834           else if (INSTR (15, 10) == 0x32
4835                    || INSTR (15, 10) == 0x3E)
4836             do_vec_fminmaxV (cpu);
4837           else if (INSTR (29, 23) == 0x1C
4838                    && INSTR (21, 10) == 0x876)
4839             do_vec_SCVTF (cpu);
4840           else
4841             HALT_NYI;
4842           return;
4843         }
4844     }
4845
4846   if (INSTR (14, 14))
4847     {
4848       /* A floating point compare.  */
4849       unsigned decode = (INSTR (29, 29) << 5) | (INSTR (23, 23) << 4)
4850         | INSTR (13, 10);
4851
4852       NYI_assert (15, 15, 1);
4853
4854       switch (decode)
4855         {
4856         case /* 0b010010: GT#0 */ 0x12: VEC_FCMP0 (>);
4857         case /* 0b110010: GE#0 */ 0x32: VEC_FCMP0 (>=);
4858         case /* 0b010110: EQ#0 */ 0x16: VEC_FCMP0 (==);
4859         case /* 0b110110: LE#0 */ 0x36: VEC_FCMP0 (<=);
4860         case /* 0b011010: LT#0 */ 0x1A: VEC_FCMP0 (<);
4861         case /* 0b111001: GT */   0x39: VEC_FCMP  (>);
4862         case /* 0b101001: GE */   0x29: VEC_FCMP  (>=);
4863         case /* 0b001001: EQ */   0x09: VEC_FCMP  (==);
4864
4865         default:
4866           HALT_NYI;
4867         }
4868     }
4869   else
4870     {
4871       unsigned decode = (INSTR (29, 29) << 6) | INSTR (15, 10);
4872
4873       switch (decode)
4874         {
4875         case 0x0D: /* 0001101 GT */     VEC_CMP  (s, > );
4876         case 0x0F: /* 0001111 GE */     VEC_CMP  (s, >= );
4877         case 0x22: /* 0100010 GT #0 */  VEC_CMP0 (s, > );
4878         case 0x23: /* 0100011 TST */    VEC_CMP  (u, & );
4879         case 0x26: /* 0100110 EQ #0 */  VEC_CMP0 (s, == );
4880         case 0x2A: /* 0101010 LT #0 */  VEC_CMP0 (s, < );
4881         case 0x4D: /* 1001101 HI */     VEC_CMP  (u, > );
4882         case 0x4F: /* 1001111 HS */     VEC_CMP  (u, >= );
4883         case 0x62: /* 1100010 GE #0 */  VEC_CMP0 (s, >= );
4884         case 0x63: /* 1100011 EQ */     VEC_CMP  (u, == );
4885         case 0x66: /* 1100110 LE #0 */  VEC_CMP0 (s, <= );
4886         default:
4887           if (vm == 0)
4888             HALT_NYI;
4889           do_vec_maxv (cpu);
4890         }
4891     }
4892 }
4893
4894 static void
4895 do_vec_SSHL (sim_cpu *cpu)
4896 {
4897   /* instr[31]    = 0
4898      instr[30]    = first part (0)/ second part (1)
4899      instr[29,24] = 00 1110
4900      instr[23,22] = size: byte(00), half(01), word (10), long (11)
4901      instr[21]    = 1
4902      instr[20,16] = Vm
4903      instr[15,10] = 0100 01
4904      instr[9,5]   = Vn
4905      instr[4,0]   = Vd.  */
4906
4907   unsigned full = INSTR (30, 30);
4908   unsigned vm = INSTR (20, 16);
4909   unsigned vn = INSTR (9, 5);
4910   unsigned vd = INSTR (4, 0);
4911   unsigned i;
4912   signed int shift;
4913
4914   NYI_assert (29, 24, 0x0E);
4915   NYI_assert (21, 21, 1);
4916   NYI_assert (15, 10, 0x11);
4917
4918   /* FIXME: What is a signed shift left in this context ?.  */
4919
4920   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4921   switch (INSTR (23, 22))
4922     {
4923     case 0:
4924       for (i = 0; i < (full ? 16 : 8); i++)
4925         {
4926           shift = aarch64_get_vec_s8 (cpu, vm, i);
4927           if (shift >= 0)
4928             aarch64_set_vec_s8 (cpu, vd, i, aarch64_get_vec_s8 (cpu, vn, i)
4929                                 << shift);
4930           else
4931             aarch64_set_vec_s8 (cpu, vd, i, aarch64_get_vec_s8 (cpu, vn, i)
4932                                 >> - shift);
4933         }
4934       return;
4935
4936     case 1:
4937       for (i = 0; i < (full ? 8 : 4); i++)
4938         {
4939           shift = aarch64_get_vec_s8 (cpu, vm, i * 2);
4940           if (shift >= 0)
4941             aarch64_set_vec_s16 (cpu, vd, i, aarch64_get_vec_s16 (cpu, vn, i)
4942                                  << shift);
4943           else
4944             aarch64_set_vec_s16 (cpu, vd, i, aarch64_get_vec_s16 (cpu, vn, i)
4945                                  >> - shift);
4946         }
4947       return;
4948
4949     case 2:
4950       for (i = 0; i < (full ? 4 : 2); i++)
4951         {
4952           shift = aarch64_get_vec_s8 (cpu, vm, i * 4);
4953           if (shift >= 0)
4954             aarch64_set_vec_s32 (cpu, vd, i, aarch64_get_vec_s32 (cpu, vn, i)
4955                                  << shift);
4956           else
4957             aarch64_set_vec_s32 (cpu, vd, i, aarch64_get_vec_s32 (cpu, vn, i)
4958                                  >> - shift);
4959         }
4960       return;
4961
4962     case 3:
4963       if (! full)
4964         HALT_UNALLOC;
4965       for (i = 0; i < 2; i++)
4966         {
4967           shift = aarch64_get_vec_s8 (cpu, vm, i * 8);
4968           if (shift >= 0)
4969             aarch64_set_vec_s64 (cpu, vd, i, aarch64_get_vec_s64 (cpu, vn, i)
4970                                  << shift);
4971           else
4972             aarch64_set_vec_s64 (cpu, vd, i, aarch64_get_vec_s64 (cpu, vn, i)
4973                                  >> - shift);
4974         }
4975       return;
4976     }
4977 }
4978
4979 static void
4980 do_vec_USHL (sim_cpu *cpu)
4981 {
4982   /* instr[31]    = 0
4983      instr[30]    = first part (0)/ second part (1)
4984      instr[29,24] = 10 1110
4985      instr[23,22] = size: byte(00), half(01), word (10), long (11)
4986      instr[21]    = 1
4987      instr[20,16] = Vm
4988      instr[15,10] = 0100 01
4989      instr[9,5]   = Vn
4990      instr[4,0]   = Vd  */
4991
4992   unsigned full = INSTR (30, 30);
4993   unsigned vm = INSTR (20, 16);
4994   unsigned vn = INSTR (9, 5);
4995   unsigned vd = INSTR (4, 0);
4996   unsigned i;
4997   signed int shift;
4998
4999   NYI_assert (29, 24, 0x2E);
5000   NYI_assert (15, 10, 0x11);
5001
5002   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5003   switch (INSTR (23, 22))
5004     {
5005     case 0:
5006         for (i = 0; i < (full ? 16 : 8); i++)
5007           {
5008             shift = aarch64_get_vec_s8 (cpu, vm, i);
5009             if (shift >= 0)
5010               aarch64_set_vec_u8 (cpu, vd, i, aarch64_get_vec_u8 (cpu, vn, i)
5011                                   << shift);
5012             else
5013               aarch64_set_vec_u8 (cpu, vd, i, aarch64_get_vec_u8 (cpu, vn, i)
5014                                   >> - shift);
5015           }
5016       return;
5017
5018     case 1:
5019       for (i = 0; i < (full ? 8 : 4); i++)
5020         {
5021           shift = aarch64_get_vec_s8 (cpu, vm, i * 2);
5022           if (shift >= 0)
5023             aarch64_set_vec_u16 (cpu, vd, i, aarch64_get_vec_u16 (cpu, vn, i)
5024                                  << shift);
5025           else
5026             aarch64_set_vec_u16 (cpu, vd, i, aarch64_get_vec_u16 (cpu, vn, i)
5027                                  >> - shift);
5028         }
5029       return;
5030
5031     case 2:
5032       for (i = 0; i < (full ? 4 : 2); i++)
5033         {
5034           shift = aarch64_get_vec_s8 (cpu, vm, i * 4);
5035           if (shift >= 0)
5036             aarch64_set_vec_u32 (cpu, vd, i, aarch64_get_vec_u32 (cpu, vn, i)
5037                                  << shift);
5038           else
5039             aarch64_set_vec_u32 (cpu, vd, i, aarch64_get_vec_u32 (cpu, vn, i)
5040                                  >> - shift);
5041         }
5042       return;
5043
5044     case 3:
5045       if (! full)
5046         HALT_UNALLOC;
5047       for (i = 0; i < 2; i++)
5048         {
5049           shift = aarch64_get_vec_s8 (cpu, vm, i * 8);
5050           if (shift >= 0)
5051             aarch64_set_vec_u64 (cpu, vd, i, aarch64_get_vec_u64 (cpu, vn, i)
5052                                  << shift);
5053           else
5054             aarch64_set_vec_u64 (cpu, vd, i, aarch64_get_vec_u64 (cpu, vn, i)
5055                                  >> - shift);
5056         }
5057       return;
5058     }
5059 }
5060
5061 static void
5062 do_vec_FMLA (sim_cpu *cpu)
5063 {
5064   /* instr[31]    = 0
5065      instr[30]    = full/half selector
5066      instr[29,23] = 0011100
5067      instr[22]    = size: 0=>float, 1=>double
5068      instr[21]    = 1
5069      instr[20,16] = Vn
5070      instr[15,10] = 1100 11
5071      instr[9,5]   = Vm
5072      instr[4.0]   = Vd.  */
5073
5074   unsigned vm = INSTR (20, 16);
5075   unsigned vn = INSTR (9, 5);
5076   unsigned vd = INSTR (4, 0);
5077   unsigned i;
5078   int      full = INSTR (30, 30);
5079
5080   NYI_assert (29, 23, 0x1C);
5081   NYI_assert (21, 21, 1);
5082   NYI_assert (15, 10, 0x33);
5083
5084   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5085   if (INSTR (22, 22))
5086     {
5087       if (! full)
5088         HALT_UNALLOC;
5089       for (i = 0; i < 2; i++)
5090         aarch64_set_vec_double (cpu, vd, i,
5091                                 aarch64_get_vec_double (cpu, vn, i) *
5092                                 aarch64_get_vec_double (cpu, vm, i) +
5093                                 aarch64_get_vec_double (cpu, vd, i));
5094     }
5095   else
5096     {
5097       for (i = 0; i < (full ? 4 : 2); i++)
5098         aarch64_set_vec_float (cpu, vd, i,
5099                                aarch64_get_vec_float (cpu, vn, i) *
5100                                aarch64_get_vec_float (cpu, vm, i) +
5101                                aarch64_get_vec_float (cpu, vd, i));
5102     }
5103 }
5104
5105 static void
5106 do_vec_max (sim_cpu *cpu)
5107 {
5108   /* instr[31]    = 0
5109      instr[30]    = full/half selector
5110      instr[29]    = SMAX (0) / UMAX (1)
5111      instr[28,24] = 0 1110
5112      instr[23,22] = size: 00=> 8-bit, 01=> 16-bit, 10=> 32-bit
5113      instr[21]    = 1
5114      instr[20,16] = Vn
5115      instr[15,10] = 0110 01
5116      instr[9,5]   = Vm
5117      instr[4.0]   = Vd.  */
5118
5119   unsigned vm = INSTR (20, 16);
5120   unsigned vn = INSTR (9, 5);
5121   unsigned vd = INSTR (4, 0);
5122   unsigned i;
5123   int      full = INSTR (30, 30);
5124
5125   NYI_assert (28, 24, 0x0E);
5126   NYI_assert (21, 21, 1);
5127   NYI_assert (15, 10, 0x19);
5128
5129   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5130   if (INSTR (29, 29))
5131     {
5132       switch (INSTR (23, 22))
5133         {
5134         case 0:
5135           for (i = 0; i < (full ? 16 : 8); i++)
5136             aarch64_set_vec_u8 (cpu, vd, i,
5137                                 aarch64_get_vec_u8 (cpu, vn, i)
5138                                 > aarch64_get_vec_u8 (cpu, vm, i)
5139                                 ? aarch64_get_vec_u8 (cpu, vn, i)
5140                                 : aarch64_get_vec_u8 (cpu, vm, i));
5141           return;
5142
5143         case 1:
5144           for (i = 0; i < (full ? 8 : 4); i++)
5145             aarch64_set_vec_u16 (cpu, vd, i,
5146                                  aarch64_get_vec_u16 (cpu, vn, i)
5147                                  > aarch64_get_vec_u16 (cpu, vm, i)
5148                                  ? aarch64_get_vec_u16 (cpu, vn, i)
5149                                  : aarch64_get_vec_u16 (cpu, vm, i));
5150           return;
5151
5152         case 2:
5153           for (i = 0; i < (full ? 4 : 2); i++)
5154             aarch64_set_vec_u32 (cpu, vd, i,
5155                                  aarch64_get_vec_u32 (cpu, vn, i)
5156                                  > aarch64_get_vec_u32 (cpu, vm, i)
5157                                  ? aarch64_get_vec_u32 (cpu, vn, i)
5158                                  : aarch64_get_vec_u32 (cpu, vm, i));
5159           return;
5160
5161         case 3:
5162           HALT_UNALLOC;
5163         }
5164     }
5165   else
5166     {
5167       switch (INSTR (23, 22))
5168         {
5169         case 0:
5170           for (i = 0; i < (full ? 16 : 8); i++)
5171             aarch64_set_vec_s8 (cpu, vd, i,
5172                                 aarch64_get_vec_s8 (cpu, vn, i)
5173                                 > aarch64_get_vec_s8 (cpu, vm, i)
5174                                 ? aarch64_get_vec_s8 (cpu, vn, i)
5175                                 : aarch64_get_vec_s8 (cpu, vm, i));
5176           return;
5177
5178         case 1:
5179           for (i = 0; i < (full ? 8 : 4); i++)
5180             aarch64_set_vec_s16 (cpu, vd, i,
5181                                  aarch64_get_vec_s16 (cpu, vn, i)
5182                                  > aarch64_get_vec_s16 (cpu, vm, i)
5183                                  ? aarch64_get_vec_s16 (cpu, vn, i)
5184                                  : aarch64_get_vec_s16 (cpu, vm, i));
5185           return;
5186
5187         case 2:
5188           for (i = 0; i < (full ? 4 : 2); i++)
5189             aarch64_set_vec_s32 (cpu, vd, i,
5190                                  aarch64_get_vec_s32 (cpu, vn, i)
5191                                  > aarch64_get_vec_s32 (cpu, vm, i)
5192                                  ? aarch64_get_vec_s32 (cpu, vn, i)
5193                                  : aarch64_get_vec_s32 (cpu, vm, i));
5194           return;
5195
5196         case 3:
5197           HALT_UNALLOC;
5198         }
5199     }
5200 }
5201
5202 static void
5203 do_vec_min (sim_cpu *cpu)
5204 {
5205   /* instr[31]    = 0
5206      instr[30]    = full/half selector
5207      instr[29]    = SMIN (0) / UMIN (1)
5208      instr[28,24] = 0 1110
5209      instr[23,22] = size: 00=> 8-bit, 01=> 16-bit, 10=> 32-bit
5210      instr[21]    = 1
5211      instr[20,16] = Vn
5212      instr[15,10] = 0110 11
5213      instr[9,5]   = Vm
5214      instr[4.0]   = Vd.  */
5215
5216   unsigned vm = INSTR (20, 16);
5217   unsigned vn = INSTR (9, 5);
5218   unsigned vd = INSTR (4, 0);
5219   unsigned i;
5220   int      full = INSTR (30, 30);
5221
5222   NYI_assert (28, 24, 0x0E);
5223   NYI_assert (21, 21, 1);
5224   NYI_assert (15, 10, 0x1B);
5225
5226   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5227   if (INSTR (29, 29))
5228     {
5229       switch (INSTR (23, 22))
5230         {
5231         case 0:
5232           for (i = 0; i < (full ? 16 : 8); i++)
5233             aarch64_set_vec_u8 (cpu, vd, i,
5234                                 aarch64_get_vec_u8 (cpu, vn, i)
5235                                 < aarch64_get_vec_u8 (cpu, vm, i)
5236                                 ? aarch64_get_vec_u8 (cpu, vn, i)
5237                                 : aarch64_get_vec_u8 (cpu, vm, i));
5238           return;
5239
5240         case 1:
5241           for (i = 0; i < (full ? 8 : 4); i++)
5242             aarch64_set_vec_u16 (cpu, vd, i,
5243                                  aarch64_get_vec_u16 (cpu, vn, i)
5244                                  < aarch64_get_vec_u16 (cpu, vm, i)
5245                                  ? aarch64_get_vec_u16 (cpu, vn, i)
5246                                  : aarch64_get_vec_u16 (cpu, vm, i));
5247           return;
5248
5249         case 2:
5250           for (i = 0; i < (full ? 4 : 2); i++)
5251             aarch64_set_vec_u32 (cpu, vd, i,
5252                                  aarch64_get_vec_u32 (cpu, vn, i)
5253                                  < aarch64_get_vec_u32 (cpu, vm, i)
5254                                  ? aarch64_get_vec_u32 (cpu, vn, i)
5255                                  : aarch64_get_vec_u32 (cpu, vm, i));
5256           return;
5257
5258         case 3:
5259           HALT_UNALLOC;
5260         }
5261     }
5262   else
5263     {
5264       switch (INSTR (23, 22))
5265         {
5266         case 0:
5267           for (i = 0; i < (full ? 16 : 8); i++)
5268             aarch64_set_vec_s8 (cpu, vd, i,
5269                                 aarch64_get_vec_s8 (cpu, vn, i)
5270                                 < aarch64_get_vec_s8 (cpu, vm, i)
5271                                 ? aarch64_get_vec_s8 (cpu, vn, i)
5272                                 : aarch64_get_vec_s8 (cpu, vm, i));
5273           return;
5274
5275         case 1:
5276           for (i = 0; i < (full ? 8 : 4); i++)
5277             aarch64_set_vec_s16 (cpu, vd, i,
5278                                  aarch64_get_vec_s16 (cpu, vn, i)
5279                                  < aarch64_get_vec_s16 (cpu, vm, i)
5280                                  ? aarch64_get_vec_s16 (cpu, vn, i)
5281                                  : aarch64_get_vec_s16 (cpu, vm, i));
5282           return;
5283
5284         case 2:
5285           for (i = 0; i < (full ? 4 : 2); i++)
5286             aarch64_set_vec_s32 (cpu, vd, i,
5287                                  aarch64_get_vec_s32 (cpu, vn, i)
5288                                  < aarch64_get_vec_s32 (cpu, vm, i)
5289                                  ? aarch64_get_vec_s32 (cpu, vn, i)
5290                                  : aarch64_get_vec_s32 (cpu, vm, i));
5291           return;
5292
5293         case 3:
5294           HALT_UNALLOC;
5295         }
5296     }
5297 }
5298
5299 static void
5300 do_vec_sub_long (sim_cpu *cpu)
5301 {
5302   /* instr[31]    = 0
5303      instr[30]    = lower (0) / upper (1)
5304      instr[29]    = signed (0) / unsigned (1)
5305      instr[28,24] = 0 1110
5306      instr[23,22] = size: bytes (00), half (01), word (10)
5307      instr[21]    = 1
5308      insrt[20,16] = Vm
5309      instr[15,10] = 0010 00
5310      instr[9,5]   = Vn
5311      instr[4,0]   = V dest.  */
5312
5313   unsigned size = INSTR (23, 22);
5314   unsigned vm = INSTR (20, 16);
5315   unsigned vn = INSTR (9, 5);
5316   unsigned vd = INSTR (4, 0);
5317   unsigned bias = 0;
5318   unsigned i;
5319
5320   NYI_assert (28, 24, 0x0E);
5321   NYI_assert (21, 21, 1);
5322   NYI_assert (15, 10, 0x08);
5323
5324   if (size == 3)
5325     HALT_UNALLOC;
5326
5327   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5328   switch (INSTR (30, 29))
5329     {
5330     case 2: /* SSUBL2.  */
5331       bias = 2;
5332     case 0: /* SSUBL.  */
5333       switch (size)
5334         {
5335         case 0:
5336           bias *= 3;
5337           for (i = 0; i < 8; i++)
5338             aarch64_set_vec_s16 (cpu, vd, i,
5339                                  aarch64_get_vec_s8 (cpu, vn, i + bias)
5340                                  - aarch64_get_vec_s8 (cpu, vm, i + bias));
5341           break;
5342
5343         case 1:
5344           bias *= 2;
5345           for (i = 0; i < 4; i++)
5346             aarch64_set_vec_s32 (cpu, vd, i,
5347                                  aarch64_get_vec_s16 (cpu, vn, i + bias)
5348                                  - aarch64_get_vec_s16 (cpu, vm, i + bias));
5349           break;
5350
5351         case 2:
5352           for (i = 0; i < 2; i++)
5353             aarch64_set_vec_s64 (cpu, vd, i,
5354                                  aarch64_get_vec_s32 (cpu, vn, i + bias)
5355                                  - aarch64_get_vec_s32 (cpu, vm, i + bias));
5356           break;
5357
5358         default:
5359           HALT_UNALLOC;
5360         }
5361       break;
5362
5363     case 3: /* USUBL2.  */
5364       bias = 2;
5365     case 1: /* USUBL.  */
5366       switch (size)
5367         {
5368         case 0:
5369           bias *= 3;
5370           for (i = 0; i < 8; i++)
5371             aarch64_set_vec_u16 (cpu, vd, i,
5372                                  aarch64_get_vec_u8 (cpu, vn, i + bias)
5373                                  - aarch64_get_vec_u8 (cpu, vm, i + bias));
5374           break;
5375
5376         case 1:
5377           bias *= 2;
5378           for (i = 0; i < 4; i++)
5379             aarch64_set_vec_u32 (cpu, vd, i,
5380                                  aarch64_get_vec_u16 (cpu, vn, i + bias)
5381                                  - aarch64_get_vec_u16 (cpu, vm, i + bias));
5382           break;
5383
5384         case 2:
5385           for (i = 0; i < 2; i++)
5386             aarch64_set_vec_u64 (cpu, vd, i,
5387                                  aarch64_get_vec_u32 (cpu, vn, i + bias)
5388                                  - aarch64_get_vec_u32 (cpu, vm, i + bias));
5389           break;
5390
5391         default:
5392           HALT_UNALLOC;
5393         }
5394       break;
5395     }
5396 }
5397
5398 static void
5399 do_vec_ADDP (sim_cpu *cpu)
5400 {
5401   /* instr[31]    = 0
5402      instr[30]    = half(0)/full(1)
5403      instr[29,24] = 00 1110
5404      instr[23,22] = size: bytes (00), half (01), word (10), long (11)
5405      instr[21]    = 1
5406      insrt[20,16] = Vm
5407      instr[15,10] = 1011 11
5408      instr[9,5]   = Vn
5409      instr[4,0]   = V dest.  */
5410
5411   FRegister copy_vn;
5412   FRegister copy_vm;
5413   unsigned full = INSTR (30, 30);
5414   unsigned size = INSTR (23, 22);
5415   unsigned vm = INSTR (20, 16);
5416   unsigned vn = INSTR (9, 5);
5417   unsigned vd = INSTR (4, 0);
5418   unsigned i, range;
5419
5420   NYI_assert (29, 24, 0x0E);
5421   NYI_assert (21, 21, 1);
5422   NYI_assert (15, 10, 0x2F);
5423
5424   /* Make copies of the source registers in case vd == vn/vm.  */
5425   copy_vn = cpu->fr[vn];
5426   copy_vm = cpu->fr[vm];
5427
5428   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5429   switch (size)
5430     {
5431     case 0:
5432       range = full ? 8 : 4;
5433       for (i = 0; i < range; i++)
5434         {
5435           aarch64_set_vec_u8 (cpu, vd, i,
5436                               copy_vn.b[i * 2] + copy_vn.b[i * 2 + 1]);
5437           aarch64_set_vec_u8 (cpu, vd, i + range,
5438                               copy_vm.b[i * 2] + copy_vm.b[i * 2 + 1]);
5439         }
5440       return;
5441
5442     case 1:
5443       range = full ? 4 : 2;
5444       for (i = 0; i < range; i++)
5445         {
5446           aarch64_set_vec_u16 (cpu, vd, i,
5447                                copy_vn.h[i * 2] + copy_vn.h[i * 2 + 1]);
5448           aarch64_set_vec_u16 (cpu, vd, i + range,
5449                                copy_vm.h[i * 2] + copy_vm.h[i * 2 + 1]);
5450         }
5451       return;
5452
5453     case 2:
5454       range = full ? 2 : 1;
5455       for (i = 0; i < range; i++)
5456         {
5457           aarch64_set_vec_u32 (cpu, vd, i,
5458                                copy_vn.w[i * 2] + copy_vn.w[i * 2 + 1]);
5459           aarch64_set_vec_u32 (cpu, vd, i + range,
5460                                copy_vm.w[i * 2] + copy_vm.w[i * 2 + 1]);
5461         }
5462       return;
5463
5464     case 3:
5465       if (! full)
5466         HALT_UNALLOC;
5467       aarch64_set_vec_u64 (cpu, vd, 0, copy_vn.v[0] + copy_vn.v[1]);
5468       aarch64_set_vec_u64 (cpu, vd, 1, copy_vm.v[0] + copy_vm.v[1]);
5469       return;
5470     }
5471 }
5472
5473 /* Float point vector convert to longer (precision).  */
5474 static void
5475 do_vec_FCVTL (sim_cpu *cpu)
5476 {
5477   /* instr[31]    = 0
5478      instr[30]    = half (0) / all (1)
5479      instr[29,23] = 00 1110 0
5480      instr[22]    = single (0) / double (1)
5481      instr[21,10] = 10 0001 0111 10
5482      instr[9,5]   = Rn
5483      instr[4,0]   = Rd.  */
5484
5485   unsigned rn = INSTR (9, 5);
5486   unsigned rd = INSTR (4, 0);
5487   unsigned full = INSTR (30, 30);
5488   unsigned i;
5489
5490   NYI_assert (31, 31, 0);
5491   NYI_assert (29, 23, 0x1C);
5492   NYI_assert (21, 10, 0x85E);
5493
5494   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5495   if (INSTR (22, 22))
5496     {
5497       for (i = 0; i < 2; i++)
5498         aarch64_set_vec_double (cpu, rd, i,
5499                                 aarch64_get_vec_float (cpu, rn, i + 2*full));
5500     }
5501   else
5502     {
5503       HALT_NYI;
5504
5505 #if 0
5506       /* TODO: Implement missing half-float support.  */
5507       for (i = 0; i < 4; i++)
5508         aarch64_set_vec_float (cpu, rd, i,
5509                              aarch64_get_vec_halffloat (cpu, rn, i + 4*full));
5510 #endif
5511     }
5512 }
5513
5514 static void
5515 do_vec_FABS (sim_cpu *cpu)
5516 {
5517   /* instr[31]    = 0
5518      instr[30]    = half(0)/full(1)
5519      instr[29,23] = 00 1110 1
5520      instr[22]    = float(0)/double(1)
5521      instr[21,16] = 10 0000
5522      instr[15,10] = 1111 10
5523      instr[9,5]   = Vn
5524      instr[4,0]   = Vd.  */
5525
5526   unsigned vn = INSTR (9, 5);
5527   unsigned vd = INSTR (4, 0);
5528   unsigned full = INSTR (30, 30);
5529   unsigned i;
5530
5531   NYI_assert (29, 23, 0x1D);
5532   NYI_assert (21, 10, 0x83E);
5533
5534   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5535   if (INSTR (22, 22))
5536     {
5537       if (! full)
5538         HALT_NYI;
5539
5540       for (i = 0; i < 2; i++)
5541         aarch64_set_vec_double (cpu, vd, i,
5542                                 fabs (aarch64_get_vec_double (cpu, vn, i)));
5543     }
5544   else
5545     {
5546       for (i = 0; i < (full ? 4 : 2); i++)
5547         aarch64_set_vec_float (cpu, vd, i,
5548                                fabsf (aarch64_get_vec_float (cpu, vn, i)));
5549     }
5550 }
5551
5552 static void
5553 do_vec_FCVTZS (sim_cpu *cpu)
5554 {
5555   /* instr[31]    = 0
5556      instr[30]    = half (0) / all (1)
5557      instr[29,23] = 00 1110 1
5558      instr[22]    = single (0) / double (1)
5559      instr[21,10] = 10 0001 1011 10
5560      instr[9,5]   = Rn
5561      instr[4,0]   = Rd.  */
5562
5563   unsigned rn = INSTR (9, 5);
5564   unsigned rd = INSTR (4, 0);
5565   unsigned full = INSTR (30, 30);
5566   unsigned i;
5567
5568   NYI_assert (31, 31, 0);
5569   NYI_assert (29, 23, 0x1D);
5570   NYI_assert (21, 10, 0x86E);
5571
5572   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5573   if (INSTR (22, 22))
5574     {
5575       if (! full)
5576         HALT_UNALLOC;
5577
5578       for (i = 0; i < 2; i++)
5579         aarch64_set_vec_s64 (cpu, rd, i,
5580                              (int64_t) aarch64_get_vec_double (cpu, rn, i));
5581     }
5582   else
5583     for (i = 0; i < (full ? 4 : 2); i++)
5584       aarch64_set_vec_s32 (cpu, rd, i,
5585                            (int32_t) aarch64_get_vec_float (cpu, rn, i));
5586 }
5587
5588 static void
5589 do_vec_REV64 (sim_cpu *cpu)
5590 {
5591   /* instr[31]    = 0
5592      instr[30]    = full/half
5593      instr[29,24] = 00 1110
5594      instr[23,22] = size
5595      instr[21,10] = 10 0000 0000 10
5596      instr[9,5]   = Rn
5597      instr[4,0]   = Rd.  */
5598
5599   unsigned rn = INSTR (9, 5);
5600   unsigned rd = INSTR (4, 0);
5601   unsigned size = INSTR (23, 22);
5602   unsigned full = INSTR (30, 30);
5603   unsigned i;
5604   FRegister val;
5605
5606   NYI_assert (29, 24, 0x0E);
5607   NYI_assert (21, 10, 0x802);
5608
5609   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5610   switch (size)
5611     {
5612     case 0:
5613       for (i = 0; i < (full ? 16 : 8); i++)
5614         val.b[i ^ 0x7] = aarch64_get_vec_u8 (cpu, rn, i);
5615       break;
5616
5617     case 1:
5618       for (i = 0; i < (full ? 8 : 4); i++)
5619         val.h[i ^ 0x3] = aarch64_get_vec_u16 (cpu, rn, i);
5620       break;
5621
5622     case 2:
5623       for (i = 0; i < (full ? 4 : 2); i++)
5624         val.w[i ^ 0x1] = aarch64_get_vec_u32 (cpu, rn, i);
5625       break;
5626
5627     case 3:
5628       HALT_UNALLOC;
5629     }
5630
5631   aarch64_set_vec_u64 (cpu, rd, 0, val.v[0]);
5632   if (full)
5633     aarch64_set_vec_u64 (cpu, rd, 1, val.v[1]);
5634 }
5635
5636 static void
5637 do_vec_REV16 (sim_cpu *cpu)
5638 {
5639   /* instr[31]    = 0
5640      instr[30]    = full/half
5641      instr[29,24] = 00 1110
5642      instr[23,22] = size
5643      instr[21,10] = 10 0000 0001 10
5644      instr[9,5]   = Rn
5645      instr[4,0]   = Rd.  */
5646
5647   unsigned rn = INSTR (9, 5);
5648   unsigned rd = INSTR (4, 0);
5649   unsigned size = INSTR (23, 22);
5650   unsigned full = INSTR (30, 30);
5651   unsigned i;
5652   FRegister val;
5653
5654   NYI_assert (29, 24, 0x0E);
5655   NYI_assert (21, 10, 0x806);
5656
5657   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5658   switch (size)
5659     {
5660     case 0:
5661       for (i = 0; i < (full ? 16 : 8); i++)
5662         val.b[i ^ 0x1] = aarch64_get_vec_u8 (cpu, rn, i);
5663       break;
5664
5665     default:
5666       HALT_UNALLOC;
5667     }
5668
5669   aarch64_set_vec_u64 (cpu, rd, 0, val.v[0]);
5670   if (full)
5671     aarch64_set_vec_u64 (cpu, rd, 1, val.v[1]);
5672 }
5673
5674 static void
5675 do_vec_op1 (sim_cpu *cpu)
5676 {
5677   /* instr[31]    = 0
5678      instr[30]    = half/full
5679      instr[29,24] = 00 1110
5680      instr[23,21] = ???
5681      instr[20,16] = Vm
5682      instr[15,10] = sub-opcode
5683      instr[9,5]   = Vn
5684      instr[4,0]   = Vd  */
5685   NYI_assert (29, 24, 0x0E);
5686
5687   if (INSTR (21, 21) == 0)
5688     {
5689       if (INSTR (23, 22) == 0)
5690         {
5691           if (INSTR (30, 30) == 1
5692               && INSTR (17, 14) == 0
5693               && INSTR (12, 10) == 7)
5694             return do_vec_ins_2 (cpu);
5695
5696           switch (INSTR (15, 10))
5697             {
5698             case 0x01: do_vec_DUP_vector_into_vector (cpu); return;
5699             case 0x03: do_vec_DUP_scalar_into_vector (cpu); return;
5700             case 0x07: do_vec_INS (cpu); return;
5701             case 0x0B: do_vec_SMOV_into_scalar (cpu); return;
5702             case 0x0F: do_vec_UMOV_into_scalar (cpu); return;
5703
5704             case 0x00:
5705             case 0x08:
5706             case 0x10:
5707             case 0x18:
5708               do_vec_TBL (cpu); return;
5709
5710             case 0x06:
5711             case 0x16:
5712               do_vec_UZP (cpu); return;
5713
5714             case 0x0A: do_vec_TRN (cpu); return;
5715
5716             case 0x0E:
5717             case 0x1E:
5718               do_vec_ZIP (cpu); return;
5719
5720             default:
5721               HALT_NYI;
5722             }
5723         }
5724
5725       switch (INSTR (13, 10))
5726         {
5727         case 0x6: do_vec_UZP (cpu); return;
5728         case 0xE: do_vec_ZIP (cpu); return;
5729         case 0xA: do_vec_TRN (cpu); return;
5730         default:  HALT_NYI;
5731         }
5732     }
5733
5734   switch (INSTR (15, 10))
5735     {
5736     case 0x02: do_vec_REV64 (cpu); return;
5737     case 0x06: do_vec_REV16 (cpu); return;
5738
5739     case 0x07:
5740       switch (INSTR (23, 21))
5741         {
5742         case 1: do_vec_AND (cpu); return;
5743         case 3: do_vec_BIC (cpu); return;
5744         case 5: do_vec_ORR (cpu); return;
5745         case 7: do_vec_ORN (cpu); return;
5746         default: HALT_NYI;
5747         }
5748
5749     case 0x08: do_vec_sub_long (cpu); return;
5750     case 0x0a: do_vec_XTN (cpu); return;
5751     case 0x11: do_vec_SSHL (cpu); return;
5752     case 0x16: do_vec_CNT (cpu); return;
5753     case 0x19: do_vec_max (cpu); return;
5754     case 0x1B: do_vec_min (cpu); return;
5755     case 0x21: do_vec_add (cpu); return;
5756     case 0x25: do_vec_MLA (cpu); return;
5757     case 0x27: do_vec_mul (cpu); return;
5758     case 0x2F: do_vec_ADDP (cpu); return;
5759     case 0x30: do_vec_mull (cpu); return;
5760     case 0x33: do_vec_FMLA (cpu); return;
5761     case 0x35: do_vec_fadd (cpu); return;
5762
5763     case 0x1E:
5764       switch (INSTR (20, 16))
5765         {
5766         case 0x01: do_vec_FCVTL (cpu); return;
5767         default: HALT_NYI;
5768         }
5769
5770     case 0x2E:
5771       switch (INSTR (20, 16))
5772         {
5773         case 0x00: do_vec_ABS (cpu); return;
5774         case 0x01: do_vec_FCVTZS (cpu); return;
5775         case 0x11: do_vec_ADDV (cpu); return;
5776         default: HALT_NYI;
5777         }
5778
5779     case 0x31:
5780     case 0x3B:
5781       do_vec_Fminmax (cpu); return;
5782
5783     case 0x0D:
5784     case 0x0F:
5785     case 0x22:
5786     case 0x23:
5787     case 0x26:
5788     case 0x2A:
5789     case 0x32:
5790     case 0x36:
5791     case 0x39:
5792     case 0x3A:
5793       do_vec_compare (cpu); return;
5794
5795     case 0x3E:
5796       do_vec_FABS (cpu); return;
5797
5798     default:
5799       HALT_NYI;
5800     }
5801 }
5802
5803 static void
5804 do_vec_xtl (sim_cpu *cpu)
5805 {
5806   /* instr[31]    = 0
5807      instr[30,29] = SXTL (00), UXTL (01), SXTL2 (10), UXTL2 (11)
5808      instr[28,22] = 0 1111 00
5809      instr[21,16] = size & shift (USHLL, SSHLL, USHLL2, SSHLL2)
5810      instr[15,10] = 1010 01
5811      instr[9,5]   = V source
5812      instr[4,0]   = V dest.  */
5813
5814   unsigned vs = INSTR (9, 5);
5815   unsigned vd = INSTR (4, 0);
5816   unsigned i, shift, bias = 0;
5817
5818   NYI_assert (28, 22, 0x3C);
5819   NYI_assert (15, 10, 0x29);
5820
5821   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5822   switch (INSTR (30, 29))
5823     {
5824     case 2: /* SXTL2, SSHLL2.  */
5825       bias = 2;
5826     case 0: /* SXTL, SSHLL.  */
5827       if (INSTR (21, 21))
5828         {
5829           int64_t val1, val2;
5830
5831           shift = INSTR (20, 16);
5832           /* Get the source values before setting the destination values
5833              in case the source and destination are the same.  */
5834           val1 = aarch64_get_vec_s32 (cpu, vs, bias) << shift;
5835           val2 = aarch64_get_vec_s32 (cpu, vs, bias + 1) << shift;
5836           aarch64_set_vec_s64 (cpu, vd, 0, val1);
5837           aarch64_set_vec_s64 (cpu, vd, 1, val2);
5838         }
5839       else if (INSTR (20, 20))
5840         {
5841           int32_t v[4];
5842           int32_t v1,v2,v3,v4;
5843
5844           shift = INSTR (19, 16);
5845           bias *= 2;
5846           for (i = 0; i < 4; i++)
5847             v[i] = aarch64_get_vec_s16 (cpu, vs, bias + i) << shift;
5848           for (i = 0; i < 4; i++)
5849             aarch64_set_vec_s32 (cpu, vd, i, v[i]);
5850         }
5851       else
5852         {
5853           int16_t v[8];
5854           NYI_assert (19, 19, 1);
5855
5856           shift = INSTR (18, 16);
5857           bias *= 4;
5858           for (i = 0; i < 8; i++)
5859             v[i] = aarch64_get_vec_s8 (cpu, vs, i + bias) << shift;
5860           for (i = 0; i < 8; i++)
5861             aarch64_set_vec_s16 (cpu, vd, i, v[i]);
5862         }
5863       return;
5864
5865     case 3: /* UXTL2, USHLL2.  */
5866       bias = 2;
5867     case 1: /* UXTL, USHLL.  */
5868       if (INSTR (21, 21))
5869         {
5870           uint64_t v1, v2;
5871           shift = INSTR (20, 16);
5872           v1 = aarch64_get_vec_u32 (cpu, vs, bias) << shift;
5873           v2 = aarch64_get_vec_u32 (cpu, vs, bias + 1) << shift;
5874           aarch64_set_vec_u64 (cpu, vd, 0, v1);
5875           aarch64_set_vec_u64 (cpu, vd, 1, v2);
5876         }
5877       else if (INSTR (20, 20))
5878         {
5879           uint32_t v[4];
5880           shift = INSTR (19, 16);
5881           bias *= 2;
5882           for (i = 0; i < 4; i++)
5883             v[i] = aarch64_get_vec_u16 (cpu, vs, i + bias) << shift;
5884           for (i = 0; i < 4; i++)
5885             aarch64_set_vec_u32 (cpu, vd, i, v[i]);
5886         }
5887       else
5888         {
5889           uint16_t v[8];
5890           NYI_assert (19, 19, 1);
5891
5892           shift = INSTR (18, 16);
5893           bias *= 4;
5894           for (i = 0; i < 8; i++)
5895             v[i] = aarch64_get_vec_u8 (cpu, vs, i + bias) << shift;
5896           for (i = 0; i < 8; i++)
5897             aarch64_set_vec_u16 (cpu, vd, i, v[i]);
5898         }
5899       return;
5900     }
5901 }
5902
5903 static void
5904 do_vec_SHL (sim_cpu *cpu)
5905 {
5906   /* instr [31]    = 0
5907      instr [30]    = half(0)/full(1)
5908      instr [29,23] = 001 1110
5909      instr [22,16] = size and shift amount
5910      instr [15,10] = 01 0101
5911      instr [9, 5]  = Vs
5912      instr [4, 0]  = Vd.  */
5913
5914   int shift;
5915   int full    = INSTR (30, 30);
5916   unsigned vs = INSTR (9, 5);
5917   unsigned vd = INSTR (4, 0);
5918   unsigned i;
5919
5920   NYI_assert (29, 23, 0x1E);
5921   NYI_assert (15, 10, 0x15);
5922
5923   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5924   if (INSTR (22, 22))
5925     {
5926       shift = INSTR (21, 16);
5927
5928       if (full == 0)
5929         HALT_UNALLOC;
5930
5931       for (i = 0; i < 2; i++)
5932         {
5933           uint64_t val = aarch64_get_vec_u64 (cpu, vs, i);
5934           aarch64_set_vec_u64 (cpu, vd, i, val << shift);
5935         }
5936
5937       return;
5938     }
5939
5940   if (INSTR (21, 21))
5941     {
5942       shift = INSTR (20, 16);
5943
5944       for (i = 0; i < (full ? 4 : 2); i++)
5945         {
5946           uint32_t val = aarch64_get_vec_u32 (cpu, vs, i);
5947           aarch64_set_vec_u32 (cpu, vd, i, val << shift);
5948         }
5949
5950       return;
5951     }
5952
5953   if (INSTR (20, 20))
5954     {
5955       shift = INSTR (19, 16);
5956
5957       for (i = 0; i < (full ? 8 : 4); i++)
5958         {
5959           uint16_t val = aarch64_get_vec_u16 (cpu, vs, i);
5960           aarch64_set_vec_u16 (cpu, vd, i, val << shift);
5961         }
5962
5963       return;
5964     }
5965
5966   if (INSTR (19, 19) == 0)
5967     HALT_UNALLOC;
5968
5969   shift = INSTR (18, 16);
5970
5971   for (i = 0; i < (full ? 16 : 8); i++)
5972     {
5973       uint8_t val = aarch64_get_vec_u8 (cpu, vs, i);
5974       aarch64_set_vec_u8 (cpu, vd, i, val << shift);
5975     }
5976 }
5977
5978 static void
5979 do_vec_SSHR_USHR (sim_cpu *cpu)
5980 {
5981   /* instr [31]    = 0
5982      instr [30]    = half(0)/full(1)
5983      instr [29]    = signed(0)/unsigned(1)
5984      instr [28,23] = 0 1111 0
5985      instr [22,16] = size and shift amount
5986      instr [15,10] = 0000 01
5987      instr [9, 5]  = Vs
5988      instr [4, 0]  = Vd.  */
5989
5990   int full       = INSTR (30, 30);
5991   int sign       = ! INSTR (29, 29);
5992   unsigned shift = INSTR (22, 16);
5993   unsigned vs    = INSTR (9, 5);
5994   unsigned vd    = INSTR (4, 0);
5995   unsigned i;
5996
5997   NYI_assert (28, 23, 0x1E);
5998   NYI_assert (15, 10, 0x01);
5999
6000   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6001   if (INSTR (22, 22))
6002     {
6003       shift = 128 - shift;
6004
6005       if (full == 0)
6006         HALT_UNALLOC;
6007
6008       if (sign)
6009         for (i = 0; i < 2; i++)
6010           {
6011             int64_t val = aarch64_get_vec_s64 (cpu, vs, i);
6012             aarch64_set_vec_s64 (cpu, vd, i, val >> shift);
6013           }
6014       else
6015         for (i = 0; i < 2; i++)
6016           {
6017             uint64_t val = aarch64_get_vec_u64 (cpu, vs, i);
6018             aarch64_set_vec_u64 (cpu, vd, i, val >> shift);
6019           }
6020
6021       return;
6022     }
6023
6024   if (INSTR (21, 21))
6025     {
6026       shift = 64 - shift;
6027
6028       if (sign)
6029         for (i = 0; i < (full ? 4 : 2); i++)
6030           {
6031             int32_t val = aarch64_get_vec_s32 (cpu, vs, i);
6032             aarch64_set_vec_s32 (cpu, vd, i, val >> shift);
6033           }
6034       else
6035         for (i = 0; i < (full ? 4 : 2); i++)
6036           {
6037             uint32_t val = aarch64_get_vec_u32 (cpu, vs, i);
6038             aarch64_set_vec_u32 (cpu, vd, i, val >> shift);
6039           }
6040
6041       return;
6042     }
6043
6044   if (INSTR (20, 20))
6045     {
6046       shift = 32 - shift;
6047
6048       if (sign)
6049         for (i = 0; i < (full ? 8 : 4); i++)
6050           {
6051             int16_t val = aarch64_get_vec_s16 (cpu, vs, i);
6052             aarch64_set_vec_s16 (cpu, vd, i, val >> shift);
6053           }
6054       else
6055         for (i = 0; i < (full ? 8 : 4); i++)
6056           {
6057             uint16_t val = aarch64_get_vec_u16 (cpu, vs, i);
6058             aarch64_set_vec_u16 (cpu, vd, i, val >> shift);
6059           }
6060
6061       return;
6062     }
6063
6064   if (INSTR (19, 19) == 0)
6065     HALT_UNALLOC;
6066
6067   shift = 16 - shift;
6068
6069   if (sign)
6070     for (i = 0; i < (full ? 16 : 8); i++)
6071       {
6072         int8_t val = aarch64_get_vec_s8 (cpu, vs, i);
6073         aarch64_set_vec_s8 (cpu, vd, i, val >> shift);
6074       }
6075   else
6076     for (i = 0; i < (full ? 16 : 8); i++)
6077       {
6078         uint8_t val = aarch64_get_vec_u8 (cpu, vs, i);
6079         aarch64_set_vec_u8 (cpu, vd, i, val >> shift);
6080       }
6081 }
6082
6083 static void
6084 do_vec_MUL_by_element (sim_cpu *cpu)
6085 {
6086   /* instr[31]    = 0
6087      instr[30]    = half/full
6088      instr[29,24] = 00 1111
6089      instr[23,22] = size
6090      instr[21]    = L
6091      instr[20]    = M
6092      instr[19,16] = m
6093      instr[15,12] = 1000
6094      instr[11]    = H
6095      instr[10]    = 0
6096      instr[9,5]   = Vn
6097      instr[4,0]   = Vd  */
6098
6099   unsigned full     = INSTR (30, 30);
6100   unsigned L        = INSTR (21, 21);
6101   unsigned H        = INSTR (11, 11);
6102   unsigned vn       = INSTR (9, 5);
6103   unsigned vd       = INSTR (4, 0);
6104   unsigned size     = INSTR (23, 22);
6105   unsigned index;
6106   unsigned vm;
6107   unsigned e;
6108
6109   NYI_assert (29, 24, 0x0F);
6110   NYI_assert (15, 12, 0x8);
6111   NYI_assert (10, 10, 0);
6112
6113   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6114   switch (size)
6115     {
6116     case 1:
6117       {
6118         /* 16 bit products.  */
6119         uint16_t product;
6120         uint16_t element1;
6121         uint16_t element2;
6122
6123         index = (H << 2) | (L << 1) | INSTR (20, 20);
6124         vm = INSTR (19, 16);
6125         element2 = aarch64_get_vec_u16 (cpu, vm, index);
6126
6127         for (e = 0; e < (full ? 8 : 4); e ++)
6128           {
6129             element1 = aarch64_get_vec_u16 (cpu, vn, e);
6130             product  = element1 * element2;
6131             aarch64_set_vec_u16 (cpu, vd, e, product);
6132           }
6133       }
6134       break;
6135
6136     case 2:
6137       {
6138         /* 32 bit products.  */
6139         uint32_t product;
6140         uint32_t element1;
6141         uint32_t element2;
6142
6143         index = (H << 1) | L;
6144         vm = INSTR (20, 16);
6145         element2 = aarch64_get_vec_u32 (cpu, vm, index);
6146
6147         for (e = 0; e < (full ? 4 : 2); e ++)
6148           {
6149             element1 = aarch64_get_vec_u32 (cpu, vn, e);
6150             product  = element1 * element2;
6151             aarch64_set_vec_u32 (cpu, vd, e, product);
6152           }
6153       }
6154       break;
6155
6156     default:
6157       HALT_UNALLOC;
6158     }
6159 }
6160
6161 static void
6162 do_FMLA_by_element (sim_cpu *cpu)
6163 {
6164   /* instr[31]    = 0
6165      instr[30]    = half/full
6166      instr[29,23] = 00 1111 1
6167      instr[22]    = size
6168      instr[21]    = L
6169      instr[20,16] = m
6170      instr[15,12] = 0001
6171      instr[11]    = H
6172      instr[10]    = 0
6173      instr[9,5]   = Vn
6174      instr[4,0]   = Vd  */
6175
6176   unsigned full     = INSTR (30, 30);
6177   unsigned size     = INSTR (22, 22);
6178   unsigned L        = INSTR (21, 21);
6179   unsigned vm       = INSTR (20, 16);
6180   unsigned H        = INSTR (11, 11);
6181   unsigned vn       = INSTR (9, 5);
6182   unsigned vd       = INSTR (4, 0);
6183   unsigned e;
6184
6185   NYI_assert (29, 23, 0x1F);
6186   NYI_assert (15, 12, 0x1);
6187   NYI_assert (10, 10, 0);
6188
6189   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6190   if (size)
6191     {
6192       double element1, element2;
6193
6194       if (! full || L)
6195         HALT_UNALLOC;
6196
6197       element2 = aarch64_get_vec_double (cpu, vm, H);
6198
6199       for (e = 0; e < 2; e++)
6200         {
6201           element1 = aarch64_get_vec_double (cpu, vn, e);
6202           element1 *= element2;
6203           element1 += aarch64_get_vec_double (cpu, vd, e);
6204           aarch64_set_vec_double (cpu, vd, e, element1);
6205         }
6206     }
6207   else
6208     {
6209       float element1;
6210       float element2 = aarch64_get_vec_float (cpu, vm, (H << 1) | L);
6211
6212       for (e = 0; e < (full ? 4 : 2); e++)
6213         {
6214           element1 = aarch64_get_vec_float (cpu, vn, e);
6215           element1 *= element2;
6216           element1 += aarch64_get_vec_float (cpu, vd, e);
6217           aarch64_set_vec_float (cpu, vd, e, element1);
6218         }
6219     }
6220 }
6221
6222 static void
6223 do_vec_op2 (sim_cpu *cpu)
6224 {
6225   /* instr[31]    = 0
6226      instr[30]    = half/full
6227      instr[29,24] = 00 1111
6228      instr[23]    = ?
6229      instr[22,16] = element size & index
6230      instr[15,10] = sub-opcode
6231      instr[9,5]   = Vm
6232      instr[4,0]   = Vd  */
6233
6234   NYI_assert (29, 24, 0x0F);
6235
6236   if (INSTR (23, 23) != 0)
6237     {
6238       switch (INSTR (15, 10))
6239         {
6240         case 0x04:
6241         case 0x06:
6242           do_FMLA_by_element (cpu);
6243           return;
6244
6245         case 0x20:
6246         case 0x22:
6247           do_vec_MUL_by_element (cpu);
6248           return;
6249
6250         default:
6251           HALT_NYI;
6252         }
6253     }
6254   else
6255     {
6256       switch (INSTR (15, 10))
6257         {
6258         case 0x01: do_vec_SSHR_USHR (cpu); return;
6259         case 0x15: do_vec_SHL (cpu); return;
6260         case 0x20:
6261         case 0x22: do_vec_MUL_by_element (cpu); return;
6262         case 0x29: do_vec_xtl (cpu); return;
6263         default:   HALT_NYI;
6264         }
6265     }
6266 }
6267
6268 static void
6269 do_vec_neg (sim_cpu *cpu)
6270 {
6271   /* instr[31]    = 0
6272      instr[30]    = full(1)/half(0)
6273      instr[29,24] = 10 1110
6274      instr[23,22] = size: byte(00), half (01), word (10), long (11)
6275      instr[21,10] = 1000 0010 1110
6276      instr[9,5]   = Vs
6277      instr[4,0]   = Vd  */
6278
6279   int    full = INSTR (30, 30);
6280   unsigned vs = INSTR (9, 5);
6281   unsigned vd = INSTR (4, 0);
6282   unsigned i;
6283
6284   NYI_assert (29, 24, 0x2E);
6285   NYI_assert (21, 10, 0x82E);
6286
6287   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6288   switch (INSTR (23, 22))
6289     {
6290     case 0:
6291       for (i = 0; i < (full ? 16 : 8); i++)
6292         aarch64_set_vec_s8 (cpu, vd, i, - aarch64_get_vec_s8 (cpu, vs, i));
6293       return;
6294
6295     case 1:
6296       for (i = 0; i < (full ? 8 : 4); i++)
6297         aarch64_set_vec_s16 (cpu, vd, i, - aarch64_get_vec_s16 (cpu, vs, i));
6298       return;
6299
6300     case 2:
6301       for (i = 0; i < (full ? 4 : 2); i++)
6302         aarch64_set_vec_s32 (cpu, vd, i, - aarch64_get_vec_s32 (cpu, vs, i));
6303       return;
6304
6305     case 3:
6306       if (! full)
6307         HALT_NYI;
6308       for (i = 0; i < 2; i++)
6309         aarch64_set_vec_s64 (cpu, vd, i, - aarch64_get_vec_s64 (cpu, vs, i));
6310       return;
6311     }
6312 }
6313
6314 static void
6315 do_vec_sqrt (sim_cpu *cpu)
6316 {
6317   /* instr[31]    = 0
6318      instr[30]    = full(1)/half(0)
6319      instr[29,23] = 101 1101
6320      instr[22]    = single(0)/double(1)
6321      instr[21,10] = 1000 0111 1110
6322      instr[9,5]   = Vs
6323      instr[4,0]   = Vd.  */
6324
6325   int    full = INSTR (30, 30);
6326   unsigned vs = INSTR (9, 5);
6327   unsigned vd = INSTR (4, 0);
6328   unsigned i;
6329
6330   NYI_assert (29, 23, 0x5B);
6331   NYI_assert (21, 10, 0x87E);
6332
6333   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6334   if (INSTR (22, 22) == 0)
6335     for (i = 0; i < (full ? 4 : 2); i++)
6336       aarch64_set_vec_float (cpu, vd, i,
6337                              sqrtf (aarch64_get_vec_float (cpu, vs, i)));
6338   else
6339     for (i = 0; i < 2; i++)
6340       aarch64_set_vec_double (cpu, vd, i,
6341                               sqrt (aarch64_get_vec_double (cpu, vs, i)));
6342 }
6343
6344 static void
6345 do_vec_mls_indexed (sim_cpu *cpu)
6346 {
6347   /* instr[31]       = 0
6348      instr[30]       = half(0)/full(1)
6349      instr[29,24]    = 10 1111
6350      instr[23,22]    = 16-bit(01)/32-bit(10)
6351      instr[21,20+11] = index (if 16-bit)
6352      instr[21+11]    = index (if 32-bit)
6353      instr[20,16]    = Vm
6354      instr[15,12]    = 0100
6355      instr[11]       = part of index
6356      instr[10]       = 0
6357      instr[9,5]      = Vs
6358      instr[4,0]      = Vd.  */
6359
6360   int    full = INSTR (30, 30);
6361   unsigned vs = INSTR (9, 5);
6362   unsigned vd = INSTR (4, 0);
6363   unsigned vm = INSTR (20, 16);
6364   unsigned i;
6365
6366   NYI_assert (15, 12, 4);
6367   NYI_assert (10, 10, 0);
6368
6369   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6370   switch (INSTR (23, 22))
6371     {
6372     case 1:
6373       {
6374         unsigned elem;
6375         uint32_t val;
6376
6377         if (vm > 15)
6378           HALT_NYI;
6379
6380         elem = (INSTR (21, 20) << 1) | INSTR (11, 11);
6381         val = aarch64_get_vec_u16 (cpu, vm, elem);
6382
6383         for (i = 0; i < (full ? 8 : 4); i++)
6384           aarch64_set_vec_u32 (cpu, vd, i,
6385                                aarch64_get_vec_u32 (cpu, vd, i) -
6386                                (aarch64_get_vec_u32 (cpu, vs, i) * val));
6387         return;
6388       }
6389
6390     case 2:
6391       {
6392         unsigned elem = (INSTR (21, 21) << 1) | INSTR (11, 11);
6393         uint64_t val = aarch64_get_vec_u32 (cpu, vm, elem);
6394
6395         for (i = 0; i < (full ? 4 : 2); i++)
6396           aarch64_set_vec_u64 (cpu, vd, i,
6397                                aarch64_get_vec_u64 (cpu, vd, i) -
6398                                (aarch64_get_vec_u64 (cpu, vs, i) * val));
6399         return;
6400       }
6401
6402     case 0:
6403     case 3:
6404     default:
6405       HALT_NYI;
6406     }
6407 }
6408
6409 static void
6410 do_vec_SUB (sim_cpu *cpu)
6411 {
6412   /* instr [31]    = 0
6413      instr [30]    = half(0)/full(1)
6414      instr [29,24] = 10 1110
6415      instr [23,22] = size: byte(00, half(01), word (10), long (11)
6416      instr [21]    = 1
6417      instr [20,16] = Vm
6418      instr [15,10] = 10 0001
6419      instr [9, 5]  = Vn
6420      instr [4, 0]  = Vd.  */
6421
6422   unsigned full = INSTR (30, 30);
6423   unsigned vm = INSTR (20, 16);
6424   unsigned vn = INSTR (9, 5);
6425   unsigned vd = INSTR (4, 0);
6426   unsigned i;
6427
6428   NYI_assert (29, 24, 0x2E);
6429   NYI_assert (21, 21, 1);
6430   NYI_assert (15, 10, 0x21);
6431
6432   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6433   switch (INSTR (23, 22))
6434     {
6435     case 0:
6436       for (i = 0; i < (full ? 16 : 8); i++)
6437         aarch64_set_vec_s8 (cpu, vd, i,
6438                             aarch64_get_vec_s8 (cpu, vn, i)
6439                             - aarch64_get_vec_s8 (cpu, vm, i));
6440       return;
6441
6442     case 1:
6443       for (i = 0; i < (full ? 8 : 4); i++)
6444         aarch64_set_vec_s16 (cpu, vd, i,
6445                              aarch64_get_vec_s16 (cpu, vn, i)
6446                              - aarch64_get_vec_s16 (cpu, vm, i));
6447       return;
6448
6449     case 2:
6450       for (i = 0; i < (full ? 4 : 2); i++)
6451         aarch64_set_vec_s32 (cpu, vd, i,
6452                              aarch64_get_vec_s32 (cpu, vn, i)
6453                              - aarch64_get_vec_s32 (cpu, vm, i));
6454       return;
6455
6456     case 3:
6457       if (full == 0)
6458         HALT_UNALLOC;
6459
6460       for (i = 0; i < 2; i++)
6461         aarch64_set_vec_s64 (cpu, vd, i,
6462                              aarch64_get_vec_s64 (cpu, vn, i)
6463                              - aarch64_get_vec_s64 (cpu, vm, i));
6464       return;
6465     }
6466 }
6467
6468 static void
6469 do_vec_MLS (sim_cpu *cpu)
6470 {
6471   /* instr [31]    = 0
6472      instr [30]    = half(0)/full(1)
6473      instr [29,24] = 10 1110
6474      instr [23,22] = size: byte(00, half(01), word (10)
6475      instr [21]    = 1
6476      instr [20,16] = Vm
6477      instr [15,10] = 10 0101
6478      instr [9, 5]  = Vn
6479      instr [4, 0]  = Vd.  */
6480
6481   unsigned full = INSTR (30, 30);
6482   unsigned vm = INSTR (20, 16);
6483   unsigned vn = INSTR (9, 5);
6484   unsigned vd = INSTR (4, 0);
6485   unsigned i;
6486
6487   NYI_assert (29, 24, 0x2E);
6488   NYI_assert (21, 21, 1);
6489   NYI_assert (15, 10, 0x25);
6490
6491   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6492   switch (INSTR (23, 22))
6493     {
6494     case 0:
6495       for (i = 0; i < (full ? 16 : 8); i++)
6496         aarch64_set_vec_u8 (cpu, vd, i,
6497                             aarch64_get_vec_u8 (cpu, vd, i)
6498                             - (aarch64_get_vec_u8 (cpu, vn, i)
6499                                * aarch64_get_vec_u8 (cpu, vm, i)));
6500       return;
6501
6502     case 1:
6503       for (i = 0; i < (full ? 8 : 4); i++)
6504         aarch64_set_vec_u16 (cpu, vd, i,
6505                              aarch64_get_vec_u16 (cpu, vd, i)
6506                              - (aarch64_get_vec_u16 (cpu, vn, i)
6507                                 * aarch64_get_vec_u16 (cpu, vm, i)));
6508       return;
6509
6510     case 2:
6511       for (i = 0; i < (full ? 4 : 2); i++)
6512         aarch64_set_vec_u32 (cpu, vd, i,
6513                              aarch64_get_vec_u32 (cpu, vd, i)
6514                              - (aarch64_get_vec_u32 (cpu, vn, i)
6515                                 * aarch64_get_vec_u32 (cpu, vm, i)));
6516       return;
6517
6518     default:
6519       HALT_UNALLOC;
6520     }
6521 }
6522
6523 static void
6524 do_vec_FDIV (sim_cpu *cpu)
6525 {
6526   /* instr [31]    = 0
6527      instr [30]    = half(0)/full(1)
6528      instr [29,23] = 10 1110 0
6529      instr [22]    = float()/double(1)
6530      instr [21]    = 1
6531      instr [20,16] = Vm
6532      instr [15,10] = 1111 11
6533      instr [9, 5]  = Vn
6534      instr [4, 0]  = Vd.  */
6535
6536   unsigned full = INSTR (30, 30);
6537   unsigned vm = INSTR (20, 16);
6538   unsigned vn = INSTR (9, 5);
6539   unsigned vd = INSTR (4, 0);
6540   unsigned i;
6541
6542   NYI_assert (29, 23, 0x5C);
6543   NYI_assert (21, 21, 1);
6544   NYI_assert (15, 10, 0x3F);
6545
6546   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6547   if (INSTR (22, 22))
6548     {
6549       if (! full)
6550         HALT_UNALLOC;
6551
6552       for (i = 0; i < 2; i++)
6553         aarch64_set_vec_double (cpu, vd, i,
6554                                 aarch64_get_vec_double (cpu, vn, i)
6555                                 / aarch64_get_vec_double (cpu, vm, i));
6556     }
6557   else
6558     for (i = 0; i < (full ? 4 : 2); i++)
6559       aarch64_set_vec_float (cpu, vd, i,
6560                              aarch64_get_vec_float (cpu, vn, i)
6561                              / aarch64_get_vec_float (cpu, vm, i));
6562 }
6563
6564 static void
6565 do_vec_FMUL (sim_cpu *cpu)
6566 {
6567   /* instr [31]    = 0
6568      instr [30]    = half(0)/full(1)
6569      instr [29,23] = 10 1110 0
6570      instr [22]    = float(0)/double(1)
6571      instr [21]    = 1
6572      instr [20,16] = Vm
6573      instr [15,10] = 1101 11
6574      instr [9, 5]  = Vn
6575      instr [4, 0]  = Vd.  */
6576
6577   unsigned full = INSTR (30, 30);
6578   unsigned vm = INSTR (20, 16);
6579   unsigned vn = INSTR (9, 5);
6580   unsigned vd = INSTR (4, 0);
6581   unsigned i;
6582
6583   NYI_assert (29, 23, 0x5C);
6584   NYI_assert (21, 21, 1);
6585   NYI_assert (15, 10, 0x37);
6586
6587   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6588   if (INSTR (22, 22))
6589     {
6590       if (! full)
6591         HALT_UNALLOC;
6592
6593       for (i = 0; i < 2; i++)
6594         aarch64_set_vec_double (cpu, vd, i,
6595                                 aarch64_get_vec_double (cpu, vn, i)
6596                                 * aarch64_get_vec_double (cpu, vm, i));
6597     }
6598   else
6599     for (i = 0; i < (full ? 4 : 2); i++)
6600       aarch64_set_vec_float (cpu, vd, i,
6601                              aarch64_get_vec_float (cpu, vn, i)
6602                              * aarch64_get_vec_float (cpu, vm, i));
6603 }
6604
6605 static void
6606 do_vec_FADDP (sim_cpu *cpu)
6607 {
6608   /* instr [31]    = 0
6609      instr [30]    = half(0)/full(1)
6610      instr [29,23] = 10 1110 0
6611      instr [22]    = float(0)/double(1)
6612      instr [21]    = 1
6613      instr [20,16] = Vm
6614      instr [15,10] = 1101 01
6615      instr [9, 5]  = Vn
6616      instr [4, 0]  = Vd.  */
6617
6618   unsigned full = INSTR (30, 30);
6619   unsigned vm = INSTR (20, 16);
6620   unsigned vn = INSTR (9, 5);
6621   unsigned vd = INSTR (4, 0);
6622
6623   NYI_assert (29, 23, 0x5C);
6624   NYI_assert (21, 21, 1);
6625   NYI_assert (15, 10, 0x35);
6626
6627   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6628   if (INSTR (22, 22))
6629     {
6630       /* Extract values before adding them incase vd == vn/vm.  */
6631       double tmp1 = aarch64_get_vec_double (cpu, vn, 0);
6632       double tmp2 = aarch64_get_vec_double (cpu, vn, 1);
6633       double tmp3 = aarch64_get_vec_double (cpu, vm, 0);
6634       double tmp4 = aarch64_get_vec_double (cpu, vm, 1);
6635
6636       if (! full)
6637         HALT_UNALLOC;
6638
6639       aarch64_set_vec_double (cpu, vd, 0, tmp1 + tmp2);
6640       aarch64_set_vec_double (cpu, vd, 1, tmp3 + tmp4);
6641     }
6642   else
6643     {
6644       /* Extract values before adding them incase vd == vn/vm.  */
6645       float tmp1 = aarch64_get_vec_float (cpu, vn, 0);
6646       float tmp2 = aarch64_get_vec_float (cpu, vn, 1);
6647       float tmp5 = aarch64_get_vec_float (cpu, vm, 0);
6648       float tmp6 = aarch64_get_vec_float (cpu, vm, 1);
6649
6650       if (full)
6651         {
6652           float tmp3 = aarch64_get_vec_float (cpu, vn, 2);
6653           float tmp4 = aarch64_get_vec_float (cpu, vn, 3);
6654           float tmp7 = aarch64_get_vec_float (cpu, vm, 2);
6655           float tmp8 = aarch64_get_vec_float (cpu, vm, 3);
6656
6657           aarch64_set_vec_float (cpu, vd, 0, tmp1 + tmp2);
6658           aarch64_set_vec_float (cpu, vd, 1, tmp3 + tmp4);
6659           aarch64_set_vec_float (cpu, vd, 2, tmp5 + tmp6);
6660           aarch64_set_vec_float (cpu, vd, 3, tmp7 + tmp8);
6661         }
6662       else
6663         {
6664           aarch64_set_vec_float (cpu, vd, 0, tmp1 + tmp2);
6665           aarch64_set_vec_float (cpu, vd, 1, tmp5 + tmp6);
6666         }
6667     }
6668 }
6669
6670 static void
6671 do_vec_FSQRT (sim_cpu *cpu)
6672 {
6673   /* instr[31]    = 0
6674      instr[30]    = half(0)/full(1)
6675      instr[29,23] = 10 1110 1
6676      instr[22]    = single(0)/double(1)
6677      instr[21,10] = 10 0001 1111 10
6678      instr[9,5]   = Vsrc
6679      instr[4,0]   = Vdest.  */
6680
6681   unsigned vn = INSTR (9, 5);
6682   unsigned vd = INSTR (4, 0);
6683   unsigned full = INSTR (30, 30);
6684   int i;
6685
6686   NYI_assert (29, 23, 0x5D);
6687   NYI_assert (21, 10, 0x87E);
6688
6689   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6690   if (INSTR (22, 22))
6691     {
6692       if (! full)
6693         HALT_UNALLOC;
6694
6695       for (i = 0; i < 2; i++)
6696         aarch64_set_vec_double (cpu, vd, i,
6697                                 sqrt (aarch64_get_vec_double (cpu, vn, i)));
6698     }
6699   else
6700     {
6701       for (i = 0; i < (full ? 4 : 2); i++)
6702         aarch64_set_vec_float (cpu, vd, i,
6703                                sqrtf (aarch64_get_vec_float (cpu, vn, i)));
6704     }
6705 }
6706
6707 static void
6708 do_vec_FNEG (sim_cpu *cpu)
6709 {
6710   /* instr[31]    = 0
6711      instr[30]    = half (0)/full (1)
6712      instr[29,23] = 10 1110 1
6713      instr[22]    = single (0)/double (1)
6714      instr[21,10] = 10 0000 1111 10
6715      instr[9,5]   = Vsrc
6716      instr[4,0]   = Vdest.  */
6717
6718   unsigned vn = INSTR (9, 5);
6719   unsigned vd = INSTR (4, 0);
6720   unsigned full = INSTR (30, 30);
6721   int i;
6722
6723   NYI_assert (29, 23, 0x5D);
6724   NYI_assert (21, 10, 0x83E);
6725
6726   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6727   if (INSTR (22, 22))
6728     {
6729       if (! full)
6730         HALT_UNALLOC;
6731
6732       for (i = 0; i < 2; i++)
6733         aarch64_set_vec_double (cpu, vd, i,
6734                                 - aarch64_get_vec_double (cpu, vn, i));
6735     }
6736   else
6737     {
6738       for (i = 0; i < (full ? 4 : 2); i++)
6739         aarch64_set_vec_float (cpu, vd, i,
6740                                - aarch64_get_vec_float (cpu, vn, i));
6741     }
6742 }
6743
6744 static void
6745 do_vec_NOT (sim_cpu *cpu)
6746 {
6747   /* instr[31]    = 0
6748      instr[30]    = half (0)/full (1)
6749      instr[29,10] = 10 1110 0010 0000 0101 10
6750      instr[9,5]   = Vn
6751      instr[4.0]   = Vd.  */
6752
6753   unsigned vn = INSTR (9, 5);
6754   unsigned vd = INSTR (4, 0);
6755   unsigned i;
6756   int      full = INSTR (30, 30);
6757
6758   NYI_assert (29, 10, 0xB8816);
6759
6760   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6761   for (i = 0; i < (full ? 16 : 8); i++)
6762     aarch64_set_vec_u8 (cpu, vd, i, ~ aarch64_get_vec_u8 (cpu, vn, i));
6763 }
6764
6765 static unsigned int
6766 clz (uint64_t val, unsigned size)
6767 {
6768   uint64_t mask = 1;
6769   int      count;
6770
6771   mask <<= (size - 1);
6772   count = 0;
6773   do
6774     {
6775       if (val & mask)
6776         break;
6777       mask >>= 1;
6778       count ++;
6779     }
6780   while (mask);
6781
6782   return count;
6783 }
6784
6785 static void
6786 do_vec_CLZ (sim_cpu *cpu)
6787 {
6788   /* instr[31]    = 0
6789      instr[30]    = half (0)/full (1)
6790      instr[29,24] = 10 1110
6791      instr[23,22] = size
6792      instr[21,10] = 10 0000 0100 10
6793      instr[9,5]   = Vn
6794      instr[4.0]   = Vd.  */
6795
6796   unsigned vn = INSTR (9, 5);
6797   unsigned vd = INSTR (4, 0);
6798   unsigned i;
6799   int      full = INSTR (30,30);
6800
6801   NYI_assert (29, 24, 0x2E);
6802   NYI_assert (21, 10, 0x812);
6803
6804   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6805   switch (INSTR (23, 22))
6806     {
6807     case 0:
6808       for (i = 0; i < (full ? 16 : 8); i++)
6809         aarch64_set_vec_u8 (cpu, vd, i, clz (aarch64_get_vec_u8 (cpu, vn, i), 8));
6810       break;
6811     case 1:
6812       for (i = 0; i < (full ? 8 : 4); i++)
6813         aarch64_set_vec_u16 (cpu, vd, i, clz (aarch64_get_vec_u16 (cpu, vn, i), 16));
6814       break;
6815     case 2:
6816       for (i = 0; i < (full ? 4 : 2); i++)
6817         aarch64_set_vec_u32 (cpu, vd, i, clz (aarch64_get_vec_u32 (cpu, vn, i), 32));
6818       break;
6819     case 3:
6820       if (! full)
6821         HALT_UNALLOC;
6822       aarch64_set_vec_u64 (cpu, vd, 0, clz (aarch64_get_vec_u64 (cpu, vn, 0), 64));
6823       aarch64_set_vec_u64 (cpu, vd, 1, clz (aarch64_get_vec_u64 (cpu, vn, 1), 64));
6824       break;
6825     }
6826 }
6827
6828 static void
6829 do_vec_MOV_element (sim_cpu *cpu)
6830 {
6831   /* instr[31,21] = 0110 1110 000
6832      instr[20,16] = size & dest index
6833      instr[15]    = 0
6834      instr[14,11] = source index
6835      instr[10]    = 1
6836      instr[9,5]   = Vs
6837      instr[4.0]   = Vd.  */
6838
6839   unsigned vs = INSTR (9, 5);
6840   unsigned vd = INSTR (4, 0);
6841   unsigned src_index;
6842   unsigned dst_index;
6843
6844   NYI_assert (31, 21, 0x370);
6845   NYI_assert (15, 15, 0);
6846   NYI_assert (10, 10, 1);
6847
6848   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6849   if (INSTR (16, 16))
6850     {
6851       /* Move a byte.  */
6852       src_index = INSTR (14, 11);
6853       dst_index = INSTR (20, 17);
6854       aarch64_set_vec_u8 (cpu, vd, dst_index,
6855                           aarch64_get_vec_u8 (cpu, vs, src_index));
6856     }
6857   else if (INSTR (17, 17))
6858     {
6859       /* Move 16-bits.  */
6860       NYI_assert (11, 11, 0);
6861       src_index = INSTR (14, 12);
6862       dst_index = INSTR (20, 18);
6863       aarch64_set_vec_u16 (cpu, vd, dst_index,
6864                            aarch64_get_vec_u16 (cpu, vs, src_index));
6865     }
6866   else if (INSTR (18, 18))
6867     {
6868       /* Move 32-bits.  */
6869       NYI_assert (12, 11, 0);
6870       src_index = INSTR (14, 13);
6871       dst_index = INSTR (20, 19);
6872       aarch64_set_vec_u32 (cpu, vd, dst_index,
6873                            aarch64_get_vec_u32 (cpu, vs, src_index));
6874     }
6875   else
6876     {
6877       NYI_assert (19, 19, 1);
6878       NYI_assert (13, 11, 0);
6879       src_index = INSTR (14, 14);
6880       dst_index = INSTR (20, 20);
6881       aarch64_set_vec_u64 (cpu, vd, dst_index,
6882                            aarch64_get_vec_u64 (cpu, vs, src_index));
6883     }
6884 }
6885
6886 static void
6887 do_vec_REV32 (sim_cpu *cpu)
6888 {
6889   /* instr[31]    = 0
6890      instr[30]    = full/half
6891      instr[29,24] = 10 1110
6892      instr[23,22] = size
6893      instr[21,10] = 10 0000 0000 10
6894      instr[9,5]   = Rn
6895      instr[4,0]   = Rd.  */
6896
6897   unsigned rn = INSTR (9, 5);
6898   unsigned rd = INSTR (4, 0);
6899   unsigned size = INSTR (23, 22);
6900   unsigned full = INSTR (30, 30);
6901   unsigned i;
6902   FRegister val;
6903
6904   NYI_assert (29, 24, 0x2E);
6905   NYI_assert (21, 10, 0x802);
6906
6907   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6908   switch (size)
6909     {
6910     case 0:
6911       for (i = 0; i < (full ? 16 : 8); i++)
6912         val.b[i ^ 0x3] = aarch64_get_vec_u8 (cpu, rn, i);
6913       break;
6914
6915     case 1:
6916       for (i = 0; i < (full ? 8 : 4); i++)
6917         val.h[i ^ 0x1] = aarch64_get_vec_u16 (cpu, rn, i);
6918       break;
6919
6920     default:
6921       HALT_UNALLOC;
6922     }
6923
6924   aarch64_set_vec_u64 (cpu, rd, 0, val.v[0]);
6925   if (full)
6926     aarch64_set_vec_u64 (cpu, rd, 1, val.v[1]);
6927 }
6928
6929 static void
6930 do_vec_EXT (sim_cpu *cpu)
6931 {
6932   /* instr[31]    = 0
6933      instr[30]    = full/half
6934      instr[29,21] = 10 1110 000
6935      instr[20,16] = Vm
6936      instr[15]    = 0
6937      instr[14,11] = source index
6938      instr[10]    = 0
6939      instr[9,5]   = Vn
6940      instr[4.0]   = Vd.  */
6941
6942   unsigned vm = INSTR (20, 16);
6943   unsigned vn = INSTR (9, 5);
6944   unsigned vd = INSTR (4, 0);
6945   unsigned src_index = INSTR (14, 11);
6946   unsigned full = INSTR (30, 30);
6947   unsigned i;
6948   unsigned j;
6949   FRegister val;
6950
6951   NYI_assert (31, 21, 0x370);
6952   NYI_assert (15, 15, 0);
6953   NYI_assert (10, 10, 0);
6954
6955   if (!full && (src_index & 0x8))
6956     HALT_UNALLOC;
6957
6958   j = 0;
6959
6960   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6961   for (i = src_index; i < (full ? 16 : 8); i++)
6962     val.b[j ++] = aarch64_get_vec_u8 (cpu, vn, i);
6963   for (i = 0; i < src_index; i++)
6964     val.b[j ++] = aarch64_get_vec_u8 (cpu, vm, i);
6965
6966   aarch64_set_vec_u64 (cpu, vd, 0, val.v[0]);
6967   if (full)
6968     aarch64_set_vec_u64 (cpu, vd, 1, val.v[1]);
6969 }
6970
6971 static void
6972 dexAdvSIMD0 (sim_cpu *cpu)
6973 {
6974   /* instr [28,25] = 0 111.  */
6975   if (    INSTR (15, 10) == 0x07
6976       && (INSTR (9, 5) ==
6977           INSTR (20, 16)))
6978     {
6979       if (INSTR (31, 21) == 0x075
6980           || INSTR (31, 21) == 0x275)
6981         {
6982           do_vec_MOV_whole_vector (cpu);
6983           return;
6984         }
6985     }
6986
6987   if (INSTR (29, 19) == 0x1E0)
6988     {
6989       do_vec_MOV_immediate (cpu);
6990       return;
6991     }
6992
6993   if (INSTR (29, 19) == 0x5E0)
6994     {
6995       do_vec_MVNI (cpu);
6996       return;
6997     }
6998
6999   if (INSTR (29, 19) == 0x1C0
7000       || INSTR (29, 19) == 0x1C1)
7001     {
7002       if (INSTR (15, 10) == 0x03)
7003         {
7004           do_vec_DUP_scalar_into_vector (cpu);
7005           return;
7006         }
7007     }
7008
7009   switch (INSTR (29, 24))
7010     {
7011     case 0x0E: do_vec_op1 (cpu); return;
7012     case 0x0F: do_vec_op2 (cpu); return;
7013
7014     case 0x2E:
7015       if (INSTR (21, 21) == 1)
7016         {
7017           switch (INSTR (15, 10))
7018             {
7019             case 0x02:
7020               do_vec_REV32 (cpu);
7021               return;
7022
7023             case 0x07:
7024               switch (INSTR (23, 22))
7025                 {
7026                 case 0: do_vec_EOR (cpu); return;
7027                 case 1: do_vec_BSL (cpu); return;
7028                 case 2:
7029                 case 3: do_vec_bit (cpu); return;
7030                 }
7031               break;
7032
7033             case 0x08: do_vec_sub_long (cpu); return;
7034             case 0x11: do_vec_USHL (cpu); return;
7035             case 0x12: do_vec_CLZ (cpu); return;
7036             case 0x16: do_vec_NOT (cpu); return;
7037             case 0x19: do_vec_max (cpu); return;
7038             case 0x1B: do_vec_min (cpu); return;
7039             case 0x21: do_vec_SUB (cpu); return;
7040             case 0x25: do_vec_MLS (cpu); return;
7041             case 0x31: do_vec_FminmaxNMP (cpu); return;
7042             case 0x35: do_vec_FADDP (cpu); return;
7043             case 0x37: do_vec_FMUL (cpu); return;
7044             case 0x3F: do_vec_FDIV (cpu); return;
7045
7046             case 0x3E:
7047               switch (INSTR (20, 16))
7048                 {
7049                 case 0x00: do_vec_FNEG (cpu); return;
7050                 case 0x01: do_vec_FSQRT (cpu); return;
7051                 default:   HALT_NYI;
7052                 }
7053
7054             case 0x0D:
7055             case 0x0F:
7056             case 0x22:
7057             case 0x23:
7058             case 0x26:
7059             case 0x2A:
7060             case 0x32:
7061             case 0x36:
7062             case 0x39:
7063             case 0x3A:
7064               do_vec_compare (cpu); return;
7065
7066             default:
7067               break;
7068             }
7069         }
7070
7071       if (INSTR (31, 21) == 0x370)
7072         {
7073           if (INSTR (10, 10))
7074             do_vec_MOV_element (cpu);
7075           else
7076             do_vec_EXT (cpu);
7077           return;
7078         }
7079
7080       switch (INSTR (21, 10))
7081         {
7082         case 0x82E: do_vec_neg (cpu); return;
7083         case 0x87E: do_vec_sqrt (cpu); return;
7084         default:
7085           if (INSTR (15, 10) == 0x30)
7086             {
7087               do_vec_mull (cpu);
7088               return;
7089             }
7090           break;
7091         }
7092       break;
7093
7094     case 0x2f:
7095       switch (INSTR (15, 10))
7096         {
7097         case 0x01: do_vec_SSHR_USHR (cpu); return;
7098         case 0x10:
7099         case 0x12: do_vec_mls_indexed (cpu); return;
7100         case 0x29: do_vec_xtl (cpu); return;
7101         default:
7102           HALT_NYI;
7103         }
7104
7105     default:
7106       break;
7107     }
7108
7109   HALT_NYI;
7110 }
7111
7112 /* 3 sources.  */
7113
7114 /* Float multiply add.  */
7115 static void
7116 fmadds (sim_cpu *cpu)
7117 {
7118   unsigned sa = INSTR (14, 10);
7119   unsigned sm = INSTR (20, 16);
7120   unsigned sn = INSTR ( 9,  5);
7121   unsigned sd = INSTR ( 4,  0);
7122
7123   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7124   aarch64_set_FP_float (cpu, sd, aarch64_get_FP_float (cpu, sa)
7125                         + aarch64_get_FP_float (cpu, sn)
7126                         * aarch64_get_FP_float (cpu, sm));
7127 }
7128
7129 /* Double multiply add.  */
7130 static void
7131 fmaddd (sim_cpu *cpu)
7132 {
7133   unsigned sa = INSTR (14, 10);
7134   unsigned sm = INSTR (20, 16);
7135   unsigned sn = INSTR ( 9,  5);
7136   unsigned sd = INSTR ( 4,  0);
7137
7138   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7139   aarch64_set_FP_double (cpu, sd, aarch64_get_FP_double (cpu, sa)
7140                          + aarch64_get_FP_double (cpu, sn)
7141                          * aarch64_get_FP_double (cpu, sm));
7142 }
7143
7144 /* Float multiply subtract.  */
7145 static void
7146 fmsubs (sim_cpu *cpu)
7147 {
7148   unsigned sa = INSTR (14, 10);
7149   unsigned sm = INSTR (20, 16);
7150   unsigned sn = INSTR ( 9,  5);
7151   unsigned sd = INSTR ( 4,  0);
7152
7153   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7154   aarch64_set_FP_float (cpu, sd, aarch64_get_FP_float (cpu, sa)
7155                         - aarch64_get_FP_float (cpu, sn)
7156                         * aarch64_get_FP_float (cpu, sm));
7157 }
7158
7159 /* Double multiply subtract.  */
7160 static void
7161 fmsubd (sim_cpu *cpu)
7162 {
7163   unsigned sa = INSTR (14, 10);
7164   unsigned sm = INSTR (20, 16);
7165   unsigned sn = INSTR ( 9,  5);
7166   unsigned sd = INSTR ( 4,  0);
7167
7168   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7169   aarch64_set_FP_double (cpu, sd, aarch64_get_FP_double (cpu, sa)
7170                          - aarch64_get_FP_double (cpu, sn)
7171                          * aarch64_get_FP_double (cpu, sm));
7172 }
7173
7174 /* Float negative multiply add.  */
7175 static void
7176 fnmadds (sim_cpu *cpu)
7177 {
7178   unsigned sa = INSTR (14, 10);
7179   unsigned sm = INSTR (20, 16);
7180   unsigned sn = INSTR ( 9,  5);
7181   unsigned sd = INSTR ( 4,  0);
7182
7183   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7184   aarch64_set_FP_float (cpu, sd, - aarch64_get_FP_float (cpu, sa)
7185                         + (- aarch64_get_FP_float (cpu, sn))
7186                         * aarch64_get_FP_float (cpu, sm));
7187 }
7188
7189 /* Double negative multiply add.  */
7190 static void
7191 fnmaddd (sim_cpu *cpu)
7192 {
7193   unsigned sa = INSTR (14, 10);
7194   unsigned sm = INSTR (20, 16);
7195   unsigned sn = INSTR ( 9,  5);
7196   unsigned sd = INSTR ( 4,  0);
7197
7198   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7199   aarch64_set_FP_double (cpu, sd, - aarch64_get_FP_double (cpu, sa)
7200                          + (- aarch64_get_FP_double (cpu, sn))
7201                          * aarch64_get_FP_double (cpu, sm));
7202 }
7203
7204 /* Float negative multiply subtract.  */
7205 static void
7206 fnmsubs (sim_cpu *cpu)
7207 {
7208   unsigned sa = INSTR (14, 10);
7209   unsigned sm = INSTR (20, 16);
7210   unsigned sn = INSTR ( 9,  5);
7211   unsigned sd = INSTR ( 4,  0);
7212
7213   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7214   aarch64_set_FP_float (cpu, sd, - aarch64_get_FP_float (cpu, sa)
7215                         + aarch64_get_FP_float (cpu, sn)
7216                         * aarch64_get_FP_float (cpu, sm));
7217 }
7218
7219 /* Double negative multiply subtract.  */
7220 static void
7221 fnmsubd (sim_cpu *cpu)
7222 {
7223   unsigned sa = INSTR (14, 10);
7224   unsigned sm = INSTR (20, 16);
7225   unsigned sn = INSTR ( 9,  5);
7226   unsigned sd = INSTR ( 4,  0);
7227
7228   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7229   aarch64_set_FP_double (cpu, sd, - aarch64_get_FP_double (cpu, sa)
7230                          + aarch64_get_FP_double (cpu, sn)
7231                          * aarch64_get_FP_double (cpu, sm));
7232 }
7233
7234 static void
7235 dexSimpleFPDataProc3Source (sim_cpu *cpu)
7236 {
7237   /* instr[31]    ==> M : 0 ==> OK, 1 ==> UNALLOC
7238      instr[30]    = 0
7239      instr[29]    ==> S :  0 ==> OK, 1 ==> UNALLOC
7240      instr[28,25] = 1111
7241      instr[24]    = 1
7242      instr[23,22] ==> type : 0 ==> single, 01 ==> double, 1x ==> UNALLOC
7243      instr[21]    ==> o1 : 0 ==> unnegated, 1 ==> negated
7244      instr[15]    ==> o2 : 0 ==> ADD, 1 ==> SUB  */
7245
7246   uint32_t M_S = (INSTR (31, 31) << 1) | INSTR (29, 29);
7247   /* dispatch on combined type:o1:o2.  */
7248   uint32_t dispatch = (INSTR (23, 21) << 1) | INSTR (15, 15);
7249
7250   if (M_S != 0)
7251     HALT_UNALLOC;
7252
7253   switch (dispatch)
7254     {
7255     case 0: fmadds (cpu); return;
7256     case 1: fmsubs (cpu); return;
7257     case 2: fnmadds (cpu); return;
7258     case 3: fnmsubs (cpu); return;
7259     case 4: fmaddd (cpu); return;
7260     case 5: fmsubd (cpu); return;
7261     case 6: fnmaddd (cpu); return;
7262     case 7: fnmsubd (cpu); return;
7263     default:
7264       /* type > 1 is currently unallocated.  */
7265       HALT_UNALLOC;
7266     }
7267 }
7268
7269 static void
7270 dexSimpleFPFixedConvert (sim_cpu *cpu)
7271 {
7272   HALT_NYI;
7273 }
7274
7275 static void
7276 dexSimpleFPCondCompare (sim_cpu *cpu)
7277 {
7278   /* instr [31,23] = 0001 1110 0
7279      instr [22]    = type
7280      instr [21]    = 1
7281      instr [20,16] = Rm
7282      instr [15,12] = condition
7283      instr [11,10] = 01
7284      instr [9,5]   = Rn
7285      instr [4]     = 0
7286      instr [3,0]   = nzcv  */
7287
7288   unsigned rm = INSTR (20, 16);
7289   unsigned rn = INSTR (9, 5);
7290
7291   NYI_assert (31, 23, 0x3C);
7292   NYI_assert (11, 10, 0x1);
7293   NYI_assert (4,  4,  0);
7294
7295   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7296   if (! testConditionCode (cpu, INSTR (15, 12)))
7297     {
7298       aarch64_set_CPSR (cpu, INSTR (3, 0));
7299       return;
7300     }
7301
7302   if (INSTR (22, 22))
7303     {
7304       /* Double precision.  */
7305       double val1 = aarch64_get_vec_double (cpu, rn, 0);
7306       double val2 = aarch64_get_vec_double (cpu, rm, 0);
7307
7308       /* FIXME: Check for NaNs.  */
7309       if (val1 == val2)
7310         aarch64_set_CPSR (cpu, (Z | C));
7311       else if (val1 < val2)
7312         aarch64_set_CPSR (cpu, N);
7313       else /* val1 > val2 */
7314         aarch64_set_CPSR (cpu, C);
7315     }
7316   else
7317     {
7318       /* Single precision.  */
7319       float val1 = aarch64_get_vec_float (cpu, rn, 0);
7320       float val2 = aarch64_get_vec_float (cpu, rm, 0);
7321
7322       /* FIXME: Check for NaNs.  */
7323       if (val1 == val2)
7324         aarch64_set_CPSR (cpu, (Z | C));
7325       else if (val1 < val2)
7326         aarch64_set_CPSR (cpu, N);
7327       else /* val1 > val2 */
7328         aarch64_set_CPSR (cpu, C);
7329     }
7330 }
7331
7332 /* 2 sources.  */
7333
7334 /* Float add.  */
7335 static void
7336 fadds (sim_cpu *cpu)
7337 {
7338   unsigned sm = INSTR (20, 16);
7339   unsigned sn = INSTR ( 9,  5);
7340   unsigned sd = INSTR ( 4,  0);
7341
7342   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7343   aarch64_set_FP_float (cpu, sd, aarch64_get_FP_float (cpu, sn)
7344                         + aarch64_get_FP_float (cpu, sm));
7345 }
7346
7347 /* Double add.  */
7348 static void
7349 faddd (sim_cpu *cpu)
7350 {
7351   unsigned sm = INSTR (20, 16);
7352   unsigned sn = INSTR ( 9,  5);
7353   unsigned sd = INSTR ( 4,  0);
7354
7355   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7356   aarch64_set_FP_double (cpu, sd, aarch64_get_FP_double (cpu, sn)
7357                          + aarch64_get_FP_double (cpu, sm));
7358 }
7359
7360 /* Float divide.  */
7361 static void
7362 fdivs (sim_cpu *cpu)
7363 {
7364   unsigned sm = INSTR (20, 16);
7365   unsigned sn = INSTR ( 9,  5);
7366   unsigned sd = INSTR ( 4,  0);
7367
7368   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7369   aarch64_set_FP_float (cpu, sd, aarch64_get_FP_float (cpu, sn)
7370                         / aarch64_get_FP_float (cpu, sm));
7371 }
7372
7373 /* Double divide.  */
7374 static void
7375 fdivd (sim_cpu *cpu)
7376 {
7377   unsigned sm = INSTR (20, 16);
7378   unsigned sn = INSTR ( 9,  5);
7379   unsigned sd = INSTR ( 4,  0);
7380
7381   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7382   aarch64_set_FP_double (cpu, sd, aarch64_get_FP_double (cpu, sn)
7383                          / aarch64_get_FP_double (cpu, sm));
7384 }
7385
7386 /* Float multiply.  */
7387 static void
7388 fmuls (sim_cpu *cpu)
7389 {
7390   unsigned sm = INSTR (20, 16);
7391   unsigned sn = INSTR ( 9,  5);
7392   unsigned sd = INSTR ( 4,  0);
7393
7394   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7395   aarch64_set_FP_float (cpu, sd, aarch64_get_FP_float (cpu, sn)
7396                         * aarch64_get_FP_float (cpu, sm));
7397 }
7398
7399 /* Double multiply.  */
7400 static void
7401 fmuld (sim_cpu *cpu)
7402 {
7403   unsigned sm = INSTR (20, 16);
7404   unsigned sn = INSTR ( 9,  5);
7405   unsigned sd = INSTR ( 4,  0);
7406
7407   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7408   aarch64_set_FP_double (cpu, sd, aarch64_get_FP_double (cpu, sn)
7409                          * aarch64_get_FP_double (cpu, sm));
7410 }
7411
7412 /* Float negate and multiply.  */
7413 static void
7414 fnmuls (sim_cpu *cpu)
7415 {
7416   unsigned sm = INSTR (20, 16);
7417   unsigned sn = INSTR ( 9,  5);
7418   unsigned sd = INSTR ( 4,  0);
7419
7420   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7421   aarch64_set_FP_float (cpu, sd, - (aarch64_get_FP_float (cpu, sn)
7422                                     * aarch64_get_FP_float (cpu, sm)));
7423 }
7424
7425 /* Double negate and multiply.  */
7426 static void
7427 fnmuld (sim_cpu *cpu)
7428 {
7429   unsigned sm = INSTR (20, 16);
7430   unsigned sn = INSTR ( 9,  5);
7431   unsigned sd = INSTR ( 4,  0);
7432
7433   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7434   aarch64_set_FP_double (cpu, sd, - (aarch64_get_FP_double (cpu, sn)
7435                                      * aarch64_get_FP_double (cpu, sm)));
7436 }
7437
7438 /* Float subtract.  */
7439 static void
7440 fsubs (sim_cpu *cpu)
7441 {
7442   unsigned sm = INSTR (20, 16);
7443   unsigned sn = INSTR ( 9,  5);
7444   unsigned sd = INSTR ( 4,  0);
7445
7446   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7447   aarch64_set_FP_float (cpu, sd, aarch64_get_FP_float (cpu, sn)
7448                         - aarch64_get_FP_float (cpu, sm));
7449 }
7450
7451 /* Double subtract.  */
7452 static void
7453 fsubd (sim_cpu *cpu)
7454 {
7455   unsigned sm = INSTR (20, 16);
7456   unsigned sn = INSTR ( 9,  5);
7457   unsigned sd = INSTR ( 4,  0);
7458
7459   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7460   aarch64_set_FP_double (cpu, sd, aarch64_get_FP_double (cpu, sn)
7461                          - aarch64_get_FP_double (cpu, sm));
7462 }
7463
7464 static void
7465 do_FMINNM (sim_cpu *cpu)
7466 {
7467   /* instr[31,23] = 0 0011 1100
7468      instr[22]    = float(0)/double(1)
7469      instr[21]    = 1
7470      instr[20,16] = Sm
7471      instr[15,10] = 01 1110
7472      instr[9,5]   = Sn
7473      instr[4,0]   = Cpu  */
7474
7475   unsigned sm = INSTR (20, 16);
7476   unsigned sn = INSTR ( 9,  5);
7477   unsigned sd = INSTR ( 4,  0);
7478
7479   NYI_assert (31, 23, 0x03C);
7480   NYI_assert (15, 10, 0x1E);
7481
7482   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7483   if (INSTR (22, 22))
7484     aarch64_set_FP_double (cpu, sd,
7485                            dminnm (aarch64_get_FP_double (cpu, sn),
7486                                    aarch64_get_FP_double (cpu, sm)));
7487   else
7488     aarch64_set_FP_float (cpu, sd,
7489                           fminnm (aarch64_get_FP_float (cpu, sn),
7490                                   aarch64_get_FP_float (cpu, sm)));
7491 }
7492
7493 static void
7494 do_FMAXNM (sim_cpu *cpu)
7495 {
7496   /* instr[31,23] = 0 0011 1100
7497      instr[22]    = float(0)/double(1)
7498      instr[21]    = 1
7499      instr[20,16] = Sm
7500      instr[15,10] = 01 1010
7501      instr[9,5]   = Sn
7502      instr[4,0]   = Cpu  */
7503
7504   unsigned sm = INSTR (20, 16);
7505   unsigned sn = INSTR ( 9,  5);
7506   unsigned sd = INSTR ( 4,  0);
7507
7508   NYI_assert (31, 23, 0x03C);
7509   NYI_assert (15, 10, 0x1A);
7510
7511   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7512   if (INSTR (22, 22))
7513     aarch64_set_FP_double (cpu, sd,
7514                            dmaxnm (aarch64_get_FP_double (cpu, sn),
7515                                    aarch64_get_FP_double (cpu, sm)));
7516   else
7517     aarch64_set_FP_float (cpu, sd,
7518                           fmaxnm (aarch64_get_FP_float (cpu, sn),
7519                                   aarch64_get_FP_float (cpu, sm)));
7520 }
7521
7522 static void
7523 dexSimpleFPDataProc2Source (sim_cpu *cpu)
7524 {
7525   /* instr[31]    ==> M : 0 ==> OK, 1 ==> UNALLOC
7526      instr[30]    = 0
7527      instr[29]    ==> S :  0 ==> OK, 1 ==> UNALLOC
7528      instr[28,25] = 1111
7529      instr[24]    = 0
7530      instr[23,22] ==> type : 0 ==> single, 01 ==> double, 1x ==> UNALLOC
7531      instr[21]    = 1
7532      instr[20,16] = Vm
7533      instr[15,12] ==> opcode : 0000 ==> FMUL, 0001 ==> FDIV
7534                                0010 ==> FADD, 0011 ==> FSUB,
7535                                0100 ==> FMAX, 0101 ==> FMIN
7536                                0110 ==> FMAXNM, 0111 ==> FMINNM
7537                                1000 ==> FNMUL, ow ==> UNALLOC
7538      instr[11,10] = 10
7539      instr[9,5]   = Vn
7540      instr[4,0]   = Vd  */
7541
7542   uint32_t M_S = (INSTR (31, 31) << 1) | INSTR (29, 29);
7543   uint32_t type = INSTR (23, 22);
7544   /* Dispatch on opcode.  */
7545   uint32_t dispatch = INSTR (15, 12);
7546
7547   if (type > 1)
7548     HALT_UNALLOC;
7549
7550   if (M_S != 0)
7551     HALT_UNALLOC;
7552
7553   if (type)
7554     switch (dispatch)
7555       {
7556       case 0: fmuld (cpu); return;
7557       case 1: fdivd (cpu); return;
7558       case 2: faddd (cpu); return;
7559       case 3: fsubd (cpu); return;
7560       case 6: do_FMAXNM (cpu); return;
7561       case 7: do_FMINNM (cpu); return;
7562       case 8: fnmuld (cpu); return;
7563
7564         /* Have not yet implemented fmax and fmin.  */
7565       case 4:
7566       case 5:
7567         HALT_NYI;
7568
7569       default:
7570         HALT_UNALLOC;
7571       }
7572   else /* type == 0 => floats.  */
7573     switch (dispatch)
7574       {
7575       case 0: fmuls (cpu); return;
7576       case 1: fdivs (cpu); return;
7577       case 2: fadds (cpu); return;
7578       case 3: fsubs (cpu); return;
7579       case 6: do_FMAXNM (cpu); return;
7580       case 7: do_FMINNM (cpu); return;
7581       case 8: fnmuls (cpu); return;
7582
7583       case 4:
7584       case 5:
7585         HALT_NYI;
7586
7587       default:
7588         HALT_UNALLOC;
7589       }
7590 }
7591
7592 static void
7593 dexSimpleFPCondSelect (sim_cpu *cpu)
7594 {
7595   /* FCSEL
7596      instr[31,23] = 0 0011 1100
7597      instr[22]    = 0=>single 1=>double
7598      instr[21]    = 1
7599      instr[20,16] = Sm
7600      instr[15,12] = cond
7601      instr[11,10] = 11
7602      instr[9,5]   = Sn
7603      instr[4,0]   = Cpu  */
7604   unsigned sm = INSTR (20, 16);
7605   unsigned sn = INSTR ( 9, 5);
7606   unsigned sd = INSTR ( 4, 0);
7607   uint32_t set = testConditionCode (cpu, INSTR (15, 12));
7608
7609   NYI_assert (31, 23, 0x03C);
7610   NYI_assert (11, 10, 0x3);
7611
7612   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7613   if (INSTR (22, 22))
7614     aarch64_set_FP_double (cpu, sd, (set ? aarch64_get_FP_double (cpu, sn)
7615                                      : aarch64_get_FP_double (cpu, sm)));
7616   else
7617     aarch64_set_FP_float (cpu, sd, (set ? aarch64_get_FP_float (cpu, sn)
7618                                     : aarch64_get_FP_float (cpu, sm)));
7619 }
7620
7621 /* Store 32 bit unscaled signed 9 bit.  */
7622 static void
7623 fsturs (sim_cpu *cpu, int32_t offset)
7624 {
7625   unsigned int rn = INSTR (9, 5);
7626   unsigned int st = INSTR (4, 0);
7627
7628   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7629   aarch64_set_mem_u32 (cpu, aarch64_get_reg_u64 (cpu, rn, 1) + offset,
7630                        aarch64_get_vec_u32 (cpu, st, 0));
7631 }
7632
7633 /* Store 64 bit unscaled signed 9 bit.  */
7634 static void
7635 fsturd (sim_cpu *cpu, int32_t offset)
7636 {
7637   unsigned int rn = INSTR (9, 5);
7638   unsigned int st = INSTR (4, 0);
7639
7640   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7641   aarch64_set_mem_u64 (cpu, aarch64_get_reg_u64 (cpu, rn, 1) + offset,
7642                        aarch64_get_vec_u64 (cpu, st, 0));
7643 }
7644
7645 /* Store 128 bit unscaled signed 9 bit.  */
7646 static void
7647 fsturq (sim_cpu *cpu, int32_t offset)
7648 {
7649   unsigned int rn = INSTR (9, 5);
7650   unsigned int st = INSTR (4, 0);
7651   FRegister a;
7652
7653   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7654   aarch64_get_FP_long_double (cpu, st, & a);
7655   aarch64_set_mem_long_double (cpu,
7656                                aarch64_get_reg_u64 (cpu, rn, 1)
7657                                + offset, a);
7658 }
7659
7660 /* TODO FP move register.  */
7661
7662 /* 32 bit fp to fp move register.  */
7663 static void
7664 ffmovs (sim_cpu *cpu)
7665 {
7666   unsigned int rn = INSTR (9, 5);
7667   unsigned int st = INSTR (4, 0);
7668
7669   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7670   aarch64_set_FP_float (cpu, st, aarch64_get_FP_float (cpu, rn));
7671 }
7672
7673 /* 64 bit fp to fp move register.  */
7674 static void
7675 ffmovd (sim_cpu *cpu)
7676 {
7677   unsigned int rn = INSTR (9, 5);
7678   unsigned int st = INSTR (4, 0);
7679
7680   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7681   aarch64_set_FP_double (cpu, st, aarch64_get_FP_double (cpu, rn));
7682 }
7683
7684 /* 32 bit GReg to Vec move register.  */
7685 static void
7686 fgmovs (sim_cpu *cpu)
7687 {
7688   unsigned int rn = INSTR (9, 5);
7689   unsigned int st = INSTR (4, 0);
7690
7691   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7692   aarch64_set_vec_u32 (cpu, st, 0, aarch64_get_reg_u32 (cpu, rn, NO_SP));
7693 }
7694
7695 /* 64 bit g to fp move register.  */
7696 static void
7697 fgmovd (sim_cpu *cpu)
7698 {
7699   unsigned int rn = INSTR (9, 5);
7700   unsigned int st = INSTR (4, 0);
7701
7702   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7703   aarch64_set_vec_u64 (cpu, st, 0, aarch64_get_reg_u64 (cpu, rn, NO_SP));
7704 }
7705
7706 /* 32 bit fp to g move register.  */
7707 static void
7708 gfmovs (sim_cpu *cpu)
7709 {
7710   unsigned int rn = INSTR (9, 5);
7711   unsigned int st = INSTR (4, 0);
7712
7713   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7714   aarch64_set_reg_u64 (cpu, st, NO_SP, aarch64_get_vec_u32 (cpu, rn, 0));
7715 }
7716
7717 /* 64 bit fp to g move register.  */
7718 static void
7719 gfmovd (sim_cpu *cpu)
7720 {
7721   unsigned int rn = INSTR (9, 5);
7722   unsigned int st = INSTR (4, 0);
7723
7724   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7725   aarch64_set_reg_u64 (cpu, st, NO_SP, aarch64_get_vec_u64 (cpu, rn, 0));
7726 }
7727
7728 /* FP move immediate
7729
7730    These install an immediate 8 bit value in the target register
7731    where the 8 bits comprise 1 sign bit, 4 bits of fraction and a 3
7732    bit exponent.  */
7733
7734 static void
7735 fmovs (sim_cpu *cpu)
7736 {
7737   unsigned int sd = INSTR (4, 0);
7738   uint32_t imm = INSTR (20, 13);
7739   float f = fp_immediate_for_encoding_32 (imm);
7740
7741   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7742   aarch64_set_FP_float (cpu, sd, f);
7743 }
7744
7745 static void
7746 fmovd (sim_cpu *cpu)
7747 {
7748   unsigned int sd = INSTR (4, 0);
7749   uint32_t imm = INSTR (20, 13);
7750   double d = fp_immediate_for_encoding_64 (imm);
7751
7752   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7753   aarch64_set_FP_double (cpu, sd, d);
7754 }
7755
7756 static void
7757 dexSimpleFPImmediate (sim_cpu *cpu)
7758 {
7759   /* instr[31,23] == 00111100
7760      instr[22]    == type : single(0)/double(1)
7761      instr[21]    == 1
7762      instr[20,13] == imm8
7763      instr[12,10] == 100
7764      instr[9,5]   == imm5 : 00000 ==> PK, ow ==> UNALLOC
7765      instr[4,0]   == Rd  */
7766   uint32_t imm5 = INSTR (9, 5);
7767
7768   NYI_assert (31, 23, 0x3C);
7769
7770   if (imm5 != 0)
7771     HALT_UNALLOC;
7772
7773   if (INSTR (22, 22))
7774     fmovd (cpu);
7775   else
7776     fmovs (cpu);
7777 }
7778
7779 /* TODO specific decode and execute for group Load Store.  */
7780
7781 /* TODO FP load/store single register (unscaled offset).  */
7782
7783 /* TODO load 8 bit unscaled signed 9 bit.  */
7784 /* TODO load 16 bit unscaled signed 9 bit.  */
7785
7786 /* Load 32 bit unscaled signed 9 bit.  */
7787 static void
7788 fldurs (sim_cpu *cpu, int32_t offset)
7789 {
7790   unsigned int rn = INSTR (9, 5);
7791   unsigned int st = INSTR (4, 0);
7792
7793   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7794   aarch64_set_vec_u32 (cpu, st, 0, aarch64_get_mem_u32
7795                        (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset));
7796 }
7797
7798 /* Load 64 bit unscaled signed 9 bit.  */
7799 static void
7800 fldurd (sim_cpu *cpu, int32_t offset)
7801 {
7802   unsigned int rn = INSTR (9, 5);
7803   unsigned int st = INSTR (4, 0);
7804
7805   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7806   aarch64_set_vec_u64 (cpu, st, 0, aarch64_get_mem_u64
7807                        (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset));
7808 }
7809
7810 /* Load 128 bit unscaled signed 9 bit.  */
7811 static void
7812 fldurq (sim_cpu *cpu, int32_t offset)
7813 {
7814   unsigned int rn = INSTR (9, 5);
7815   unsigned int st = INSTR (4, 0);
7816   FRegister a;
7817   uint64_t addr = aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset;
7818
7819   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7820   aarch64_get_mem_long_double (cpu, addr, & a);
7821   aarch64_set_FP_long_double (cpu, st, a);
7822 }
7823
7824 /* TODO store 8 bit unscaled signed 9 bit.  */
7825 /* TODO store 16 bit unscaled signed 9 bit.  */
7826
7827
7828 /* 1 source.  */
7829
7830 /* Float absolute value.  */
7831 static void
7832 fabss (sim_cpu *cpu)
7833 {
7834   unsigned sn = INSTR (9, 5);
7835   unsigned sd = INSTR (4, 0);
7836   float value = aarch64_get_FP_float (cpu, sn);
7837
7838   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7839   aarch64_set_FP_float (cpu, sd, fabsf (value));
7840 }
7841
7842 /* Double absolute value.  */
7843 static void
7844 fabcpu (sim_cpu *cpu)
7845 {
7846   unsigned sn = INSTR (9, 5);
7847   unsigned sd = INSTR (4, 0);
7848   double value = aarch64_get_FP_double (cpu, sn);
7849
7850   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7851   aarch64_set_FP_double (cpu, sd, fabs (value));
7852 }
7853
7854 /* Float negative value.  */
7855 static void
7856 fnegs (sim_cpu *cpu)
7857 {
7858   unsigned sn = INSTR (9, 5);
7859   unsigned sd = INSTR (4, 0);
7860
7861   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7862   aarch64_set_FP_float (cpu, sd, - aarch64_get_FP_float (cpu, sn));
7863 }
7864
7865 /* Double negative value.  */
7866 static void
7867 fnegd (sim_cpu *cpu)
7868 {
7869   unsigned sn = INSTR (9, 5);
7870   unsigned sd = INSTR (4, 0);
7871
7872   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7873   aarch64_set_FP_double (cpu, sd, - aarch64_get_FP_double (cpu, sn));
7874 }
7875
7876 /* Float square root.  */
7877 static void
7878 fsqrts (sim_cpu *cpu)
7879 {
7880   unsigned sn = INSTR (9, 5);
7881   unsigned sd = INSTR (4, 0);
7882
7883   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7884   aarch64_set_FP_float (cpu, sd, sqrtf (aarch64_get_FP_float (cpu, sn)));
7885 }
7886
7887 /* Double square root.  */
7888 static void
7889 fsqrtd (sim_cpu *cpu)
7890 {
7891   unsigned sn = INSTR (9, 5);
7892   unsigned sd = INSTR (4, 0);
7893
7894   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7895   aarch64_set_FP_double (cpu, sd,
7896                          sqrt (aarch64_get_FP_double (cpu, sn)));
7897 }
7898
7899 /* Convert double to float.  */
7900 static void
7901 fcvtds (sim_cpu *cpu)
7902 {
7903   unsigned sn = INSTR (9, 5);
7904   unsigned sd = INSTR (4, 0);
7905
7906   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7907   aarch64_set_FP_float (cpu, sd, (float) aarch64_get_FP_double (cpu, sn));
7908 }
7909
7910 /* Convert float to double.  */
7911 static void
7912 fcvtcpu (sim_cpu *cpu)
7913 {
7914   unsigned sn = INSTR (9, 5);
7915   unsigned sd = INSTR (4, 0);
7916
7917   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7918   aarch64_set_FP_double (cpu, sd, (double) aarch64_get_FP_float (cpu, sn));
7919 }
7920
7921 static void
7922 do_FRINT (sim_cpu *cpu)
7923 {
7924   /* instr[31,23] = 0001 1110 0
7925      instr[22]    = single(0)/double(1)
7926      instr[21,18] = 1001
7927      instr[17,15] = rounding mode
7928      instr[14,10] = 10000
7929      instr[9,5]   = source
7930      instr[4,0]   = dest  */
7931
7932   float val;
7933   unsigned rs = INSTR (9, 5);
7934   unsigned rd = INSTR (4, 0);
7935   unsigned int rmode = INSTR (17, 15);
7936
7937   NYI_assert (31, 23, 0x03C);
7938   NYI_assert (21, 18, 0x9);
7939   NYI_assert (14, 10, 0x10);
7940
7941   if (rmode == 6 || rmode == 7)
7942     /* FIXME: Add support for rmode == 6 exactness check.  */
7943     rmode = uimm (aarch64_get_FPSR (cpu), 23, 22);
7944
7945   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7946   if (INSTR (22, 22))
7947     {
7948       double val = aarch64_get_FP_double (cpu, rs);
7949
7950       switch (rmode)
7951         {
7952         case 0: /* mode N: nearest or even.  */
7953           {
7954             double rval = round (val);
7955
7956             if (val - rval == 0.5)
7957               {
7958                 if (((rval / 2.0) * 2.0) != rval)
7959                   rval += 1.0;
7960               }
7961
7962             aarch64_set_FP_double (cpu, rd, round (val));
7963             return;
7964           }
7965
7966         case 1: /* mode P: towards +inf.  */
7967           if (val < 0.0)
7968             aarch64_set_FP_double (cpu, rd, trunc (val));
7969           else
7970             aarch64_set_FP_double (cpu, rd, round (val));
7971           return;
7972
7973         case 2: /* mode M: towards -inf.  */
7974           if (val < 0.0)
7975             aarch64_set_FP_double (cpu, rd, round (val));
7976           else
7977             aarch64_set_FP_double (cpu, rd, trunc (val));
7978           return;
7979
7980         case 3: /* mode Z: towards 0.  */
7981           aarch64_set_FP_double (cpu, rd, trunc (val));
7982           return;
7983
7984         case 4: /* mode A: away from 0.  */
7985           aarch64_set_FP_double (cpu, rd, round (val));
7986           return;
7987
7988         case 6: /* mode X: use FPCR with exactness check.  */
7989         case 7: /* mode I: use FPCR mode.  */
7990           HALT_NYI;
7991
7992         default:
7993           HALT_UNALLOC;
7994         }
7995     }
7996
7997   val = aarch64_get_FP_float (cpu, rs);
7998
7999   switch (rmode)
8000     {
8001     case 0: /* mode N: nearest or even.  */
8002       {
8003         float rval = roundf (val);
8004
8005         if (val - rval == 0.5)
8006           {
8007             if (((rval / 2.0) * 2.0) != rval)
8008               rval += 1.0;
8009           }
8010
8011         aarch64_set_FP_float (cpu, rd, rval);
8012         return;
8013       }
8014
8015     case 1: /* mode P: towards +inf.  */
8016       if (val < 0.0)
8017         aarch64_set_FP_float (cpu, rd, truncf (val));
8018       else
8019         aarch64_set_FP_float (cpu, rd, roundf (val));
8020       return;
8021
8022     case 2: /* mode M: towards -inf.  */
8023       if (val < 0.0)
8024         aarch64_set_FP_float (cpu, rd, truncf (val));
8025       else
8026         aarch64_set_FP_float (cpu, rd, roundf (val));
8027       return;
8028
8029     case 3: /* mode Z: towards 0.  */
8030       aarch64_set_FP_float (cpu, rd, truncf (val));
8031       return;
8032
8033     case 4: /* mode A: away from 0.  */
8034       aarch64_set_FP_float (cpu, rd, roundf (val));
8035       return;
8036
8037     case 6: /* mode X: use FPCR with exactness check.  */
8038     case 7: /* mode I: use FPCR mode.  */
8039       HALT_NYI;
8040
8041     default:
8042       HALT_UNALLOC;
8043     }
8044 }
8045
8046 /* Convert half to float.  */
8047 static void
8048 do_FCVT_half_to_single (sim_cpu *cpu)
8049 {
8050   unsigned rn = INSTR (9, 5);
8051   unsigned rd = INSTR (4, 0);
8052
8053   NYI_assert (31, 10, 0x7B890);
8054
8055   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8056   aarch64_set_FP_float (cpu, rd, (float) aarch64_get_FP_half  (cpu, rn));
8057 }
8058
8059 /* Convert half to double.  */
8060 static void
8061 do_FCVT_half_to_double (sim_cpu *cpu)
8062 {
8063   unsigned rn = INSTR (9, 5);
8064   unsigned rd = INSTR (4, 0);
8065
8066   NYI_assert (31, 10, 0x7B8B0);
8067
8068   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8069   aarch64_set_FP_double (cpu, rd, (double) aarch64_get_FP_half  (cpu, rn));
8070 }
8071
8072 static void
8073 do_FCVT_single_to_half (sim_cpu *cpu)
8074 {
8075   unsigned rn = INSTR (9, 5);
8076   unsigned rd = INSTR (4, 0);
8077
8078   NYI_assert (31, 10, 0x788F0);
8079
8080   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8081   aarch64_set_FP_half (cpu, rd, aarch64_get_FP_float  (cpu, rn));
8082 }
8083
8084 /* Convert double to half.  */
8085 static void
8086 do_FCVT_double_to_half (sim_cpu *cpu)
8087 {
8088   unsigned rn = INSTR (9, 5);
8089   unsigned rd = INSTR (4, 0);
8090
8091   NYI_assert (31, 10, 0x798F0);
8092
8093   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8094   aarch64_set_FP_half (cpu, rd, (float) aarch64_get_FP_double  (cpu, rn));
8095 }
8096
8097 static void
8098 dexSimpleFPDataProc1Source (sim_cpu *cpu)
8099 {
8100   /* instr[31]    ==> M : 0 ==> OK, 1 ==> UNALLOC
8101      instr[30]    = 0
8102      instr[29]    ==> S :  0 ==> OK, 1 ==> UNALLOC
8103      instr[28,25] = 1111
8104      instr[24]    = 0
8105      instr[23,22] ==> type : 00 ==> source is single,
8106                              01 ==> source is double
8107                              10 ==> UNALLOC
8108                              11 ==> UNALLOC or source is half
8109      instr[21]    = 1
8110      instr[20,15] ==> opcode : with type 00 or 01
8111                                000000 ==> FMOV, 000001 ==> FABS,
8112                                000010 ==> FNEG, 000011 ==> FSQRT,
8113                                000100 ==> UNALLOC, 000101 ==> FCVT,(to single/double)
8114                                000110 ==> UNALLOC, 000111 ==> FCVT (to half)
8115                                001000 ==> FRINTN, 001001 ==> FRINTP,
8116                                001010 ==> FRINTM, 001011 ==> FRINTZ,
8117                                001100 ==> FRINTA, 001101 ==> UNALLOC
8118                                001110 ==> FRINTX, 001111 ==> FRINTI
8119                                with type 11
8120                                000100 ==> FCVT (half-to-single)
8121                                000101 ==> FCVT (half-to-double)
8122                                instr[14,10] = 10000.  */
8123
8124   uint32_t M_S = (INSTR (31, 31) << 1) | INSTR (29, 29);
8125   uint32_t type   = INSTR (23, 22);
8126   uint32_t opcode = INSTR (20, 15);
8127
8128   if (M_S != 0)
8129     HALT_UNALLOC;
8130
8131   if (type == 3)
8132     {
8133       if (opcode == 4)
8134         do_FCVT_half_to_single (cpu);
8135       else if (opcode == 5)
8136         do_FCVT_half_to_double (cpu);
8137       else
8138         HALT_UNALLOC;
8139       return;
8140     }
8141
8142   if (type == 2)
8143     HALT_UNALLOC;
8144
8145   switch (opcode)
8146     {
8147     case 0:
8148       if (type)
8149         ffmovd (cpu);
8150       else
8151         ffmovs (cpu);
8152       return;
8153
8154     case 1:
8155       if (type)
8156         fabcpu (cpu);
8157       else
8158         fabss (cpu);
8159       return;
8160
8161     case 2:
8162       if (type)
8163         fnegd (cpu);
8164       else
8165         fnegs (cpu);
8166       return;
8167
8168     case 3:
8169       if (type)
8170         fsqrtd (cpu);
8171       else
8172         fsqrts (cpu);
8173       return;
8174
8175     case 4:
8176       if (type)
8177         fcvtds (cpu);
8178       else
8179         HALT_UNALLOC;
8180       return;
8181
8182     case 5:
8183       if (type)
8184         HALT_UNALLOC;
8185       fcvtcpu (cpu);
8186       return;
8187
8188     case 8:             /* FRINTN etc.  */
8189     case 9:
8190     case 10:
8191     case 11:
8192     case 12:
8193     case 14:
8194     case 15:
8195        do_FRINT (cpu);
8196        return;
8197
8198     case 7:
8199       if (INSTR (22, 22))
8200         do_FCVT_double_to_half (cpu);
8201       else
8202         do_FCVT_single_to_half (cpu);
8203       return;
8204
8205     case 13:
8206       HALT_NYI;
8207
8208     default:
8209       HALT_UNALLOC;
8210     }
8211 }
8212
8213 /* 32 bit signed int to float.  */
8214 static void
8215 scvtf32 (sim_cpu *cpu)
8216 {
8217   unsigned rn = INSTR (9, 5);
8218   unsigned sd = INSTR (4, 0);
8219
8220   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8221   aarch64_set_FP_float
8222     (cpu, sd, (float) aarch64_get_reg_s32 (cpu, rn, NO_SP));
8223 }
8224
8225 /* signed int to float.  */
8226 static void
8227 scvtf (sim_cpu *cpu)
8228 {
8229   unsigned rn = INSTR (9, 5);
8230   unsigned sd = INSTR (4, 0);
8231
8232   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8233   aarch64_set_FP_float
8234     (cpu, sd, (float) aarch64_get_reg_s64 (cpu, rn, NO_SP));
8235 }
8236
8237 /* 32 bit signed int to double.  */
8238 static void
8239 scvtd32 (sim_cpu *cpu)
8240 {
8241   unsigned rn = INSTR (9, 5);
8242   unsigned sd = INSTR (4, 0);
8243
8244   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8245   aarch64_set_FP_double
8246     (cpu, sd, (double) aarch64_get_reg_s32 (cpu, rn, NO_SP));
8247 }
8248
8249 /* signed int to double.  */
8250 static void
8251 scvtd (sim_cpu *cpu)
8252 {
8253   unsigned rn = INSTR (9, 5);
8254   unsigned sd = INSTR (4, 0);
8255
8256   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8257   aarch64_set_FP_double
8258     (cpu, sd, (double) aarch64_get_reg_s64 (cpu, rn, NO_SP));
8259 }
8260
8261 static const float  FLOAT_INT_MAX   = (float)  INT_MAX;
8262 static const float  FLOAT_INT_MIN   = (float)  INT_MIN;
8263 static const double DOUBLE_INT_MAX  = (double) INT_MAX;
8264 static const double DOUBLE_INT_MIN  = (double) INT_MIN;
8265 static const float  FLOAT_LONG_MAX  = (float)  LONG_MAX;
8266 static const float  FLOAT_LONG_MIN  = (float)  LONG_MIN;
8267 static const double DOUBLE_LONG_MAX = (double) LONG_MAX;
8268 static const double DOUBLE_LONG_MIN = (double) LONG_MIN;
8269
8270 #define UINT_MIN 0
8271 #define ULONG_MIN 0
8272 static const float  FLOAT_UINT_MAX   = (float)  UINT_MAX;
8273 static const float  FLOAT_UINT_MIN   = (float)  UINT_MIN;
8274 static const double DOUBLE_UINT_MAX  = (double) UINT_MAX;
8275 static const double DOUBLE_UINT_MIN  = (double) UINT_MIN;
8276 static const float  FLOAT_ULONG_MAX  = (float)  ULONG_MAX;
8277 static const float  FLOAT_ULONG_MIN  = (float)  ULONG_MIN;
8278 static const double DOUBLE_ULONG_MAX = (double) ULONG_MAX;
8279 static const double DOUBLE_ULONG_MIN = (double) ULONG_MIN;
8280
8281 /* Check for FP exception conditions:
8282      NaN raises IO
8283      Infinity raises IO
8284      Out of Range raises IO and IX and saturates value
8285      Denormal raises ID and IX and sets to zero.  */
8286 #define RAISE_EXCEPTIONS(F, VALUE, FTYPE, ITYPE)        \
8287   do                                                    \
8288     {                                                   \
8289       switch (fpclassify (F))                           \
8290         {                                               \
8291         case FP_INFINITE:                               \
8292         case FP_NAN:                                    \
8293           aarch64_set_FPSR (cpu, IO);                   \
8294           if (signbit (F))                              \
8295             VALUE = ITYPE##_MAX;                        \
8296           else                                          \
8297             VALUE = ITYPE##_MIN;                        \
8298           break;                                        \
8299                                                         \
8300         case FP_NORMAL:                                 \
8301           if (F >= FTYPE##_##ITYPE##_MAX)               \
8302             {                                           \
8303               aarch64_set_FPSR_bits (cpu, IO | IX, IO | IX);    \
8304               VALUE = ITYPE##_MAX;                      \
8305             }                                           \
8306           else if (F <= FTYPE##_##ITYPE##_MIN)          \
8307             {                                           \
8308               aarch64_set_FPSR_bits (cpu, IO | IX, IO | IX);    \
8309               VALUE = ITYPE##_MIN;                      \
8310             }                                           \
8311           break;                                        \
8312                                                         \
8313         case FP_SUBNORMAL:                              \
8314           aarch64_set_FPSR_bits (cpu, IO | IX | ID, IX | ID);   \
8315           VALUE = 0;                                    \
8316           break;                                        \
8317                                                         \
8318         default:                                        \
8319         case FP_ZERO:                                   \
8320           VALUE = 0;                                    \
8321           break;                                        \
8322         }                                               \
8323     }                                                   \
8324   while (0)
8325
8326 /* 32 bit convert float to signed int truncate towards zero.  */
8327 static void
8328 fcvtszs32 (sim_cpu *cpu)
8329 {
8330   unsigned sn = INSTR (9, 5);
8331   unsigned rd = INSTR (4, 0);
8332   /* TODO : check that this rounds toward zero.  */
8333   float   f = aarch64_get_FP_float (cpu, sn);
8334   int32_t value = (int32_t) f;
8335
8336   RAISE_EXCEPTIONS (f, value, FLOAT, INT);
8337
8338   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8339   /* Avoid sign extension to 64 bit.  */
8340   aarch64_set_reg_u64 (cpu, rd, NO_SP, (uint32_t) value);
8341 }
8342
8343 /* 64 bit convert float to signed int truncate towards zero.  */
8344 static void
8345 fcvtszs (sim_cpu *cpu)
8346 {
8347   unsigned sn = INSTR (9, 5);
8348   unsigned rd = INSTR (4, 0);
8349   float f = aarch64_get_FP_float (cpu, sn);
8350   int64_t value = (int64_t) f;
8351
8352   RAISE_EXCEPTIONS (f, value, FLOAT, LONG);
8353
8354   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8355   aarch64_set_reg_s64 (cpu, rd, NO_SP, value);
8356 }
8357
8358 /* 32 bit convert double to signed int truncate towards zero.  */
8359 static void
8360 fcvtszd32 (sim_cpu *cpu)
8361 {
8362   unsigned sn = INSTR (9, 5);
8363   unsigned rd = INSTR (4, 0);
8364   /* TODO : check that this rounds toward zero.  */
8365   double   d = aarch64_get_FP_double (cpu, sn);
8366   int32_t  value = (int32_t) d;
8367
8368   RAISE_EXCEPTIONS (d, value, DOUBLE, INT);
8369
8370   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8371   /* Avoid sign extension to 64 bit.  */
8372   aarch64_set_reg_u64 (cpu, rd, NO_SP, (uint32_t) value);
8373 }
8374
8375 /* 64 bit convert double to signed int truncate towards zero.  */
8376 static void
8377 fcvtszd (sim_cpu *cpu)
8378 {
8379   unsigned sn = INSTR (9, 5);
8380   unsigned rd = INSTR (4, 0);
8381   /* TODO : check that this rounds toward zero.  */
8382   double  d = aarch64_get_FP_double (cpu, sn);
8383   int64_t value;
8384
8385   value = (int64_t) d;
8386
8387   RAISE_EXCEPTIONS (d, value, DOUBLE, LONG);
8388
8389   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8390   aarch64_set_reg_s64 (cpu, rd, NO_SP, value);
8391 }
8392
8393 static void
8394 do_fcvtzu (sim_cpu *cpu)
8395 {
8396   /* instr[31]    = size: 32-bit (0), 64-bit (1)
8397      instr[30,23] = 00111100
8398      instr[22]    = type: single (0)/ double (1)
8399      instr[21]    = enable (0)/disable(1) precision
8400      instr[20,16] = 11001
8401      instr[15,10] = precision
8402      instr[9,5]   = Rs
8403      instr[4,0]   = Rd.  */
8404
8405   unsigned rs = INSTR (9, 5);
8406   unsigned rd = INSTR (4, 0);
8407
8408   NYI_assert (30, 23, 0x3C);
8409   NYI_assert (20, 16, 0x19);
8410
8411   if (INSTR (21, 21) != 1)
8412     /* Convert to fixed point.  */
8413     HALT_NYI;
8414
8415   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8416   if (INSTR (31, 31))
8417     {
8418       /* Convert to unsigned 64-bit integer.  */
8419       if (INSTR (22, 22))
8420         {
8421           double  d = aarch64_get_FP_double (cpu, rs);
8422           uint64_t value = (uint64_t) d;
8423
8424           /* Do not raise an exception if we have reached ULONG_MAX.  */
8425           if (value != (1ULL << 63))
8426             RAISE_EXCEPTIONS (d, value, DOUBLE, ULONG);
8427
8428           aarch64_set_reg_u64 (cpu, rd, NO_SP, value);
8429         }
8430       else
8431         {
8432           float  f = aarch64_get_FP_float (cpu, rs);
8433           uint64_t value = (uint64_t) f;
8434
8435           /* Do not raise an exception if we have reached ULONG_MAX.  */
8436           if (value != (1ULL << 63))
8437             RAISE_EXCEPTIONS (f, value, FLOAT, ULONG);
8438
8439           aarch64_set_reg_u64 (cpu, rd, NO_SP, value);
8440         }
8441     }
8442   else
8443     {
8444       uint32_t value;
8445
8446       /* Convert to unsigned 32-bit integer.  */
8447       if (INSTR (22, 22))
8448         {
8449           double  d = aarch64_get_FP_double (cpu, rs);
8450
8451           value = (uint32_t) d;
8452           /* Do not raise an exception if we have reached UINT_MAX.  */
8453           if (value != (1UL << 31))
8454             RAISE_EXCEPTIONS (d, value, DOUBLE, UINT);
8455         }
8456       else
8457         {
8458           float  f = aarch64_get_FP_float (cpu, rs);
8459
8460           value = (uint32_t) f;
8461           /* Do not raise an exception if we have reached UINT_MAX.  */
8462           if (value != (1UL << 31))
8463             RAISE_EXCEPTIONS (f, value, FLOAT, UINT);
8464         }
8465
8466       aarch64_set_reg_u64 (cpu, rd, NO_SP, value);
8467     }
8468 }
8469
8470 static void
8471 do_UCVTF (sim_cpu *cpu)
8472 {
8473   /* instr[31]    = size: 32-bit (0), 64-bit (1)
8474      instr[30,23] = 001 1110 0
8475      instr[22]    = type: single (0)/ double (1)
8476      instr[21]    = enable (0)/disable(1) precision
8477      instr[20,16] = 0 0011
8478      instr[15,10] = precision
8479      instr[9,5]   = Rs
8480      instr[4,0]   = Rd.  */
8481
8482   unsigned rs = INSTR (9, 5);
8483   unsigned rd = INSTR (4, 0);
8484
8485   NYI_assert (30, 23, 0x3C);
8486   NYI_assert (20, 16, 0x03);
8487
8488   if (INSTR (21, 21) != 1)
8489     HALT_NYI;
8490
8491   /* FIXME: Add exception raising.  */
8492   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8493   if (INSTR (31, 31))
8494     {
8495       uint64_t value = aarch64_get_reg_u64 (cpu, rs, NO_SP);
8496
8497       if (INSTR (22, 22))
8498         aarch64_set_FP_double (cpu, rd, (double) value);
8499       else
8500         aarch64_set_FP_float (cpu, rd, (float) value);
8501     }
8502   else
8503     {
8504       uint32_t value =  aarch64_get_reg_u32 (cpu, rs, NO_SP);
8505
8506       if (INSTR (22, 22))
8507         aarch64_set_FP_double (cpu, rd, (double) value);
8508       else
8509         aarch64_set_FP_float (cpu, rd, (float) value);
8510     }
8511 }
8512
8513 static void
8514 float_vector_move (sim_cpu *cpu)
8515 {
8516   /* instr[31,17] == 100 1111 0101 0111
8517      instr[16]    ==> direction 0=> to GR, 1=> from GR
8518      instr[15,10] => ???
8519      instr[9,5]   ==> source
8520      instr[4,0]   ==> dest.  */
8521
8522   unsigned rn = INSTR (9, 5);
8523   unsigned rd = INSTR (4, 0);
8524
8525   NYI_assert (31, 17, 0x4F57);
8526
8527   if (INSTR (15, 10) != 0)
8528     HALT_UNALLOC;
8529
8530   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8531   if (INSTR (16, 16))
8532     aarch64_set_vec_u64 (cpu, rd, 1, aarch64_get_reg_u64 (cpu, rn, NO_SP));
8533   else
8534     aarch64_set_reg_u64 (cpu, rd, NO_SP, aarch64_get_vec_u64 (cpu, rn, 1));
8535 }
8536
8537 static void
8538 dexSimpleFPIntegerConvert (sim_cpu *cpu)
8539 {
8540   /* instr[31]    = size : 0 ==> 32 bit, 1 ==> 64 bit
8541      instr[30     = 0
8542      instr[29]    = S :  0 ==> OK, 1 ==> UNALLOC
8543      instr[28,25] = 1111
8544      instr[24]    = 0
8545      instr[23,22] = type : 00 ==> single, 01 ==> double, 1x ==> UNALLOC
8546      instr[21]    = 1
8547      instr[20,19] = rmode
8548      instr[18,16] = opcode
8549      instr[15,10] = 10 0000  */
8550
8551   uint32_t rmode_opcode;
8552   uint32_t size_type;
8553   uint32_t type;
8554   uint32_t size;
8555   uint32_t S;
8556
8557   if (INSTR (31, 17) == 0x4F57)
8558     {
8559       float_vector_move (cpu);
8560       return;
8561     }
8562
8563   size = INSTR (31, 31);
8564   S = INSTR (29, 29);
8565   if (S != 0)
8566     HALT_UNALLOC;
8567
8568   type = INSTR (23, 22);
8569   if (type > 1)
8570     HALT_UNALLOC;
8571
8572   rmode_opcode = INSTR (20, 16);
8573   size_type = (size << 1) | type; /* 0==32f, 1==32d, 2==64f, 3==64d.  */
8574
8575   switch (rmode_opcode)
8576     {
8577     case 2:                     /* SCVTF.  */
8578       switch (size_type)
8579         {
8580         case 0: scvtf32 (cpu); return;
8581         case 1: scvtd32 (cpu); return;
8582         case 2: scvtf (cpu); return;
8583         case 3: scvtd (cpu); return;
8584         }
8585
8586     case 6:                     /* FMOV GR, Vec.  */
8587       switch (size_type)
8588         {
8589         case 0:  gfmovs (cpu); return;
8590         case 3:  gfmovd (cpu); return;
8591         default: HALT_UNALLOC;
8592         }
8593
8594     case 7:                     /* FMOV vec, GR.  */
8595       switch (size_type)
8596         {
8597         case 0:  fgmovs (cpu); return;
8598         case 3:  fgmovd (cpu); return;
8599         default: HALT_UNALLOC;
8600         }
8601
8602     case 24:                    /* FCVTZS.  */
8603       switch (size_type)
8604         {
8605         case 0: fcvtszs32 (cpu); return;
8606         case 1: fcvtszd32 (cpu); return;
8607         case 2: fcvtszs (cpu); return;
8608         case 3: fcvtszd (cpu); return;
8609         }
8610
8611     case 25: do_fcvtzu (cpu); return;
8612     case 3:  do_UCVTF (cpu); return;
8613
8614     case 0:     /* FCVTNS.  */
8615     case 1:     /* FCVTNU.  */
8616     case 4:     /* FCVTAS.  */
8617     case 5:     /* FCVTAU.  */
8618     case 8:     /* FCVPTS.  */
8619     case 9:     /* FCVTPU.  */
8620     case 16:    /* FCVTMS.  */
8621     case 17:    /* FCVTMU.  */
8622     default:
8623       HALT_NYI;
8624     }
8625 }
8626
8627 static void
8628 set_flags_for_float_compare (sim_cpu *cpu, float fvalue1, float fvalue2)
8629 {
8630   uint32_t flags;
8631
8632   /* FIXME: Add exception raising.  */
8633   if (isnan (fvalue1) || isnan (fvalue2))
8634     flags = C|V;
8635   else if (isinf (fvalue1) && isinf (fvalue2))
8636     {
8637       /* Subtracting two infinities may give a NaN.  We only need to compare
8638          the signs, which we can get from isinf.  */
8639       int result = isinf (fvalue1) - isinf (fvalue2);
8640
8641       if (result == 0)
8642         flags = Z|C;
8643       else if (result < 0)
8644         flags = N;
8645       else /* (result > 0).  */
8646         flags = C;
8647     }
8648   else
8649     {
8650       float result = fvalue1 - fvalue2;
8651
8652       if (result == 0.0)
8653         flags = Z|C;
8654       else if (result < 0)
8655         flags = N;
8656       else /* (result > 0).  */
8657         flags = C;
8658     }
8659
8660   aarch64_set_CPSR (cpu, flags);
8661 }
8662
8663 static void
8664 fcmps (sim_cpu *cpu)
8665 {
8666   unsigned sm = INSTR (20, 16);
8667   unsigned sn = INSTR ( 9,  5);
8668
8669   float fvalue1 = aarch64_get_FP_float (cpu, sn);
8670   float fvalue2 = aarch64_get_FP_float (cpu, sm);
8671
8672   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8673   set_flags_for_float_compare (cpu, fvalue1, fvalue2);
8674 }
8675
8676 /* Float compare to zero -- Invalid Operation exception
8677    only on signaling NaNs.  */
8678 static void
8679 fcmpzs (sim_cpu *cpu)
8680 {
8681   unsigned sn = INSTR ( 9,  5);
8682   float fvalue1 = aarch64_get_FP_float (cpu, sn);
8683
8684   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8685   set_flags_for_float_compare (cpu, fvalue1, 0.0f);
8686 }
8687
8688 /* Float compare -- Invalid Operation exception on all NaNs.  */
8689 static void
8690 fcmpes (sim_cpu *cpu)
8691 {
8692   unsigned sm = INSTR (20, 16);
8693   unsigned sn = INSTR ( 9,  5);
8694
8695   float fvalue1 = aarch64_get_FP_float (cpu, sn);
8696   float fvalue2 = aarch64_get_FP_float (cpu, sm);
8697
8698   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8699   set_flags_for_float_compare (cpu, fvalue1, fvalue2);
8700 }
8701
8702 /* Float compare to zero -- Invalid Operation exception on all NaNs.  */
8703 static void
8704 fcmpzes (sim_cpu *cpu)
8705 {
8706   unsigned sn = INSTR ( 9,  5);
8707   float fvalue1 = aarch64_get_FP_float (cpu, sn);
8708
8709   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8710   set_flags_for_float_compare (cpu, fvalue1, 0.0f);
8711 }
8712
8713 static void
8714 set_flags_for_double_compare (sim_cpu *cpu, double dval1, double dval2)
8715 {
8716   uint32_t flags;
8717
8718   /* FIXME: Add exception raising.  */
8719   if (isnan (dval1) || isnan (dval2))
8720     flags = C|V;
8721   else if (isinf (dval1) && isinf (dval2))
8722     {
8723       /* Subtracting two infinities may give a NaN.  We only need to compare
8724          the signs, which we can get from isinf.  */
8725       int result = isinf (dval1) - isinf (dval2);
8726
8727       if (result == 0)
8728         flags = Z|C;
8729       else if (result < 0)
8730         flags = N;
8731       else /* (result > 0).  */
8732         flags = C;
8733     }
8734   else
8735     {
8736       double result = dval1 - dval2;
8737
8738       if (result == 0.0)
8739         flags = Z|C;
8740       else if (result < 0)
8741         flags = N;
8742       else /* (result > 0).  */
8743         flags = C;
8744     }
8745
8746   aarch64_set_CPSR (cpu, flags);
8747 }
8748
8749 /* Double compare -- Invalid Operation exception only on signaling NaNs.  */
8750 static void
8751 fcmpd (sim_cpu *cpu)
8752 {
8753   unsigned sm = INSTR (20, 16);
8754   unsigned sn = INSTR ( 9,  5);
8755
8756   double dvalue1 = aarch64_get_FP_double (cpu, sn);
8757   double dvalue2 = aarch64_get_FP_double (cpu, sm);
8758
8759   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8760   set_flags_for_double_compare (cpu, dvalue1, dvalue2);
8761 }
8762
8763 /* Double compare to zero -- Invalid Operation exception
8764    only on signaling NaNs.  */
8765 static void
8766 fcmpzd (sim_cpu *cpu)
8767 {
8768   unsigned sn = INSTR ( 9,  5);
8769   double dvalue1 = aarch64_get_FP_double (cpu, sn);
8770
8771   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8772   set_flags_for_double_compare (cpu, dvalue1, 0.0);
8773 }
8774
8775 /* Double compare -- Invalid Operation exception on all NaNs.  */
8776 static void
8777 fcmped (sim_cpu *cpu)
8778 {
8779   unsigned sm = INSTR (20, 16);
8780   unsigned sn = INSTR ( 9,  5);
8781
8782   double dvalue1 = aarch64_get_FP_double (cpu, sn);
8783   double dvalue2 = aarch64_get_FP_double (cpu, sm);
8784
8785   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8786   set_flags_for_double_compare (cpu, dvalue1, dvalue2);
8787 }
8788
8789 /* Double compare to zero -- Invalid Operation exception on all NaNs.  */
8790 static void
8791 fcmpzed (sim_cpu *cpu)
8792 {
8793   unsigned sn = INSTR ( 9,  5);
8794   double dvalue1 = aarch64_get_FP_double (cpu, sn);
8795
8796   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8797   set_flags_for_double_compare (cpu, dvalue1, 0.0);
8798 }
8799
8800 static void
8801 dexSimpleFPCompare (sim_cpu *cpu)
8802 {
8803   /* assert instr[28,25] == 1111
8804      instr[30:24:21:13,10] = 0011000
8805      instr[31] = M : 0 ==> OK, 1 ==> UNALLOC
8806      instr[29] ==> S :  0 ==> OK, 1 ==> UNALLOC
8807      instr[23,22] ==> type : 0 ==> single, 01 ==> double, 1x ==> UNALLOC
8808      instr[15,14] ==> op : 00 ==> OK, ow ==> UNALLOC
8809      instr[4,0] ==> opcode2 : 00000 ==> FCMP, 10000 ==> FCMPE,
8810                               01000 ==> FCMPZ, 11000 ==> FCMPEZ,
8811                               ow ==> UNALLOC  */
8812   uint32_t dispatch;
8813   uint32_t M_S = (INSTR (31, 31) << 1) | INSTR (29, 29);
8814   uint32_t type = INSTR (23, 22);
8815   uint32_t op = INSTR (15, 14);
8816   uint32_t op2_2_0 = INSTR (2, 0);
8817
8818   if (op2_2_0 != 0)
8819     HALT_UNALLOC;
8820
8821   if (M_S != 0)
8822     HALT_UNALLOC;
8823
8824   if (type > 1)
8825     HALT_UNALLOC;
8826
8827   if (op != 0)
8828     HALT_UNALLOC;
8829
8830   /* dispatch on type and top 2 bits of opcode.  */
8831   dispatch = (type << 2) | INSTR (4, 3);
8832
8833   switch (dispatch)
8834     {
8835     case 0: fcmps (cpu); return;
8836     case 1: fcmpzs (cpu); return;
8837     case 2: fcmpes (cpu); return;
8838     case 3: fcmpzes (cpu); return;
8839     case 4: fcmpd (cpu); return;
8840     case 5: fcmpzd (cpu); return;
8841     case 6: fcmped (cpu); return;
8842     case 7: fcmpzed (cpu); return;
8843     }
8844 }
8845
8846 static void
8847 do_scalar_FADDP (sim_cpu *cpu)
8848 {
8849   /* instr [31,23] = 0111 1110 0
8850      instr [22]    = single(0)/double(1)
8851      instr [21,10] = 11 0000 1101 10
8852      instr [9,5]   = Fn
8853      instr [4,0]   = Fd.  */
8854
8855   unsigned Fn = INSTR (9, 5);
8856   unsigned Fd = INSTR (4, 0);
8857
8858   NYI_assert (31, 23, 0x0FC);
8859   NYI_assert (21, 10, 0xC36);
8860
8861   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8862   if (INSTR (22, 22))
8863     {
8864       double val1 = aarch64_get_vec_double (cpu, Fn, 0);
8865       double val2 = aarch64_get_vec_double (cpu, Fn, 1);
8866
8867       aarch64_set_FP_double (cpu, Fd, val1 + val2);
8868     }
8869   else
8870     {
8871       float val1 = aarch64_get_vec_float (cpu, Fn, 0);
8872       float val2 = aarch64_get_vec_float (cpu, Fn, 1);
8873
8874       aarch64_set_FP_float (cpu, Fd, val1 + val2);
8875     }
8876 }
8877
8878 /* Floating point absolute difference.  */
8879
8880 static void
8881 do_scalar_FABD (sim_cpu *cpu)
8882 {
8883   /* instr [31,23] = 0111 1110 1
8884      instr [22]    = float(0)/double(1)
8885      instr [21]    = 1
8886      instr [20,16] = Rm
8887      instr [15,10] = 1101 01
8888      instr [9, 5]  = Rn
8889      instr [4, 0]  = Rd.  */
8890
8891   unsigned rm = INSTR (20, 16);
8892   unsigned rn = INSTR (9, 5);
8893   unsigned rd = INSTR (4, 0);
8894
8895   NYI_assert (31, 23, 0x0FD);
8896   NYI_assert (21, 21, 1);
8897   NYI_assert (15, 10, 0x35);
8898
8899   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8900   if (INSTR (22, 22))
8901     aarch64_set_FP_double (cpu, rd,
8902                            fabs (aarch64_get_FP_double (cpu, rn)
8903                                  - aarch64_get_FP_double (cpu, rm)));
8904   else
8905     aarch64_set_FP_float (cpu, rd,
8906                           fabsf (aarch64_get_FP_float (cpu, rn)
8907                                  - aarch64_get_FP_float (cpu, rm)));
8908 }
8909
8910 static void
8911 do_scalar_CMGT (sim_cpu *cpu)
8912 {
8913   /* instr [31,21] = 0101 1110 111
8914      instr [20,16] = Rm
8915      instr [15,10] = 00 1101
8916      instr [9, 5]  = Rn
8917      instr [4, 0]  = Rd.  */
8918
8919   unsigned rm = INSTR (20, 16);
8920   unsigned rn = INSTR (9, 5);
8921   unsigned rd = INSTR (4, 0);
8922
8923   NYI_assert (31, 21, 0x2F7);
8924   NYI_assert (15, 10, 0x0D);
8925
8926   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8927   aarch64_set_vec_u64 (cpu, rd, 0,
8928                        aarch64_get_vec_u64 (cpu, rn, 0) >
8929                        aarch64_get_vec_u64 (cpu, rm, 0) ? -1L : 0L);
8930 }
8931
8932 static void
8933 do_scalar_USHR (sim_cpu *cpu)
8934 {
8935   /* instr [31,23] = 0111 1111 0
8936      instr [22,16] = shift amount
8937      instr [15,10] = 0000 01
8938      instr [9, 5]  = Rn
8939      instr [4, 0]  = Rd.  */
8940
8941   unsigned amount = 128 - INSTR (22, 16);
8942   unsigned rn = INSTR (9, 5);
8943   unsigned rd = INSTR (4, 0);
8944
8945   NYI_assert (31, 23, 0x0FE);
8946   NYI_assert (15, 10, 0x01);
8947
8948   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8949   aarch64_set_vec_u64 (cpu, rd, 0,
8950                        aarch64_get_vec_u64 (cpu, rn, 0) >> amount);
8951 }
8952
8953 static void
8954 do_scalar_SSHL (sim_cpu *cpu)
8955 {
8956   /* instr [31,21] = 0101 1110 111
8957      instr [20,16] = Rm
8958      instr [15,10] = 0100 01
8959      instr [9, 5]  = Rn
8960      instr [4, 0]  = Rd.  */
8961
8962   unsigned rm = INSTR (20, 16);
8963   unsigned rn = INSTR (9, 5);
8964   unsigned rd = INSTR (4, 0);
8965   signed int shift = aarch64_get_vec_s8 (cpu, rm, 0);
8966
8967   NYI_assert (31, 21, 0x2F7);
8968   NYI_assert (15, 10, 0x11);
8969
8970   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8971   if (shift >= 0)
8972     aarch64_set_vec_s64 (cpu, rd, 0,
8973                          aarch64_get_vec_s64 (cpu, rn, 0) << shift);
8974   else
8975     aarch64_set_vec_s64 (cpu, rd, 0,
8976                          aarch64_get_vec_s64 (cpu, rn, 0) >> - shift);
8977 }
8978
8979 /* Floating point scalar compare greater than or equal to 0.  */
8980 static void
8981 do_scalar_FCMGE_zero (sim_cpu *cpu)
8982 {
8983   /* instr [31,23] = 0111 1110 1
8984      instr [22,22] = size
8985      instr [21,16] = 1000 00
8986      instr [15,10] = 1100 10
8987      instr [9, 5]  = Rn
8988      instr [4, 0]  = Rd.  */
8989
8990   unsigned size = INSTR (22, 22);
8991   unsigned rn = INSTR (9, 5);
8992   unsigned rd = INSTR (4, 0);
8993
8994   NYI_assert (31, 23, 0x0FD);
8995   NYI_assert (21, 16, 0x20);
8996   NYI_assert (15, 10, 0x32);
8997
8998   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8999   if (size)
9000     aarch64_set_vec_u64 (cpu, rd, 0,
9001                          aarch64_get_vec_double (cpu, rn, 0) >= 0.0 ? -1 : 0);
9002   else
9003     aarch64_set_vec_u32 (cpu, rd, 0,
9004                          aarch64_get_vec_float (cpu, rn, 0) >= 0.0 ? -1 : 0);
9005 }
9006
9007 /* Floating point scalar compare less than or equal to 0.  */
9008 static void
9009 do_scalar_FCMLE_zero (sim_cpu *cpu)
9010 {
9011   /* instr [31,23] = 0111 1110 1
9012      instr [22,22] = size
9013      instr [21,16] = 1000 00
9014      instr [15,10] = 1101 10
9015      instr [9, 5]  = Rn
9016      instr [4, 0]  = Rd.  */
9017
9018   unsigned size = INSTR (22, 22);
9019   unsigned rn = INSTR (9, 5);
9020   unsigned rd = INSTR (4, 0);
9021
9022   NYI_assert (31, 23, 0x0FD);
9023   NYI_assert (21, 16, 0x20);
9024   NYI_assert (15, 10, 0x36);
9025
9026   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9027   if (size)
9028     aarch64_set_vec_u64 (cpu, rd, 0,
9029                          aarch64_get_vec_double (cpu, rn, 0) <= 0.0 ? -1 : 0);
9030   else
9031     aarch64_set_vec_u32 (cpu, rd, 0,
9032                          aarch64_get_vec_float (cpu, rn, 0) <= 0.0 ? -1 : 0);
9033 }
9034
9035 /* Floating point scalar compare greater than 0.  */
9036 static void
9037 do_scalar_FCMGT_zero (sim_cpu *cpu)
9038 {
9039   /* instr [31,23] = 0101 1110 1
9040      instr [22,22] = size
9041      instr [21,16] = 1000 00
9042      instr [15,10] = 1100 10
9043      instr [9, 5]  = Rn
9044      instr [4, 0]  = Rd.  */
9045
9046   unsigned size = INSTR (22, 22);
9047   unsigned rn = INSTR (9, 5);
9048   unsigned rd = INSTR (4, 0);
9049
9050   NYI_assert (31, 23, 0x0BD);
9051   NYI_assert (21, 16, 0x20);
9052   NYI_assert (15, 10, 0x32);
9053
9054   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9055   if (size)
9056     aarch64_set_vec_u64 (cpu, rd, 0,
9057                          aarch64_get_vec_double (cpu, rn, 0) > 0.0 ? -1 : 0);
9058   else
9059     aarch64_set_vec_u32 (cpu, rd, 0,
9060                          aarch64_get_vec_float (cpu, rn, 0) > 0.0 ? -1 : 0);
9061 }
9062
9063 /* Floating point scalar compare equal to 0.  */
9064 static void
9065 do_scalar_FCMEQ_zero (sim_cpu *cpu)
9066 {
9067   /* instr [31,23] = 0101 1110 1
9068      instr [22,22] = size
9069      instr [21,16] = 1000 00
9070      instr [15,10] = 1101 10
9071      instr [9, 5]  = Rn
9072      instr [4, 0]  = Rd.  */
9073
9074   unsigned size = INSTR (22, 22);
9075   unsigned rn = INSTR (9, 5);
9076   unsigned rd = INSTR (4, 0);
9077
9078   NYI_assert (31, 23, 0x0BD);
9079   NYI_assert (21, 16, 0x20);
9080   NYI_assert (15, 10, 0x36);
9081
9082   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9083   if (size)
9084     aarch64_set_vec_u64 (cpu, rd, 0,
9085                          aarch64_get_vec_double (cpu, rn, 0) == 0.0 ? -1 : 0);
9086   else
9087     aarch64_set_vec_u32 (cpu, rd, 0,
9088                          aarch64_get_vec_float (cpu, rn, 0) == 0.0 ? -1 : 0);
9089 }
9090
9091 /* Floating point scalar compare less than 0.  */
9092 static void
9093 do_scalar_FCMLT_zero (sim_cpu *cpu)
9094 {
9095   /* instr [31,23] = 0101 1110 1
9096      instr [22,22] = size
9097      instr [21,16] = 1000 00
9098      instr [15,10] = 1110 10
9099      instr [9, 5]  = Rn
9100      instr [4, 0]  = Rd.  */
9101
9102   unsigned size = INSTR (22, 22);
9103   unsigned rn = INSTR (9, 5);
9104   unsigned rd = INSTR (4, 0);
9105
9106   NYI_assert (31, 23, 0x0BD);
9107   NYI_assert (21, 16, 0x20);
9108   NYI_assert (15, 10, 0x3A);
9109
9110   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9111   if (size)
9112     aarch64_set_vec_u64 (cpu, rd, 0,
9113                          aarch64_get_vec_double (cpu, rn, 0) < 0.0 ? -1 : 0);
9114   else
9115     aarch64_set_vec_u32 (cpu, rd, 0,
9116                          aarch64_get_vec_float (cpu, rn, 0) < 0.0 ? -1 : 0);
9117 }
9118
9119 static void
9120 do_scalar_shift (sim_cpu *cpu)
9121 {
9122   /* instr [31,23] = 0101 1111 0
9123      instr [22,16] = shift amount
9124      instr [15,10] = 0101 01   [SHL]
9125      instr [15,10] = 0000 01   [SSHR]
9126      instr [9, 5]  = Rn
9127      instr [4, 0]  = Rd.  */
9128
9129   unsigned rn = INSTR (9, 5);
9130   unsigned rd = INSTR (4, 0);
9131   unsigned amount;
9132
9133   NYI_assert (31, 23, 0x0BE);
9134
9135   if (INSTR (22, 22) == 0)
9136     HALT_UNALLOC;
9137
9138   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9139   switch (INSTR (15, 10))
9140     {
9141     case 0x01: /* SSHR */
9142       amount = 128 - INSTR (22, 16);
9143       aarch64_set_vec_s64 (cpu, rd, 0,
9144                            aarch64_get_vec_s64 (cpu, rn, 0) >> amount);
9145       return;
9146     case 0x15: /* SHL */
9147       amount = INSTR (22, 16) - 64;
9148       aarch64_set_vec_u64 (cpu, rd, 0,
9149                            aarch64_get_vec_u64 (cpu, rn, 0) << amount);
9150       return;
9151     default:
9152       HALT_NYI;
9153     }
9154 }
9155
9156 /* FCMEQ FCMGT FCMGE.  */
9157 static void
9158 do_scalar_FCM (sim_cpu *cpu)
9159 {
9160   /* instr [31,30] = 01
9161      instr [29]    = U
9162      instr [28,24] = 1 1110
9163      instr [23]    = E
9164      instr [22]    = size
9165      instr [21]    = 1
9166      instr [20,16] = Rm
9167      instr [15,12] = 1110
9168      instr [11]    = AC
9169      instr [10]    = 1
9170      instr [9, 5]  = Rn
9171      instr [4, 0]  = Rd.  */
9172
9173   unsigned rm = INSTR (20, 16);
9174   unsigned rn = INSTR (9, 5);
9175   unsigned rd = INSTR (4, 0);
9176   unsigned EUac = (INSTR (23, 23) << 2) | (INSTR (29, 29) << 1) | INSTR (11, 11);
9177   unsigned result;
9178   float val1;
9179   float val2;
9180
9181   NYI_assert (31, 30, 1);
9182   NYI_assert (28, 24, 0x1E);
9183   NYI_assert (21, 21, 1);
9184   NYI_assert (15, 12, 0xE);
9185   NYI_assert (10, 10, 1);
9186
9187   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9188   if (INSTR (22, 22))
9189     {
9190       double val1 = aarch64_get_FP_double (cpu, rn);
9191       double val2 = aarch64_get_FP_double (cpu, rm);
9192
9193       switch (EUac)
9194         {
9195         case 0: /* 000 */
9196           result = val1 == val2;
9197           break;
9198
9199         case 3: /* 011 */
9200           val1 = fabs (val1);
9201           val2 = fabs (val2);
9202           /* Fall through. */
9203         case 2: /* 010 */
9204           result = val1 >= val2;
9205           break;
9206
9207         case 7: /* 111 */
9208           val1 = fabs (val1);
9209           val2 = fabs (val2);
9210           /* Fall through. */
9211         case 6: /* 110 */
9212           result = val1 > val2;
9213           break;
9214
9215         default:
9216           HALT_UNALLOC;
9217         }
9218
9219       aarch64_set_vec_u32 (cpu, rd, 0, result ? -1 : 0);
9220       return;
9221     }
9222
9223   val1 = aarch64_get_FP_float (cpu, rn);
9224   val2 = aarch64_get_FP_float (cpu, rm);
9225
9226   switch (EUac)
9227     {
9228     case 0: /* 000 */
9229       result = val1 == val2;
9230       break;
9231
9232     case 3: /* 011 */
9233       val1 = fabsf (val1);
9234       val2 = fabsf (val2);
9235       /* Fall through. */
9236     case 2: /* 010 */
9237       result = val1 >= val2;
9238       break;
9239
9240     case 7: /* 111 */
9241       val1 = fabsf (val1);
9242       val2 = fabsf (val2);
9243       /* Fall through. */
9244     case 6: /* 110 */
9245       result = val1 > val2;
9246       break;
9247
9248     default:
9249       HALT_UNALLOC;
9250     }
9251
9252   aarch64_set_vec_u32 (cpu, rd, 0, result ? -1 : 0);
9253 }
9254
9255 /* An alias of DUP.  */
9256 static void
9257 do_scalar_MOV (sim_cpu *cpu)
9258 {
9259   /* instr [31,21] = 0101 1110 000
9260      instr [20,16] = imm5
9261      instr [15,10] = 0000 01
9262      instr [9, 5]  = Rn
9263      instr [4, 0]  = Rd.  */
9264
9265   unsigned rn = INSTR (9, 5);
9266   unsigned rd = INSTR (4, 0);
9267   unsigned index;
9268
9269   NYI_assert (31, 21, 0x2F0);
9270   NYI_assert (15, 10, 0x01);
9271
9272   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9273   if (INSTR (16, 16))
9274     {
9275       /* 8-bit.  */
9276       index = INSTR (20, 17);
9277       aarch64_set_vec_u8
9278         (cpu, rd, 0, aarch64_get_vec_u8 (cpu, rn, index));
9279     }
9280   else if (INSTR (17, 17))
9281     {
9282       /* 16-bit.  */
9283       index = INSTR (20, 18);
9284       aarch64_set_vec_u16
9285         (cpu, rd, 0, aarch64_get_vec_u16 (cpu, rn, index));
9286     }
9287   else if (INSTR (18, 18))
9288     {
9289       /* 32-bit.  */
9290       index = INSTR (20, 19);
9291       aarch64_set_vec_u32
9292         (cpu, rd, 0, aarch64_get_vec_u32 (cpu, rn, index));
9293     }
9294   else if (INSTR (19, 19))
9295     {
9296       /* 64-bit.  */
9297       index = INSTR (20, 20);
9298       aarch64_set_vec_u64
9299         (cpu, rd, 0, aarch64_get_vec_u64 (cpu, rn, index));
9300     }
9301   else
9302     HALT_UNALLOC;
9303 }
9304
9305 static void
9306 do_scalar_NEG (sim_cpu *cpu)
9307 {
9308   /* instr [31,10] = 0111 1110 1110 0000 1011 10
9309      instr [9, 5]  = Rn
9310      instr [4, 0]  = Rd.  */
9311
9312   unsigned rn = INSTR (9, 5);
9313   unsigned rd = INSTR (4, 0);
9314
9315   NYI_assert (31, 10, 0x1FB82E);
9316
9317   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9318   aarch64_set_vec_u64 (cpu, rd, 0, - aarch64_get_vec_u64 (cpu, rn, 0));
9319 }
9320
9321 static void
9322 do_scalar_USHL (sim_cpu *cpu)
9323 {
9324   /* instr [31,21] = 0111 1110 111
9325      instr [20,16] = Rm
9326      instr [15,10] = 0100 01
9327      instr [9, 5]  = Rn
9328      instr [4, 0]  = Rd.  */
9329
9330   unsigned rm = INSTR (20, 16);
9331   unsigned rn = INSTR (9, 5);
9332   unsigned rd = INSTR (4, 0);
9333   signed int shift = aarch64_get_vec_s8 (cpu, rm, 0);
9334
9335   NYI_assert (31, 21, 0x3F7);
9336   NYI_assert (15, 10, 0x11);
9337
9338   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9339   if (shift >= 0)
9340     aarch64_set_vec_u64 (cpu, rd, 0, aarch64_get_vec_u64 (cpu, rn, 0) << shift);
9341   else
9342     aarch64_set_vec_u64 (cpu, rd, 0, aarch64_get_vec_u64 (cpu, rn, 0) >> - shift);
9343 }
9344
9345 static void
9346 do_double_add (sim_cpu *cpu)
9347 {
9348   /* instr [31,21] = 0101 1110 111
9349      instr [20,16] = Fn
9350      instr [15,10] = 1000 01
9351      instr [9,5]   = Fm
9352      instr [4,0]   = Fd.  */
9353   unsigned Fd;
9354   unsigned Fm;
9355   unsigned Fn;
9356   double val1;
9357   double val2;
9358
9359   NYI_assert (31, 21, 0x2F7);
9360   NYI_assert (15, 10, 0x21);
9361
9362   Fd = INSTR (4, 0);
9363   Fm = INSTR (9, 5);
9364   Fn = INSTR (20, 16);
9365
9366   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9367   val1 = aarch64_get_FP_double (cpu, Fm);
9368   val2 = aarch64_get_FP_double (cpu, Fn);
9369
9370   aarch64_set_FP_double (cpu, Fd, val1 + val2);
9371 }
9372
9373 static void
9374 do_scalar_UCVTF (sim_cpu *cpu)
9375 {
9376   /* instr [31,23] = 0111 1110 0
9377      instr [22]    = single(0)/double(1)
9378      instr [21,10] = 10 0001 1101 10
9379      instr [9,5]   = rn
9380      instr [4,0]   = rd.  */
9381
9382   unsigned rn = INSTR (9, 5);
9383   unsigned rd = INSTR (4, 0);
9384
9385   NYI_assert (31, 23, 0x0FC);
9386   NYI_assert (21, 10, 0x876);
9387
9388   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9389   if (INSTR (22, 22))
9390     {
9391       uint64_t val = aarch64_get_vec_u64 (cpu, rn, 0);
9392
9393       aarch64_set_vec_double (cpu, rd, 0, (double) val);
9394     }
9395   else
9396     {
9397       uint32_t val = aarch64_get_vec_u32 (cpu, rn, 0);
9398
9399       aarch64_set_vec_float (cpu, rd, 0, (float) val);
9400     }
9401 }
9402
9403 static void
9404 do_scalar_vec (sim_cpu *cpu)
9405 {
9406   /* instr [30] = 1.  */
9407   /* instr [28,25] = 1111.  */
9408   switch (INSTR (31, 23))
9409     {
9410     case 0xBC:
9411       switch (INSTR (15, 10))
9412         {
9413         case 0x01: do_scalar_MOV (cpu); return;
9414         case 0x39: do_scalar_FCM (cpu); return;
9415         case 0x3B: do_scalar_FCM (cpu); return;
9416         }
9417       break;
9418
9419     case 0xBE: do_scalar_shift (cpu); return;
9420
9421     case 0xFC:
9422       switch (INSTR (15, 10))
9423         {
9424         case 0x36:
9425           switch (INSTR (21, 16))
9426             {
9427             case 0x30: do_scalar_FADDP (cpu); return;
9428             case 0x21: do_scalar_UCVTF (cpu); return;
9429             }
9430           HALT_NYI;
9431         case 0x39: do_scalar_FCM (cpu); return;
9432         case 0x3B: do_scalar_FCM (cpu); return;
9433         }
9434       break;
9435
9436     case 0xFD:
9437       switch (INSTR (15, 10))
9438         {
9439         case 0x0D: do_scalar_CMGT (cpu); return;
9440         case 0x11: do_scalar_USHL (cpu); return;
9441         case 0x2E: do_scalar_NEG (cpu); return;
9442         case 0x32: do_scalar_FCMGE_zero (cpu); return;
9443         case 0x35: do_scalar_FABD (cpu); return;
9444         case 0x36: do_scalar_FCMLE_zero (cpu); return;
9445         case 0x39: do_scalar_FCM (cpu); return;
9446         case 0x3B: do_scalar_FCM (cpu); return;
9447         default:
9448           HALT_NYI;
9449         }
9450
9451     case 0xFE: do_scalar_USHR (cpu); return;
9452
9453     case 0xBD:
9454       switch (INSTR (15, 10))
9455         {
9456         case 0x21: do_double_add (cpu); return;
9457         case 0x11: do_scalar_SSHL (cpu); return;
9458         case 0x32: do_scalar_FCMGT_zero (cpu); return;
9459         case 0x36: do_scalar_FCMEQ_zero (cpu); return;
9460         case 0x3A: do_scalar_FCMLT_zero (cpu); return;
9461         default:
9462           HALT_NYI;
9463         }
9464
9465     default:
9466       HALT_NYI;
9467     }
9468 }
9469
9470 static void
9471 dexAdvSIMD1 (sim_cpu *cpu)
9472 {
9473   /* instr [28,25] = 1 111.  */
9474
9475   /* We are currently only interested in the basic
9476      scalar fp routines which all have bit 30 = 0.  */
9477   if (INSTR (30, 30))
9478     do_scalar_vec (cpu);
9479
9480   /* instr[24] is set for FP data processing 3-source and clear for
9481      all other basic scalar fp instruction groups.  */
9482   else if (INSTR (24, 24))
9483     dexSimpleFPDataProc3Source (cpu);
9484
9485   /* instr[21] is clear for floating <-> fixed conversions and set for
9486      all other basic scalar fp instruction groups.  */
9487   else if (!INSTR (21, 21))
9488     dexSimpleFPFixedConvert (cpu);
9489
9490   /* instr[11,10] : 01 ==> cond compare, 10 ==> Data Proc 2 Source
9491      11 ==> cond select,  00 ==> other.  */
9492   else
9493     switch (INSTR (11, 10))
9494       {
9495       case 1: dexSimpleFPCondCompare (cpu); return;
9496       case 2: dexSimpleFPDataProc2Source (cpu); return;
9497       case 3: dexSimpleFPCondSelect (cpu); return;
9498
9499       default:
9500         /* Now an ordered cascade of tests.
9501            FP immediate has instr [12] == 1.
9502            FP compare has   instr [13] == 1.
9503            FP Data Proc 1 Source has instr [14] == 1.
9504            FP floating <--> integer conversions has instr [15] == 0.  */
9505         if (INSTR (12, 12))
9506           dexSimpleFPImmediate (cpu);
9507
9508         else if (INSTR (13, 13))
9509           dexSimpleFPCompare (cpu);
9510
9511         else if (INSTR (14, 14))
9512           dexSimpleFPDataProc1Source (cpu);
9513
9514         else if (!INSTR (15, 15))
9515           dexSimpleFPIntegerConvert (cpu);
9516
9517         else
9518           /* If we get here then instr[15] == 1 which means UNALLOC.  */
9519           HALT_UNALLOC;
9520       }
9521 }
9522
9523 /* PC relative addressing.  */
9524
9525 static void
9526 pcadr (sim_cpu *cpu)
9527 {
9528   /* instr[31] = op : 0 ==> ADR, 1 ==> ADRP
9529      instr[30,29] = immlo
9530      instr[23,5] = immhi.  */
9531   uint64_t address;
9532   unsigned rd = INSTR (4, 0);
9533   uint32_t isPage = INSTR (31, 31);
9534   union { int64_t u64; uint64_t s64; } imm;
9535   uint64_t offset;
9536
9537   imm.s64 = simm64 (aarch64_get_instr (cpu), 23, 5);
9538   offset = imm.u64;
9539   offset = (offset << 2) | INSTR (30, 29);
9540
9541   address = aarch64_get_PC (cpu);
9542
9543   if (isPage)
9544     {
9545       offset <<= 12;
9546       address &= ~0xfff;
9547     }
9548
9549   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9550   aarch64_set_reg_u64 (cpu, rd, NO_SP, address + offset);
9551 }
9552
9553 /* Specific decode and execute for group Data Processing Immediate.  */
9554
9555 static void
9556 dexPCRelAddressing (sim_cpu *cpu)
9557 {
9558   /* assert instr[28,24] = 10000.  */
9559   pcadr (cpu);
9560 }
9561
9562 /* Immediate logical.
9563    The bimm32/64 argument is constructed by replicating a 2, 4, 8,
9564    16, 32 or 64 bit sequence pulled out at decode and possibly
9565    inverting it..
9566
9567    N.B. the output register (dest) can normally be Xn or SP
9568    the exception occurs for flag setting instructions which may
9569    only use Xn for the output (dest).  The input register can
9570    never be SP.  */
9571
9572 /* 32 bit and immediate.  */
9573 static void
9574 and32 (sim_cpu *cpu, uint32_t bimm)
9575 {
9576   unsigned rn = INSTR (9, 5);
9577   unsigned rd = INSTR (4, 0);
9578
9579   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9580   aarch64_set_reg_u64 (cpu, rd, SP_OK,
9581                        aarch64_get_reg_u32 (cpu, rn, NO_SP) & bimm);
9582 }
9583
9584 /* 64 bit and immediate.  */
9585 static void
9586 and64 (sim_cpu *cpu, uint64_t bimm)
9587 {
9588   unsigned rn = INSTR (9, 5);
9589   unsigned rd = INSTR (4, 0);
9590
9591   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9592   aarch64_set_reg_u64 (cpu, rd, SP_OK,
9593                        aarch64_get_reg_u64 (cpu, rn, NO_SP) & bimm);
9594 }
9595
9596 /* 32 bit and immediate set flags.  */
9597 static void
9598 ands32 (sim_cpu *cpu, uint32_t bimm)
9599 {
9600   unsigned rn = INSTR (9, 5);
9601   unsigned rd = INSTR (4, 0);
9602
9603   uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP);
9604   uint32_t value2 = bimm;
9605
9606   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9607   aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 & value2);
9608   set_flags_for_binop32 (cpu, value1 & value2);
9609 }
9610
9611 /* 64 bit and immediate set flags.  */
9612 static void
9613 ands64 (sim_cpu *cpu, uint64_t bimm)
9614 {
9615   unsigned rn = INSTR (9, 5);
9616   unsigned rd = INSTR (4, 0);
9617
9618   uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP);
9619   uint64_t value2 = bimm;
9620
9621   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9622   aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 & value2);
9623   set_flags_for_binop64 (cpu, value1 & value2);
9624 }
9625
9626 /* 32 bit exclusive or immediate.  */
9627 static void
9628 eor32 (sim_cpu *cpu, uint32_t bimm)
9629 {
9630   unsigned rn = INSTR (9, 5);
9631   unsigned rd = INSTR (4, 0);
9632
9633   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9634   aarch64_set_reg_u64 (cpu, rd, SP_OK,
9635                        aarch64_get_reg_u32 (cpu, rn, NO_SP) ^ bimm);
9636 }
9637
9638 /* 64 bit exclusive or immediate.  */
9639 static void
9640 eor64 (sim_cpu *cpu, uint64_t bimm)
9641 {
9642   unsigned rn = INSTR (9, 5);
9643   unsigned rd = INSTR (4, 0);
9644
9645   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9646   aarch64_set_reg_u64 (cpu, rd, SP_OK,
9647                        aarch64_get_reg_u64 (cpu, rn, NO_SP) ^ bimm);
9648 }
9649
9650 /* 32 bit or immediate.  */
9651 static void
9652 orr32 (sim_cpu *cpu, uint32_t bimm)
9653 {
9654   unsigned rn = INSTR (9, 5);
9655   unsigned rd = INSTR (4, 0);
9656
9657   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9658   aarch64_set_reg_u64 (cpu, rd, SP_OK,
9659                        aarch64_get_reg_u32 (cpu, rn, NO_SP) | bimm);
9660 }
9661
9662 /* 64 bit or immediate.  */
9663 static void
9664 orr64 (sim_cpu *cpu, uint64_t bimm)
9665 {
9666   unsigned rn = INSTR (9, 5);
9667   unsigned rd = INSTR (4, 0);
9668
9669   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9670   aarch64_set_reg_u64 (cpu, rd, SP_OK,
9671                        aarch64_get_reg_u64 (cpu, rn, NO_SP) | bimm);
9672 }
9673
9674 /* Logical shifted register.
9675    These allow an optional LSL, ASR, LSR or ROR to the second source
9676    register with a count up to the register bit count.
9677    N.B register args may not be SP.  */
9678
9679 /* 32 bit AND shifted register.  */
9680 static void
9681 and32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9682 {
9683   unsigned rm = INSTR (20, 16);
9684   unsigned rn = INSTR (9, 5);
9685   unsigned rd = INSTR (4, 0);
9686
9687   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9688   aarch64_set_reg_u64
9689     (cpu, rd, NO_SP, aarch64_get_reg_u32 (cpu, rn, NO_SP)
9690      & shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP), shift, count));
9691 }
9692
9693 /* 64 bit AND shifted register.  */
9694 static void
9695 and64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9696 {
9697   unsigned rm = INSTR (20, 16);
9698   unsigned rn = INSTR (9, 5);
9699   unsigned rd = INSTR (4, 0);
9700
9701   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9702   aarch64_set_reg_u64
9703     (cpu, rd, NO_SP, aarch64_get_reg_u64 (cpu, rn, NO_SP)
9704      & shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP), shift, count));
9705 }
9706
9707 /* 32 bit AND shifted register setting flags.  */
9708 static void
9709 ands32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9710 {
9711   unsigned rm = INSTR (20, 16);
9712   unsigned rn = INSTR (9, 5);
9713   unsigned rd = INSTR (4, 0);
9714
9715   uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP);
9716   uint32_t value2 = shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP),
9717                                shift, count);
9718
9719   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9720   aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 & value2);
9721   set_flags_for_binop32 (cpu, value1 & value2);
9722 }
9723
9724 /* 64 bit AND shifted register setting flags.  */
9725 static void
9726 ands64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9727 {
9728   unsigned rm = INSTR (20, 16);
9729   unsigned rn = INSTR (9, 5);
9730   unsigned rd = INSTR (4, 0);
9731
9732   uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP);
9733   uint64_t value2 = shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP),
9734                                shift, count);
9735
9736   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9737   aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 & value2);
9738   set_flags_for_binop64 (cpu, value1 & value2);
9739 }
9740
9741 /* 32 bit BIC shifted register.  */
9742 static void
9743 bic32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9744 {
9745   unsigned rm = INSTR (20, 16);
9746   unsigned rn = INSTR (9, 5);
9747   unsigned rd = INSTR (4, 0);
9748
9749   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9750   aarch64_set_reg_u64
9751     (cpu, rd, NO_SP, aarch64_get_reg_u32 (cpu, rn, NO_SP)
9752      & ~ shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP), shift, count));
9753 }
9754
9755 /* 64 bit BIC shifted register.  */
9756 static void
9757 bic64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9758 {
9759   unsigned rm = INSTR (20, 16);
9760   unsigned rn = INSTR (9, 5);
9761   unsigned rd = INSTR (4, 0);
9762
9763   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9764   aarch64_set_reg_u64
9765     (cpu, rd, NO_SP, aarch64_get_reg_u64 (cpu, rn, NO_SP)
9766      & ~ shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP), shift, count));
9767 }
9768
9769 /* 32 bit BIC shifted register setting flags.  */
9770 static void
9771 bics32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9772 {
9773   unsigned rm = INSTR (20, 16);
9774   unsigned rn = INSTR (9, 5);
9775   unsigned rd = INSTR (4, 0);
9776
9777   uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP);
9778   uint32_t value2 = ~ shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP),
9779                                  shift, count);
9780
9781   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9782   aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 & value2);
9783   set_flags_for_binop32 (cpu, value1 & value2);
9784 }
9785
9786 /* 64 bit BIC shifted register setting flags.  */
9787 static void
9788 bics64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9789 {
9790   unsigned rm = INSTR (20, 16);
9791   unsigned rn = INSTR (9, 5);
9792   unsigned rd = INSTR (4, 0);
9793
9794   uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP);
9795   uint64_t value2 = ~ shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP),
9796                                  shift, count);
9797
9798   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9799   aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 & value2);
9800   set_flags_for_binop64 (cpu, value1 & value2);
9801 }
9802
9803 /* 32 bit EON shifted register.  */
9804 static void
9805 eon32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9806 {
9807   unsigned rm = INSTR (20, 16);
9808   unsigned rn = INSTR (9, 5);
9809   unsigned rd = INSTR (4, 0);
9810
9811   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9812   aarch64_set_reg_u64
9813     (cpu, rd, NO_SP, aarch64_get_reg_u32 (cpu, rn, NO_SP)
9814      ^ ~ shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP), shift, count));
9815 }
9816
9817 /* 64 bit EON shifted register.  */
9818 static void
9819 eon64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9820 {
9821   unsigned rm = INSTR (20, 16);
9822   unsigned rn = INSTR (9, 5);
9823   unsigned rd = INSTR (4, 0);
9824
9825   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9826   aarch64_set_reg_u64
9827     (cpu, rd, NO_SP, aarch64_get_reg_u64 (cpu, rn, NO_SP)
9828      ^ ~ shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP), shift, count));
9829 }
9830
9831 /* 32 bit EOR shifted register.  */
9832 static void
9833 eor32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9834 {
9835   unsigned rm = INSTR (20, 16);
9836   unsigned rn = INSTR (9, 5);
9837   unsigned rd = INSTR (4, 0);
9838
9839   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9840   aarch64_set_reg_u64
9841     (cpu, rd, NO_SP, aarch64_get_reg_u32 (cpu, rn, NO_SP)
9842      ^ shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP), shift, count));
9843 }
9844
9845 /* 64 bit EOR shifted register.  */
9846 static void
9847 eor64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9848 {
9849   unsigned rm = INSTR (20, 16);
9850   unsigned rn = INSTR (9, 5);
9851   unsigned rd = INSTR (4, 0);
9852
9853   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9854   aarch64_set_reg_u64
9855     (cpu, rd, NO_SP, aarch64_get_reg_u64 (cpu, rn, NO_SP)
9856      ^ shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP), shift, count));
9857 }
9858
9859 /* 32 bit ORR shifted register.  */
9860 static void
9861 orr32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9862 {
9863   unsigned rm = INSTR (20, 16);
9864   unsigned rn = INSTR (9, 5);
9865   unsigned rd = INSTR (4, 0);
9866
9867   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9868   aarch64_set_reg_u64
9869     (cpu, rd, NO_SP, aarch64_get_reg_u32 (cpu, rn, NO_SP)
9870      | shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP), shift, count));
9871 }
9872
9873 /* 64 bit ORR shifted register.  */
9874 static void
9875 orr64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9876 {
9877   unsigned rm = INSTR (20, 16);
9878   unsigned rn = INSTR (9, 5);
9879   unsigned rd = INSTR (4, 0);
9880
9881   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9882   aarch64_set_reg_u64
9883     (cpu, rd, NO_SP, aarch64_get_reg_u64 (cpu, rn, NO_SP)
9884      | shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP), shift, count));
9885 }
9886
9887 /* 32 bit ORN shifted register.  */
9888 static void
9889 orn32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9890 {
9891   unsigned rm = INSTR (20, 16);
9892   unsigned rn = INSTR (9, 5);
9893   unsigned rd = INSTR (4, 0);
9894
9895   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9896   aarch64_set_reg_u64
9897     (cpu, rd, NO_SP, aarch64_get_reg_u32 (cpu, rn, NO_SP)
9898      | ~ shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP), shift, count));
9899 }
9900
9901 /* 64 bit ORN shifted register.  */
9902 static void
9903 orn64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9904 {
9905   unsigned rm = INSTR (20, 16);
9906   unsigned rn = INSTR (9, 5);
9907   unsigned rd = INSTR (4, 0);
9908
9909   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9910   aarch64_set_reg_u64
9911     (cpu, rd, NO_SP, aarch64_get_reg_u64 (cpu, rn, NO_SP)
9912      | ~ shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP), shift, count));
9913 }
9914
9915 static void
9916 dexLogicalImmediate (sim_cpu *cpu)
9917 {
9918   /* assert instr[28,23] = 1001000
9919      instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
9920      instr[30,29] = op : 0 ==> AND, 1 ==> ORR, 2 ==> EOR, 3 ==> ANDS
9921      instr[22] = N : used to construct immediate mask
9922      instr[21,16] = immr
9923      instr[15,10] = imms
9924      instr[9,5] = Rn
9925      instr[4,0] = Rd  */
9926
9927   /* 32 bit operations must have N = 0 or else we have an UNALLOC.  */
9928   uint32_t size = INSTR (31, 31);
9929   uint32_t N = INSTR (22, 22);
9930   /* uint32_t immr = INSTR (21, 16);.  */
9931   /* uint32_t imms = INSTR (15, 10);.  */
9932   uint32_t index = INSTR (22, 10);
9933   uint64_t bimm64 = LITable [index];
9934   uint32_t dispatch = INSTR (30, 29);
9935
9936   if (~size & N)
9937     HALT_UNALLOC;
9938
9939   if (!bimm64)
9940     HALT_UNALLOC;
9941
9942   if (size == 0)
9943     {
9944       uint32_t bimm = (uint32_t) bimm64;
9945
9946       switch (dispatch)
9947         {
9948         case 0: and32 (cpu, bimm); return;
9949         case 1: orr32 (cpu, bimm); return;
9950         case 2: eor32 (cpu, bimm); return;
9951         case 3: ands32 (cpu, bimm); return;
9952         }
9953     }
9954   else
9955     {
9956       switch (dispatch)
9957         {
9958         case 0: and64 (cpu, bimm64); return;
9959         case 1: orr64 (cpu, bimm64); return;
9960         case 2: eor64 (cpu, bimm64); return;
9961         case 3: ands64 (cpu, bimm64); return;
9962         }
9963     }
9964   HALT_UNALLOC;
9965 }
9966
9967 /* Immediate move.
9968    The uimm argument is a 16 bit value to be inserted into the
9969    target register the pos argument locates the 16 bit word in the
9970    dest register i.e. it is in {0, 1} for 32 bit and {0, 1, 2,
9971    3} for 64 bit.
9972    N.B register arg may not be SP so it should be.
9973    accessed using the setGZRegisterXXX accessors.  */
9974
9975 /* 32 bit move 16 bit immediate zero remaining shorts.  */
9976 static void
9977 movz32 (sim_cpu *cpu, uint32_t val, uint32_t pos)
9978 {
9979   unsigned rd = INSTR (4, 0);
9980
9981   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9982   aarch64_set_reg_u64 (cpu, rd, NO_SP, val << (pos * 16));
9983 }
9984
9985 /* 64 bit move 16 bit immediate zero remaining shorts.  */
9986 static void
9987 movz64 (sim_cpu *cpu, uint32_t val, uint32_t pos)
9988 {
9989   unsigned rd = INSTR (4, 0);
9990
9991   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9992   aarch64_set_reg_u64 (cpu, rd, NO_SP, ((uint64_t) val) << (pos * 16));
9993 }
9994
9995 /* 32 bit move 16 bit immediate negated.  */
9996 static void
9997 movn32 (sim_cpu *cpu, uint32_t val, uint32_t pos)
9998 {
9999   unsigned rd = INSTR (4, 0);
10000
10001   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
10002   aarch64_set_reg_u64 (cpu, rd, NO_SP, ((val << (pos * 16)) ^ 0xffffffffU));
10003 }
10004
10005 /* 64 bit move 16 bit immediate negated.  */
10006 static void
10007 movn64 (sim_cpu *cpu, uint32_t val, uint32_t pos)
10008 {
10009   unsigned rd = INSTR (4, 0);
10010
10011   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
10012   aarch64_set_reg_u64
10013     (cpu, rd, NO_SP, ((((uint64_t) val) << (pos * 16))
10014                       ^ 0xffffffffffffffffULL));
10015 }
10016
10017 /* 32 bit move 16 bit immediate keep remaining shorts.  */
10018 static void
10019 movk32 (sim_cpu *cpu, uint32_t val, uint32_t pos)
10020 {
10021   unsigned rd = INSTR (4, 0);
10022   uint32_t current = aarch64_get_reg_u32 (cpu, rd, NO_SP);
10023   uint32_t value = val << (pos * 16);
10024   uint32_t mask = ~(0xffffU << (pos * 16));
10025
10026   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
10027   aarch64_set_reg_u64 (cpu, rd, NO_SP, (value | (current & mask)));
10028 }
10029
10030 /* 64 bit move 16 it immediate keep remaining shorts.  */
10031 static void
10032 movk64 (sim_cpu *cpu, uint32_t val, uint32_t pos)
10033 {
10034   unsigned rd = INSTR (4, 0);
10035   uint64_t current = aarch64_get_reg_u64 (cpu, rd, NO_SP);
10036   uint64_t value = (uint64_t) val << (pos * 16);
10037   uint64_t mask = ~(0xffffULL << (pos * 16));
10038
10039   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
10040   aarch64_set_reg_u64 (cpu, rd, NO_SP, (value | (current & mask)));
10041 }
10042
10043 static void
10044 dexMoveWideImmediate (sim_cpu *cpu)
10045 {
10046   /* assert instr[28:23] = 100101
10047      instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
10048      instr[30,29] = op : 0 ==> MOVN, 1 ==> UNALLOC, 2 ==> MOVZ, 3 ==> MOVK
10049      instr[22,21] = shift : 00 == LSL#0, 01 = LSL#16, 10 = LSL#32, 11 = LSL#48
10050      instr[20,5] = uimm16
10051      instr[4,0] = Rd  */
10052
10053   /* N.B. the (multiple of 16) shift is applied by the called routine,
10054      we just pass the multiplier.  */
10055
10056   uint32_t imm;
10057   uint32_t size = INSTR (31, 31);
10058   uint32_t op = INSTR (30, 29);
10059   uint32_t shift = INSTR (22, 21);
10060
10061   /* 32 bit can only shift 0 or 1 lot of 16.
10062      anything else is an unallocated instruction.  */
10063   if (size == 0 && (shift > 1))
10064     HALT_UNALLOC;
10065
10066   if (op == 1)
10067     HALT_UNALLOC;
10068
10069   imm = INSTR (20, 5);
10070
10071   if (size == 0)
10072     {
10073       if (op == 0)
10074         movn32 (cpu, imm, shift);
10075       else if (op == 2)
10076         movz32 (cpu, imm, shift);
10077       else
10078         movk32 (cpu, imm, shift);
10079     }
10080   else
10081     {
10082       if (op == 0)
10083         movn64 (cpu, imm, shift);
10084       else if (op == 2)
10085         movz64 (cpu, imm, shift);
10086       else
10087         movk64 (cpu, imm, shift);
10088     }
10089 }
10090
10091 /* Bitfield operations.
10092    These take a pair of bit positions r and s which are in {0..31}
10093    or {0..63} depending on the instruction word size.
10094    N.B register args may not be SP.  */
10095
10096 /* OK, we start with ubfm which just needs to pick
10097    some bits out of source zero the rest and write
10098    the result to dest.  Just need two logical shifts.  */
10099
10100 /* 32 bit bitfield move, left and right of affected zeroed
10101    if r <= s Wd<s-r:0> = Wn<s:r> else Wd<32+s-r,32-r> = Wn<s:0>.  */
10102 static void
10103 ubfm32 (sim_cpu *cpu, uint32_t r, uint32_t s)
10104 {
10105   unsigned rd;
10106   unsigned rn = INSTR (9, 5);
10107   uint32_t value = aarch64_get_reg_u32 (cpu, rn, NO_SP);
10108
10109   /* Pick either s+1-r or s+1 consecutive bits out of the original word.  */
10110   if (r <= s)
10111     {
10112       /* 31:...:s:xxx:r:...:0 ==> 31:...:s-r:xxx:0.
10113          We want only bits s:xxx:r at the bottom of the word
10114          so we LSL bit s up to bit 31 i.e. by 31 - s
10115          and then we LSR to bring bit 31 down to bit s - r
10116          i.e. by 31 + r - s.  */
10117       value <<= 31 - s;
10118       value >>= 31 + r - s;
10119     }
10120   else
10121     {
10122       /* 31:...:s:xxx:0 ==> 31:...:31-(r-1)+s:xxx:31-(r-1):...:0
10123          We want only bits s:xxx:0 starting at it 31-(r-1)
10124          so we LSL bit s up to bit 31 i.e. by 31 - s
10125          and then we LSL to bring bit 31 down to 31-(r-1)+s
10126          i.e. by r - (s + 1).  */
10127       value <<= 31 - s;
10128       value >>= r - (s + 1);
10129     }
10130
10131   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
10132   rd = INSTR (4, 0);
10133   aarch64_set_reg_u64 (cpu, rd, NO_SP, value);
10134 }
10135
10136 /* 64 bit bitfield move, left and right of affected zeroed
10137    if r <= s Wd<s-r:0> = Wn<s:r> else Wd<64+s-r,64-r> = Wn<s:0>.  */
10138 static void
10139 ubfm (sim_cpu *cpu, uint32_t r, uint32_t s)
10140 {
10141   unsigned rd;
10142   unsigned rn = INSTR (9, 5);
10143   uint64_t value = aarch64_get_reg_u64 (cpu, rn, NO_SP);
10144
10145   if (r <= s)
10146     {
10147       /* 63:...:s:xxx:r:...:0 ==> 63:...:s-r:xxx:0.
10148          We want only bits s:xxx:r at the bottom of the word.
10149          So we LSL bit s up to bit 63 i.e. by 63 - s
10150          and then we LSR to bring bit 63 down to bit s - r
10151          i.e. by 63 + r - s.  */
10152       value <<= 63 - s;
10153       value >>= 63 + r - s;
10154     }
10155   else
10156     {
10157       /* 63:...:s:xxx:0 ==> 63:...:63-(r-1)+s:xxx:63-(r-1):...:0.
10158          We want only bits s:xxx:0 starting at it 63-(r-1).
10159          So we LSL bit s up to bit 63 i.e. by 63 - s
10160          and then we LSL to bring bit 63 down to 63-(r-1)+s
10161          i.e. by r - (s + 1).  */
10162       value <<= 63 - s;
10163       value >>= r - (s + 1);
10164     }
10165
10166   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
10167   rd = INSTR (4, 0);
10168   aarch64_set_reg_u64 (cpu, rd, NO_SP, value);
10169 }
10170
10171 /* The signed versions need to insert sign bits
10172    on the left of the inserted bit field. so we do
10173    much the same as the unsigned version except we
10174    use an arithmetic shift right -- this just means
10175    we need to operate on signed values.  */
10176
10177 /* 32 bit bitfield move, left of affected sign-extended, right zeroed.  */
10178 /* If r <= s Wd<s-r:0> = Wn<s:r> else Wd<32+s-r,32-r> = Wn<s:0>.  */
10179 static void
10180 sbfm32 (sim_cpu *cpu, uint32_t r, uint32_t s)
10181 {
10182   unsigned rd;
10183   unsigned rn = INSTR (9, 5);
10184   /* as per ubfm32 but use an ASR instead of an LSR.  */
10185   int32_t value = aarch64_get_reg_s32 (cpu, rn, NO_SP);
10186
10187   if (r <= s)
10188     {
10189       value <<= 31 - s;
10190       value >>= 31 + r - s;
10191     }
10192   else
10193     {
10194       value <<= 31 - s;
10195       value >>= r - (s + 1);
10196     }
10197
10198   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
10199   rd = INSTR (4, 0);
10200   aarch64_set_reg_u64 (cpu, rd, NO_SP, (uint32_t) value);
10201 }
10202
10203 /* 64 bit bitfield move, left of affected sign-extended, right zeroed.  */
10204 /* If r <= s Wd<s-r:0> = Wn<s:r> else Wd<64+s-r,64-r> = Wn<s:0>.  */
10205 static void
10206 sbfm (sim_cpu *cpu, uint32_t r, uint32_t s)
10207 {
10208   unsigned rd;
10209   unsigned rn = INSTR (9, 5);
10210   /* acpu per ubfm but use an ASR instead of an LSR.  */
10211   int64_t value = aarch64_get_reg_s64 (cpu, rn, NO_SP);
10212
10213   if (r <= s)
10214     {
10215       value <<= 63 - s;
10216       value >>= 63 + r - s;
10217     }
10218   else
10219     {
10220       value <<= 63 - s;
10221       value >>= r - (s + 1);
10222     }
10223
10224   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
10225   rd = INSTR (4, 0);
10226   aarch64_set_reg_s64 (cpu, rd, NO_SP, value);
10227 }
10228
10229 /* Finally, these versions leave non-affected bits
10230    as is. so we need to generate the bits as per
10231    ubfm and also generate a mask to pick the
10232    bits from the original and computed values.  */
10233
10234 /* 32 bit bitfield move, non-affected bits left as is.
10235    If r <= s Wd<s-r:0> = Wn<s:r> else Wd<32+s-r,32-r> = Wn<s:0>.  */
10236 static void
10237 bfm32 (sim_cpu *cpu, uint32_t r, uint32_t s)
10238 {
10239   unsigned rn = INSTR (9, 5);
10240   uint32_t value = aarch64_get_reg_u32 (cpu, rn, NO_SP);
10241   uint32_t mask = -1;
10242   unsigned rd;
10243   uint32_t value2;
10244
10245   /* Pick either s+1-r or s+1 consecutive bits out of the original word.  */
10246   if (r <= s)
10247     {
10248       /* 31:...:s:xxx:r:...:0 ==> 31:...:s-r:xxx:0.
10249          We want only bits s:xxx:r at the bottom of the word
10250          so we LSL bit s up to bit 31 i.e. by 31 - s
10251          and then we LSR to bring bit 31 down to bit s - r
10252          i.e. by 31 + r - s.  */
10253       value <<= 31 - s;
10254       value >>= 31 + r - s;
10255       /* the mask must include the same bits.  */
10256       mask <<= 31 - s;
10257       mask >>= 31 + r - s;
10258     }
10259   else
10260     {
10261       /* 31:...:s:xxx:0 ==> 31:...:31-(r-1)+s:xxx:31-(r-1):...:0.
10262          We want only bits s:xxx:0 starting at it 31-(r-1)
10263          so we LSL bit s up to bit 31 i.e. by 31 - s
10264          and then we LSL to bring bit 31 down to 31-(r-1)+s
10265          i.e. by r - (s + 1).  */
10266       value <<= 31 - s;
10267       value >>= r - (s + 1);
10268       /* The mask must include the same bits.  */
10269       mask <<= 31 - s;
10270       mask >>= r - (s + 1);
10271     }
10272
10273   rd = INSTR (4, 0);
10274   value2 = aarch64_get_reg_u32 (cpu, rd, NO_SP);
10275
10276   value2 &= ~mask;
10277   value2 |= value;
10278
10279   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
10280   aarch64_set_reg_u64
10281     (cpu, rd, NO_SP, (aarch64_get_reg_u32 (cpu, rd, NO_SP) & ~mask) | value);
10282 }
10283
10284 /* 64 bit bitfield move, non-affected bits left as is.
10285    If r <= s Wd<s-r:0> = Wn<s:r> else Wd<64+s-r,64-r> = Wn<s:0>.  */
10286 static void
10287 bfm (sim_cpu *cpu, uint32_t r, uint32_t s)
10288 {
10289   unsigned rd;
10290   unsigned rn = INSTR (9, 5);
10291   uint64_t value = aarch64_get_reg_u64 (cpu, rn, NO_SP);
10292   uint64_t mask = 0xffffffffffffffffULL;
10293
10294   if (r <= s)
10295     {
10296       /* 63:...:s:xxx:r:...:0 ==> 63:...:s-r:xxx:0.
10297          We want only bits s:xxx:r at the bottom of the word
10298          so we LSL bit s up to bit 63 i.e. by 63 - s
10299          and then we LSR to bring bit 63 down to bit s - r
10300          i.e. by 63 + r - s.  */
10301       value <<= 63 - s;
10302       value >>= 63 + r - s;
10303       /* The mask must include the same bits.  */
10304       mask <<= 63 - s;
10305       mask >>= 63 + r - s;
10306     }
10307   else
10308     {
10309       /* 63:...:s:xxx:0 ==> 63:...:63-(r-1)+s:xxx:63-(r-1):...:0
10310          We want only bits s:xxx:0 starting at it 63-(r-1)
10311          so we LSL bit s up to bit 63 i.e. by 63 - s
10312          and then we LSL to bring bit 63 down to 63-(r-1)+s
10313          i.e. by r - (s + 1).  */
10314       value <<= 63 - s;
10315       value >>= r - (s + 1);
10316       /* The mask must include the same bits.  */
10317       mask <<= 63 - s;
10318       mask >>= r - (s + 1);
10319     }
10320
10321   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
10322   rd = INSTR (4, 0);
10323   aarch64_set_reg_u64
10324     (cpu, rd, NO_SP, (aarch64_get_reg_u64 (cpu, rd, NO_SP) & ~mask) | value);
10325 }
10326
10327 static void
10328 dexBitfieldImmediate (sim_cpu *cpu)
10329 {
10330   /* assert instr[28:23] = 100110
10331      instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
10332      instr[30,29] = op : 0 ==> SBFM, 1 ==> BFM, 2 ==> UBFM, 3 ==> UNALLOC
10333      instr[22] = N : must be 0 for 32 bit, 1 for 64 bit ow UNALLOC
10334      instr[21,16] = immr : 0xxxxx for 32 bit, xxxxxx for 64 bit
10335      instr[15,10] = imms :  0xxxxx for 32 bit, xxxxxx for 64 bit
10336      instr[9,5] = Rn
10337      instr[4,0] = Rd  */
10338
10339   /* 32 bit operations must have N = 0 or else we have an UNALLOC.  */
10340   uint32_t dispatch;
10341   uint32_t imms;
10342   uint32_t size = INSTR (31, 31);
10343   uint32_t N = INSTR (22, 22);
10344   /* 32 bit operations must have immr[5] = 0 and imms[5] = 0.  */
10345   /* or else we have an UNALLOC.  */
10346   uint32_t immr = INSTR (21, 16);
10347
10348   if (~size & N)
10349     HALT_UNALLOC;
10350
10351   if (!size && uimm (immr, 5, 5))
10352     HALT_UNALLOC;
10353
10354   imms = INSTR (15, 10);
10355   if (!size && uimm (imms, 5, 5))
10356     HALT_UNALLOC;
10357
10358   /* Switch on combined size and op.  */
10359   dispatch = INSTR (31, 29);
10360   switch (dispatch)
10361     {
10362     case 0: sbfm32 (cpu, immr, imms); return;
10363     case 1: bfm32 (cpu, immr, imms); return;
10364     case 2: ubfm32 (cpu, immr, imms); return;
10365     case 4: sbfm (cpu, immr, imms); return;
10366     case 5: bfm (cpu, immr, imms); return;
10367     case 6: ubfm (cpu, immr, imms); return;
10368     default: HALT_UNALLOC;
10369     }
10370 }
10371
10372 static void
10373 do_EXTR_32 (sim_cpu *cpu)
10374 {
10375   /* instr[31:21] = 00010011100
10376      instr[20,16] = Rm
10377      instr[15,10] = imms :  0xxxxx for 32 bit
10378      instr[9,5]   = Rn
10379      instr[4,0]   = Rd  */
10380   unsigned rm   = INSTR (20, 16);
10381   unsigned imms = INSTR (15, 10) & 31;
10382   unsigned rn   = INSTR ( 9,  5);
10383   unsigned rd   = INSTR ( 4,  0);
10384   uint64_t val1;
10385   uint64_t val2;
10386
10387   val1 = aarch64_get_reg_u32 (cpu, rm, NO_SP);
10388   val1 >>= imms;
10389   val2 = aarch64_get_reg_u32 (cpu, rn, NO_SP);
10390   val2 <<= (32 - imms);
10391
10392   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
10393   aarch64_set_reg_u64 (cpu, rd, NO_SP, val1 | val2);
10394 }
10395
10396 static void
10397 do_EXTR_64 (sim_cpu *cpu)
10398 {
10399   /* instr[31:21] = 10010011100
10400      instr[20,16] = Rm
10401      instr[15,10] = imms
10402      instr[9,5]   = Rn
10403      instr[4,0]   = Rd  */
10404   unsigned rm   = INSTR (20, 16);
10405   unsigned imms = INSTR (15, 10) & 63;
10406   unsigned rn   = INSTR ( 9,  5);
10407   unsigned rd   = INSTR ( 4,  0);
10408   uint64_t val;
10409
10410   val = aarch64_get_reg_u64 (cpu, rm, NO_SP);
10411   val >>= imms;
10412   val |= (aarch64_get_reg_u64 (cpu, rn, NO_SP) << (64 - imms));
10413
10414   aarch64_set_reg_u64 (cpu, rd, NO_SP, val);
10415 }
10416
10417 static void
10418 dexExtractImmediate (sim_cpu *cpu)
10419 {
10420   /* assert instr[28:23] = 100111
10421      instr[31]    = size : 0 ==> 32 bit, 1 ==> 64 bit
10422      instr[30,29] = op21 : 0 ==> EXTR, 1,2,3 ==> UNALLOC
10423      instr[22]    = N : must be 0 for 32 bit, 1 for 64 bit or UNALLOC
10424      instr[21]    = op0 : must be 0 or UNALLOC
10425      instr[20,16] = Rm
10426      instr[15,10] = imms :  0xxxxx for 32 bit, xxxxxx for 64 bit
10427      instr[9,5]   = Rn
10428      instr[4,0]   = Rd  */
10429
10430   /* 32 bit operations must have N = 0 or else we have an UNALLOC.  */
10431   /* 64 bit operations must have N = 1 or else we have an UNALLOC.  */
10432   uint32_t dispatch;
10433   uint32_t size = INSTR (31, 31);
10434   uint32_t N = INSTR (22, 22);
10435   /* 32 bit operations must have imms[5] = 0
10436      or else we have an UNALLOC.  */
10437   uint32_t imms = INSTR (15, 10);
10438
10439   if (size ^ N)
10440     HALT_UNALLOC;
10441
10442   if (!size && uimm (imms, 5, 5))
10443     HALT_UNALLOC;
10444
10445   /* Switch on combined size and op.  */
10446   dispatch = INSTR (31, 29);
10447
10448   if (dispatch == 0)
10449     do_EXTR_32 (cpu);
10450
10451   else if (dispatch == 4)
10452     do_EXTR_64 (cpu);
10453
10454   else if (dispatch == 1)
10455     HALT_NYI;
10456   else
10457     HALT_UNALLOC;
10458 }
10459
10460 static void
10461 dexDPImm (sim_cpu *cpu)
10462 {
10463   /* uint32_t group = dispatchGroup (aarch64_get_instr (cpu));
10464      assert  group == GROUP_DPIMM_1000 || grpoup == GROUP_DPIMM_1001
10465      bits [25,23] of a DPImm are the secondary dispatch vector.  */
10466   uint32_t group2 = dispatchDPImm (aarch64_get_instr (cpu));
10467
10468   switch (group2)
10469     {
10470     case DPIMM_PCADR_000:
10471     case DPIMM_PCADR_001:
10472       dexPCRelAddressing (cpu);
10473       return;
10474
10475     case DPIMM_ADDSUB_010:
10476     case DPIMM_ADDSUB_011:
10477       dexAddSubtractImmediate (cpu);
10478       return;
10479
10480     case DPIMM_LOG_100:
10481       dexLogicalImmediate (cpu);
10482       return;
10483
10484     case DPIMM_MOV_101:
10485       dexMoveWideImmediate (cpu);
10486       return;
10487
10488     case DPIMM_BITF_110:
10489       dexBitfieldImmediate (cpu);
10490       return;
10491
10492     case DPIMM_EXTR_111:
10493       dexExtractImmediate (cpu);
10494       return;
10495
10496     default:
10497       /* Should never reach here.  */
10498       HALT_NYI;
10499     }
10500 }
10501
10502 static void
10503 dexLoadUnscaledImmediate (sim_cpu *cpu)
10504 {
10505   /* instr[29,24] == 111_00
10506      instr[21] == 0
10507      instr[11,10] == 00
10508      instr[31,30] = size
10509      instr[26] = V
10510      instr[23,22] = opc
10511      instr[20,12] = simm9
10512      instr[9,5] = rn may be SP.  */
10513   /* unsigned rt = INSTR (4, 0);  */
10514   uint32_t V = INSTR (26, 26);
10515   uint32_t dispatch = ((INSTR (31, 30) << 2) | INSTR (23, 22));
10516   int32_t imm = simm32 (aarch64_get_instr (cpu), 20, 12);
10517
10518   if (!V)
10519     {
10520       /* GReg operations.  */
10521       switch (dispatch)
10522         {
10523         case 0:  sturb (cpu, imm); return;
10524         case 1:  ldurb32 (cpu, imm); return;
10525         case 2:  ldursb64 (cpu, imm); return;
10526         case 3:  ldursb32 (cpu, imm); return;
10527         case 4:  sturh (cpu, imm); return;
10528         case 5:  ldurh32 (cpu, imm); return;
10529         case 6:  ldursh64 (cpu, imm); return;
10530         case 7:  ldursh32 (cpu, imm); return;
10531         case 8:  stur32 (cpu, imm); return;
10532         case 9:  ldur32 (cpu, imm); return;
10533         case 10: ldursw (cpu, imm); return;
10534         case 12: stur64 (cpu, imm); return;
10535         case 13: ldur64 (cpu, imm); return;
10536
10537         case 14:
10538           /* PRFUM NYI.  */
10539           HALT_NYI;
10540
10541         default:
10542         case 11:
10543         case 15:
10544           HALT_UNALLOC;
10545         }
10546     }
10547
10548   /* FReg operations.  */
10549   switch (dispatch)
10550     {
10551     case 2:  fsturq (cpu, imm); return;
10552     case 3:  fldurq (cpu, imm); return;
10553     case 8:  fsturs (cpu, imm); return;
10554     case 9:  fldurs (cpu, imm); return;
10555     case 12: fsturd (cpu, imm); return;
10556     case 13: fldurd (cpu, imm); return;
10557
10558     case 0: /* STUR 8 bit FP.  */
10559     case 1: /* LDUR 8 bit FP.  */
10560     case 4: /* STUR 16 bit FP.  */
10561     case 5: /* LDUR 8 bit FP.  */
10562       HALT_NYI;
10563
10564     default:
10565     case 6:
10566     case 7:
10567     case 10:
10568     case 11:
10569     case 14:
10570     case 15:
10571       HALT_UNALLOC;
10572     }
10573 }
10574
10575 /*  N.B. A preliminary note regarding all the ldrs<x>32
10576     instructions
10577
10578    The signed value loaded by these instructions is cast to unsigned
10579    before being assigned to aarch64_get_reg_u64 (cpu, N) i.e. to the
10580    64 bit element of the GReg union. this performs a 32 bit sign extension
10581    (as required) but avoids 64 bit sign extension, thus ensuring that the
10582    top half of the register word is zero. this is what the spec demands
10583    when a 32 bit load occurs.  */
10584
10585 /* 32 bit load sign-extended byte scaled unsigned 12 bit.  */
10586 static void
10587 ldrsb32_abs (sim_cpu *cpu, uint32_t offset)
10588 {
10589   unsigned int rn = INSTR (9, 5);
10590   unsigned int rt = INSTR (4, 0);
10591
10592   /* The target register may not be SP but the source may be
10593      there is no scaling required for a byte load.  */
10594   uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset;
10595   aarch64_set_reg_u64 (cpu, rt, NO_SP,
10596                        (int64_t) aarch64_get_mem_s8 (cpu, address));
10597 }
10598
10599 /* 32 bit load sign-extended byte scaled or unscaled zero-
10600    or sign-extended 32-bit register offset.  */
10601 static void
10602 ldrsb32_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
10603 {
10604   unsigned int rm = INSTR (20, 16);
10605   unsigned int rn = INSTR (9, 5);
10606   unsigned int rt = INSTR (4, 0);
10607
10608   /* rn may reference SP, rm and rt must reference ZR.  */
10609
10610   uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10611   int64_t displacement = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
10612                                  extension);
10613
10614   /* There is no scaling required for a byte load.  */
10615   aarch64_set_reg_u64
10616     (cpu, rt, NO_SP, (int64_t) aarch64_get_mem_s8 (cpu, address
10617                                                    + displacement));
10618 }
10619
10620 /* 32 bit load sign-extended byte unscaled signed 9 bit with
10621    pre- or post-writeback.  */
10622 static void
10623 ldrsb32_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
10624 {
10625   uint64_t address;
10626   unsigned int rn = INSTR (9, 5);
10627   unsigned int rt = INSTR (4, 0);
10628
10629   if (rn == rt && wb != NoWriteBack)
10630     HALT_UNALLOC;
10631
10632   address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10633
10634   if (wb == Pre)
10635       address += offset;
10636
10637   aarch64_set_reg_u64 (cpu, rt, NO_SP,
10638                        (int64_t) aarch64_get_mem_s8 (cpu, address));
10639
10640   if (wb == Post)
10641     address += offset;
10642
10643   if (wb != NoWriteBack)
10644     aarch64_set_reg_u64 (cpu, rn, NO_SP, address);
10645 }
10646
10647 /* 8 bit store scaled.  */
10648 static void
10649 fstrb_abs (sim_cpu *cpu, uint32_t offset)
10650 {
10651   unsigned st = INSTR (4, 0);
10652   unsigned rn = INSTR (9, 5);
10653
10654   aarch64_set_mem_u8 (cpu,
10655                       aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset,
10656                       aarch64_get_vec_u8 (cpu, st, 0));
10657 }
10658
10659 /* 8 bit store scaled or unscaled zero- or
10660    sign-extended 8-bit register offset.  */
10661 static void
10662 fstrb_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
10663 {
10664   unsigned rm = INSTR (20, 16);
10665   unsigned rn = INSTR (9, 5);
10666   unsigned st = INSTR (4, 0);
10667
10668   uint64_t  address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10669   int64_t   extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
10670                                extension);
10671   uint64_t  displacement = scaling == Scaled ? extended : 0;
10672
10673   aarch64_set_mem_u8
10674     (cpu, address + displacement, aarch64_get_vec_u8 (cpu, st, 0));
10675 }
10676
10677 /* 16 bit store scaled.  */
10678 static void
10679 fstrh_abs (sim_cpu *cpu, uint32_t offset)
10680 {
10681   unsigned st = INSTR (4, 0);
10682   unsigned rn = INSTR (9, 5);
10683
10684   aarch64_set_mem_u16
10685     (cpu,
10686      aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 16),
10687      aarch64_get_vec_u16 (cpu, st, 0));
10688 }
10689
10690 /* 16 bit store scaled or unscaled zero-
10691    or sign-extended 16-bit register offset.  */
10692 static void
10693 fstrh_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
10694 {
10695   unsigned rm = INSTR (20, 16);
10696   unsigned rn = INSTR (9, 5);
10697   unsigned st = INSTR (4, 0);
10698
10699   uint64_t  address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10700   int64_t   extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
10701                                extension);
10702   uint64_t  displacement = OPT_SCALE (extended, 16, scaling);
10703
10704   aarch64_set_mem_u16
10705     (cpu, address + displacement, aarch64_get_vec_u16 (cpu, st, 0));
10706 }
10707
10708 /* 32 bit store scaled unsigned 12 bit.  */
10709 static void
10710 fstrs_abs (sim_cpu *cpu, uint32_t offset)
10711 {
10712   unsigned st = INSTR (4, 0);
10713   unsigned rn = INSTR (9, 5);
10714
10715   aarch64_set_mem_u32
10716     (cpu,
10717      aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 32),
10718      aarch64_get_vec_u32 (cpu, st, 0));
10719 }
10720
10721 /* 32 bit store unscaled signed 9 bit with pre- or post-writeback.  */
10722 static void
10723 fstrs_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
10724 {
10725   unsigned rn = INSTR (9, 5);
10726   unsigned st = INSTR (4, 0);
10727
10728   uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10729
10730   if (wb != Post)
10731     address += offset;
10732
10733   aarch64_set_mem_u32 (cpu, address, aarch64_get_vec_u32 (cpu, st, 0));
10734
10735   if (wb == Post)
10736     address += offset;
10737
10738   if (wb != NoWriteBack)
10739     aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
10740 }
10741
10742 /* 32 bit store scaled or unscaled zero-
10743    or sign-extended 32-bit register offset.  */
10744 static void
10745 fstrs_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
10746 {
10747   unsigned rm = INSTR (20, 16);
10748   unsigned rn = INSTR (9, 5);
10749   unsigned st = INSTR (4, 0);
10750
10751   uint64_t  address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10752   int64_t   extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
10753                                extension);
10754   uint64_t  displacement = OPT_SCALE (extended, 32, scaling);
10755
10756   aarch64_set_mem_u32
10757     (cpu, address + displacement, aarch64_get_vec_u32 (cpu, st, 0));
10758 }
10759
10760 /* 64 bit store scaled unsigned 12 bit.  */
10761 static void
10762 fstrd_abs (sim_cpu *cpu, uint32_t offset)
10763 {
10764   unsigned st = INSTR (4, 0);
10765   unsigned rn = INSTR (9, 5);
10766
10767   aarch64_set_mem_u64
10768     (cpu,
10769      aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 64),
10770      aarch64_get_vec_u64 (cpu, st, 0));
10771 }
10772
10773 /* 64 bit store unscaled signed 9 bit with pre- or post-writeback.  */
10774 static void
10775 fstrd_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
10776 {
10777   unsigned rn = INSTR (9, 5);
10778   unsigned st = INSTR (4, 0);
10779
10780   uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10781
10782   if (wb != Post)
10783     address += offset;
10784
10785   aarch64_set_mem_u64 (cpu, address, aarch64_get_vec_u64 (cpu, st, 0));
10786
10787   if (wb == Post)
10788     address += offset;
10789
10790   if (wb != NoWriteBack)
10791     aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
10792 }
10793
10794 /* 64 bit store scaled or unscaled zero-
10795    or sign-extended 32-bit register offset.  */
10796 static void
10797 fstrd_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
10798 {
10799   unsigned rm = INSTR (20, 16);
10800   unsigned rn = INSTR (9, 5);
10801   unsigned st = INSTR (4, 0);
10802
10803   uint64_t  address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10804   int64_t   extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
10805                                extension);
10806   uint64_t  displacement = OPT_SCALE (extended, 64, scaling);
10807
10808   aarch64_set_mem_u64
10809     (cpu, address + displacement, aarch64_get_vec_u64 (cpu, st, 0));
10810 }
10811
10812 /* 128 bit store scaled unsigned 12 bit.  */
10813 static void
10814 fstrq_abs (sim_cpu *cpu, uint32_t offset)
10815 {
10816   FRegister a;
10817   unsigned st = INSTR (4, 0);
10818   unsigned rn = INSTR (9, 5);
10819   uint64_t addr;
10820
10821   aarch64_get_FP_long_double (cpu, st, & a);
10822
10823   addr = aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 128);
10824   aarch64_set_mem_long_double (cpu, addr, a);
10825 }
10826
10827 /* 128 bit store unscaled signed 9 bit with pre- or post-writeback.  */
10828 static void
10829 fstrq_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
10830 {
10831   FRegister a;
10832   unsigned rn = INSTR (9, 5);
10833   unsigned st = INSTR (4, 0);
10834   uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10835
10836   if (wb != Post)
10837     address += offset;
10838
10839   aarch64_get_FP_long_double (cpu, st, & a);
10840   aarch64_set_mem_long_double (cpu, address, a);
10841
10842   if (wb == Post)
10843     address += offset;
10844
10845   if (wb != NoWriteBack)
10846     aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
10847 }
10848
10849 /* 128 bit store scaled or unscaled zero-
10850    or sign-extended 32-bit register offset.  */
10851 static void
10852 fstrq_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
10853 {
10854   unsigned rm = INSTR (20, 16);
10855   unsigned rn = INSTR (9, 5);
10856   unsigned st = INSTR (4, 0);
10857
10858   uint64_t  address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10859   int64_t   extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
10860                                extension);
10861   uint64_t  displacement = OPT_SCALE (extended, 128, scaling);
10862
10863   FRegister a;
10864
10865   aarch64_get_FP_long_double (cpu, st, & a);
10866   aarch64_set_mem_long_double (cpu, address + displacement, a);
10867 }
10868
10869 static void
10870 dexLoadImmediatePrePost (sim_cpu *cpu)
10871 {
10872   /* instr[31,30] = size
10873      instr[29,27] = 111
10874      instr[26]    = V
10875      instr[25,24] = 00
10876      instr[23,22] = opc
10877      instr[21]    = 0
10878      instr[20,12] = simm9
10879      instr[11]    = wb : 0 ==> Post, 1 ==> Pre
10880      instr[10]    = 0
10881      instr[9,5]   = Rn may be SP.
10882      instr[4,0]   = Rt */
10883
10884   uint32_t  V        = INSTR (26, 26);
10885   uint32_t  dispatch = ((INSTR (31, 30) << 2) | INSTR (23, 22));
10886   int32_t   imm      = simm32 (aarch64_get_instr (cpu), 20, 12);
10887   WriteBack wb       = INSTR (11, 11);
10888
10889   if (!V)
10890     {
10891       /* GReg operations.  */
10892       switch (dispatch)
10893         {
10894         case 0:  strb_wb (cpu, imm, wb); return;
10895         case 1:  ldrb32_wb (cpu, imm, wb); return;
10896         case 2:  ldrsb_wb (cpu, imm, wb); return;
10897         case 3:  ldrsb32_wb (cpu, imm, wb); return;
10898         case 4:  strh_wb (cpu, imm, wb); return;
10899         case 5:  ldrh32_wb (cpu, imm, wb); return;
10900         case 6:  ldrsh64_wb (cpu, imm, wb); return;
10901         case 7:  ldrsh32_wb (cpu, imm, wb); return;
10902         case 8:  str32_wb (cpu, imm, wb); return;
10903         case 9:  ldr32_wb (cpu, imm, wb); return;
10904         case 10: ldrsw_wb (cpu, imm, wb); return;
10905         case 12: str_wb (cpu, imm, wb); return;
10906         case 13: ldr_wb (cpu, imm, wb); return;
10907
10908         default:
10909         case 11:
10910         case 14:
10911         case 15:
10912           HALT_UNALLOC;
10913         }
10914     }
10915
10916   /* FReg operations.  */
10917   switch (dispatch)
10918     {
10919     case 2:  fstrq_wb (cpu, imm, wb); return;
10920     case 3:  fldrq_wb (cpu, imm, wb); return;
10921     case 8:  fstrs_wb (cpu, imm, wb); return;
10922     case 9:  fldrs_wb (cpu, imm, wb); return;
10923     case 12: fstrd_wb (cpu, imm, wb); return;
10924     case 13: fldrd_wb (cpu, imm, wb); return;
10925
10926     case 0:       /* STUR 8 bit FP.  */
10927     case 1:       /* LDUR 8 bit FP.  */
10928     case 4:       /* STUR 16 bit FP.  */
10929     case 5:       /* LDUR 8 bit FP.  */
10930       HALT_NYI;
10931
10932     default:
10933     case 6:
10934     case 7:
10935     case 10:
10936     case 11:
10937     case 14:
10938     case 15:
10939       HALT_UNALLOC;
10940     }
10941 }
10942
10943 static void
10944 dexLoadRegisterOffset (sim_cpu *cpu)
10945 {
10946   /* instr[31,30] = size
10947      instr[29,27] = 111
10948      instr[26]    = V
10949      instr[25,24] = 00
10950      instr[23,22] = opc
10951      instr[21]    = 1
10952      instr[20,16] = rm
10953      instr[15,13] = option : 010 ==> UXTW, 011 ==> UXTX/LSL,
10954                              110 ==> SXTW, 111 ==> SXTX,
10955                              ow ==> RESERVED
10956      instr[12]    = scaled
10957      instr[11,10] = 10
10958      instr[9,5]   = rn
10959      instr[4,0]   = rt.  */
10960
10961   uint32_t  V = INSTR (26, 26);
10962   uint32_t  dispatch = ((INSTR (31, 30) << 2) | INSTR (23, 22));
10963   Scaling   scale = INSTR (12, 12);
10964   Extension extensionType = INSTR (15, 13);
10965
10966   /* Check for illegal extension types.  */
10967   if (uimm (extensionType, 1, 1) == 0)
10968     HALT_UNALLOC;
10969
10970   if (extensionType == UXTX || extensionType == SXTX)
10971     extensionType = NoExtension;
10972
10973   if (!V)
10974     {
10975       /* GReg operations.  */
10976       switch (dispatch)
10977         {
10978         case 0:  strb_scale_ext (cpu, scale, extensionType); return;
10979         case 1:  ldrb32_scale_ext (cpu, scale, extensionType); return;
10980         case 2:  ldrsb_scale_ext (cpu, scale, extensionType); return;
10981         case 3:  ldrsb32_scale_ext (cpu, scale, extensionType); return;
10982         case 4:  strh_scale_ext (cpu, scale, extensionType); return;
10983         case 5:  ldrh32_scale_ext (cpu, scale, extensionType); return;
10984         case 6:  ldrsh_scale_ext (cpu, scale, extensionType); return;
10985         case 7:  ldrsh32_scale_ext (cpu, scale, extensionType); return;
10986         case 8:  str32_scale_ext (cpu, scale, extensionType); return;
10987         case 9:  ldr32_scale_ext (cpu, scale, extensionType); return;
10988         case 10: ldrsw_scale_ext (cpu, scale, extensionType); return;
10989         case 12: str_scale_ext (cpu, scale, extensionType); return;
10990         case 13: ldr_scale_ext (cpu, scale, extensionType); return;
10991         case 14: prfm_scale_ext (cpu, scale, extensionType); return;
10992
10993         default:
10994         case 11:
10995         case 15:
10996           HALT_UNALLOC;
10997         }
10998     }
10999
11000   /* FReg operations.  */
11001   switch (dispatch)
11002     {
11003     case 1: /* LDUR 8 bit FP.  */
11004       HALT_NYI;
11005     case 3:  fldrq_scale_ext (cpu, scale, extensionType); return;
11006     case 5: /* LDUR 8 bit FP.  */
11007       HALT_NYI;
11008     case 9:  fldrs_scale_ext (cpu, scale, extensionType); return;
11009     case 13: fldrd_scale_ext (cpu, scale, extensionType); return;
11010
11011     case 0:  fstrb_scale_ext (cpu, scale, extensionType); return;
11012     case 2:  fstrq_scale_ext (cpu, scale, extensionType); return;
11013     case 4:  fstrh_scale_ext (cpu, scale, extensionType); return;
11014     case 8:  fstrs_scale_ext (cpu, scale, extensionType); return;
11015     case 12: fstrd_scale_ext (cpu, scale, extensionType); return;
11016
11017     default:
11018     case 6:
11019     case 7:
11020     case 10:
11021     case 11:
11022     case 14:
11023     case 15:
11024       HALT_UNALLOC;
11025     }
11026 }
11027
11028 static void
11029 dexLoadUnsignedImmediate (sim_cpu *cpu)
11030 {
11031   /* instr[29,24] == 111_01
11032      instr[31,30] = size
11033      instr[26]    = V
11034      instr[23,22] = opc
11035      instr[21,10] = uimm12 : unsigned immediate offset
11036      instr[9,5]   = rn may be SP.
11037      instr[4,0]   = rt.  */
11038
11039   uint32_t V = INSTR (26,26);
11040   uint32_t dispatch = ((INSTR (31, 30) << 2) | INSTR (23, 22));
11041   uint32_t imm = INSTR (21, 10);
11042
11043   if (!V)
11044     {
11045       /* GReg operations.  */
11046       switch (dispatch)
11047         {
11048         case 0:  strb_abs (cpu, imm); return;
11049         case 1:  ldrb32_abs (cpu, imm); return;
11050         case 2:  ldrsb_abs (cpu, imm); return;
11051         case 3:  ldrsb32_abs (cpu, imm); return;
11052         case 4:  strh_abs (cpu, imm); return;
11053         case 5:  ldrh32_abs (cpu, imm); return;
11054         case 6:  ldrsh_abs (cpu, imm); return;
11055         case 7:  ldrsh32_abs (cpu, imm); return;
11056         case 8:  str32_abs (cpu, imm); return;
11057         case 9:  ldr32_abs (cpu, imm); return;
11058         case 10: ldrsw_abs (cpu, imm); return;
11059         case 12: str_abs (cpu, imm); return;
11060         case 13: ldr_abs (cpu, imm); return;
11061         case 14: prfm_abs (cpu, imm); return;
11062
11063         default:
11064         case 11:
11065         case 15:
11066           HALT_UNALLOC;
11067         }
11068     }
11069
11070   /* FReg operations.  */
11071   switch (dispatch)
11072     {
11073     case 0:  fstrb_abs (cpu, imm); return;
11074     case 4:  fstrh_abs (cpu, imm); return;
11075     case 8:  fstrs_abs (cpu, imm); return;
11076     case 12: fstrd_abs (cpu, imm); return;
11077     case 2:  fstrq_abs (cpu, imm); return;
11078
11079     case 1:  fldrb_abs (cpu, imm); return;
11080     case 5:  fldrh_abs (cpu, imm); return;
11081     case 9:  fldrs_abs (cpu, imm); return;
11082     case 13: fldrd_abs (cpu, imm); return;
11083     case 3:  fldrq_abs (cpu, imm); return;
11084
11085     default:
11086     case 6:
11087     case 7:
11088     case 10:
11089     case 11:
11090     case 14:
11091     case 15:
11092       HALT_UNALLOC;
11093     }
11094 }
11095
11096 static void
11097 dexLoadExclusive (sim_cpu *cpu)
11098 {
11099   /* assert instr[29:24] = 001000;
11100      instr[31,30] = size
11101      instr[23] = 0 if exclusive
11102      instr[22] = L : 1 if load, 0 if store
11103      instr[21] = 1 if pair
11104      instr[20,16] = Rs
11105      instr[15] = o0 : 1 if ordered
11106      instr[14,10] = Rt2
11107      instr[9,5] = Rn
11108      instr[4.0] = Rt.  */
11109
11110   switch (INSTR (22, 21))
11111     {
11112     case 2:   ldxr (cpu); return;
11113     case 0:   stxr (cpu); return;
11114     default:  HALT_NYI;
11115     }
11116 }
11117
11118 static void
11119 dexLoadOther (sim_cpu *cpu)
11120 {
11121   uint32_t dispatch;
11122
11123   /* instr[29,25] = 111_0
11124      instr[24] == 0 ==> dispatch, 1 ==> ldst reg unsigned immediate
11125      instr[21:11,10] is the secondary dispatch.  */
11126   if (INSTR (24, 24))
11127     {
11128       dexLoadUnsignedImmediate (cpu);
11129       return;
11130     }
11131
11132   dispatch = ((INSTR (21, 21) << 2) | INSTR (11, 10));
11133   switch (dispatch)
11134     {
11135     case 0: dexLoadUnscaledImmediate (cpu); return;
11136     case 1: dexLoadImmediatePrePost (cpu); return;
11137     case 3: dexLoadImmediatePrePost (cpu); return;
11138     case 6: dexLoadRegisterOffset (cpu); return;
11139
11140     default:
11141     case 2:
11142     case 4:
11143     case 5:
11144     case 7:
11145       HALT_NYI;
11146     }
11147 }
11148
11149 static void
11150 store_pair_u32 (sim_cpu *cpu, int32_t offset, WriteBack wb)
11151 {
11152   unsigned rn = INSTR (14, 10);
11153   unsigned rd = INSTR (9, 5);
11154   unsigned rm = INSTR (4, 0);
11155   uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
11156
11157   if ((rn == rd || rm == rd) && wb != NoWriteBack)
11158     HALT_UNALLOC; /* ??? */
11159
11160   offset <<= 2;
11161
11162   if (wb != Post)
11163     address += offset;
11164
11165   aarch64_set_mem_u32 (cpu, address,
11166                        aarch64_get_reg_u32 (cpu, rm, NO_SP));
11167   aarch64_set_mem_u32 (cpu, address + 4,
11168                        aarch64_get_reg_u32 (cpu, rn, NO_SP));
11169
11170   if (wb == Post)
11171     address += offset;
11172
11173   if (wb != NoWriteBack)
11174     aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
11175 }
11176
11177 static void
11178 store_pair_u64 (sim_cpu *cpu, int32_t offset, WriteBack wb)
11179 {
11180   unsigned rn = INSTR (14, 10);
11181   unsigned rd = INSTR (9, 5);
11182   unsigned rm = INSTR (4, 0);
11183   uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
11184
11185   if ((rn == rd || rm == rd) && wb != NoWriteBack)
11186     HALT_UNALLOC; /* ??? */
11187
11188   offset <<= 3;
11189
11190   if (wb != Post)
11191     address += offset;
11192
11193   aarch64_set_mem_u64 (cpu, address,
11194                        aarch64_get_reg_u64 (cpu, rm, NO_SP));
11195   aarch64_set_mem_u64 (cpu, address + 8,
11196                        aarch64_get_reg_u64 (cpu, rn, NO_SP));
11197
11198   if (wb == Post)
11199     address += offset;
11200
11201   if (wb != NoWriteBack)
11202     aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
11203 }
11204
11205 static void
11206 load_pair_u32 (sim_cpu *cpu, int32_t offset, WriteBack wb)
11207 {
11208   unsigned rn = INSTR (14, 10);
11209   unsigned rd = INSTR (9, 5);
11210   unsigned rm = INSTR (4, 0);
11211   uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
11212
11213   /* Treat this as unalloc to make sure we don't do it.  */
11214   if (rn == rm)
11215     HALT_UNALLOC;
11216
11217   offset <<= 2;
11218
11219   if (wb != Post)
11220     address += offset;
11221
11222   aarch64_set_reg_u64 (cpu, rm, SP_OK, aarch64_get_mem_u32 (cpu, address));
11223   aarch64_set_reg_u64 (cpu, rn, SP_OK, aarch64_get_mem_u32 (cpu, address + 4));
11224
11225   if (wb == Post)
11226     address += offset;
11227
11228   if (wb != NoWriteBack)
11229     aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
11230 }
11231
11232 static void
11233 load_pair_s32 (sim_cpu *cpu, int32_t offset, WriteBack wb)
11234 {
11235   unsigned rn = INSTR (14, 10);
11236   unsigned rd = INSTR (9, 5);
11237   unsigned rm = INSTR (4, 0);
11238   uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
11239
11240   /* Treat this as unalloc to make sure we don't do it.  */
11241   if (rn == rm)
11242     HALT_UNALLOC;
11243
11244   offset <<= 2;
11245
11246   if (wb != Post)
11247     address += offset;
11248
11249   aarch64_set_reg_s64 (cpu, rm, SP_OK, aarch64_get_mem_s32 (cpu, address));
11250   aarch64_set_reg_s64 (cpu, rn, SP_OK, aarch64_get_mem_s32 (cpu, address + 4));
11251
11252   if (wb == Post)
11253     address += offset;
11254
11255   if (wb != NoWriteBack)
11256     aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
11257 }
11258
11259 static void
11260 load_pair_u64 (sim_cpu *cpu, int32_t offset, WriteBack wb)
11261 {
11262   unsigned rn = INSTR (14, 10);
11263   unsigned rd = INSTR (9, 5);
11264   unsigned rm = INSTR (4, 0);
11265   uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
11266
11267   /* Treat this as unalloc to make sure we don't do it.  */
11268   if (rn == rm)
11269     HALT_UNALLOC;
11270
11271   offset <<= 3;
11272
11273   if (wb != Post)
11274     address += offset;
11275
11276   aarch64_set_reg_u64 (cpu, rm, SP_OK, aarch64_get_mem_u64 (cpu, address));
11277   aarch64_set_reg_u64 (cpu, rn, SP_OK, aarch64_get_mem_u64 (cpu, address + 8));
11278
11279   if (wb == Post)
11280     address += offset;
11281
11282   if (wb != NoWriteBack)
11283     aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
11284 }
11285
11286 static void
11287 dex_load_store_pair_gr (sim_cpu *cpu)
11288 {
11289   /* instr[31,30] = size (10=> 64-bit, 01=> signed 32-bit, 00=> 32-bit)
11290      instr[29,25] = instruction encoding: 101_0
11291      instr[26]    = V : 1 if fp 0 if gp
11292      instr[24,23] = addressing mode (10=> offset, 01=> post, 11=> pre)
11293      instr[22]    = load/store (1=> load)
11294      instr[21,15] = signed, scaled, offset
11295      instr[14,10] = Rn
11296      instr[ 9, 5] = Rd
11297      instr[ 4, 0] = Rm.  */
11298
11299   uint32_t dispatch = ((INSTR (31, 30) << 3) | INSTR (24, 22));
11300   int32_t offset = simm32 (aarch64_get_instr (cpu), 21, 15);
11301
11302   switch (dispatch)
11303     {
11304     case 2: store_pair_u32 (cpu, offset, Post); return;
11305     case 3: load_pair_u32  (cpu, offset, Post); return;
11306     case 4: store_pair_u32 (cpu, offset, NoWriteBack); return;
11307     case 5: load_pair_u32  (cpu, offset, NoWriteBack); return;
11308     case 6: store_pair_u32 (cpu, offset, Pre); return;
11309     case 7: load_pair_u32  (cpu, offset, Pre); return;
11310
11311     case 11: load_pair_s32  (cpu, offset, Post); return;
11312     case 13: load_pair_s32  (cpu, offset, NoWriteBack); return;
11313     case 15: load_pair_s32  (cpu, offset, Pre); return;
11314
11315     case 18: store_pair_u64 (cpu, offset, Post); return;
11316     case 19: load_pair_u64  (cpu, offset, Post); return;
11317     case 20: store_pair_u64 (cpu, offset, NoWriteBack); return;
11318     case 21: load_pair_u64  (cpu, offset, NoWriteBack); return;
11319     case 22: store_pair_u64 (cpu, offset, Pre); return;
11320     case 23: load_pair_u64  (cpu, offset, Pre); return;
11321
11322     default:
11323       HALT_UNALLOC;
11324     }
11325 }
11326
11327 static void
11328 store_pair_float (sim_cpu *cpu, int32_t offset, WriteBack wb)
11329 {
11330   unsigned rn = INSTR (14, 10);
11331   unsigned rd = INSTR (9, 5);
11332   unsigned rm = INSTR (4, 0);
11333   uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
11334
11335   offset <<= 2;
11336
11337   if (wb != Post)
11338     address += offset;
11339
11340   aarch64_set_mem_u32 (cpu, address,     aarch64_get_vec_u32 (cpu, rm, 0));
11341   aarch64_set_mem_u32 (cpu, address + 4, aarch64_get_vec_u32 (cpu, rn, 0));
11342
11343   if (wb == Post)
11344     address += offset;
11345
11346   if (wb != NoWriteBack)
11347     aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
11348 }
11349
11350 static void
11351 store_pair_double (sim_cpu *cpu, int32_t offset, WriteBack wb)
11352 {
11353   unsigned rn = INSTR (14, 10);
11354   unsigned rd = INSTR (9, 5);
11355   unsigned rm = INSTR (4, 0);
11356   uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
11357
11358   offset <<= 3;
11359
11360   if (wb != Post)
11361     address += offset;
11362
11363   aarch64_set_mem_u64 (cpu, address,     aarch64_get_vec_u64 (cpu, rm, 0));
11364   aarch64_set_mem_u64 (cpu, address + 8, aarch64_get_vec_u64 (cpu, rn, 0));
11365
11366   if (wb == Post)
11367     address += offset;
11368
11369   if (wb != NoWriteBack)
11370     aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
11371 }
11372
11373 static void
11374 store_pair_long_double (sim_cpu *cpu, int32_t offset, WriteBack wb)
11375 {
11376   FRegister a;
11377   unsigned rn = INSTR (14, 10);
11378   unsigned rd = INSTR (9, 5);
11379   unsigned rm = INSTR (4, 0);
11380   uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
11381
11382   offset <<= 4;
11383
11384   if (wb != Post)
11385     address += offset;
11386
11387   aarch64_get_FP_long_double (cpu, rm, & a);
11388   aarch64_set_mem_long_double (cpu, address, a);
11389   aarch64_get_FP_long_double (cpu, rn, & a);
11390   aarch64_set_mem_long_double (cpu, address + 16, a);
11391
11392   if (wb == Post)
11393     address += offset;
11394
11395   if (wb != NoWriteBack)
11396     aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
11397 }
11398
11399 static void
11400 load_pair_float (sim_cpu *cpu, int32_t offset, WriteBack wb)
11401 {
11402   unsigned rn = INSTR (14, 10);
11403   unsigned rd = INSTR (9, 5);
11404   unsigned rm = INSTR (4, 0);
11405   uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
11406
11407   if (rm == rn)
11408     HALT_UNALLOC;
11409
11410   offset <<= 2;
11411
11412   if (wb != Post)
11413     address += offset;
11414
11415   aarch64_set_vec_u32 (cpu, rm, 0, aarch64_get_mem_u32 (cpu, address));
11416   aarch64_set_vec_u32 (cpu, rn, 0, aarch64_get_mem_u32 (cpu, address + 4));
11417
11418   if (wb == Post)
11419     address += offset;
11420
11421   if (wb != NoWriteBack)
11422     aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
11423 }
11424
11425 static void
11426 load_pair_double (sim_cpu *cpu, int32_t offset, WriteBack wb)
11427 {
11428   unsigned rn = INSTR (14, 10);
11429   unsigned rd = INSTR (9, 5);
11430   unsigned rm = INSTR (4, 0);
11431   uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
11432
11433   if (rm == rn)
11434     HALT_UNALLOC;
11435
11436   offset <<= 3;
11437
11438   if (wb != Post)
11439     address += offset;
11440
11441   aarch64_set_vec_u64 (cpu, rm, 0, aarch64_get_mem_u64 (cpu, address));
11442   aarch64_set_vec_u64 (cpu, rn, 0, aarch64_get_mem_u64 (cpu, address + 8));
11443
11444   if (wb == Post)
11445     address += offset;
11446
11447   if (wb != NoWriteBack)
11448     aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
11449 }
11450
11451 static void
11452 load_pair_long_double (sim_cpu *cpu, int32_t offset, WriteBack wb)
11453 {
11454   FRegister a;
11455   unsigned rn = INSTR (14, 10);
11456   unsigned rd = INSTR (9, 5);
11457   unsigned rm = INSTR (4, 0);
11458   uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
11459
11460   if (rm == rn)
11461     HALT_UNALLOC;
11462
11463   offset <<= 4;
11464
11465   if (wb != Post)
11466     address += offset;
11467
11468   aarch64_get_mem_long_double (cpu, address, & a);
11469   aarch64_set_FP_long_double (cpu, rm, a);
11470   aarch64_get_mem_long_double (cpu, address + 16, & a);
11471   aarch64_set_FP_long_double (cpu, rn, a);
11472
11473   if (wb == Post)
11474     address += offset;
11475
11476   if (wb != NoWriteBack)
11477     aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
11478 }
11479
11480 static void
11481 dex_load_store_pair_fp (sim_cpu *cpu)
11482 {
11483   /* instr[31,30] = size (10=> 128-bit, 01=> 64-bit, 00=> 32-bit)
11484      instr[29,25] = instruction encoding
11485      instr[24,23] = addressing mode (10=> offset, 01=> post, 11=> pre)
11486      instr[22]    = load/store (1=> load)
11487      instr[21,15] = signed, scaled, offset
11488      instr[14,10] = Rn
11489      instr[ 9, 5] = Rd
11490      instr[ 4, 0] = Rm  */
11491
11492   uint32_t dispatch = ((INSTR (31, 30) << 3) | INSTR (24, 22));
11493   int32_t offset = simm32 (aarch64_get_instr (cpu), 21, 15);
11494
11495   switch (dispatch)
11496     {
11497     case 2: store_pair_float (cpu, offset, Post); return;
11498     case 3: load_pair_float  (cpu, offset, Post); return;
11499     case 4: store_pair_float (cpu, offset, NoWriteBack); return;
11500     case 5: load_pair_float  (cpu, offset, NoWriteBack); return;
11501     case 6: store_pair_float (cpu, offset, Pre); return;
11502     case 7: load_pair_float  (cpu, offset, Pre); return;
11503
11504     case 10: store_pair_double (cpu, offset, Post); return;
11505     case 11: load_pair_double  (cpu, offset, Post); return;
11506     case 12: store_pair_double (cpu, offset, NoWriteBack); return;
11507     case 13: load_pair_double  (cpu, offset, NoWriteBack); return;
11508     case 14: store_pair_double (cpu, offset, Pre); return;
11509     case 15: load_pair_double  (cpu, offset, Pre); return;
11510
11511     case 18: store_pair_long_double (cpu, offset, Post); return;
11512     case 19: load_pair_long_double  (cpu, offset, Post); return;
11513     case 20: store_pair_long_double (cpu, offset, NoWriteBack); return;
11514     case 21: load_pair_long_double  (cpu, offset, NoWriteBack); return;
11515     case 22: store_pair_long_double (cpu, offset, Pre); return;
11516     case 23: load_pair_long_double  (cpu, offset, Pre); return;
11517
11518     default:
11519       HALT_UNALLOC;
11520     }
11521 }
11522
11523 static inline unsigned
11524 vec_reg (unsigned v, unsigned o)
11525 {
11526   return (v + o) & 0x3F;
11527 }
11528
11529 /* Load multiple N-element structures to M consecutive registers.  */
11530 static void
11531 vec_load (sim_cpu *cpu, uint64_t address, unsigned N, unsigned M)
11532 {
11533   int      all  = INSTR (30, 30);
11534   unsigned size = INSTR (11, 10);
11535   unsigned vd   = INSTR (4, 0);
11536   unsigned rpt = (N == M) ? 1 : M;
11537   unsigned selem = N;
11538   unsigned i, j, k;
11539
11540   switch (size)
11541     {
11542     case 0: /* 8-bit operations.  */
11543       for (i = 0; i < rpt; i++)
11544         for (j = 0; j < (8 + (8 * all)); j++)
11545           for (k = 0; k < selem; k++)
11546             {
11547               aarch64_set_vec_u8 (cpu, vec_reg (vd, i + k), j,
11548                                   aarch64_get_mem_u8 (cpu, address));
11549               address += 1;
11550             }
11551       return;
11552
11553     case 1: /* 16-bit operations.  */
11554       for (i = 0; i < rpt; i++)
11555         for (j = 0; j < (4 + (4 * all)); j++)
11556           for (k = 0; k < selem; k++)
11557             {
11558               aarch64_set_vec_u16 (cpu, vec_reg (vd, i + k), j,
11559                                    aarch64_get_mem_u16 (cpu, address));
11560               address += 2;
11561             }
11562       return;
11563
11564     case 2: /* 32-bit operations.  */
11565       for (i = 0; i < rpt; i++)
11566         for (j = 0; j < (2 + (2 * all)); j++)
11567           for (k = 0; k < selem; k++)
11568             {
11569               aarch64_set_vec_u32 (cpu, vec_reg (vd, i + k), j,
11570                                    aarch64_get_mem_u32 (cpu, address));
11571               address += 4;
11572             }
11573       return;
11574
11575     case 3: /* 64-bit operations.  */
11576       for (i = 0; i < rpt; i++)
11577         for (j = 0; j < (1 + all); j++)
11578           for (k = 0; k < selem; k++)
11579             {
11580               aarch64_set_vec_u64 (cpu, vec_reg (vd, i + k), j,
11581                                    aarch64_get_mem_u64 (cpu, address));
11582               address += 8;
11583             }
11584       return;
11585     }
11586 }
11587
11588 /* Load multiple 4-element structures into four consecutive registers.  */
11589 static void
11590 LD4 (sim_cpu *cpu, uint64_t address)
11591 {
11592   vec_load (cpu, address, 4, 4);
11593 }
11594
11595 /* Load multiple 3-element structures into three consecutive registers.  */
11596 static void
11597 LD3 (sim_cpu *cpu, uint64_t address)
11598 {
11599   vec_load (cpu, address, 3, 3);
11600 }
11601
11602 /* Load multiple 2-element structures into two consecutive registers.  */
11603 static void
11604 LD2 (sim_cpu *cpu, uint64_t address)
11605 {
11606   vec_load (cpu, address, 2, 2);
11607 }
11608
11609 /* Load multiple 1-element structures into one register.  */
11610 static void
11611 LD1_1 (sim_cpu *cpu, uint64_t address)
11612 {
11613   vec_load (cpu, address, 1, 1);
11614 }
11615
11616 /* Load multiple 1-element structures into two registers.  */
11617 static void
11618 LD1_2 (sim_cpu *cpu, uint64_t address)
11619 {
11620   vec_load (cpu, address, 1, 2);
11621 }
11622
11623 /* Load multiple 1-element structures into three registers.  */
11624 static void
11625 LD1_3 (sim_cpu *cpu, uint64_t address)
11626 {
11627   vec_load (cpu, address, 1, 3);
11628 }
11629
11630 /* Load multiple 1-element structures into four registers.  */
11631 static void
11632 LD1_4 (sim_cpu *cpu, uint64_t address)
11633 {
11634   vec_load (cpu, address, 1, 4);
11635 }
11636
11637 /* Store multiple N-element structures from M consecutive registers.  */
11638 static void
11639 vec_store (sim_cpu *cpu, uint64_t address, unsigned N, unsigned M)
11640 {
11641   int      all  = INSTR (30, 30);
11642   unsigned size = INSTR (11, 10);
11643   unsigned vd   = INSTR (4, 0);
11644   unsigned rpt = (N == M) ? 1 : M;
11645   unsigned selem = N;
11646   unsigned i, j, k;
11647
11648   switch (size)
11649     {
11650     case 0: /* 8-bit operations.  */
11651       for (i = 0; i < rpt; i++)
11652         for (j = 0; j < (8 + (8 * all)); j++)
11653           for (k = 0; k < selem; k++)
11654             {
11655               aarch64_set_mem_u8
11656                 (cpu, address,
11657                  aarch64_get_vec_u8 (cpu, vec_reg (vd, i + k), j));
11658               address += 1;
11659             }
11660       return;
11661
11662     case 1: /* 16-bit operations.  */
11663       for (i = 0; i < rpt; i++)
11664         for (j = 0; j < (4 + (4 * all)); j++)
11665           for (k = 0; k < selem; k++)
11666             {
11667               aarch64_set_mem_u16
11668                 (cpu, address,
11669                  aarch64_get_vec_u16 (cpu, vec_reg (vd, i + k), j));
11670               address += 2;
11671             }
11672       return;
11673
11674     case 2: /* 32-bit operations.  */
11675       for (i = 0; i < rpt; i++)
11676         for (j = 0; j < (2 + (2 * all)); j++)
11677           for (k = 0; k < selem; k++)
11678             {
11679               aarch64_set_mem_u32
11680                 (cpu, address,
11681                  aarch64_get_vec_u32 (cpu, vec_reg (vd, i + k), j));
11682               address += 4;
11683             }
11684       return;
11685
11686     case 3: /* 64-bit operations.  */
11687       for (i = 0; i < rpt; i++)
11688         for (j = 0; j < (1 + all); j++)
11689           for (k = 0; k < selem; k++)
11690             {
11691               aarch64_set_mem_u64
11692                 (cpu, address,
11693                  aarch64_get_vec_u64 (cpu, vec_reg (vd, i + k), j));
11694               address += 8;
11695             }
11696       return;
11697     }
11698 }
11699
11700 /* Store multiple 4-element structure from four consecutive registers.  */
11701 static void
11702 ST4 (sim_cpu *cpu, uint64_t address)
11703 {
11704   vec_store (cpu, address, 4, 4);
11705 }
11706
11707 /* Store multiple 3-element structures from three consecutive registers.  */
11708 static void
11709 ST3 (sim_cpu *cpu, uint64_t address)
11710 {
11711   vec_store (cpu, address, 3, 3);
11712 }
11713
11714 /* Store multiple 2-element structures from two consecutive registers.  */
11715 static void
11716 ST2 (sim_cpu *cpu, uint64_t address)
11717 {
11718   vec_store (cpu, address, 2, 2);
11719 }
11720
11721 /* Store multiple 1-element structures from one register.  */
11722 static void
11723 ST1_1 (sim_cpu *cpu, uint64_t address)
11724 {
11725   vec_store (cpu, address, 1, 1);
11726 }
11727
11728 /* Store multiple 1-element structures from two registers.  */
11729 static void
11730 ST1_2 (sim_cpu *cpu, uint64_t address)
11731 {
11732   vec_store (cpu, address, 1, 2);
11733 }
11734
11735 /* Store multiple 1-element structures from three registers.  */
11736 static void
11737 ST1_3 (sim_cpu *cpu, uint64_t address)
11738 {
11739   vec_store (cpu, address, 1, 3);
11740 }
11741
11742 /* Store multiple 1-element structures from four registers.  */
11743 static void
11744 ST1_4 (sim_cpu *cpu, uint64_t address)
11745 {
11746   vec_store (cpu, address, 1, 4);
11747 }
11748
11749 #define LDn_STn_SINGLE_LANE_AND_SIZE()                          \
11750   do                                                            \
11751     {                                                           \
11752       switch (INSTR (15, 14))                                   \
11753         {                                                       \
11754         case 0:                                                 \
11755           lane = (full << 3) | (s << 2) | size;                 \
11756           size = 0;                                             \
11757           break;                                                \
11758                                                                 \
11759         case 1:                                                 \
11760           if ((size & 1) == 1)                                  \
11761             HALT_UNALLOC;                                       \
11762           lane = (full << 2) | (s << 1) | (size >> 1);          \
11763           size = 1;                                             \
11764           break;                                                \
11765                                                                 \
11766         case 2:                                                 \
11767           if ((size & 2) == 2)                                  \
11768             HALT_UNALLOC;                                       \
11769                                                                 \
11770           if ((size & 1) == 0)                                  \
11771             {                                                   \
11772               lane = (full << 1) | s;                           \
11773               size = 2;                                         \
11774             }                                                   \
11775           else                                                  \
11776             {                                                   \
11777               if (s)                                            \
11778                 HALT_UNALLOC;                                   \
11779               lane = full;                                      \
11780               size = 3;                                         \
11781             }                                                   \
11782           break;                                                \
11783                                                                 \
11784         default:                                                \
11785           HALT_UNALLOC;                                         \
11786         }                                                       \
11787     }                                                           \
11788   while (0)
11789
11790 /* Load single structure into one lane of N registers.  */
11791 static void
11792 do_vec_LDn_single (sim_cpu *cpu, uint64_t address)
11793 {
11794   /* instr[31]    = 0
11795      instr[30]    = element selector 0=>half, 1=>all elements
11796      instr[29,24] = 00 1101
11797      instr[23]    = 0=>simple, 1=>post
11798      instr[22]    = 1
11799      instr[21]    = width: LD1-or-LD3 (0) / LD2-or-LD4 (1)
11800      instr[20,16] = 0 0000 (simple), Vinc (reg-post-inc, no SP),
11801                       11111 (immediate post inc)
11802      instr[15,13] = opcode
11803      instr[12]    = S, used for lane number
11804      instr[11,10] = size, also used for lane number
11805      instr[9,5]   = address
11806      instr[4,0]   = Vd  */
11807
11808   unsigned full = INSTR (30, 30);
11809   unsigned vd = INSTR (4, 0);
11810   unsigned size = INSTR (11, 10);
11811   unsigned s = INSTR (12, 12);
11812   int nregs = ((INSTR (13, 13) << 1) | INSTR (21, 21)) + 1;
11813   int lane = 0;
11814   int i;
11815
11816   NYI_assert (29, 24, 0x0D);
11817   NYI_assert (22, 22, 1);
11818
11819   /* Compute the lane number first (using size), and then compute size.  */
11820   LDn_STn_SINGLE_LANE_AND_SIZE ();
11821
11822   for (i = 0; i < nregs; i++)
11823     switch (size)
11824       {
11825       case 0:
11826         {
11827           uint8_t val = aarch64_get_mem_u8 (cpu, address + i);
11828           aarch64_set_vec_u8 (cpu, vd + i, lane, val);
11829           break;
11830         }
11831
11832       case 1:
11833         {
11834           uint16_t val = aarch64_get_mem_u16 (cpu, address + (i * 2));
11835           aarch64_set_vec_u16 (cpu, vd + i, lane, val);
11836           break;
11837         }
11838
11839       case 2:
11840         {
11841           uint32_t val = aarch64_get_mem_u32 (cpu, address + (i * 4));
11842           aarch64_set_vec_u32 (cpu, vd + i, lane, val);
11843           break;
11844         }
11845
11846       case 3:
11847         {
11848           uint64_t val = aarch64_get_mem_u64 (cpu, address + (i * 8));
11849           aarch64_set_vec_u64 (cpu, vd + i, lane, val);
11850           break;
11851         }
11852       }
11853 }
11854
11855 /* Store single structure from one lane from N registers.  */
11856 static void
11857 do_vec_STn_single (sim_cpu *cpu, uint64_t address)
11858 {
11859   /* instr[31]    = 0
11860      instr[30]    = element selector 0=>half, 1=>all elements
11861      instr[29,24] = 00 1101
11862      instr[23]    = 0=>simple, 1=>post
11863      instr[22]    = 0
11864      instr[21]    = width: LD1-or-LD3 (0) / LD2-or-LD4 (1)
11865      instr[20,16] = 0 0000 (simple), Vinc (reg-post-inc, no SP),
11866                       11111 (immediate post inc)
11867      instr[15,13] = opcode
11868      instr[12]    = S, used for lane number
11869      instr[11,10] = size, also used for lane number
11870      instr[9,5]   = address
11871      instr[4,0]   = Vd  */
11872
11873   unsigned full = INSTR (30, 30);
11874   unsigned vd = INSTR (4, 0);
11875   unsigned size = INSTR (11, 10);
11876   unsigned s = INSTR (12, 12);
11877   int nregs = ((INSTR (13, 13) << 1) | INSTR (21, 21)) + 1;
11878   int lane = 0;
11879   int i;
11880
11881   NYI_assert (29, 24, 0x0D);
11882   NYI_assert (22, 22, 0);
11883
11884   /* Compute the lane number first (using size), and then compute size.  */
11885   LDn_STn_SINGLE_LANE_AND_SIZE ();
11886
11887   for (i = 0; i < nregs; i++)
11888     switch (size)
11889       {
11890       case 0:
11891         {
11892           uint8_t val = aarch64_get_vec_u8 (cpu, vd + i, lane);
11893           aarch64_set_mem_u8 (cpu, address + i, val);
11894           break;
11895         }
11896
11897       case 1:
11898         {
11899           uint16_t val = aarch64_get_vec_u16 (cpu, vd + i, lane);
11900           aarch64_set_mem_u16 (cpu, address + (i * 2), val);
11901           break;
11902         }
11903
11904       case 2:
11905         {
11906           uint32_t val = aarch64_get_vec_u32 (cpu, vd + i, lane);
11907           aarch64_set_mem_u32 (cpu, address + (i * 4), val);
11908           break;
11909         }
11910
11911       case 3:
11912         {
11913           uint64_t val = aarch64_get_vec_u64 (cpu, vd + i, lane);
11914           aarch64_set_mem_u64 (cpu, address + (i * 8), val);
11915           break;
11916         }
11917       }
11918 }
11919
11920 /* Load single structure into all lanes of N registers.  */
11921 static void
11922 do_vec_LDnR (sim_cpu *cpu, uint64_t address)
11923 {
11924   /* instr[31]    = 0
11925      instr[30]    = element selector 0=>half, 1=>all elements
11926      instr[29,24] = 00 1101
11927      instr[23]    = 0=>simple, 1=>post
11928      instr[22]    = 1
11929      instr[21]    = width: LD1R-or-LD3R (0) / LD2R-or-LD4R (1)
11930      instr[20,16] = 0 0000 (simple), Vinc (reg-post-inc, no SP),
11931                       11111 (immediate post inc)
11932      instr[15,14] = 11
11933      instr[13]    = width: LD1R-or-LD2R (0) / LD3R-or-LD4R (1)
11934      instr[12]    = 0
11935      instr[11,10] = element size 00=> byte(b), 01=> half(h),
11936                                  10=> word(s), 11=> double(d)
11937      instr[9,5]   = address
11938      instr[4,0]   = Vd  */
11939
11940   unsigned full = INSTR (30, 30);
11941   unsigned vd = INSTR (4, 0);
11942   unsigned size = INSTR (11, 10);
11943   int nregs = ((INSTR (13, 13) << 1) | INSTR (21, 21)) + 1;
11944   int i, n;
11945
11946   NYI_assert (29, 24, 0x0D);
11947   NYI_assert (22, 22, 1);
11948   NYI_assert (15, 14, 3);
11949   NYI_assert (12, 12, 0);
11950
11951   for (n = 0; n < nregs; n++)
11952     switch (size)
11953       {
11954       case 0:
11955         {
11956           uint8_t val = aarch64_get_mem_u8 (cpu, address + n);
11957           for (i = 0; i < (full ? 16 : 8); i++)
11958             aarch64_set_vec_u8 (cpu, vd + n, i, val);
11959           break;
11960         }
11961
11962       case 1:
11963         {
11964           uint16_t val = aarch64_get_mem_u16 (cpu, address + (n * 2));
11965           for (i = 0; i < (full ? 8 : 4); i++)
11966             aarch64_set_vec_u16 (cpu, vd + n, i, val);
11967           break;
11968         }
11969
11970       case 2:
11971         {
11972           uint32_t val = aarch64_get_mem_u32 (cpu, address + (n * 4));
11973           for (i = 0; i < (full ? 4 : 2); i++)
11974             aarch64_set_vec_u32 (cpu, vd + n, i, val);
11975           break;
11976         }
11977
11978       case 3:
11979         {
11980           uint64_t val = aarch64_get_mem_u64 (cpu, address + (n * 8));
11981           for (i = 0; i < (full ? 2 : 1); i++)
11982             aarch64_set_vec_u64 (cpu, vd + n, i, val);
11983           break;
11984         }
11985
11986       default:
11987         HALT_UNALLOC;
11988       }
11989 }
11990
11991 static void
11992 do_vec_load_store (sim_cpu *cpu)
11993 {
11994   /* {LD|ST}<N>   {Vd..Vd+N}, vaddr
11995
11996      instr[31]    = 0
11997      instr[30]    = element selector 0=>half, 1=>all elements
11998      instr[29,25] = 00110
11999      instr[24]    = 0=>multiple struct, 1=>single struct
12000      instr[23]    = 0=>simple, 1=>post
12001      instr[22]    = 0=>store, 1=>load
12002      instr[21]    = 0 (LDn) / small(0)-large(1) selector (LDnR)
12003      instr[20,16] = 00000 (simple), Vinc (reg-post-inc, no SP),
12004                     11111 (immediate post inc)
12005      instr[15,12] = elements and destinations.  eg for load:
12006                      0000=>LD4 => load multiple 4-element to
12007                      four consecutive registers
12008                      0100=>LD3 => load multiple 3-element to
12009                      three consecutive registers
12010                      1000=>LD2 => load multiple 2-element to
12011                      two consecutive registers
12012                      0010=>LD1 => load multiple 1-element to
12013                      four consecutive registers
12014                      0110=>LD1 => load multiple 1-element to
12015                      three consecutive registers
12016                      1010=>LD1 => load multiple 1-element to
12017                      two consecutive registers
12018                      0111=>LD1 => load multiple 1-element to
12019                      one register
12020                      1100=>LDR1,LDR2
12021                      1110=>LDR3,LDR4
12022      instr[11,10] = element size 00=> byte(b), 01=> half(h),
12023                                  10=> word(s), 11=> double(d)
12024      instr[9,5]   = Vn, can be SP
12025      instr[4,0]   = Vd  */
12026
12027   int single;
12028   int post;
12029   int load;
12030   unsigned vn;
12031   uint64_t address;
12032   int type;
12033
12034   if (INSTR (31, 31) != 0 || INSTR (29, 25) != 0x06)
12035     HALT_NYI;
12036
12037   single = INSTR (24, 24);
12038   post = INSTR (23, 23);
12039   load = INSTR (22, 22);
12040   type = INSTR (15, 12);
12041   vn = INSTR (9, 5);
12042   address = aarch64_get_reg_u64 (cpu, vn, SP_OK);
12043
12044   if (! single && INSTR (21, 21) != 0)
12045     HALT_UNALLOC;
12046
12047   if (post)
12048     {
12049       unsigned vm = INSTR (20, 16);
12050
12051       if (vm == R31)
12052         {
12053           unsigned sizeof_operation;
12054
12055           if (single)
12056             {
12057               if ((type >= 0) && (type <= 11))
12058                 {
12059                   int nregs = ((INSTR (13, 13) << 1) | INSTR (21, 21)) + 1;
12060                   switch (INSTR (15, 14))
12061                     {
12062                     case 0:
12063                       sizeof_operation = nregs * 1;
12064                       break;
12065                     case 1:
12066                       sizeof_operation = nregs * 2;
12067                       break;
12068                     case 2:
12069                       if (INSTR (10, 10) == 0)
12070                         sizeof_operation = nregs * 4;
12071                       else
12072                         sizeof_operation = nregs * 8;
12073                       break;
12074                     default:
12075                       HALT_UNALLOC;
12076                     }
12077                 }
12078               else if (type == 0xC)
12079                 {
12080                   sizeof_operation = INSTR (21, 21) ? 2 : 1;
12081                   sizeof_operation <<= INSTR (11, 10);
12082                 }
12083               else if (type == 0xE)
12084                 {
12085                   sizeof_operation = INSTR (21, 21) ? 4 : 3;
12086                   sizeof_operation <<= INSTR (11, 10);
12087                 }
12088               else
12089                 HALT_UNALLOC;
12090             }
12091           else
12092             {
12093               switch (type)
12094                 {
12095                 case 0: sizeof_operation = 32; break;
12096                 case 4: sizeof_operation = 24; break;
12097                 case 8: sizeof_operation = 16; break;
12098
12099                 case 7:
12100                   /* One register, immediate offset variant.  */
12101                   sizeof_operation = 8;
12102                   break;
12103
12104                 case 10:
12105                   /* Two registers, immediate offset variant.  */
12106                   sizeof_operation = 16;
12107                   break;
12108
12109                 case 6:
12110                   /* Three registers, immediate offset variant.  */
12111                   sizeof_operation = 24;
12112                   break;
12113
12114                 case 2:
12115                   /* Four registers, immediate offset variant.  */
12116                   sizeof_operation = 32;
12117                   break;
12118
12119                 default:
12120                   HALT_UNALLOC;
12121                 }
12122
12123               if (INSTR (30, 30))
12124                 sizeof_operation *= 2;
12125             }
12126
12127           aarch64_set_reg_u64 (cpu, vn, SP_OK, address + sizeof_operation);
12128         }
12129       else
12130         aarch64_set_reg_u64 (cpu, vn, SP_OK,
12131                              address + aarch64_get_reg_u64 (cpu, vm, NO_SP));
12132     }
12133   else
12134     {
12135       NYI_assert (20, 16, 0);
12136     }
12137
12138   if (single)
12139     {
12140       if (load)
12141         {
12142           if ((type >= 0) && (type <= 11))
12143             do_vec_LDn_single (cpu, address);
12144           else if ((type == 0xC) || (type == 0xE))
12145             do_vec_LDnR (cpu, address);
12146           else
12147             HALT_UNALLOC;
12148           return;
12149         }
12150
12151       /* Stores.  */
12152       if ((type >= 0) && (type <= 11))
12153         {
12154           do_vec_STn_single (cpu, address);
12155           return;
12156         }
12157
12158       HALT_UNALLOC;
12159     }
12160
12161   if (load)
12162     {
12163       switch (type)
12164         {
12165         case 0:  LD4 (cpu, address); return;
12166         case 4:  LD3 (cpu, address); return;
12167         case 8:  LD2 (cpu, address); return;
12168         case 2:  LD1_4 (cpu, address); return;
12169         case 6:  LD1_3 (cpu, address); return;
12170         case 10: LD1_2 (cpu, address); return;
12171         case 7:  LD1_1 (cpu, address); return;
12172
12173         default:
12174           HALT_UNALLOC;
12175         }
12176     }
12177
12178   /* Stores.  */
12179   switch (type)
12180     {
12181     case 0:  ST4 (cpu, address); return;
12182     case 4:  ST3 (cpu, address); return;
12183     case 8:  ST2 (cpu, address); return;
12184     case 2:  ST1_4 (cpu, address); return;
12185     case 6:  ST1_3 (cpu, address); return;
12186     case 10: ST1_2 (cpu, address); return;
12187     case 7:  ST1_1 (cpu, address); return;
12188     default:
12189       HALT_UNALLOC;
12190     }
12191 }
12192
12193 static void
12194 dexLdSt (sim_cpu *cpu)
12195 {
12196   /* uint32_t group = dispatchGroup (aarch64_get_instr (cpu));
12197      assert  group == GROUP_LDST_0100 || group == GROUP_LDST_0110 ||
12198              group == GROUP_LDST_1100 || group == GROUP_LDST_1110
12199      bits [29,28:26] of a LS are the secondary dispatch vector.  */
12200   uint32_t group2 = dispatchLS (aarch64_get_instr (cpu));
12201
12202   switch (group2)
12203     {
12204     case LS_EXCL_000:
12205       dexLoadExclusive (cpu); return;
12206
12207     case LS_LIT_010:
12208     case LS_LIT_011:
12209       dexLoadLiteral (cpu); return;
12210
12211     case LS_OTHER_110:
12212     case LS_OTHER_111:
12213       dexLoadOther (cpu); return;
12214
12215     case LS_ADVSIMD_001:
12216       do_vec_load_store (cpu); return;
12217
12218     case LS_PAIR_100:
12219       dex_load_store_pair_gr (cpu); return;
12220
12221     case LS_PAIR_101:
12222       dex_load_store_pair_fp (cpu); return;
12223
12224     default:
12225       /* Should never reach here.  */
12226       HALT_NYI;
12227     }
12228 }
12229
12230 /* Specific decode and execute for group Data Processing Register.  */
12231
12232 static void
12233 dexLogicalShiftedRegister (sim_cpu *cpu)
12234 {
12235   /* instr[31]    = size : 0 ==> 32 bit, 1 ==> 64 bit
12236      instr[30,29] = op
12237      instr[28:24] = 01010
12238      instr[23,22] = shift : 0 ==> LSL, 1 ==> LSR, 2 ==> ASR, 3 ==> ROR
12239      instr[21]    = N
12240      instr[20,16] = Rm
12241      instr[15,10] = count : must be 0xxxxx for 32 bit
12242      instr[9,5]   = Rn
12243      instr[4,0]   = Rd  */
12244
12245   uint32_t size      = INSTR (31, 31);
12246   Shift    shiftType = INSTR (23, 22);
12247   uint32_t count     = INSTR (15, 10);
12248
12249   /* 32 bit operations must have count[5] = 0.
12250      or else we have an UNALLOC.  */
12251   if (size == 0 && uimm (count, 5, 5))
12252     HALT_UNALLOC;
12253
12254   /* Dispatch on size:op:N.  */
12255   switch ((INSTR (31, 29) << 1) | INSTR (21, 21))
12256     {
12257     case 0: and32_shift  (cpu, shiftType, count); return;
12258     case 1: bic32_shift  (cpu, shiftType, count); return;
12259     case 2: orr32_shift  (cpu, shiftType, count); return;
12260     case 3: orn32_shift  (cpu, shiftType, count); return;
12261     case 4: eor32_shift  (cpu, shiftType, count); return;
12262     case 5: eon32_shift  (cpu, shiftType, count); return;
12263     case 6: ands32_shift (cpu, shiftType, count); return;
12264     case 7: bics32_shift (cpu, shiftType, count); return;
12265     case 8: and64_shift  (cpu, shiftType, count); return;
12266     case 9: bic64_shift  (cpu, shiftType, count); return;
12267     case 10:orr64_shift  (cpu, shiftType, count); return;
12268     case 11:orn64_shift  (cpu, shiftType, count); return;
12269     case 12:eor64_shift  (cpu, shiftType, count); return;
12270     case 13:eon64_shift  (cpu, shiftType, count); return;
12271     case 14:ands64_shift (cpu, shiftType, count); return;
12272     case 15:bics64_shift (cpu, shiftType, count); return;
12273     }
12274 }
12275
12276 /* 32 bit conditional select.  */
12277 static void
12278 csel32 (sim_cpu *cpu, CondCode cc)
12279 {
12280   unsigned rm = INSTR (20, 16);
12281   unsigned rn = INSTR (9, 5);
12282   unsigned rd = INSTR (4, 0);
12283
12284   aarch64_set_reg_u64 (cpu, rd, NO_SP,
12285                        testConditionCode (cpu, cc)
12286                        ? aarch64_get_reg_u32 (cpu, rn, NO_SP)
12287                        : aarch64_get_reg_u32 (cpu, rm, NO_SP));
12288 }
12289
12290 /* 64 bit conditional select.  */
12291 static void
12292 csel64 (sim_cpu *cpu, CondCode cc)
12293 {
12294   unsigned rm = INSTR (20, 16);
12295   unsigned rn = INSTR (9, 5);
12296   unsigned rd = INSTR (4, 0);
12297
12298   aarch64_set_reg_u64 (cpu, rd, NO_SP,
12299                        testConditionCode (cpu, cc)
12300                        ? aarch64_get_reg_u64 (cpu, rn, NO_SP)
12301                        : aarch64_get_reg_u64 (cpu, rm, NO_SP));
12302 }
12303
12304 /* 32 bit conditional increment.  */
12305 static void
12306 csinc32 (sim_cpu *cpu, CondCode cc)
12307 {
12308   unsigned rm = INSTR (20, 16);
12309   unsigned rn = INSTR (9, 5);
12310   unsigned rd = INSTR (4, 0);
12311
12312   aarch64_set_reg_u64 (cpu, rd, NO_SP,
12313                        testConditionCode (cpu, cc)
12314                        ? aarch64_get_reg_u32 (cpu, rn, NO_SP)
12315                        : aarch64_get_reg_u32 (cpu, rm, NO_SP) + 1);
12316 }
12317
12318 /* 64 bit conditional increment.  */
12319 static void
12320 csinc64 (sim_cpu *cpu, CondCode cc)
12321 {
12322   unsigned rm = INSTR (20, 16);
12323   unsigned rn = INSTR (9, 5);
12324   unsigned rd = INSTR (4, 0);
12325
12326   aarch64_set_reg_u64 (cpu, rd, NO_SP,
12327                        testConditionCode (cpu, cc)
12328                        ? aarch64_get_reg_u64 (cpu, rn, NO_SP)
12329                        : aarch64_get_reg_u64 (cpu, rm, NO_SP) + 1);
12330 }
12331
12332 /* 32 bit conditional invert.  */
12333 static void
12334 csinv32 (sim_cpu *cpu, CondCode cc)
12335 {
12336   unsigned rm = INSTR (20, 16);
12337   unsigned rn = INSTR (9, 5);
12338   unsigned rd = INSTR (4, 0);
12339
12340   aarch64_set_reg_u64 (cpu, rd, NO_SP,
12341                        testConditionCode (cpu, cc)
12342                        ? aarch64_get_reg_u32 (cpu, rn, NO_SP)
12343                        : ~ aarch64_get_reg_u32 (cpu, rm, NO_SP));
12344 }
12345
12346 /* 64 bit conditional invert.  */
12347 static void
12348 csinv64 (sim_cpu *cpu, CondCode cc)
12349 {
12350   unsigned rm = INSTR (20, 16);
12351   unsigned rn = INSTR (9, 5);
12352   unsigned rd = INSTR (4, 0);
12353
12354   aarch64_set_reg_u64 (cpu, rd, NO_SP,
12355                        testConditionCode (cpu, cc)
12356                        ? aarch64_get_reg_u64 (cpu, rn, NO_SP)
12357                        : ~ aarch64_get_reg_u64 (cpu, rm, NO_SP));
12358 }
12359
12360 /* 32 bit conditional negate.  */
12361 static void
12362 csneg32 (sim_cpu *cpu, CondCode cc)
12363 {
12364   unsigned rm = INSTR (20, 16);
12365   unsigned rn = INSTR (9, 5);
12366   unsigned rd = INSTR (4, 0);
12367
12368   aarch64_set_reg_u64 (cpu, rd, NO_SP,
12369                        testConditionCode (cpu, cc)
12370                        ? aarch64_get_reg_u32 (cpu, rn, NO_SP)
12371                        : - aarch64_get_reg_u32 (cpu, rm, NO_SP));
12372 }
12373
12374 /* 64 bit conditional negate.  */
12375 static void
12376 csneg64 (sim_cpu *cpu, CondCode cc)
12377 {
12378   unsigned rm = INSTR (20, 16);
12379   unsigned rn = INSTR (9, 5);
12380   unsigned rd = INSTR (4, 0);
12381
12382   aarch64_set_reg_u64 (cpu, rd, NO_SP,
12383                        testConditionCode (cpu, cc)
12384                        ? aarch64_get_reg_u64 (cpu, rn, NO_SP)
12385                        : - aarch64_get_reg_u64 (cpu, rm, NO_SP));
12386 }
12387
12388 static void
12389 dexCondSelect (sim_cpu *cpu)
12390 {
12391   /* instr[28,21] = 11011011
12392      instr[31]    = size : 0 ==> 32 bit, 1 ==> 64 bit
12393      instr[30:11,10] = op : 000 ==> CSEL, 001 ==> CSINC,
12394                             100 ==> CSINV, 101 ==> CSNEG,
12395                             _1_ ==> UNALLOC
12396      instr[29] = S : 0 ==> ok, 1 ==> UNALLOC
12397      instr[15,12] = cond
12398      instr[29] = S : 0 ==> ok, 1 ==> UNALLOC  */
12399
12400   CondCode cc = INSTR (15, 12);
12401   uint32_t S = INSTR (29, 29);
12402   uint32_t op2 = INSTR (11, 10);
12403
12404   if (S == 1)
12405     HALT_UNALLOC;
12406
12407   if (op2 & 0x2)
12408     HALT_UNALLOC;
12409
12410   switch ((INSTR (31, 30) << 1) | op2)
12411     {
12412     case 0: csel32  (cpu, cc); return;
12413     case 1: csinc32 (cpu, cc); return;
12414     case 2: csinv32 (cpu, cc); return;
12415     case 3: csneg32 (cpu, cc); return;
12416     case 4: csel64  (cpu, cc); return;
12417     case 5: csinc64 (cpu, cc); return;
12418     case 6: csinv64 (cpu, cc); return;
12419     case 7: csneg64 (cpu, cc); return;
12420     }
12421 }
12422
12423 /* Some helpers for counting leading 1 or 0 bits.  */
12424
12425 /* Counts the number of leading bits which are the same
12426    in a 32 bit value in the range 1 to 32.  */
12427 static uint32_t
12428 leading32 (uint32_t value)
12429 {
12430   int32_t mask= 0xffff0000;
12431   uint32_t count= 16; /* Counts number of bits set in mask.  */
12432   uint32_t lo = 1;    /* Lower bound for number of sign bits.  */
12433   uint32_t hi = 32;   /* Upper bound for number of sign bits.  */
12434
12435   while (lo + 1 < hi)
12436     {
12437       int32_t test = (value & mask);
12438
12439       if (test == 0 || test == mask)
12440         {
12441           lo = count;
12442           count = (lo + hi) / 2;
12443           mask >>= (count - lo);
12444         }
12445       else
12446         {
12447           hi = count;
12448           count = (lo + hi) / 2;
12449           mask <<= hi - count;
12450         }
12451     }
12452
12453   if (lo != hi)
12454     {
12455       int32_t test;
12456
12457       mask >>= 1;
12458       test = (value & mask);
12459
12460       if (test == 0 || test == mask)
12461         count = hi;
12462       else
12463         count = lo;
12464     }
12465
12466   return count;
12467 }
12468
12469 /* Counts the number of leading bits which are the same
12470    in a 64 bit value in the range 1 to 64.  */
12471 static uint64_t
12472 leading64 (uint64_t value)
12473 {
12474   int64_t mask= 0xffffffff00000000LL;
12475   uint64_t count = 32; /* Counts number of bits set in mask.  */
12476   uint64_t lo = 1;     /* Lower bound for number of sign bits.  */
12477   uint64_t hi = 64;    /* Upper bound for number of sign bits.  */
12478
12479   while (lo + 1 < hi)
12480     {
12481       int64_t test = (value & mask);
12482
12483       if (test == 0 || test == mask)
12484         {
12485           lo = count;
12486           count = (lo + hi) / 2;
12487           mask >>= (count - lo);
12488         }
12489       else
12490         {
12491           hi = count;
12492           count = (lo + hi) / 2;
12493           mask <<= hi - count;
12494         }
12495     }
12496
12497   if (lo != hi)
12498     {
12499       int64_t test;
12500
12501       mask >>= 1;
12502       test = (value & mask);
12503
12504       if (test == 0 || test == mask)
12505         count = hi;
12506       else
12507         count = lo;
12508     }
12509
12510   return count;
12511 }
12512
12513 /* Bit operations.  */
12514 /* N.B register args may not be SP.  */
12515
12516 /* 32 bit count leading sign bits.  */
12517 static void
12518 cls32 (sim_cpu *cpu)
12519 {
12520   unsigned rn = INSTR (9, 5);
12521   unsigned rd = INSTR (4, 0);
12522
12523   /* N.B. the result needs to exclude the leading bit.  */
12524   aarch64_set_reg_u64
12525     (cpu, rd, NO_SP, leading32 (aarch64_get_reg_u32 (cpu, rn, NO_SP)) - 1);
12526 }
12527
12528 /* 64 bit count leading sign bits.  */
12529 static void
12530 cls64 (sim_cpu *cpu)
12531 {
12532   unsigned rn = INSTR (9, 5);
12533   unsigned rd = INSTR (4, 0);
12534
12535   /* N.B. the result needs to exclude the leading bit.  */
12536   aarch64_set_reg_u64
12537     (cpu, rd, NO_SP, leading64 (aarch64_get_reg_u64 (cpu, rn, NO_SP)) - 1);
12538 }
12539
12540 /* 32 bit count leading zero bits.  */
12541 static void
12542 clz32 (sim_cpu *cpu)
12543 {
12544   unsigned rn = INSTR (9, 5);
12545   unsigned rd = INSTR (4, 0);
12546   uint32_t value = aarch64_get_reg_u32 (cpu, rn, NO_SP);
12547
12548   /* if the sign (top) bit is set then the count is 0.  */
12549   if (pick32 (value, 31, 31))
12550     aarch64_set_reg_u64 (cpu, rd, NO_SP, 0L);
12551   else
12552     aarch64_set_reg_u64 (cpu, rd, NO_SP, leading32 (value));
12553 }
12554
12555 /* 64 bit count leading zero bits.  */
12556 static void
12557 clz64 (sim_cpu *cpu)
12558 {
12559   unsigned rn = INSTR (9, 5);
12560   unsigned rd = INSTR (4, 0);
12561   uint64_t value = aarch64_get_reg_u64 (cpu, rn, NO_SP);
12562
12563   /* if the sign (top) bit is set then the count is 0.  */
12564   if (pick64 (value, 63, 63))
12565     aarch64_set_reg_u64 (cpu, rd, NO_SP, 0L);
12566   else
12567     aarch64_set_reg_u64 (cpu, rd, NO_SP, leading64 (value));
12568 }
12569
12570 /* 32 bit reverse bits.  */
12571 static void
12572 rbit32 (sim_cpu *cpu)
12573 {
12574   unsigned rn = INSTR (9, 5);
12575   unsigned rd = INSTR (4, 0);
12576   uint32_t value = aarch64_get_reg_u32 (cpu, rn, NO_SP);
12577   uint32_t result = 0;
12578   int i;
12579
12580   for (i = 0; i < 32; i++)
12581     {
12582       result <<= 1;
12583       result |= (value & 1);
12584       value >>= 1;
12585     }
12586   aarch64_set_reg_u64 (cpu, rd, NO_SP, result);
12587 }
12588
12589 /* 64 bit reverse bits.  */
12590 static void
12591 rbit64 (sim_cpu *cpu)
12592 {
12593   unsigned rn = INSTR (9, 5);
12594   unsigned rd = INSTR (4, 0);
12595   uint64_t value = aarch64_get_reg_u64 (cpu, rn, NO_SP);
12596   uint64_t result = 0;
12597   int i;
12598
12599   for (i = 0; i < 64; i++)
12600     {
12601       result <<= 1;
12602       result |= (value & 1UL);
12603       value >>= 1;
12604     }
12605   aarch64_set_reg_u64 (cpu, rd, NO_SP, result);
12606 }
12607
12608 /* 32 bit reverse bytes.  */
12609 static void
12610 rev32 (sim_cpu *cpu)
12611 {
12612   unsigned rn = INSTR (9, 5);
12613   unsigned rd = INSTR (4, 0);
12614   uint32_t value = aarch64_get_reg_u32 (cpu, rn, NO_SP);
12615   uint32_t result = 0;
12616   int i;
12617
12618   for (i = 0; i < 4; i++)
12619     {
12620       result <<= 8;
12621       result |= (value & 0xff);
12622       value >>= 8;
12623     }
12624   aarch64_set_reg_u64 (cpu, rd, NO_SP, result);
12625 }
12626
12627 /* 64 bit reverse bytes.  */
12628 static void
12629 rev64 (sim_cpu *cpu)
12630 {
12631   unsigned rn = INSTR (9, 5);
12632   unsigned rd = INSTR (4, 0);
12633   uint64_t value = aarch64_get_reg_u64 (cpu, rn, NO_SP);
12634   uint64_t result = 0;
12635   int i;
12636
12637   for (i = 0; i < 8; i++)
12638     {
12639       result <<= 8;
12640       result |= (value & 0xffULL);
12641       value >>= 8;
12642     }
12643   aarch64_set_reg_u64 (cpu, rd, NO_SP, result);
12644 }
12645
12646 /* 32 bit reverse shorts.  */
12647 /* N.B.this reverses the order of the bytes in each half word.  */
12648 static void
12649 revh32 (sim_cpu *cpu)
12650 {
12651   unsigned rn = INSTR (9, 5);
12652   unsigned rd = INSTR (4, 0);
12653   uint32_t value = aarch64_get_reg_u32 (cpu, rn, NO_SP);
12654   uint32_t result = 0;
12655   int i;
12656
12657   for (i = 0; i < 2; i++)
12658     {
12659       result <<= 8;
12660       result |= (value & 0x00ff00ff);
12661       value >>= 8;
12662     }
12663   aarch64_set_reg_u64 (cpu, rd, NO_SP, result);
12664 }
12665
12666 /* 64 bit reverse shorts.  */
12667 /* N.B.this reverses the order of the bytes in each half word.  */
12668 static void
12669 revh64 (sim_cpu *cpu)
12670 {
12671   unsigned rn = INSTR (9, 5);
12672   unsigned rd = INSTR (4, 0);
12673   uint64_t value = aarch64_get_reg_u64 (cpu, rn, NO_SP);
12674   uint64_t result = 0;
12675   int i;
12676
12677   for (i = 0; i < 2; i++)
12678     {
12679       result <<= 8;
12680       result |= (value & 0x00ff00ff00ff00ffULL);
12681       value >>= 8;
12682     }
12683   aarch64_set_reg_u64 (cpu, rd, NO_SP, result);
12684 }
12685
12686 static void
12687 dexDataProc1Source (sim_cpu *cpu)
12688 {
12689   /* instr[30]    = 1
12690      instr[28,21] = 111010110
12691      instr[31]    = size : 0 ==> 32 bit, 1 ==> 64 bit
12692      instr[29]    = S : 0 ==> ok, 1 ==> UNALLOC
12693      instr[20,16] = opcode2 : 00000 ==> ok, ow ==> UNALLOC
12694      instr[15,10] = opcode : 000000 ==> RBIT, 000001 ==> REV16,
12695                              000010 ==> REV, 000011 ==> UNALLOC
12696                              000100 ==> CLZ, 000101 ==> CLS
12697                              ow ==> UNALLOC
12698      instr[9,5]   = rn : may not be SP
12699      instr[4,0]   = rd : may not be SP.  */
12700
12701   uint32_t S = INSTR (29, 29);
12702   uint32_t opcode2 = INSTR (20, 16);
12703   uint32_t opcode = INSTR (15, 10);
12704   uint32_t dispatch = ((INSTR (31, 31) << 3) | opcode);
12705
12706   if (S == 1)
12707     HALT_UNALLOC;
12708
12709   if (opcode2 != 0)
12710     HALT_UNALLOC;
12711
12712   if (opcode & 0x38)
12713     HALT_UNALLOC;
12714
12715   switch (dispatch)
12716     {
12717     case 0: rbit32 (cpu); return;
12718     case 1: revh32 (cpu); return;
12719     case 2: rev32 (cpu); return;
12720     case 4: clz32 (cpu); return;
12721     case 5: cls32 (cpu); return;
12722     case 8: rbit64 (cpu); return;
12723     case 9: revh64 (cpu); return;
12724     case 10:rev32 (cpu); return;
12725     case 11:rev64 (cpu); return;
12726     case 12:clz64 (cpu); return;
12727     case 13:cls64 (cpu); return;
12728     default: HALT_UNALLOC;
12729     }
12730 }
12731
12732 /* Variable shift.
12733    Shifts by count supplied in register.
12734    N.B register args may not be SP.
12735    These all use the shifted auxiliary function for
12736    simplicity and clarity.  Writing the actual shift
12737    inline would avoid a branch and so be faster but
12738    would also necessitate getting signs right.  */
12739
12740 /* 32 bit arithmetic shift right.  */
12741 static void
12742 asrv32 (sim_cpu *cpu)
12743 {
12744   unsigned rm = INSTR (20, 16);
12745   unsigned rn = INSTR (9, 5);
12746   unsigned rd = INSTR (4, 0);
12747
12748   aarch64_set_reg_u64
12749     (cpu, rd, NO_SP,
12750      shifted32 (aarch64_get_reg_u32 (cpu, rn, NO_SP), ASR,
12751                 (aarch64_get_reg_u32 (cpu, rm, NO_SP) & 0x1f)));
12752 }
12753
12754 /* 64 bit arithmetic shift right.  */
12755 static void
12756 asrv64 (sim_cpu *cpu)
12757 {
12758   unsigned rm = INSTR (20, 16);
12759   unsigned rn = INSTR (9, 5);
12760   unsigned rd = INSTR (4, 0);
12761
12762   aarch64_set_reg_u64
12763     (cpu, rd, NO_SP,
12764      shifted64 (aarch64_get_reg_u64 (cpu, rn, NO_SP), ASR,
12765                 (aarch64_get_reg_u64 (cpu, rm, NO_SP) & 0x3f)));
12766 }
12767
12768 /* 32 bit logical shift left.  */
12769 static void
12770 lslv32 (sim_cpu *cpu)
12771 {
12772   unsigned rm = INSTR (20, 16);
12773   unsigned rn = INSTR (9, 5);
12774   unsigned rd = INSTR (4, 0);
12775
12776   aarch64_set_reg_u64
12777     (cpu, rd, NO_SP,
12778      shifted32 (aarch64_get_reg_u32 (cpu, rn, NO_SP), LSL,
12779                 (aarch64_get_reg_u32 (cpu, rm, NO_SP) & 0x1f)));
12780 }
12781
12782 /* 64 bit arithmetic shift left.  */
12783 static void
12784 lslv64 (sim_cpu *cpu)
12785 {
12786   unsigned rm = INSTR (20, 16);
12787   unsigned rn = INSTR (9, 5);
12788   unsigned rd = INSTR (4, 0);
12789
12790   aarch64_set_reg_u64
12791     (cpu, rd, NO_SP,
12792      shifted64 (aarch64_get_reg_u64 (cpu, rn, NO_SP), LSL,
12793                 (aarch64_get_reg_u64 (cpu, rm, NO_SP) & 0x3f)));
12794 }
12795
12796 /* 32 bit logical shift right.  */
12797 static void
12798 lsrv32 (sim_cpu *cpu)
12799 {
12800   unsigned rm = INSTR (20, 16);
12801   unsigned rn = INSTR (9, 5);
12802   unsigned rd = INSTR (4, 0);
12803
12804   aarch64_set_reg_u64
12805     (cpu, rd, NO_SP,
12806      shifted32 (aarch64_get_reg_u32 (cpu, rn, NO_SP), LSR,
12807                 (aarch64_get_reg_u32 (cpu, rm, NO_SP) & 0x1f)));
12808 }
12809
12810 /* 64 bit logical shift right.  */
12811 static void
12812 lsrv64 (sim_cpu *cpu)
12813 {
12814   unsigned rm = INSTR (20, 16);
12815   unsigned rn = INSTR (9, 5);
12816   unsigned rd = INSTR (4, 0);
12817
12818   aarch64_set_reg_u64
12819     (cpu, rd, NO_SP,
12820      shifted64 (aarch64_get_reg_u64 (cpu, rn, NO_SP), LSR,
12821                 (aarch64_get_reg_u64 (cpu, rm, NO_SP) & 0x3f)));
12822 }
12823
12824 /* 32 bit rotate right.  */
12825 static void
12826 rorv32 (sim_cpu *cpu)
12827 {
12828   unsigned rm = INSTR (20, 16);
12829   unsigned rn = INSTR (9, 5);
12830   unsigned rd = INSTR (4, 0);
12831
12832   aarch64_set_reg_u64
12833     (cpu, rd, NO_SP,
12834      shifted32 (aarch64_get_reg_u32 (cpu, rn, NO_SP), ROR,
12835                 (aarch64_get_reg_u32 (cpu, rm, NO_SP) & 0x1f)));
12836 }
12837
12838 /* 64 bit rotate right.  */
12839 static void
12840 rorv64 (sim_cpu *cpu)
12841 {
12842   unsigned rm = INSTR (20, 16);
12843   unsigned rn = INSTR (9, 5);
12844   unsigned rd = INSTR (4, 0);
12845
12846   aarch64_set_reg_u64
12847     (cpu, rd, NO_SP,
12848      shifted64 (aarch64_get_reg_u64 (cpu, rn, NO_SP), ROR,
12849                 (aarch64_get_reg_u64 (cpu, rm, NO_SP) & 0x3f)));
12850 }
12851
12852
12853 /* divide.  */
12854
12855 /* 32 bit signed divide.  */
12856 static void
12857 cpuiv32 (sim_cpu *cpu)
12858 {
12859   unsigned rm = INSTR (20, 16);
12860   unsigned rn = INSTR (9, 5);
12861   unsigned rd = INSTR (4, 0);
12862   /* N.B. the pseudo-code does the divide using 64 bit data.  */
12863   /* TODO : check that this rounds towards zero as required.  */
12864   int64_t dividend = aarch64_get_reg_s32 (cpu, rn, NO_SP);
12865   int64_t divisor = aarch64_get_reg_s32 (cpu, rm, NO_SP);
12866
12867   aarch64_set_reg_s64 (cpu, rd, NO_SP,
12868                        divisor ? ((int32_t) (dividend / divisor)) : 0);
12869 }
12870
12871 /* 64 bit signed divide.  */
12872 static void
12873 cpuiv64 (sim_cpu *cpu)
12874 {
12875   unsigned rm = INSTR (20, 16);
12876   unsigned rn = INSTR (9, 5);
12877   unsigned rd = INSTR (4, 0);
12878
12879   /* TODO : check that this rounds towards zero as required.  */
12880   int64_t divisor = aarch64_get_reg_s64 (cpu, rm, NO_SP);
12881
12882   aarch64_set_reg_s64
12883     (cpu, rd, NO_SP,
12884      divisor ? (aarch64_get_reg_s64 (cpu, rn, NO_SP) / divisor) : 0);
12885 }
12886
12887 /* 32 bit unsigned divide.  */
12888 static void
12889 udiv32 (sim_cpu *cpu)
12890 {
12891   unsigned rm = INSTR (20, 16);
12892   unsigned rn = INSTR (9, 5);
12893   unsigned rd = INSTR (4, 0);
12894
12895   /* N.B. the pseudo-code does the divide using 64 bit data.  */
12896   uint64_t dividend = aarch64_get_reg_u32 (cpu, rn, NO_SP);
12897   uint64_t divisor  = aarch64_get_reg_u32 (cpu, rm, NO_SP);
12898
12899   aarch64_set_reg_u64 (cpu, rd, NO_SP,
12900                        divisor ? (uint32_t) (dividend / divisor) : 0);
12901 }
12902
12903 /* 64 bit unsigned divide.  */
12904 static void
12905 udiv64 (sim_cpu *cpu)
12906 {
12907   unsigned rm = INSTR (20, 16);
12908   unsigned rn = INSTR (9, 5);
12909   unsigned rd = INSTR (4, 0);
12910
12911   /* TODO : check that this rounds towards zero as required.  */
12912   uint64_t divisor = aarch64_get_reg_u64 (cpu, rm, NO_SP);
12913
12914   aarch64_set_reg_u64
12915     (cpu, rd, NO_SP,
12916      divisor ? (aarch64_get_reg_u64 (cpu, rn, NO_SP) / divisor) : 0);
12917 }
12918
12919 static void
12920 dexDataProc2Source (sim_cpu *cpu)
12921 {
12922   /* assert instr[30] == 0
12923      instr[28,21] == 11010110
12924      instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
12925      instr[29] = S : 0 ==> ok, 1 ==> UNALLOC
12926      instr[15,10] = opcode : 000010 ==> UDIV, 000011 ==> CPUIV,
12927                              001000 ==> LSLV, 001001 ==> LSRV
12928                              001010 ==> ASRV, 001011 ==> RORV
12929                              ow ==> UNALLOC.  */
12930
12931   uint32_t dispatch;
12932   uint32_t S = INSTR (29, 29);
12933   uint32_t opcode = INSTR (15, 10);
12934
12935   if (S == 1)
12936     HALT_UNALLOC;
12937
12938   if (opcode & 0x34)
12939     HALT_UNALLOC;
12940
12941   dispatch = (  (INSTR (31, 31) << 3)
12942               | (uimm (opcode, 3, 3) << 2)
12943               |  uimm (opcode, 1, 0));
12944   switch (dispatch)
12945     {
12946     case 2:  udiv32 (cpu); return;
12947     case 3:  cpuiv32 (cpu); return;
12948     case 4:  lslv32 (cpu); return;
12949     case 5:  lsrv32 (cpu); return;
12950     case 6:  asrv32 (cpu); return;
12951     case 7:  rorv32 (cpu); return;
12952     case 10: udiv64 (cpu); return;
12953     case 11: cpuiv64 (cpu); return;
12954     case 12: lslv64 (cpu); return;
12955     case 13: lsrv64 (cpu); return;
12956     case 14: asrv64 (cpu); return;
12957     case 15: rorv64 (cpu); return;
12958     default: HALT_UNALLOC;
12959     }
12960 }
12961
12962
12963 /* Multiply.  */
12964
12965 /* 32 bit multiply and add.  */
12966 static void
12967 madd32 (sim_cpu *cpu)
12968 {
12969   unsigned rm = INSTR (20, 16);
12970   unsigned ra = INSTR (14, 10);
12971   unsigned rn = INSTR (9, 5);
12972   unsigned rd = INSTR (4, 0);
12973
12974   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
12975   aarch64_set_reg_u64 (cpu, rd, NO_SP,
12976                        aarch64_get_reg_u32 (cpu, ra, NO_SP)
12977                        + aarch64_get_reg_u32 (cpu, rn, NO_SP)
12978                        * aarch64_get_reg_u32 (cpu, rm, NO_SP));
12979 }
12980
12981 /* 64 bit multiply and add.  */
12982 static void
12983 madd64 (sim_cpu *cpu)
12984 {
12985   unsigned rm = INSTR (20, 16);
12986   unsigned ra = INSTR (14, 10);
12987   unsigned rn = INSTR (9, 5);
12988   unsigned rd = INSTR (4, 0);
12989
12990   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
12991   aarch64_set_reg_u64 (cpu, rd, NO_SP,
12992                        aarch64_get_reg_u64 (cpu, ra, NO_SP)
12993                        + (aarch64_get_reg_u64 (cpu, rn, NO_SP)
12994                           * aarch64_get_reg_u64 (cpu, rm, NO_SP)));
12995 }
12996
12997 /* 32 bit multiply and sub.  */
12998 static void
12999 msub32 (sim_cpu *cpu)
13000 {
13001   unsigned rm = INSTR (20, 16);
13002   unsigned ra = INSTR (14, 10);
13003   unsigned rn = INSTR (9, 5);
13004   unsigned rd = INSTR (4, 0);
13005
13006   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13007   aarch64_set_reg_u64 (cpu, rd, NO_SP,
13008                        aarch64_get_reg_u32 (cpu, ra, NO_SP)
13009                        - aarch64_get_reg_u32 (cpu, rn, NO_SP)
13010                        * aarch64_get_reg_u32 (cpu, rm, NO_SP));
13011 }
13012
13013 /* 64 bit multiply and sub.  */
13014 static void
13015 msub64 (sim_cpu *cpu)
13016 {
13017   unsigned rm = INSTR (20, 16);
13018   unsigned ra = INSTR (14, 10);
13019   unsigned rn = INSTR (9, 5);
13020   unsigned rd = INSTR (4, 0);
13021
13022   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13023   aarch64_set_reg_u64 (cpu, rd, NO_SP,
13024                        aarch64_get_reg_u64 (cpu, ra, NO_SP)
13025                        - aarch64_get_reg_u64 (cpu, rn, NO_SP)
13026                        * aarch64_get_reg_u64 (cpu, rm, NO_SP));
13027 }
13028
13029 /* Signed multiply add long -- source, source2 : 32 bit, source3 : 64 bit.  */
13030 static void
13031 smaddl (sim_cpu *cpu)
13032 {
13033   unsigned rm = INSTR (20, 16);
13034   unsigned ra = INSTR (14, 10);
13035   unsigned rn = INSTR (9, 5);
13036   unsigned rd = INSTR (4, 0);
13037
13038   /* N.B. we need to multiply the signed 32 bit values in rn, rm to
13039      obtain a 64 bit product.  */
13040   aarch64_set_reg_s64
13041     (cpu, rd, NO_SP,
13042      aarch64_get_reg_s64 (cpu, ra, NO_SP)
13043      + ((int64_t) aarch64_get_reg_s32 (cpu, rn, NO_SP))
13044      * ((int64_t) aarch64_get_reg_s32 (cpu, rm, NO_SP)));
13045 }
13046
13047 /* Signed multiply sub long -- source, source2 : 32 bit, source3 : 64 bit.  */
13048 static void
13049 smsubl (sim_cpu *cpu)
13050 {
13051   unsigned rm = INSTR (20, 16);
13052   unsigned ra = INSTR (14, 10);
13053   unsigned rn = INSTR (9, 5);
13054   unsigned rd = INSTR (4, 0);
13055
13056   /* N.B. we need to multiply the signed 32 bit values in rn, rm to
13057      obtain a 64 bit product.  */
13058   aarch64_set_reg_s64
13059     (cpu, rd, NO_SP,
13060      aarch64_get_reg_s64 (cpu, ra, NO_SP)
13061      - ((int64_t) aarch64_get_reg_s32 (cpu, rn, NO_SP))
13062      * ((int64_t) aarch64_get_reg_s32 (cpu, rm, NO_SP)));
13063 }
13064
13065 /* Integer Multiply/Divide.  */
13066
13067 /* First some macros and a helper function.  */
13068 /* Macros to test or access elements of 64 bit words.  */
13069
13070 /* Mask used to access lo 32 bits of 64 bit unsigned int.  */
13071 #define LOW_WORD_MASK ((1ULL << 32) - 1)
13072 /* Return the lo 32 bit word of a 64 bit unsigned int as a 64 bit unsigned int.  */
13073 #define lowWordToU64(_value_u64) ((_value_u64) & LOW_WORD_MASK)
13074 /* Return the hi 32 bit word of a 64 bit unsigned int as a 64 bit unsigned int.  */
13075 #define highWordToU64(_value_u64) ((_value_u64) >> 32)
13076
13077 /* Offset of sign bit in 64 bit signed integger.  */
13078 #define SIGN_SHIFT_U64 63
13079 /* The sign bit itself -- also identifies the minimum negative int value.  */
13080 #define SIGN_BIT_U64 (1UL << SIGN_SHIFT_U64)
13081 /* Return true if a 64 bit signed int presented as an unsigned int is the
13082    most negative value.  */
13083 #define isMinimumU64(_value_u64) ((_value_u64) == SIGN_BIT_U64)
13084 /* Return true (non-zero) if a 64 bit signed int presented as an unsigned
13085    int has its sign bit set to false.  */
13086 #define isSignSetU64(_value_u64) ((_value_u64) & SIGN_BIT_U64)
13087 /* Return 1L or -1L according to whether a 64 bit signed int presented as
13088    an unsigned int has its sign bit set or not.  */
13089 #define signOfU64(_value_u64) (1L + (((value_u64) >> SIGN_SHIFT_U64) * -2L)
13090 /* Clear the sign bit of a 64 bit signed int presented as an unsigned int.  */
13091 #define clearSignU64(_value_u64) ((_value_u64) &= ~SIGN_BIT_U64)
13092
13093 /* Multiply two 64 bit ints and return.
13094    the hi 64 bits of the 128 bit product.  */
13095
13096 static uint64_t
13097 mul64hi (uint64_t value1, uint64_t value2)
13098 {
13099   uint64_t resultmid1;
13100   uint64_t result;
13101   uint64_t value1_lo = lowWordToU64 (value1);
13102   uint64_t value1_hi = highWordToU64 (value1) ;
13103   uint64_t value2_lo = lowWordToU64 (value2);
13104   uint64_t value2_hi = highWordToU64 (value2);
13105
13106   /* Cross-multiply and collect results.  */
13107   uint64_t xproductlo = value1_lo * value2_lo;
13108   uint64_t xproductmid1 = value1_lo * value2_hi;
13109   uint64_t xproductmid2 = value1_hi * value2_lo;
13110   uint64_t xproducthi = value1_hi * value2_hi;
13111   uint64_t carry = 0;
13112   /* Start accumulating 64 bit results.  */
13113   /* Drop bottom half of lowest cross-product.  */
13114   uint64_t resultmid = xproductlo >> 32;
13115   /* Add in middle products.  */
13116   resultmid = resultmid + xproductmid1;
13117
13118   /* Check for overflow.  */
13119   if (resultmid < xproductmid1)
13120     /* Carry over 1 into top cross-product.  */
13121     carry++;
13122
13123   resultmid1  = resultmid + xproductmid2;
13124
13125   /* Check for overflow.  */
13126   if (resultmid1 < xproductmid2)
13127     /* Carry over 1 into top cross-product.  */
13128     carry++;
13129
13130   /* Drop lowest 32 bits of middle cross-product.  */
13131   result = resultmid1 >> 32;
13132   /* Move carry bit to just above middle cross-product highest bit.  */
13133   carry = carry << 32;
13134
13135   /* Add top cross-product plus and any carry.  */
13136   result += xproducthi + carry;
13137
13138   return result;
13139 }
13140
13141 /* Signed multiply high, source, source2 :
13142    64 bit, dest <-- high 64-bit of result.  */
13143 static void
13144 smulh (sim_cpu *cpu)
13145 {
13146   uint64_t uresult;
13147   int64_t  result;
13148   unsigned rm = INSTR (20, 16);
13149   unsigned rn = INSTR (9, 5);
13150   unsigned rd = INSTR (4, 0);
13151   GReg     ra = INSTR (14, 10);
13152   int64_t  value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP);
13153   int64_t  value2 = aarch64_get_reg_u64 (cpu, rm, NO_SP);
13154   uint64_t uvalue1;
13155   uint64_t uvalue2;
13156   int  negate = 0;
13157
13158   if (ra != R31)
13159     HALT_UNALLOC;
13160
13161   /* Convert to unsigned and use the unsigned mul64hi routine
13162      the fix the sign up afterwards.  */
13163   if (value1 < 0)
13164     {
13165       negate = !negate;
13166       uvalue1 = -value1;
13167     }
13168   else
13169     {
13170       uvalue1 = value1;
13171     }
13172
13173   if (value2 < 0)
13174     {
13175       negate = !negate;
13176       uvalue2 = -value2;
13177     }
13178   else
13179     {
13180       uvalue2 = value2;
13181     }
13182
13183   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13184
13185   uresult = mul64hi (uvalue1, uvalue2);
13186   result = uresult;
13187
13188   if (negate)
13189     {
13190       /* Multiply 128-bit result by -1, which means highpart gets inverted,
13191          and has carry in added only if low part is 0.  */
13192       result = ~result;
13193       if ((uvalue1 * uvalue2) == 0)
13194         result += 1;
13195     }
13196
13197   aarch64_set_reg_s64 (cpu, rd, NO_SP, result);
13198 }
13199
13200 /* Unsigned multiply add long -- source, source2 :
13201    32 bit, source3 : 64 bit.  */
13202 static void
13203 umaddl (sim_cpu *cpu)
13204 {
13205   unsigned rm = INSTR (20, 16);
13206   unsigned ra = INSTR (14, 10);
13207   unsigned rn = INSTR (9, 5);
13208   unsigned rd = INSTR (4, 0);
13209
13210   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13211   /* N.B. we need to multiply the signed 32 bit values in rn, rm to
13212      obtain a 64 bit product.  */
13213   aarch64_set_reg_u64
13214     (cpu, rd, NO_SP,
13215      aarch64_get_reg_u64 (cpu, ra, NO_SP)
13216      + ((uint64_t) aarch64_get_reg_u32 (cpu, rn, NO_SP))
13217      * ((uint64_t) aarch64_get_reg_u32 (cpu, rm, NO_SP)));
13218 }
13219
13220 /* Unsigned multiply sub long -- source, source2 : 32 bit, source3 : 64 bit.  */
13221 static void
13222 umsubl (sim_cpu *cpu)
13223 {
13224   unsigned rm = INSTR (20, 16);
13225   unsigned ra = INSTR (14, 10);
13226   unsigned rn = INSTR (9, 5);
13227   unsigned rd = INSTR (4, 0);
13228
13229   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13230   /* N.B. we need to multiply the signed 32 bit values in rn, rm to
13231      obtain a 64 bit product.  */
13232   aarch64_set_reg_u64
13233     (cpu, rd, NO_SP,
13234      aarch64_get_reg_u64 (cpu, ra, NO_SP)
13235      - ((uint64_t) aarch64_get_reg_u32 (cpu, rn, NO_SP))
13236      * ((uint64_t) aarch64_get_reg_u32 (cpu, rm, NO_SP)));
13237 }
13238
13239 /* Unsigned multiply high, source, source2 :
13240    64 bit, dest <-- high 64-bit of result.  */
13241 static void
13242 umulh (sim_cpu *cpu)
13243 {
13244   unsigned rm = INSTR (20, 16);
13245   unsigned rn = INSTR (9, 5);
13246   unsigned rd = INSTR (4, 0);
13247   GReg     ra = INSTR (14, 10);
13248
13249   if (ra != R31)
13250     HALT_UNALLOC;
13251
13252   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13253   aarch64_set_reg_u64 (cpu, rd, NO_SP,
13254                        mul64hi (aarch64_get_reg_u64 (cpu, rn, NO_SP),
13255                                 aarch64_get_reg_u64 (cpu, rm, NO_SP)));
13256 }
13257
13258 static void
13259 dexDataProc3Source (sim_cpu *cpu)
13260 {
13261   /* assert instr[28,24] == 11011.  */
13262   /* instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit (for rd at least)
13263      instr[30,29] = op54 : 00 ==> ok, ow ==> UNALLOC
13264      instr[23,21] = op31 : 111 ==> UNALLOC, o2 ==> ok
13265      instr[15] = o0 : 0/1 ==> ok
13266      instr[23,21:15] ==> op : 0000 ==> MADD, 0001 ==> MSUB,     (32/64 bit)
13267                               0010 ==> SMADDL, 0011 ==> SMSUBL, (64 bit only)
13268                               0100 ==> SMULH,                   (64 bit only)
13269                               1010 ==> UMADDL, 1011 ==> UNSUBL, (64 bit only)
13270                               1100 ==> UMULH                    (64 bit only)
13271                               ow ==> UNALLOC.  */
13272
13273   uint32_t dispatch;
13274   uint32_t size = INSTR (31, 31);
13275   uint32_t op54 = INSTR (30, 29);
13276   uint32_t op31 = INSTR (23, 21);
13277   uint32_t o0 = INSTR (15, 15);
13278
13279   if (op54 != 0)
13280     HALT_UNALLOC;
13281
13282   if (size == 0)
13283     {
13284       if (op31 != 0)
13285         HALT_UNALLOC;
13286
13287       if (o0 == 0)
13288         madd32 (cpu);
13289       else
13290         msub32 (cpu);
13291       return;
13292     }
13293
13294   dispatch = (op31 << 1) | o0;
13295
13296   switch (dispatch)
13297     {
13298     case 0:  madd64 (cpu); return;
13299     case 1:  msub64 (cpu); return;
13300     case 2:  smaddl (cpu); return;
13301     case 3:  smsubl (cpu); return;
13302     case 4:  smulh (cpu); return;
13303     case 10: umaddl (cpu); return;
13304     case 11: umsubl (cpu); return;
13305     case 12: umulh (cpu); return;
13306     default: HALT_UNALLOC;
13307     }
13308 }
13309
13310 static void
13311 dexDPReg (sim_cpu *cpu)
13312 {
13313   /* uint32_t group = dispatchGroup (aarch64_get_instr (cpu));
13314      assert  group == GROUP_DPREG_0101 || group == GROUP_DPREG_1101
13315      bits [28:24:21] of a DPReg are the secondary dispatch vector.  */
13316   uint32_t group2 = dispatchDPReg (aarch64_get_instr (cpu));
13317
13318   switch (group2)
13319     {
13320     case DPREG_LOG_000:
13321     case DPREG_LOG_001:
13322       dexLogicalShiftedRegister (cpu); return;
13323
13324     case DPREG_ADDSHF_010:
13325       dexAddSubtractShiftedRegister (cpu); return;
13326
13327     case DPREG_ADDEXT_011:
13328       dexAddSubtractExtendedRegister (cpu); return;
13329
13330     case DPREG_ADDCOND_100:
13331       {
13332         /* This set bundles a variety of different operations.  */
13333         /* Check for.  */
13334         /* 1) add/sub w carry.  */
13335         uint32_t mask1 = 0x1FE00000U;
13336         uint32_t val1  = 0x1A000000U;
13337         /* 2) cond compare register/immediate.  */
13338         uint32_t mask2 = 0x1FE00000U;
13339         uint32_t val2  = 0x1A400000U;
13340         /* 3) cond select.  */
13341         uint32_t mask3 = 0x1FE00000U;
13342         uint32_t val3  = 0x1A800000U;
13343         /* 4) data proc 1/2 source.  */
13344         uint32_t mask4 = 0x1FE00000U;
13345         uint32_t val4  = 0x1AC00000U;
13346
13347         if ((aarch64_get_instr (cpu) & mask1) == val1)
13348           dexAddSubtractWithCarry (cpu);
13349
13350         else if ((aarch64_get_instr (cpu) & mask2) == val2)
13351           CondCompare (cpu);
13352
13353         else if ((aarch64_get_instr (cpu) & mask3) == val3)
13354           dexCondSelect (cpu);
13355
13356         else if ((aarch64_get_instr (cpu) & mask4) == val4)
13357           {
13358             /* Bit 30 is clear for data proc 2 source
13359                and set for data proc 1 source.  */
13360             if (aarch64_get_instr (cpu)  & (1U << 30))
13361               dexDataProc1Source (cpu);
13362             else
13363               dexDataProc2Source (cpu);
13364           }
13365
13366         else
13367           /* Should not reach here.  */
13368           HALT_NYI;
13369
13370         return;
13371       }
13372
13373     case DPREG_3SRC_110:
13374       dexDataProc3Source (cpu); return;
13375
13376     case DPREG_UNALLOC_101:
13377       HALT_UNALLOC;
13378
13379     case DPREG_3SRC_111:
13380       dexDataProc3Source (cpu); return;
13381
13382     default:
13383       /* Should never reach here.  */
13384       HALT_NYI;
13385     }
13386 }
13387
13388 /* Unconditional Branch immediate.
13389    Offset is a PC-relative byte offset in the range +/- 128MiB.
13390    The offset is assumed to be raw from the decode i.e. the
13391    simulator is expected to scale them from word offsets to byte.  */
13392
13393 /* Unconditional branch.  */
13394 static void
13395 buc (sim_cpu *cpu, int32_t offset)
13396 {
13397   aarch64_set_next_PC_by_offset (cpu, offset);
13398 }
13399
13400 static unsigned stack_depth = 0;
13401
13402 /* Unconditional branch and link -- writes return PC to LR.  */
13403 static void
13404 bl (sim_cpu *cpu, int32_t offset)
13405 {
13406   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13407   aarch64_save_LR (cpu);
13408   aarch64_set_next_PC_by_offset (cpu, offset);
13409
13410   if (TRACE_BRANCH_P (cpu))
13411     {
13412       ++ stack_depth;
13413       TRACE_BRANCH (cpu,
13414                     " %*scall %" PRIx64 " [%s]"
13415                     " [args: %" PRIx64 " %" PRIx64 " %" PRIx64 "]",
13416                     stack_depth, " ", aarch64_get_next_PC (cpu),
13417                     aarch64_get_func (CPU_STATE (cpu),
13418                                       aarch64_get_next_PC (cpu)),
13419                     aarch64_get_reg_u64 (cpu, 0, NO_SP),
13420                     aarch64_get_reg_u64 (cpu, 1, NO_SP),
13421                     aarch64_get_reg_u64 (cpu, 2, NO_SP)
13422                     );
13423     }
13424 }
13425
13426 /* Unconditional Branch register.
13427    Branch/return address is in source register.  */
13428
13429 /* Unconditional branch.  */
13430 static void
13431 br (sim_cpu *cpu)
13432 {
13433   unsigned rn = INSTR (9, 5);
13434   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13435   aarch64_set_next_PC (cpu, aarch64_get_reg_u64 (cpu, rn, NO_SP));
13436 }
13437
13438 /* Unconditional branch and link -- writes return PC to LR.  */
13439 static void
13440 blr (sim_cpu *cpu)
13441 {
13442   /* Ensure we read the destination before we write LR.  */
13443   uint64_t target = aarch64_get_reg_u64 (cpu, INSTR (9, 5), NO_SP);
13444
13445   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13446   aarch64_save_LR (cpu);
13447   aarch64_set_next_PC (cpu, target);
13448
13449   if (TRACE_BRANCH_P (cpu))
13450     {
13451       ++ stack_depth;
13452       TRACE_BRANCH (cpu,
13453                     " %*scall %" PRIx64 " [%s]"
13454                     " [args: %" PRIx64 " %" PRIx64 " %" PRIx64 "]",
13455                     stack_depth, " ", aarch64_get_next_PC (cpu),
13456                     aarch64_get_func (CPU_STATE (cpu),
13457                                       aarch64_get_next_PC (cpu)),
13458                     aarch64_get_reg_u64 (cpu, 0, NO_SP),
13459                     aarch64_get_reg_u64 (cpu, 1, NO_SP),
13460                     aarch64_get_reg_u64 (cpu, 2, NO_SP)
13461                     );
13462     }
13463 }
13464
13465 /* Return -- assembler will default source to LR this is functionally
13466    equivalent to br but, presumably, unlike br it side effects the
13467    branch predictor.  */
13468 static void
13469 ret (sim_cpu *cpu)
13470 {
13471   unsigned rn = INSTR (9, 5);
13472   aarch64_set_next_PC (cpu, aarch64_get_reg_u64 (cpu, rn, NO_SP));
13473
13474   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13475   if (TRACE_BRANCH_P (cpu))
13476     {
13477       TRACE_BRANCH (cpu,
13478                     " %*sreturn [result: %" PRIx64 "]",
13479                     stack_depth, " ", aarch64_get_reg_u64 (cpu, 0, NO_SP));
13480       -- stack_depth;
13481     }
13482 }
13483
13484 /* NOP -- we implement this and call it from the decode in case we
13485    want to intercept it later.  */
13486
13487 static void
13488 nop (sim_cpu *cpu)
13489 {
13490   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13491 }
13492
13493 /* Data synchronization barrier.  */
13494
13495 static void
13496 dsb (sim_cpu *cpu)
13497 {
13498   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13499 }
13500
13501 /* Data memory barrier.  */
13502
13503 static void
13504 dmb (sim_cpu *cpu)
13505 {
13506   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13507 }
13508
13509 /* Instruction synchronization barrier.  */
13510
13511 static void
13512 isb (sim_cpu *cpu)
13513 {
13514   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13515 }
13516
13517 static void
13518 dexBranchImmediate (sim_cpu *cpu)
13519 {
13520   /* assert instr[30,26] == 00101
13521      instr[31] ==> 0 == B, 1 == BL
13522      instr[25,0] == imm26 branch offset counted in words.  */
13523
13524   uint32_t top = INSTR (31, 31);
13525   /* We have a 26 byte signed word offset which we need to pass to the
13526      execute routine as a signed byte offset.  */
13527   int32_t offset = simm32 (aarch64_get_instr (cpu), 25, 0) << 2;
13528
13529   if (top)
13530     bl (cpu, offset);
13531   else
13532     buc (cpu, offset);
13533 }
13534
13535 /* Control Flow.  */
13536
13537 /* Conditional branch
13538
13539    Offset is a PC-relative byte offset in the range +/- 1MiB pos is
13540    a bit position in the range 0 .. 63
13541
13542    cc is a CondCode enum value as pulled out of the decode
13543
13544    N.B. any offset register (source) can only be Xn or Wn.  */
13545
13546 static void
13547 bcc (sim_cpu *cpu, int32_t offset, CondCode cc)
13548 {
13549   /* The test returns TRUE if CC is met.  */
13550   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13551   if (testConditionCode (cpu, cc))
13552     aarch64_set_next_PC_by_offset (cpu, offset);
13553 }
13554
13555 /* 32 bit branch on register non-zero.  */
13556 static void
13557 cbnz32 (sim_cpu *cpu, int32_t offset)
13558 {
13559   unsigned rt = INSTR (4, 0);
13560
13561   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13562   if (aarch64_get_reg_u32 (cpu, rt, NO_SP) != 0)
13563     aarch64_set_next_PC_by_offset (cpu, offset);
13564 }
13565
13566 /* 64 bit branch on register zero.  */
13567 static void
13568 cbnz (sim_cpu *cpu, int32_t offset)
13569 {
13570   unsigned rt = INSTR (4, 0);
13571
13572   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13573   if (aarch64_get_reg_u64 (cpu, rt, NO_SP) != 0)
13574     aarch64_set_next_PC_by_offset (cpu, offset);
13575 }
13576
13577 /* 32 bit branch on register non-zero.  */
13578 static void
13579 cbz32 (sim_cpu *cpu, int32_t offset)
13580 {
13581   unsigned rt = INSTR (4, 0);
13582
13583   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13584   if (aarch64_get_reg_u32 (cpu, rt, NO_SP) == 0)
13585     aarch64_set_next_PC_by_offset (cpu, offset);
13586 }
13587
13588 /* 64 bit branch on register zero.  */
13589 static void
13590 cbz (sim_cpu *cpu, int32_t offset)
13591 {
13592   unsigned rt = INSTR (4, 0);
13593
13594   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13595   if (aarch64_get_reg_u64 (cpu, rt, NO_SP) == 0)
13596     aarch64_set_next_PC_by_offset (cpu, offset);
13597 }
13598
13599 /* Branch on register bit test non-zero -- one size fits all.  */
13600 static void
13601 tbnz (sim_cpu *cpu, uint32_t  pos, int32_t offset)
13602 {
13603   unsigned rt = INSTR (4, 0);
13604
13605   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13606   if (aarch64_get_reg_u64 (cpu, rt, NO_SP) & (((uint64_t) 1) << pos))
13607     aarch64_set_next_PC_by_offset (cpu, offset);
13608 }
13609
13610 /* Branch on register bit test zero -- one size fits all.  */
13611 static void
13612 tbz (sim_cpu *cpu, uint32_t  pos, int32_t offset)
13613 {
13614   unsigned rt = INSTR (4, 0);
13615
13616   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13617   if (!(aarch64_get_reg_u64 (cpu, rt, NO_SP) & (((uint64_t) 1) << pos)))
13618     aarch64_set_next_PC_by_offset (cpu, offset);
13619 }
13620
13621 static void
13622 dexCompareBranchImmediate (sim_cpu *cpu)
13623 {
13624   /* instr[30,25] = 01 1010
13625      instr[31]    = size : 0 ==> 32, 1 ==> 64
13626      instr[24]    = op : 0 ==> CBZ, 1 ==> CBNZ
13627      instr[23,5]  = simm19 branch offset counted in words
13628      instr[4,0]   = rt  */
13629
13630   uint32_t size = INSTR (31, 31);
13631   uint32_t op   = INSTR (24, 24);
13632   int32_t offset = simm32 (aarch64_get_instr (cpu), 23, 5) << 2;
13633
13634   if (size == 0)
13635     {
13636       if (op == 0)
13637         cbz32 (cpu, offset);
13638       else
13639         cbnz32 (cpu, offset);
13640     }
13641   else
13642     {
13643       if (op == 0)
13644         cbz (cpu, offset);
13645       else
13646         cbnz (cpu, offset);
13647     }
13648 }
13649
13650 static void
13651 dexTestBranchImmediate (sim_cpu *cpu)
13652 {
13653   /* instr[31]    = b5 : bit 5 of test bit idx
13654      instr[30,25] = 01 1011
13655      instr[24]    = op : 0 ==> TBZ, 1 == TBNZ
13656      instr[23,19] = b40 : bits 4 to 0 of test bit idx
13657      instr[18,5]  = simm14 : signed offset counted in words
13658      instr[4,0]   = uimm5  */
13659
13660   uint32_t pos = ((INSTR (31, 31) << 5) | INSTR (23, 19));
13661   int32_t offset = simm32 (aarch64_get_instr (cpu), 18, 5) << 2;
13662
13663   NYI_assert (30, 25, 0x1b);
13664
13665   if (INSTR (24, 24) == 0)
13666     tbz (cpu, pos, offset);
13667   else
13668     tbnz (cpu, pos, offset);
13669 }
13670
13671 static void
13672 dexCondBranchImmediate (sim_cpu *cpu)
13673 {
13674   /* instr[31,25] = 010 1010
13675      instr[24]    = op1; op => 00 ==> B.cond
13676      instr[23,5]  = simm19 : signed offset counted in words
13677      instr[4]     = op0
13678      instr[3,0]   = cond  */
13679
13680   int32_t offset;
13681   uint32_t op = ((INSTR (24, 24) << 1) | INSTR (4, 4));
13682
13683   NYI_assert (31, 25, 0x2a);
13684
13685   if (op != 0)
13686     HALT_UNALLOC;
13687
13688   offset = simm32 (aarch64_get_instr (cpu), 23, 5) << 2;
13689
13690   bcc (cpu, offset, INSTR (3, 0));
13691 }
13692
13693 static void
13694 dexBranchRegister (sim_cpu *cpu)
13695 {
13696   /* instr[31,25] = 110 1011
13697      instr[24,21] = op : 0 ==> BR, 1 => BLR, 2 => RET, 3 => ERET, 4 => DRPS
13698      instr[20,16] = op2 : must be 11111
13699      instr[15,10] = op3 : must be 000000
13700      instr[4,0]   = op2 : must be 11111.  */
13701
13702   uint32_t op = INSTR (24, 21);
13703   uint32_t op2 = INSTR (20, 16);
13704   uint32_t op3 = INSTR (15, 10);
13705   uint32_t op4 = INSTR (4, 0);
13706
13707   NYI_assert (31, 25, 0x6b);
13708
13709   if (op2 != 0x1F || op3 != 0 || op4 != 0)
13710     HALT_UNALLOC;
13711
13712   if (op == 0)
13713     br (cpu);
13714
13715   else if (op == 1)
13716     blr (cpu);
13717
13718   else if (op == 2)
13719     ret (cpu);
13720
13721   else
13722     {
13723       /* ERET and DRPS accept 0b11111 for rn = instr [4,0].  */
13724       /* anything else is unallocated.  */
13725       uint32_t rn = INSTR (4, 0);
13726
13727       if (rn != 0x1f)
13728         HALT_UNALLOC;
13729
13730       if (op == 4 || op == 5)
13731         HALT_NYI;
13732
13733       HALT_UNALLOC;
13734     }
13735 }
13736
13737 /* FIXME: We should get the Angel SWI values from ../../libgloss/aarch64/svc.h
13738    but this may not be available.  So instead we define the values we need
13739    here.  */
13740 #define AngelSVC_Reason_Open            0x01
13741 #define AngelSVC_Reason_Close           0x02
13742 #define AngelSVC_Reason_Write           0x05
13743 #define AngelSVC_Reason_Read            0x06
13744 #define AngelSVC_Reason_IsTTY           0x09
13745 #define AngelSVC_Reason_Seek            0x0A
13746 #define AngelSVC_Reason_FLen            0x0C
13747 #define AngelSVC_Reason_Remove          0x0E
13748 #define AngelSVC_Reason_Rename          0x0F
13749 #define AngelSVC_Reason_Clock           0x10
13750 #define AngelSVC_Reason_Time            0x11
13751 #define AngelSVC_Reason_System          0x12
13752 #define AngelSVC_Reason_Errno           0x13
13753 #define AngelSVC_Reason_GetCmdLine      0x15
13754 #define AngelSVC_Reason_HeapInfo        0x16
13755 #define AngelSVC_Reason_ReportException 0x18
13756 #define AngelSVC_Reason_Elapsed         0x30
13757
13758
13759 static void
13760 handle_halt (sim_cpu *cpu, uint32_t val)
13761 {
13762   uint64_t result = 0;
13763
13764   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13765   if (val != 0xf000)
13766     {
13767       TRACE_SYSCALL (cpu, " HLT [0x%x]", val);
13768       sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
13769                        sim_stopped, SIM_SIGTRAP);
13770     }
13771
13772   /* We have encountered an Angel SVC call.  See if we can process it.  */
13773   switch (aarch64_get_reg_u32 (cpu, 0, NO_SP))
13774     {
13775     case AngelSVC_Reason_HeapInfo:
13776       {
13777         /* Get the values.  */
13778         uint64_t stack_top = aarch64_get_stack_start (cpu);
13779         uint64_t heap_base = aarch64_get_heap_start (cpu);
13780
13781         /* Get the pointer  */
13782         uint64_t ptr = aarch64_get_reg_u64 (cpu, 1, SP_OK);
13783         ptr = aarch64_get_mem_u64 (cpu, ptr);
13784
13785         /* Fill in the memory block.  */
13786         /* Start addr of heap.  */
13787         aarch64_set_mem_u64 (cpu, ptr +  0, heap_base);
13788         /* End addr of heap.  */
13789         aarch64_set_mem_u64 (cpu, ptr +  8, stack_top);
13790         /* Lowest stack addr.  */
13791         aarch64_set_mem_u64 (cpu, ptr + 16, heap_base);
13792         /* Initial stack addr.  */
13793         aarch64_set_mem_u64 (cpu, ptr + 24, stack_top);
13794
13795         TRACE_SYSCALL (cpu, " AngelSVC: Get Heap Info");
13796       }
13797       break;
13798
13799     case AngelSVC_Reason_Open:
13800       {
13801         /* Get the pointer  */
13802         /* uint64_t ptr = aarch64_get_reg_u64 (cpu, 1, SP_OK);.  */
13803         /* FIXME: For now we just assume that we will only be asked
13804            to open the standard file descriptors.  */
13805         static int fd = 0;
13806         result = fd ++;
13807
13808         TRACE_SYSCALL (cpu, " AngelSVC: Open file %d", fd - 1);
13809       }
13810       break;
13811
13812     case AngelSVC_Reason_Close:
13813       {
13814         uint64_t fh = aarch64_get_reg_u64 (cpu, 1, SP_OK);
13815         TRACE_SYSCALL (cpu, " AngelSVC: Close file %d", (int) fh);
13816         result = 0;
13817       }
13818       break;
13819
13820     case AngelSVC_Reason_Errno:
13821       result = 0;
13822       TRACE_SYSCALL (cpu, " AngelSVC: Get Errno");
13823       break;
13824
13825     case AngelSVC_Reason_Clock:
13826       result =
13827 #ifdef CLOCKS_PER_SEC
13828         (CLOCKS_PER_SEC >= 100)
13829         ? (clock () / (CLOCKS_PER_SEC / 100))
13830         : ((clock () * 100) / CLOCKS_PER_SEC)
13831 #else
13832         /* Presume unix... clock() returns microseconds.  */
13833         (clock () / 10000)
13834 #endif
13835         ;
13836         TRACE_SYSCALL (cpu, " AngelSVC: Get Clock");
13837       break;
13838
13839     case AngelSVC_Reason_GetCmdLine:
13840       {
13841         /* Get the pointer  */
13842         uint64_t ptr = aarch64_get_reg_u64 (cpu, 1, SP_OK);
13843         ptr = aarch64_get_mem_u64 (cpu, ptr);
13844
13845         /* FIXME: No command line for now.  */
13846         aarch64_set_mem_u64 (cpu, ptr, 0);
13847         TRACE_SYSCALL (cpu, " AngelSVC: Get Command Line");
13848       }
13849       break;
13850
13851     case AngelSVC_Reason_IsTTY:
13852       result = 1;
13853         TRACE_SYSCALL (cpu, " AngelSVC: IsTTY ?");
13854       break;
13855
13856     case AngelSVC_Reason_Write:
13857       {
13858         /* Get the pointer  */
13859         uint64_t ptr = aarch64_get_reg_u64 (cpu, 1, SP_OK);
13860         /* Get the write control block.  */
13861         uint64_t fd  = aarch64_get_mem_u64 (cpu, ptr);
13862         uint64_t buf = aarch64_get_mem_u64 (cpu, ptr + 8);
13863         uint64_t len = aarch64_get_mem_u64 (cpu, ptr + 16);
13864
13865         TRACE_SYSCALL (cpu, "write of %" PRIx64 " bytes from %"
13866                        PRIx64 " on descriptor %" PRIx64,
13867                        len, buf, fd);
13868
13869         if (len > 1280)
13870           {
13871             TRACE_SYSCALL (cpu,
13872                            " AngelSVC: Write: Suspiciously long write: %ld",
13873                            (long) len);
13874             sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
13875                              sim_stopped, SIM_SIGBUS);
13876           }
13877         else if (fd == 1)
13878           {
13879             printf ("%.*s", (int) len, aarch64_get_mem_ptr (cpu, buf));
13880           }
13881         else if (fd == 2)
13882           {
13883             TRACE (cpu, 0, "\n");
13884             sim_io_eprintf (CPU_STATE (cpu), "%.*s",
13885                             (int) len, aarch64_get_mem_ptr (cpu, buf));
13886             TRACE (cpu, 0, "\n");
13887           }
13888         else
13889           {
13890             TRACE_SYSCALL (cpu,
13891                            " AngelSVC: Write: Unexpected file handle: %d",
13892                            (int) fd);
13893             sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
13894                              sim_stopped, SIM_SIGABRT);
13895           }
13896       }
13897       break;
13898
13899     case AngelSVC_Reason_ReportException:
13900       {
13901         /* Get the pointer  */
13902         uint64_t ptr = aarch64_get_reg_u64 (cpu, 1, SP_OK);
13903         /*ptr = aarch64_get_mem_u64 (cpu, ptr);.  */
13904         uint64_t type = aarch64_get_mem_u64 (cpu, ptr);
13905         uint64_t state = aarch64_get_mem_u64 (cpu, ptr + 8);
13906
13907         TRACE_SYSCALL (cpu,
13908                        "Angel Exception: type 0x%" PRIx64 " state %" PRIx64,
13909                        type, state);
13910
13911         if (type == 0x20026)
13912           sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
13913                            sim_exited, state);
13914         else
13915           sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
13916                            sim_stopped, SIM_SIGINT);
13917       }
13918       break;
13919
13920     case AngelSVC_Reason_Read:
13921     case AngelSVC_Reason_FLen:
13922     case AngelSVC_Reason_Seek:
13923     case AngelSVC_Reason_Remove:
13924     case AngelSVC_Reason_Time:
13925     case AngelSVC_Reason_System:
13926     case AngelSVC_Reason_Rename:
13927     case AngelSVC_Reason_Elapsed:
13928     default:
13929       TRACE_SYSCALL (cpu, " HLT [Unknown angel %x]",
13930                      aarch64_get_reg_u32 (cpu, 0, NO_SP));
13931       sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
13932                        sim_stopped, SIM_SIGTRAP);
13933     }
13934
13935   aarch64_set_reg_u64 (cpu, 0, NO_SP, result);
13936 }
13937
13938 static void
13939 dexExcpnGen (sim_cpu *cpu)
13940 {
13941   /* instr[31:24] = 11010100
13942      instr[23,21] = opc : 000 ==> GEN EXCPN, 001 ==> BRK
13943                           010 ==> HLT,       101 ==> DBG GEN EXCPN
13944      instr[20,5]  = imm16
13945      instr[4,2]   = opc2 000 ==> OK, ow ==> UNALLOC
13946      instr[1,0]   = LL : discriminates opc  */
13947
13948   uint32_t opc = INSTR (23, 21);
13949   uint32_t imm16 = INSTR (20, 5);
13950   uint32_t opc2 = INSTR (4, 2);
13951   uint32_t LL;
13952
13953   NYI_assert (31, 24, 0xd4);
13954
13955   if (opc2 != 0)
13956     HALT_UNALLOC;
13957
13958   LL = INSTR (1, 0);
13959
13960   /* We only implement HLT and BRK for now.  */
13961   if (opc == 1 && LL == 0)
13962     {
13963       TRACE_EVENTS (cpu, " BRK [0x%x]", imm16);
13964       sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
13965                        sim_exited, aarch64_get_reg_s32 (cpu, R0, SP_OK));
13966     }
13967
13968   if (opc == 2 && LL == 0)
13969     handle_halt (cpu, imm16);
13970
13971   else if (opc == 0 || opc == 5)
13972     HALT_NYI;
13973
13974   else
13975     HALT_UNALLOC;
13976 }
13977
13978 /* Stub for accessing system registers.  */
13979
13980 static uint64_t
13981 system_get (sim_cpu *cpu, unsigned op0, unsigned op1, unsigned crn,
13982             unsigned crm, unsigned op2)
13983 {
13984   if (crn == 0 && op1 == 3 && crm == 0 && op2 == 7)
13985     /* DCZID_EL0 - the Data Cache Zero ID register.
13986        We do not support DC ZVA at the moment, so
13987        we return a value with the disable bit set.
13988        We implement support for the DCZID register since
13989        it is used by the C library's memset function.  */
13990     return ((uint64_t) 1) << 4;
13991
13992   if (crn == 0 && op1 == 3 && crm == 0 && op2 == 1)
13993     /* Cache Type Register.  */
13994     return 0x80008000UL;
13995
13996   if (crn == 13 && op1 == 3 && crm == 0 && op2 == 2)
13997     /* TPIDR_EL0 - thread pointer id.  */
13998     return aarch64_get_thread_id (cpu);
13999
14000   if (op1 == 3 && crm == 4 && op2 == 0)
14001     return aarch64_get_FPCR (cpu);
14002
14003   if (op1 == 3 && crm == 4 && op2 == 1)
14004     return aarch64_get_FPSR (cpu);
14005
14006   else if (op1 == 3 && crm == 2 && op2 == 0)
14007     return aarch64_get_CPSR (cpu);
14008
14009   HALT_NYI;
14010 }
14011
14012 static void
14013 system_set (sim_cpu *cpu, unsigned op0, unsigned op1, unsigned crn,
14014             unsigned crm, unsigned op2, uint64_t val)
14015 {
14016   if (op1 == 3 && crm == 4 && op2 == 0)
14017     aarch64_set_FPCR (cpu, val);
14018
14019   else if (op1 == 3 && crm == 4 && op2 == 1)
14020     aarch64_set_FPSR (cpu, val);
14021
14022   else if (op1 == 3 && crm == 2 && op2 == 0)
14023     aarch64_set_CPSR (cpu, val);
14024
14025   else
14026     HALT_NYI;
14027 }
14028
14029 static void
14030 do_mrs (sim_cpu *cpu)
14031 {
14032   /* instr[31:20] = 1101 0101 0001 1
14033      instr[19]    = op0
14034      instr[18,16] = op1
14035      instr[15,12] = CRn
14036      instr[11,8]  = CRm
14037      instr[7,5]   = op2
14038      instr[4,0]   = Rt  */
14039   unsigned sys_op0 = INSTR (19, 19) + 2;
14040   unsigned sys_op1 = INSTR (18, 16);
14041   unsigned sys_crn = INSTR (15, 12);
14042   unsigned sys_crm = INSTR (11, 8);
14043   unsigned sys_op2 = INSTR (7, 5);
14044   unsigned rt = INSTR (4, 0);
14045
14046   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
14047   aarch64_set_reg_u64 (cpu, rt, NO_SP,
14048                        system_get (cpu, sys_op0, sys_op1, sys_crn, sys_crm, sys_op2));
14049 }
14050
14051 static void
14052 do_MSR_immediate (sim_cpu *cpu)
14053 {
14054   /* instr[31:19] = 1101 0101 0000 0
14055      instr[18,16] = op1
14056      instr[15,12] = 0100
14057      instr[11,8]  = CRm
14058      instr[7,5]   = op2
14059      instr[4,0]   = 1 1111  */
14060
14061   unsigned op1 = INSTR (18, 16);
14062   /*unsigned crm = INSTR (11, 8);*/
14063   unsigned op2 = INSTR (7, 5);
14064
14065   NYI_assert (31, 19, 0x1AA0);
14066   NYI_assert (15, 12, 0x4);
14067   NYI_assert (4,  0,  0x1F);
14068
14069   if (op1 == 0)
14070     {
14071       if (op2 == 5)
14072         HALT_NYI; /* set SPSel.  */
14073       else
14074         HALT_UNALLOC;
14075     }
14076   else if (op1 == 3)
14077     {
14078       if (op2 == 6)
14079         HALT_NYI; /* set DAIFset.  */
14080       else if (op2 == 7)
14081         HALT_NYI; /* set DAIFclr.  */
14082       else
14083         HALT_UNALLOC;
14084     }
14085   else
14086     HALT_UNALLOC;
14087 }
14088
14089 static void
14090 do_MSR_reg (sim_cpu *cpu)
14091 {
14092   /* instr[31:20] = 1101 0101 0001
14093      instr[19]    = op0
14094      instr[18,16] = op1
14095      instr[15,12] = CRn
14096      instr[11,8]  = CRm
14097      instr[7,5]   = op2
14098      instr[4,0]   = Rt  */
14099
14100   unsigned sys_op0 = INSTR (19, 19) + 2;
14101   unsigned sys_op1 = INSTR (18, 16);
14102   unsigned sys_crn = INSTR (15, 12);
14103   unsigned sys_crm = INSTR (11, 8);
14104   unsigned sys_op2 = INSTR (7, 5);
14105   unsigned rt = INSTR (4, 0);
14106
14107   NYI_assert (31, 20, 0xD51);
14108
14109   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
14110   system_set (cpu, sys_op0, sys_op1, sys_crn, sys_crm, sys_op2,
14111               aarch64_get_reg_u64 (cpu, rt, NO_SP));
14112 }
14113
14114 static void
14115 do_SYS (sim_cpu *cpu)
14116 {
14117   /* instr[31,19] = 1101 0101 0000 1
14118      instr[18,16] = op1
14119      instr[15,12] = CRn
14120      instr[11,8]  = CRm
14121      instr[7,5]   = op2
14122      instr[4,0]   = Rt  */
14123   NYI_assert (31, 19, 0x1AA1);
14124
14125   /* FIXME: For now we just silently accept system ops.  */
14126 }
14127
14128 static void
14129 dexSystem (sim_cpu *cpu)
14130 {
14131   /* instr[31:22] = 1101 01010 0
14132      instr[21]    = L
14133      instr[20,19] = op0
14134      instr[18,16] = op1
14135      instr[15,12] = CRn
14136      instr[11,8]  = CRm
14137      instr[7,5]   = op2
14138      instr[4,0]   = uimm5  */
14139
14140   /* We are interested in HINT, DSB, DMB and ISB
14141
14142      Hint #0 encodes NOOP (this is the only hint we care about)
14143      L == 0, op0 == 0, op1 = 011, CRn = 0010, Rt = 11111,
14144      CRm op2  != 0000 000 OR CRm op2 == 0000 000 || CRm op > 0000 101
14145
14146      DSB, DMB, ISB are data store barrier, data memory barrier and
14147      instruction store barrier, respectively, where
14148
14149      L == 0, op0 == 0, op1 = 011, CRn = 0011, Rt = 11111,
14150      op2 : DSB ==> 100, DMB ==> 101, ISB ==> 110
14151      CRm<3:2> ==> domain, CRm<1:0> ==> types,
14152      domain : 00 ==> OuterShareable, 01 ==> Nonshareable,
14153               10 ==> InerShareable, 11 ==> FullSystem
14154      types :  01 ==> Reads, 10 ==> Writes,
14155               11 ==> All, 00 ==> All (domain == FullSystem).  */
14156
14157   unsigned rt = INSTR (4, 0);
14158
14159   NYI_assert (31, 22, 0x354);
14160
14161   switch (INSTR (21, 12))
14162     {
14163     case 0x032:
14164       if (rt == 0x1F)
14165         {
14166           /* NOP has CRm != 0000 OR.  */
14167           /*         (CRm == 0000 AND (op2 == 000 OR op2 > 101)).  */
14168           uint32_t crm = INSTR (11, 8);
14169           uint32_t op2 = INSTR (7, 5);
14170
14171           if (crm != 0 || (op2 == 0 || op2 > 5))
14172             {
14173               /* Actually call nop method so we can reimplement it later.  */
14174               nop (cpu);
14175               return;
14176             }
14177         }
14178       HALT_NYI;
14179
14180     case 0x033:
14181       {
14182         uint32_t op2 =  INSTR (7, 5);
14183
14184         switch (op2)
14185           {
14186           case 2: HALT_NYI;
14187           case 4: dsb (cpu); return;
14188           case 5: dmb (cpu); return;
14189           case 6: isb (cpu); return;
14190           default: HALT_UNALLOC;
14191         }
14192       }
14193
14194     case 0x3B0:
14195     case 0x3B4:
14196     case 0x3BD:
14197       do_mrs (cpu);
14198       return;
14199
14200     case 0x0B7:
14201       do_SYS (cpu); /* DC is an alias of SYS.  */
14202       return;
14203
14204     default:
14205       if (INSTR (21, 20) == 0x1)
14206         do_MSR_reg (cpu);
14207       else if (INSTR (21, 19) == 0 && INSTR (15, 12) == 0x4)
14208         do_MSR_immediate (cpu);
14209       else
14210         HALT_NYI;
14211       return;
14212     }
14213 }
14214
14215 static void
14216 dexBr (sim_cpu *cpu)
14217 {
14218   /* uint32_t group = dispatchGroup (aarch64_get_instr (cpu));
14219      assert  group == GROUP_BREXSYS_1010 || group == GROUP_BREXSYS_1011
14220      bits [31,29] of a BrExSys are the secondary dispatch vector.  */
14221   uint32_t group2 = dispatchBrExSys (aarch64_get_instr (cpu));
14222
14223   switch (group2)
14224     {
14225     case BR_IMM_000:
14226       return dexBranchImmediate (cpu);
14227
14228     case BR_IMMCMP_001:
14229       /* Compare has bit 25 clear while test has it set.  */
14230       if (!INSTR (25, 25))
14231         dexCompareBranchImmediate (cpu);
14232       else
14233         dexTestBranchImmediate (cpu);
14234       return;
14235
14236     case BR_IMMCOND_010:
14237       /* This is a conditional branch if bit 25 is clear otherwise
14238          unallocated.  */
14239       if (!INSTR (25, 25))
14240         dexCondBranchImmediate (cpu);
14241       else
14242         HALT_UNALLOC;
14243       return;
14244
14245     case BR_UNALLOC_011:
14246       HALT_UNALLOC;
14247
14248     case BR_IMM_100:
14249       dexBranchImmediate (cpu);
14250       return;
14251
14252     case BR_IMMCMP_101:
14253       /* Compare has bit 25 clear while test has it set.  */
14254       if (!INSTR (25, 25))
14255         dexCompareBranchImmediate (cpu);
14256       else
14257         dexTestBranchImmediate (cpu);
14258       return;
14259
14260     case BR_REG_110:
14261       /* Unconditional branch reg has bit 25 set.  */
14262       if (INSTR (25, 25))
14263         dexBranchRegister (cpu);
14264
14265       /* This includes both Excpn Gen, System and unalloc operations.
14266          We need to decode the Excpn Gen operation BRK so we can plant
14267          debugger entry points.
14268          Excpn Gen operations have instr [24] = 0.
14269          we need to decode at least one of the System operations NOP
14270          which is an alias for HINT #0.
14271          System operations have instr [24,22] = 100.  */
14272       else if (INSTR (24, 24) == 0)
14273         dexExcpnGen (cpu);
14274
14275       else if (INSTR (24, 22) == 4)
14276         dexSystem (cpu);
14277
14278       else
14279         HALT_UNALLOC;
14280
14281       return;
14282
14283     case BR_UNALLOC_111:
14284       HALT_UNALLOC;
14285
14286     default:
14287       /* Should never reach here.  */
14288       HALT_NYI;
14289     }
14290 }
14291
14292 static void
14293 aarch64_decode_and_execute (sim_cpu *cpu, uint64_t pc)
14294 {
14295   /* We need to check if gdb wants an in here.  */
14296   /* checkBreak (cpu);.  */
14297
14298   uint64_t group = dispatchGroup (aarch64_get_instr (cpu));
14299
14300   switch (group)
14301     {
14302     case GROUP_PSEUDO_0000:   dexPseudo (cpu); break;
14303     case GROUP_LDST_0100:     dexLdSt (cpu); break;
14304     case GROUP_DPREG_0101:    dexDPReg (cpu); break;
14305     case GROUP_LDST_0110:     dexLdSt (cpu); break;
14306     case GROUP_ADVSIMD_0111:  dexAdvSIMD0 (cpu); break;
14307     case GROUP_DPIMM_1000:    dexDPImm (cpu); break;
14308     case GROUP_DPIMM_1001:    dexDPImm (cpu); break;
14309     case GROUP_BREXSYS_1010:  dexBr (cpu); break;
14310     case GROUP_BREXSYS_1011:  dexBr (cpu); break;
14311     case GROUP_LDST_1100:     dexLdSt (cpu); break;
14312     case GROUP_DPREG_1101:    dexDPReg (cpu); break;
14313     case GROUP_LDST_1110:     dexLdSt (cpu); break;
14314     case GROUP_ADVSIMD_1111:  dexAdvSIMD1 (cpu); break;
14315
14316     case GROUP_UNALLOC_0001:
14317     case GROUP_UNALLOC_0010:
14318     case GROUP_UNALLOC_0011:
14319       HALT_UNALLOC;
14320
14321     default:
14322       /* Should never reach here.  */
14323       HALT_NYI;
14324     }
14325 }
14326
14327 static bfd_boolean
14328 aarch64_step (sim_cpu *cpu)
14329 {
14330   uint64_t pc = aarch64_get_PC (cpu);
14331
14332   if (pc == TOP_LEVEL_RETURN_PC)
14333     return FALSE;
14334
14335   aarch64_set_next_PC (cpu, pc + 4);
14336
14337   /* Code is always little-endian.  */
14338   sim_core_read_buffer (CPU_STATE (cpu), cpu, read_map,
14339                         & aarch64_get_instr (cpu), pc, 4);
14340   aarch64_get_instr (cpu) = endian_le2h_4 (aarch64_get_instr (cpu));
14341
14342   TRACE_INSN (cpu, " pc = %" PRIx64 " instr = %08x", pc,
14343               aarch64_get_instr (cpu));
14344   TRACE_DISASM (cpu, pc);
14345
14346   aarch64_decode_and_execute (cpu, pc);
14347
14348   return TRUE;
14349 }
14350
14351 void
14352 aarch64_run (SIM_DESC sd)
14353 {
14354   sim_cpu *cpu = STATE_CPU (sd, 0);
14355
14356   while (aarch64_step (cpu))
14357     {
14358       aarch64_update_PC (cpu);
14359
14360       if (sim_events_tick (sd))
14361         sim_events_process (sd);
14362     }
14363
14364   sim_engine_halt (sd, cpu, NULL, aarch64_get_PC (cpu),
14365                    sim_exited, aarch64_get_reg_s32 (cpu, R0, NO_SP));
14366 }
14367
14368 void
14369 aarch64_init (sim_cpu *cpu, uint64_t pc)
14370 {
14371   uint64_t sp = aarch64_get_stack_start (cpu);
14372
14373   /* Install SP, FP and PC and set LR to -20
14374      so we can detect a top-level return.  */
14375   aarch64_set_reg_u64 (cpu, SP, SP_OK, sp);
14376   aarch64_set_reg_u64 (cpu, FP, SP_OK, sp);
14377   aarch64_set_reg_u64 (cpu, LR, SP_OK, TOP_LEVEL_RETURN_PC);
14378   aarch64_set_next_PC (cpu, pc);
14379   aarch64_update_PC (cpu);
14380   aarch64_init_LIT_table ();
14381 }