]> git.ipfire.org Git - thirdparty/binutils-gdb.git/blob - sim/aarch64/simulator.c
Update copyright year range in header of all files managed by GDB
[thirdparty/binutils-gdb.git] / sim / aarch64 / simulator.c
1 /* simulator.c -- Interface for the AArch64 simulator.
2
3 Copyright (C) 2015-2024 Free Software Foundation, Inc.
4
5 Contributed by Red Hat.
6
7 This file is part of GDB.
8
9 This program is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 3 of the License, or
12 (at your option) any later version.
13
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with this program. If not, see <http://www.gnu.org/licenses/>. */
21
22 /* This must come before any other includes. */
23 #include "defs.h"
24
25 #include <stdlib.h>
26 #include <stdio.h>
27 #include <string.h>
28 #include <sys/types.h>
29 #include <math.h>
30 #include <time.h>
31 #include <limits.h>
32
33 #include "aarch64-sim.h"
34 #include "simulator.h"
35 #include "cpustate.h"
36 #include "memory.h"
37
38 #include "sim-signal.h"
39
40 #define NO_SP 0
41 #define SP_OK 1
42
43 #define TST(_flag) (aarch64_test_CPSR_bit (cpu, _flag))
44 #define IS_SET(_X) (TST (( _X )) ? 1 : 0)
45 #define IS_CLEAR(_X) (TST (( _X )) ? 0 : 1)
46
47 /* Space saver macro. */
48 #define INSTR(HIGH, LOW) uimm (aarch64_get_instr (cpu), (HIGH), (LOW))
49
50 #define HALT_UNALLOC \
51 do \
52 { \
53 TRACE_DISASM (cpu, aarch64_get_PC (cpu)); \
54 TRACE_INSN (cpu, \
55 "Unallocated instruction detected at sim line %d," \
56 " exe addr %" PRIx64, \
57 __LINE__, aarch64_get_PC (cpu)); \
58 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),\
59 sim_stopped, SIM_SIGILL); \
60 } \
61 while (0)
62
63 #define HALT_NYI \
64 do \
65 { \
66 TRACE_DISASM (cpu, aarch64_get_PC (cpu)); \
67 TRACE_INSN (cpu, \
68 "Unimplemented instruction detected at sim line %d," \
69 " exe addr %" PRIx64, \
70 __LINE__, aarch64_get_PC (cpu)); \
71 if (! TRACE_ANY_P (cpu)) \
72 sim_io_eprintf (CPU_STATE (cpu), "SIM Error: Unimplemented instruction: %#08x\n", \
73 aarch64_get_instr (cpu)); \
74 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),\
75 sim_stopped, SIM_SIGABRT); \
76 } \
77 while (0)
78
79 #define NYI_assert(HI, LO, EXPECTED) \
80 do \
81 { \
82 if (INSTR ((HI), (LO)) != (EXPECTED)) \
83 HALT_NYI; \
84 } \
85 while (0)
86
87 static uint64_t
88 expand_logical_immediate (uint32_t S, uint32_t R, uint32_t N)
89 {
90 uint64_t mask;
91 uint64_t imm;
92 unsigned simd_size;
93
94 /* The immediate value is S+1 bits to 1, left rotated by SIMDsize - R
95 (in other words, right rotated by R), then replicated. */
96 if (N != 0)
97 {
98 simd_size = 64;
99 mask = 0xffffffffffffffffull;
100 }
101 else
102 {
103 switch (S)
104 {
105 case 0x00 ... 0x1f: /* 0xxxxx */ simd_size = 32; break;
106 case 0x20 ... 0x2f: /* 10xxxx */ simd_size = 16; S &= 0xf; break;
107 case 0x30 ... 0x37: /* 110xxx */ simd_size = 8; S &= 0x7; break;
108 case 0x38 ... 0x3b: /* 1110xx */ simd_size = 4; S &= 0x3; break;
109 case 0x3c ... 0x3d: /* 11110x */ simd_size = 2; S &= 0x1; break;
110 default: return 0;
111 }
112 mask = (1ull << simd_size) - 1;
113 /* Top bits are IGNORED. */
114 R &= simd_size - 1;
115 }
116
117 /* NOTE: if S = simd_size - 1 we get 0xf..f which is rejected. */
118 if (S == simd_size - 1)
119 return 0;
120
121 /* S+1 consecutive bits to 1. */
122 /* NOTE: S can't be 63 due to detection above. */
123 imm = (1ull << (S + 1)) - 1;
124
125 /* Rotate to the left by simd_size - R. */
126 if (R != 0)
127 imm = ((imm << (simd_size - R)) & mask) | (imm >> R);
128
129 /* Replicate the value according to SIMD size. */
130 switch (simd_size)
131 {
132 case 2: imm = (imm << 2) | imm; ATTRIBUTE_FALLTHROUGH;
133 case 4: imm = (imm << 4) | imm; ATTRIBUTE_FALLTHROUGH;
134 case 8: imm = (imm << 8) | imm; ATTRIBUTE_FALLTHROUGH;
135 case 16: imm = (imm << 16) | imm; ATTRIBUTE_FALLTHROUGH;
136 case 32: imm = (imm << 32) | imm; ATTRIBUTE_FALLTHROUGH;
137 case 64: break;
138 default: return 0;
139 }
140
141 return imm;
142 }
143
144 /* Instr[22,10] encodes N immr and imms. we want a lookup table
145 for each possible combination i.e. 13 bits worth of int entries. */
146 #define LI_TABLE_SIZE (1 << 13)
147 static uint64_t LITable[LI_TABLE_SIZE];
148
149 void
150 aarch64_init_LIT_table (void)
151 {
152 unsigned index;
153
154 for (index = 0; index < LI_TABLE_SIZE; index++)
155 {
156 uint32_t N = uimm (index, 12, 12);
157 uint32_t immr = uimm (index, 11, 6);
158 uint32_t imms = uimm (index, 5, 0);
159
160 LITable [index] = expand_logical_immediate (imms, immr, N);
161 }
162 }
163
164 static void
165 dexNotify (sim_cpu *cpu)
166 {
167 /* instr[14,0] == type : 0 ==> method entry, 1 ==> method reentry
168 2 ==> exit Java, 3 ==> start next bytecode. */
169 uint32_t type = INSTR (14, 0);
170
171 TRACE_EVENTS (cpu, "Notify Insn encountered, type = 0x%x", type);
172
173 switch (type)
174 {
175 case 0:
176 /* aarch64_notifyMethodEntry (aarch64_get_reg_u64 (cpu, R23, 0),
177 aarch64_get_reg_u64 (cpu, R22, 0)); */
178 break;
179 case 1:
180 /* aarch64_notifyMethodReentry (aarch64_get_reg_u64 (cpu, R23, 0),
181 aarch64_get_reg_u64 (cpu, R22, 0)); */
182 break;
183 case 2:
184 /* aarch64_notifyMethodExit (); */
185 break;
186 case 3:
187 /* aarch64_notifyBCStart (aarch64_get_reg_u64 (cpu, R23, 0),
188 aarch64_get_reg_u64 (cpu, R22, 0)); */
189 break;
190 }
191 }
192
193 /* secondary decode within top level groups */
194
195 static void
196 dexPseudo (sim_cpu *cpu)
197 {
198 /* assert instr[28,27] = 00
199
200 We provide 2 pseudo instructions:
201
202 HALT stops execution of the simulator causing an immediate
203 return to the x86 code which entered it.
204
205 CALLOUT initiates recursive entry into x86 code. A register
206 argument holds the address of the x86 routine. Immediate
207 values in the instruction identify the number of general
208 purpose and floating point register arguments to be passed
209 and the type of any value to be returned. */
210
211 uint32_t PSEUDO_HALT = 0xE0000000U;
212 uint32_t PSEUDO_CALLOUT = 0x00018000U;
213 uint32_t PSEUDO_CALLOUTR = 0x00018001U;
214 uint32_t PSEUDO_NOTIFY = 0x00014000U;
215 uint32_t dispatch;
216
217 if (aarch64_get_instr (cpu) == PSEUDO_HALT)
218 {
219 TRACE_EVENTS (cpu, " Pseudo Halt Instruction");
220 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
221 sim_stopped, SIM_SIGTRAP);
222 }
223
224 dispatch = INSTR (31, 15);
225
226 /* We do not handle callouts at the moment. */
227 if (dispatch == PSEUDO_CALLOUT || dispatch == PSEUDO_CALLOUTR)
228 {
229 TRACE_EVENTS (cpu, " Callout");
230 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
231 sim_stopped, SIM_SIGABRT);
232 }
233
234 else if (dispatch == PSEUDO_NOTIFY)
235 dexNotify (cpu);
236
237 else
238 HALT_UNALLOC;
239 }
240
241 /* Load-store single register (unscaled offset)
242 These instructions employ a base register plus an unscaled signed
243 9 bit offset.
244
245 N.B. the base register (source) can be Xn or SP. all other
246 registers may not be SP. */
247
248 /* 32 bit load 32 bit unscaled signed 9 bit. */
249 static void
250 ldur32 (sim_cpu *cpu, int32_t offset)
251 {
252 unsigned rn = INSTR (9, 5);
253 unsigned rt = INSTR (4, 0);
254
255 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
256 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u32
257 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
258 + offset));
259 }
260
261 /* 64 bit load 64 bit unscaled signed 9 bit. */
262 static void
263 ldur64 (sim_cpu *cpu, int32_t offset)
264 {
265 unsigned rn = INSTR (9, 5);
266 unsigned rt = INSTR (4, 0);
267
268 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
269 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u64
270 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
271 + offset));
272 }
273
274 /* 32 bit load zero-extended byte unscaled signed 9 bit. */
275 static void
276 ldurb32 (sim_cpu *cpu, int32_t offset)
277 {
278 unsigned rn = INSTR (9, 5);
279 unsigned rt = INSTR (4, 0);
280
281 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
282 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u8
283 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
284 + offset));
285 }
286
287 /* 32 bit load sign-extended byte unscaled signed 9 bit. */
288 static void
289 ldursb32 (sim_cpu *cpu, int32_t offset)
290 {
291 unsigned rn = INSTR (9, 5);
292 unsigned rt = INSTR (4, 0);
293
294 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
295 aarch64_set_reg_u64 (cpu, rt, NO_SP, (uint32_t) aarch64_get_mem_s8
296 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
297 + offset));
298 }
299
300 /* 64 bit load sign-extended byte unscaled signed 9 bit. */
301 static void
302 ldursb64 (sim_cpu *cpu, int32_t offset)
303 {
304 unsigned rn = INSTR (9, 5);
305 unsigned rt = INSTR (4, 0);
306
307 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
308 aarch64_set_reg_s64 (cpu, rt, NO_SP, aarch64_get_mem_s8
309 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
310 + offset));
311 }
312
313 /* 32 bit load zero-extended short unscaled signed 9 bit */
314 static void
315 ldurh32 (sim_cpu *cpu, int32_t offset)
316 {
317 unsigned rn = INSTR (9, 5);
318 unsigned rd = INSTR (4, 0);
319
320 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
321 aarch64_set_reg_u64 (cpu, rd, NO_SP, aarch64_get_mem_u16
322 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
323 + offset));
324 }
325
326 /* 32 bit load sign-extended short unscaled signed 9 bit */
327 static void
328 ldursh32 (sim_cpu *cpu, int32_t offset)
329 {
330 unsigned rn = INSTR (9, 5);
331 unsigned rd = INSTR (4, 0);
332
333 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
334 aarch64_set_reg_u64 (cpu, rd, NO_SP, (uint32_t) aarch64_get_mem_s16
335 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
336 + offset));
337 }
338
339 /* 64 bit load sign-extended short unscaled signed 9 bit */
340 static void
341 ldursh64 (sim_cpu *cpu, int32_t offset)
342 {
343 unsigned rn = INSTR (9, 5);
344 unsigned rt = INSTR (4, 0);
345
346 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
347 aarch64_set_reg_s64 (cpu, rt, NO_SP, aarch64_get_mem_s16
348 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
349 + offset));
350 }
351
352 /* 64 bit load sign-extended word unscaled signed 9 bit */
353 static void
354 ldursw (sim_cpu *cpu, int32_t offset)
355 {
356 unsigned rn = INSTR (9, 5);
357 unsigned rd = INSTR (4, 0);
358
359 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
360 aarch64_set_reg_u64 (cpu, rd, NO_SP, (uint32_t) aarch64_get_mem_s32
361 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
362 + offset));
363 }
364
365 /* N.B. with stores the value in source is written to the address
366 identified by source2 modified by offset. */
367
368 /* 32 bit store 32 bit unscaled signed 9 bit. */
369 static void
370 stur32 (sim_cpu *cpu, int32_t offset)
371 {
372 unsigned rn = INSTR (9, 5);
373 unsigned rd = INSTR (4, 0);
374
375 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
376 aarch64_set_mem_u32 (cpu,
377 aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset,
378 aarch64_get_reg_u32 (cpu, rd, NO_SP));
379 }
380
381 /* 64 bit store 64 bit unscaled signed 9 bit */
382 static void
383 stur64 (sim_cpu *cpu, int32_t offset)
384 {
385 unsigned rn = INSTR (9, 5);
386 unsigned rd = INSTR (4, 0);
387
388 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
389 aarch64_set_mem_u64 (cpu,
390 aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset,
391 aarch64_get_reg_u64 (cpu, rd, NO_SP));
392 }
393
394 /* 32 bit store byte unscaled signed 9 bit */
395 static void
396 sturb (sim_cpu *cpu, int32_t offset)
397 {
398 unsigned rn = INSTR (9, 5);
399 unsigned rd = INSTR (4, 0);
400
401 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
402 aarch64_set_mem_u8 (cpu,
403 aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset,
404 aarch64_get_reg_u8 (cpu, rd, NO_SP));
405 }
406
407 /* 32 bit store short unscaled signed 9 bit */
408 static void
409 sturh (sim_cpu *cpu, int32_t offset)
410 {
411 unsigned rn = INSTR (9, 5);
412 unsigned rd = INSTR (4, 0);
413
414 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
415 aarch64_set_mem_u16 (cpu,
416 aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset,
417 aarch64_get_reg_u16 (cpu, rd, NO_SP));
418 }
419
420 /* Load single register pc-relative label
421 Offset is a signed 19 bit immediate count in words
422 rt may not be SP. */
423
424 /* 32 bit pc-relative load */
425 static void
426 ldr32_pcrel (sim_cpu *cpu, int32_t offset)
427 {
428 unsigned rd = INSTR (4, 0);
429
430 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
431 aarch64_set_reg_u64 (cpu, rd, NO_SP,
432 aarch64_get_mem_u32
433 (cpu, aarch64_get_PC (cpu) + offset * 4));
434 }
435
436 /* 64 bit pc-relative load */
437 static void
438 ldr_pcrel (sim_cpu *cpu, int32_t offset)
439 {
440 unsigned rd = INSTR (4, 0);
441
442 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
443 aarch64_set_reg_u64 (cpu, rd, NO_SP,
444 aarch64_get_mem_u64
445 (cpu, aarch64_get_PC (cpu) + offset * 4));
446 }
447
448 /* sign extended 32 bit pc-relative load */
449 static void
450 ldrsw_pcrel (sim_cpu *cpu, int32_t offset)
451 {
452 unsigned rd = INSTR (4, 0);
453
454 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
455 aarch64_set_reg_u64 (cpu, rd, NO_SP,
456 aarch64_get_mem_s32
457 (cpu, aarch64_get_PC (cpu) + offset * 4));
458 }
459
460 /* float pc-relative load */
461 static void
462 fldrs_pcrel (sim_cpu *cpu, int32_t offset)
463 {
464 unsigned int rd = INSTR (4, 0);
465
466 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
467 aarch64_set_vec_u32 (cpu, rd, 0,
468 aarch64_get_mem_u32
469 (cpu, aarch64_get_PC (cpu) + offset * 4));
470 }
471
472 /* double pc-relative load */
473 static void
474 fldrd_pcrel (sim_cpu *cpu, int32_t offset)
475 {
476 unsigned int st = INSTR (4, 0);
477
478 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
479 aarch64_set_vec_u64 (cpu, st, 0,
480 aarch64_get_mem_u64
481 (cpu, aarch64_get_PC (cpu) + offset * 4));
482 }
483
484 /* long double pc-relative load. */
485 static void
486 fldrq_pcrel (sim_cpu *cpu, int32_t offset)
487 {
488 unsigned int st = INSTR (4, 0);
489 uint64_t addr = aarch64_get_PC (cpu) + offset * 4;
490 FRegister a;
491
492 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
493 aarch64_get_mem_long_double (cpu, addr, & a);
494 aarch64_set_FP_long_double (cpu, st, a);
495 }
496
497 /* This can be used to scale an offset by applying
498 the requisite shift. the second argument is either
499 16, 32 or 64. */
500
501 #define SCALE(_offset, _elementSize) \
502 ((_offset) << ScaleShift ## _elementSize)
503
504 /* This can be used to optionally scale a register derived offset
505 by applying the requisite shift as indicated by the Scaling
506 argument. The second argument is either Byte, Short, Word
507 or Long. The third argument is either Scaled or Unscaled.
508 N.B. when _Scaling is Scaled the shift gets ANDed with
509 all 1s while when it is Unscaled it gets ANDed with 0. */
510
511 #define OPT_SCALE(_offset, _elementType, _Scaling) \
512 ((_offset) << (_Scaling ? ScaleShift ## _elementType : 0))
513
514 /* This can be used to zero or sign extend a 32 bit register derived
515 value to a 64 bit value. the first argument must be the value as
516 a uint32_t and the second must be either UXTW or SXTW. The result
517 is returned as an int64_t. */
518
519 static inline int64_t
520 extend (uint32_t value, Extension extension)
521 {
522 union
523 {
524 uint32_t u;
525 int32_t n;
526 } x;
527
528 /* A branchless variant of this ought to be possible. */
529 if (extension == UXTW || extension == NoExtension)
530 return value;
531
532 x.u = value;
533 return x.n;
534 }
535
536 /* Scalar Floating Point
537
538 FP load/store single register (4 addressing modes)
539
540 N.B. the base register (source) can be the stack pointer.
541 The secondary source register (source2) can only be an Xn register. */
542
543 /* Load 32 bit unscaled signed 9 bit with pre- or post-writeback. */
544 static void
545 fldrs_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
546 {
547 unsigned rn = INSTR (9, 5);
548 unsigned st = INSTR (4, 0);
549 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
550
551 if (wb != Post)
552 address += offset;
553
554 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
555 aarch64_set_vec_u32 (cpu, st, 0, aarch64_get_mem_u32 (cpu, address));
556 if (wb == Post)
557 address += offset;
558
559 if (wb != NoWriteBack)
560 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
561 }
562
563 /* Load 8 bit with unsigned 12 bit offset. */
564 static void
565 fldrb_abs (sim_cpu *cpu, uint32_t offset)
566 {
567 unsigned rd = INSTR (4, 0);
568 unsigned rn = INSTR (9, 5);
569 uint64_t addr = aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset;
570
571 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
572 aarch64_set_vec_u8 (cpu, rd, 0, aarch64_get_mem_u32 (cpu, addr));
573 }
574
575 /* Load 16 bit scaled unsigned 12 bit. */
576 static void
577 fldrh_abs (sim_cpu *cpu, uint32_t offset)
578 {
579 unsigned rd = INSTR (4, 0);
580 unsigned rn = INSTR (9, 5);
581 uint64_t addr = aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 16);
582
583 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
584 aarch64_set_vec_u16 (cpu, rd, 0, aarch64_get_mem_u16 (cpu, addr));
585 }
586
587 /* Load 32 bit scaled unsigned 12 bit. */
588 static void
589 fldrs_abs (sim_cpu *cpu, uint32_t offset)
590 {
591 unsigned rd = INSTR (4, 0);
592 unsigned rn = INSTR (9, 5);
593 uint64_t addr = aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 32);
594
595 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
596 aarch64_set_vec_u32 (cpu, rd, 0, aarch64_get_mem_u32 (cpu, addr));
597 }
598
599 /* Load 64 bit scaled unsigned 12 bit. */
600 static void
601 fldrd_abs (sim_cpu *cpu, uint32_t offset)
602 {
603 unsigned rd = INSTR (4, 0);
604 unsigned rn = INSTR (9, 5);
605 uint64_t addr = aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 64);
606
607 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
608 aarch64_set_vec_u64 (cpu, rd, 0, aarch64_get_mem_u64 (cpu, addr));
609 }
610
611 /* Load 128 bit scaled unsigned 12 bit. */
612 static void
613 fldrq_abs (sim_cpu *cpu, uint32_t offset)
614 {
615 unsigned rd = INSTR (4, 0);
616 unsigned rn = INSTR (9, 5);
617 uint64_t addr = aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 128);
618
619 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
620 aarch64_set_vec_u64 (cpu, rd, 0, aarch64_get_mem_u64 (cpu, addr));
621 aarch64_set_vec_u64 (cpu, rd, 1, aarch64_get_mem_u64 (cpu, addr + 8));
622 }
623
624 /* Load 32 bit scaled or unscaled zero- or sign-extended
625 32-bit register offset. */
626 static void
627 fldrs_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
628 {
629 unsigned rm = INSTR (20, 16);
630 unsigned rn = INSTR (9, 5);
631 unsigned st = INSTR (4, 0);
632 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
633 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
634 uint64_t displacement = OPT_SCALE (extended, 32, scaling);
635
636 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
637 aarch64_set_vec_u32 (cpu, st, 0, aarch64_get_mem_u32
638 (cpu, address + displacement));
639 }
640
641 /* Load 64 bit unscaled signed 9 bit with pre- or post-writeback. */
642 static void
643 fldrd_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
644 {
645 unsigned rn = INSTR (9, 5);
646 unsigned st = INSTR (4, 0);
647 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
648
649 if (wb != Post)
650 address += offset;
651
652 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
653 aarch64_set_vec_u64 (cpu, st, 0, aarch64_get_mem_u64 (cpu, address));
654
655 if (wb == Post)
656 address += offset;
657
658 if (wb != NoWriteBack)
659 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
660 }
661
662 /* Load 64 bit scaled or unscaled zero- or sign-extended 32-bit register offset. */
663 static void
664 fldrd_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
665 {
666 unsigned rm = INSTR (20, 16);
667 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
668 uint64_t displacement = OPT_SCALE (extended, 64, scaling);
669
670 fldrd_wb (cpu, displacement, NoWriteBack);
671 }
672
673 /* Load 128 bit unscaled signed 9 bit with pre- or post-writeback. */
674 static void
675 fldrq_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
676 {
677 FRegister a;
678 unsigned rn = INSTR (9, 5);
679 unsigned st = INSTR (4, 0);
680 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
681
682 if (wb != Post)
683 address += offset;
684
685 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
686 aarch64_get_mem_long_double (cpu, address, & a);
687 aarch64_set_FP_long_double (cpu, st, a);
688
689 if (wb == Post)
690 address += offset;
691
692 if (wb != NoWriteBack)
693 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
694 }
695
696 /* Load 128 bit scaled or unscaled zero- or sign-extended 32-bit register offset */
697 static void
698 fldrq_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
699 {
700 unsigned rm = INSTR (20, 16);
701 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
702 uint64_t displacement = OPT_SCALE (extended, 128, scaling);
703
704 fldrq_wb (cpu, displacement, NoWriteBack);
705 }
706
707 /* Memory Access
708
709 load-store single register
710 There are four addressing modes available here which all employ a
711 64 bit source (base) register.
712
713 N.B. the base register (source) can be the stack pointer.
714 The secondary source register (source2)can only be an Xn register.
715
716 Scaled, 12-bit, unsigned immediate offset, without pre- and
717 post-index options.
718 Unscaled, 9-bit, signed immediate offset with pre- or post-index
719 writeback.
720 scaled or unscaled 64-bit register offset.
721 scaled or unscaled 32-bit extended register offset.
722
723 All offsets are assumed to be raw from the decode i.e. the
724 simulator is expected to adjust scaled offsets based on the
725 accessed data size with register or extended register offset
726 versions the same applies except that in the latter case the
727 operation may also require a sign extend.
728
729 A separate method is provided for each possible addressing mode. */
730
731 /* 32 bit load 32 bit scaled unsigned 12 bit */
732 static void
733 ldr32_abs (sim_cpu *cpu, uint32_t offset)
734 {
735 unsigned rn = INSTR (9, 5);
736 unsigned rt = INSTR (4, 0);
737
738 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
739 /* The target register may not be SP but the source may be. */
740 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u32
741 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
742 + SCALE (offset, 32)));
743 }
744
745 /* 32 bit load 32 bit unscaled signed 9 bit with pre- or post-writeback. */
746 static void
747 ldr32_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
748 {
749 unsigned rn = INSTR (9, 5);
750 unsigned rt = INSTR (4, 0);
751 uint64_t address;
752
753 if (rn == rt && wb != NoWriteBack)
754 HALT_UNALLOC;
755
756 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
757
758 if (wb != Post)
759 address += offset;
760
761 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
762 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u32 (cpu, address));
763
764 if (wb == Post)
765 address += offset;
766
767 if (wb != NoWriteBack)
768 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
769 }
770
771 /* 32 bit load 32 bit scaled or unscaled
772 zero- or sign-extended 32-bit register offset */
773 static void
774 ldr32_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
775 {
776 unsigned rm = INSTR (20, 16);
777 unsigned rn = INSTR (9, 5);
778 unsigned rt = INSTR (4, 0);
779 /* rn may reference SP, rm and rt must reference ZR */
780
781 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
782 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
783 uint64_t displacement = OPT_SCALE (extended, 32, scaling);
784
785 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
786 aarch64_set_reg_u64 (cpu, rt, NO_SP,
787 aarch64_get_mem_u32 (cpu, address + displacement));
788 }
789
790 /* 64 bit load 64 bit scaled unsigned 12 bit */
791 static void
792 ldr_abs (sim_cpu *cpu, uint32_t offset)
793 {
794 unsigned rn = INSTR (9, 5);
795 unsigned rt = INSTR (4, 0);
796
797 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
798 /* The target register may not be SP but the source may be. */
799 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u64
800 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
801 + SCALE (offset, 64)));
802 }
803
804 /* 64 bit load 64 bit unscaled signed 9 bit with pre- or post-writeback. */
805 static void
806 ldr_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
807 {
808 unsigned rn = INSTR (9, 5);
809 unsigned rt = INSTR (4, 0);
810 uint64_t address;
811
812 if (rn == rt && wb != NoWriteBack)
813 HALT_UNALLOC;
814
815 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
816
817 if (wb != Post)
818 address += offset;
819
820 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
821 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u64 (cpu, address));
822
823 if (wb == Post)
824 address += offset;
825
826 if (wb != NoWriteBack)
827 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
828 }
829
830 /* 64 bit load 64 bit scaled or unscaled zero-
831 or sign-extended 32-bit register offset. */
832 static void
833 ldr_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
834 {
835 unsigned rm = INSTR (20, 16);
836 unsigned rn = INSTR (9, 5);
837 unsigned rt = INSTR (4, 0);
838 /* rn may reference SP, rm and rt must reference ZR */
839
840 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
841 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
842 uint64_t displacement = OPT_SCALE (extended, 64, scaling);
843
844 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
845 aarch64_set_reg_u64 (cpu, rt, NO_SP,
846 aarch64_get_mem_u64 (cpu, address + displacement));
847 }
848
849 /* 32 bit load zero-extended byte scaled unsigned 12 bit. */
850 static void
851 ldrb32_abs (sim_cpu *cpu, uint32_t offset)
852 {
853 unsigned rn = INSTR (9, 5);
854 unsigned rt = INSTR (4, 0);
855
856 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
857 /* The target register may not be SP but the source may be
858 there is no scaling required for a byte load. */
859 aarch64_set_reg_u64 (cpu, rt, NO_SP,
860 aarch64_get_mem_u8
861 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset));
862 }
863
864 /* 32 bit load zero-extended byte unscaled signed 9 bit with pre- or post-writeback. */
865 static void
866 ldrb32_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
867 {
868 unsigned rn = INSTR (9, 5);
869 unsigned rt = INSTR (4, 0);
870 uint64_t address;
871
872 if (rn == rt && wb != NoWriteBack)
873 HALT_UNALLOC;
874
875 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
876
877 if (wb != Post)
878 address += offset;
879
880 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
881 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u8 (cpu, address));
882
883 if (wb == Post)
884 address += offset;
885
886 if (wb != NoWriteBack)
887 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
888 }
889
890 /* 32 bit load zero-extended byte scaled or unscaled zero-
891 or sign-extended 32-bit register offset. */
892 static void
893 ldrb32_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
894 {
895 unsigned rm = INSTR (20, 16);
896 unsigned rn = INSTR (9, 5);
897 unsigned rt = INSTR (4, 0);
898 /* rn may reference SP, rm and rt must reference ZR */
899
900 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
901 int64_t displacement = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
902 extension);
903
904 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
905 /* There is no scaling required for a byte load. */
906 aarch64_set_reg_u64 (cpu, rt, NO_SP,
907 aarch64_get_mem_u8 (cpu, address + displacement));
908 }
909
910 /* 64 bit load sign-extended byte unscaled signed 9 bit
911 with pre- or post-writeback. */
912 static void
913 ldrsb_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
914 {
915 unsigned rn = INSTR (9, 5);
916 unsigned rt = INSTR (4, 0);
917 uint64_t address;
918 int64_t val;
919
920 if (rn == rt && wb != NoWriteBack)
921 HALT_UNALLOC;
922
923 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
924
925 if (wb != Post)
926 address += offset;
927
928 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
929 val = aarch64_get_mem_s8 (cpu, address);
930 aarch64_set_reg_s64 (cpu, rt, NO_SP, val);
931
932 if (wb == Post)
933 address += offset;
934
935 if (wb != NoWriteBack)
936 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
937 }
938
939 /* 64 bit load sign-extended byte scaled unsigned 12 bit. */
940 static void
941 ldrsb_abs (sim_cpu *cpu, uint32_t offset)
942 {
943 ldrsb_wb (cpu, offset, NoWriteBack);
944 }
945
946 /* 64 bit load sign-extended byte scaled or unscaled zero-
947 or sign-extended 32-bit register offset. */
948 static void
949 ldrsb_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
950 {
951 unsigned rm = INSTR (20, 16);
952 unsigned rn = INSTR (9, 5);
953 unsigned rt = INSTR (4, 0);
954 /* rn may reference SP, rm and rt must reference ZR */
955
956 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
957 int64_t displacement = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
958 extension);
959 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
960 /* There is no scaling required for a byte load. */
961 aarch64_set_reg_s64 (cpu, rt, NO_SP,
962 aarch64_get_mem_s8 (cpu, address + displacement));
963 }
964
965 /* 32 bit load zero-extended short scaled unsigned 12 bit. */
966 static void
967 ldrh32_abs (sim_cpu *cpu, uint32_t offset)
968 {
969 unsigned rn = INSTR (9, 5);
970 unsigned rt = INSTR (4, 0);
971 uint32_t val;
972
973 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
974 /* The target register may not be SP but the source may be. */
975 val = aarch64_get_mem_u16 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
976 + SCALE (offset, 16));
977 aarch64_set_reg_u32 (cpu, rt, NO_SP, val);
978 }
979
980 /* 32 bit load zero-extended short unscaled signed 9 bit
981 with pre- or post-writeback. */
982 static void
983 ldrh32_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
984 {
985 unsigned rn = INSTR (9, 5);
986 unsigned rt = INSTR (4, 0);
987 uint64_t address;
988
989 if (rn == rt && wb != NoWriteBack)
990 HALT_UNALLOC;
991
992 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
993
994 if (wb != Post)
995 address += offset;
996
997 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
998 aarch64_set_reg_u32 (cpu, rt, NO_SP, aarch64_get_mem_u16 (cpu, address));
999
1000 if (wb == Post)
1001 address += offset;
1002
1003 if (wb != NoWriteBack)
1004 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
1005 }
1006
1007 /* 32 bit load zero-extended short scaled or unscaled zero-
1008 or sign-extended 32-bit register offset. */
1009 static void
1010 ldrh32_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1011 {
1012 unsigned rm = INSTR (20, 16);
1013 unsigned rn = INSTR (9, 5);
1014 unsigned rt = INSTR (4, 0);
1015 /* rn may reference SP, rm and rt must reference ZR */
1016
1017 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1018 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
1019 uint64_t displacement = OPT_SCALE (extended, 16, scaling);
1020
1021 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1022 aarch64_set_reg_u32 (cpu, rt, NO_SP,
1023 aarch64_get_mem_u16 (cpu, address + displacement));
1024 }
1025
1026 /* 32 bit load sign-extended short scaled unsigned 12 bit. */
1027 static void
1028 ldrsh32_abs (sim_cpu *cpu, uint32_t offset)
1029 {
1030 unsigned rn = INSTR (9, 5);
1031 unsigned rt = INSTR (4, 0);
1032 int32_t val;
1033
1034 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1035 /* The target register may not be SP but the source may be. */
1036 val = aarch64_get_mem_s16 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
1037 + SCALE (offset, 16));
1038 aarch64_set_reg_s32 (cpu, rt, NO_SP, val);
1039 }
1040
1041 /* 32 bit load sign-extended short unscaled signed 9 bit
1042 with pre- or post-writeback. */
1043 static void
1044 ldrsh32_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
1045 {
1046 unsigned rn = INSTR (9, 5);
1047 unsigned rt = INSTR (4, 0);
1048 uint64_t address;
1049
1050 if (rn == rt && wb != NoWriteBack)
1051 HALT_UNALLOC;
1052
1053 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1054
1055 if (wb != Post)
1056 address += offset;
1057
1058 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1059 aarch64_set_reg_s32 (cpu, rt, NO_SP,
1060 (int32_t) aarch64_get_mem_s16 (cpu, address));
1061
1062 if (wb == Post)
1063 address += offset;
1064
1065 if (wb != NoWriteBack)
1066 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
1067 }
1068
1069 /* 32 bit load sign-extended short scaled or unscaled zero-
1070 or sign-extended 32-bit register offset. */
1071 static void
1072 ldrsh32_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1073 {
1074 unsigned rm = INSTR (20, 16);
1075 unsigned rn = INSTR (9, 5);
1076 unsigned rt = INSTR (4, 0);
1077 /* rn may reference SP, rm and rt must reference ZR */
1078
1079 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1080 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
1081 uint64_t displacement = OPT_SCALE (extended, 16, scaling);
1082
1083 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1084 aarch64_set_reg_s32 (cpu, rt, NO_SP,
1085 (int32_t) aarch64_get_mem_s16
1086 (cpu, address + displacement));
1087 }
1088
1089 /* 64 bit load sign-extended short scaled unsigned 12 bit. */
1090 static void
1091 ldrsh_abs (sim_cpu *cpu, uint32_t offset)
1092 {
1093 unsigned rn = INSTR (9, 5);
1094 unsigned rt = INSTR (4, 0);
1095 int64_t val;
1096
1097 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1098 /* The target register may not be SP but the source may be. */
1099 val = aarch64_get_mem_s16 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
1100 + SCALE (offset, 16));
1101 aarch64_set_reg_s64 (cpu, rt, NO_SP, val);
1102 }
1103
1104 /* 64 bit load sign-extended short unscaled signed 9 bit
1105 with pre- or post-writeback. */
1106 static void
1107 ldrsh64_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
1108 {
1109 unsigned rn = INSTR (9, 5);
1110 unsigned rt = INSTR (4, 0);
1111 uint64_t address;
1112 int64_t val;
1113
1114 if (rn == rt && wb != NoWriteBack)
1115 HALT_UNALLOC;
1116
1117 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1118 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1119
1120 if (wb != Post)
1121 address += offset;
1122
1123 val = aarch64_get_mem_s16 (cpu, address);
1124 aarch64_set_reg_s64 (cpu, rt, NO_SP, val);
1125
1126 if (wb == Post)
1127 address += offset;
1128
1129 if (wb != NoWriteBack)
1130 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
1131 }
1132
1133 /* 64 bit load sign-extended short scaled or unscaled zero-
1134 or sign-extended 32-bit register offset. */
1135 static void
1136 ldrsh_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1137 {
1138 unsigned rm = INSTR (20, 16);
1139 unsigned rn = INSTR (9, 5);
1140 unsigned rt = INSTR (4, 0);
1141
1142 /* rn may reference SP, rm and rt must reference ZR */
1143
1144 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1145 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
1146 uint64_t displacement = OPT_SCALE (extended, 16, scaling);
1147 int64_t val;
1148
1149 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1150 val = aarch64_get_mem_s16 (cpu, address + displacement);
1151 aarch64_set_reg_s64 (cpu, rt, NO_SP, val);
1152 }
1153
1154 /* 64 bit load sign-extended 32 bit scaled unsigned 12 bit. */
1155 static void
1156 ldrsw_abs (sim_cpu *cpu, uint32_t offset)
1157 {
1158 unsigned rn = INSTR (9, 5);
1159 unsigned rt = INSTR (4, 0);
1160 int64_t val;
1161
1162 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1163 val = aarch64_get_mem_s32 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
1164 + SCALE (offset, 32));
1165 /* The target register may not be SP but the source may be. */
1166 return aarch64_set_reg_s64 (cpu, rt, NO_SP, val);
1167 }
1168
1169 /* 64 bit load sign-extended 32 bit unscaled signed 9 bit
1170 with pre- or post-writeback. */
1171 static void
1172 ldrsw_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
1173 {
1174 unsigned rn = INSTR (9, 5);
1175 unsigned rt = INSTR (4, 0);
1176 uint64_t address;
1177
1178 if (rn == rt && wb != NoWriteBack)
1179 HALT_UNALLOC;
1180
1181 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1182
1183 if (wb != Post)
1184 address += offset;
1185
1186 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1187 aarch64_set_reg_s64 (cpu, rt, NO_SP, aarch64_get_mem_s32 (cpu, address));
1188
1189 if (wb == Post)
1190 address += offset;
1191
1192 if (wb != NoWriteBack)
1193 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
1194 }
1195
1196 /* 64 bit load sign-extended 32 bit scaled or unscaled zero-
1197 or sign-extended 32-bit register offset. */
1198 static void
1199 ldrsw_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1200 {
1201 unsigned rm = INSTR (20, 16);
1202 unsigned rn = INSTR (9, 5);
1203 unsigned rt = INSTR (4, 0);
1204 /* rn may reference SP, rm and rt must reference ZR */
1205
1206 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1207 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
1208 uint64_t displacement = OPT_SCALE (extended, 32, scaling);
1209
1210 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1211 aarch64_set_reg_s64 (cpu, rt, NO_SP,
1212 aarch64_get_mem_s32 (cpu, address + displacement));
1213 }
1214
1215 /* N.B. with stores the value in source is written to the
1216 address identified by source2 modified by source3/offset. */
1217
1218 /* 32 bit store scaled unsigned 12 bit. */
1219 static void
1220 str32_abs (sim_cpu *cpu, uint32_t offset)
1221 {
1222 unsigned rn = INSTR (9, 5);
1223 unsigned rt = INSTR (4, 0);
1224
1225 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1226 /* The target register may not be SP but the source may be. */
1227 aarch64_set_mem_u32 (cpu, (aarch64_get_reg_u64 (cpu, rn, SP_OK)
1228 + SCALE (offset, 32)),
1229 aarch64_get_reg_u32 (cpu, rt, NO_SP));
1230 }
1231
1232 /* 32 bit store unscaled signed 9 bit with pre- or post-writeback. */
1233 static void
1234 str32_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
1235 {
1236 unsigned rn = INSTR (9, 5);
1237 unsigned rt = INSTR (4, 0);
1238 uint64_t address;
1239
1240 if (rn == rt && wb != NoWriteBack)
1241 HALT_UNALLOC;
1242
1243 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1244 if (wb != Post)
1245 address += offset;
1246
1247 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1248 aarch64_set_mem_u32 (cpu, address, aarch64_get_reg_u32 (cpu, rt, NO_SP));
1249
1250 if (wb == Post)
1251 address += offset;
1252
1253 if (wb != NoWriteBack)
1254 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
1255 }
1256
1257 /* 32 bit store scaled or unscaled zero- or
1258 sign-extended 32-bit register offset. */
1259 static void
1260 str32_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1261 {
1262 unsigned rm = INSTR (20, 16);
1263 unsigned rn = INSTR (9, 5);
1264 unsigned rt = INSTR (4, 0);
1265
1266 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1267 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
1268 uint64_t displacement = OPT_SCALE (extended, 32, scaling);
1269
1270 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1271 aarch64_set_mem_u32 (cpu, address + displacement,
1272 aarch64_get_reg_u64 (cpu, rt, NO_SP));
1273 }
1274
1275 /* 64 bit store scaled unsigned 12 bit. */
1276 static void
1277 str_abs (sim_cpu *cpu, uint32_t offset)
1278 {
1279 unsigned rn = INSTR (9, 5);
1280 unsigned rt = INSTR (4, 0);
1281
1282 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1283 aarch64_set_mem_u64 (cpu,
1284 aarch64_get_reg_u64 (cpu, rn, SP_OK)
1285 + SCALE (offset, 64),
1286 aarch64_get_reg_u64 (cpu, rt, NO_SP));
1287 }
1288
1289 /* 64 bit store unscaled signed 9 bit with pre- or post-writeback. */
1290 static void
1291 str_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
1292 {
1293 unsigned rn = INSTR (9, 5);
1294 unsigned rt = INSTR (4, 0);
1295 uint64_t address;
1296
1297 if (rn == rt && wb != NoWriteBack)
1298 HALT_UNALLOC;
1299
1300 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1301
1302 if (wb != Post)
1303 address += offset;
1304
1305 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1306 aarch64_set_mem_u64 (cpu, address, aarch64_get_reg_u64 (cpu, rt, NO_SP));
1307
1308 if (wb == Post)
1309 address += offset;
1310
1311 if (wb != NoWriteBack)
1312 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
1313 }
1314
1315 /* 64 bit store scaled or unscaled zero-
1316 or sign-extended 32-bit register offset. */
1317 static void
1318 str_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1319 {
1320 unsigned rm = INSTR (20, 16);
1321 unsigned rn = INSTR (9, 5);
1322 unsigned rt = INSTR (4, 0);
1323 /* rn may reference SP, rm and rt must reference ZR */
1324
1325 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1326 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
1327 extension);
1328 uint64_t displacement = OPT_SCALE (extended, 64, scaling);
1329
1330 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1331 aarch64_set_mem_u64 (cpu, address + displacement,
1332 aarch64_get_reg_u64 (cpu, rt, NO_SP));
1333 }
1334
1335 /* 32 bit store byte scaled unsigned 12 bit. */
1336 static void
1337 strb_abs (sim_cpu *cpu, uint32_t offset)
1338 {
1339 unsigned rn = INSTR (9, 5);
1340 unsigned rt = INSTR (4, 0);
1341
1342 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1343 /* The target register may not be SP but the source may be.
1344 There is no scaling required for a byte load. */
1345 aarch64_set_mem_u8 (cpu,
1346 aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset,
1347 aarch64_get_reg_u8 (cpu, rt, NO_SP));
1348 }
1349
1350 /* 32 bit store byte unscaled signed 9 bit with pre- or post-writeback. */
1351 static void
1352 strb_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
1353 {
1354 unsigned rn = INSTR (9, 5);
1355 unsigned rt = INSTR (4, 0);
1356 uint64_t address;
1357
1358 if (rn == rt && wb != NoWriteBack)
1359 HALT_UNALLOC;
1360
1361 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1362
1363 if (wb != Post)
1364 address += offset;
1365
1366 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1367 aarch64_set_mem_u8 (cpu, address, aarch64_get_reg_u8 (cpu, rt, NO_SP));
1368
1369 if (wb == Post)
1370 address += offset;
1371
1372 if (wb != NoWriteBack)
1373 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
1374 }
1375
1376 /* 32 bit store byte scaled or unscaled zero-
1377 or sign-extended 32-bit register offset. */
1378 static void
1379 strb_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1380 {
1381 unsigned rm = INSTR (20, 16);
1382 unsigned rn = INSTR (9, 5);
1383 unsigned rt = INSTR (4, 0);
1384 /* rn may reference SP, rm and rt must reference ZR */
1385
1386 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1387 int64_t displacement = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
1388 extension);
1389
1390 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1391 /* There is no scaling required for a byte load. */
1392 aarch64_set_mem_u8 (cpu, address + displacement,
1393 aarch64_get_reg_u8 (cpu, rt, NO_SP));
1394 }
1395
1396 /* 32 bit store short scaled unsigned 12 bit. */
1397 static void
1398 strh_abs (sim_cpu *cpu, uint32_t offset)
1399 {
1400 unsigned rn = INSTR (9, 5);
1401 unsigned rt = INSTR (4, 0);
1402
1403 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1404 /* The target register may not be SP but the source may be. */
1405 aarch64_set_mem_u16 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
1406 + SCALE (offset, 16),
1407 aarch64_get_reg_u16 (cpu, rt, NO_SP));
1408 }
1409
1410 /* 32 bit store short unscaled signed 9 bit with pre- or post-writeback. */
1411 static void
1412 strh_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
1413 {
1414 unsigned rn = INSTR (9, 5);
1415 unsigned rt = INSTR (4, 0);
1416 uint64_t address;
1417
1418 if (rn == rt && wb != NoWriteBack)
1419 HALT_UNALLOC;
1420
1421 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1422
1423 if (wb != Post)
1424 address += offset;
1425
1426 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1427 aarch64_set_mem_u16 (cpu, address, aarch64_get_reg_u16 (cpu, rt, NO_SP));
1428
1429 if (wb == Post)
1430 address += offset;
1431
1432 if (wb != NoWriteBack)
1433 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
1434 }
1435
1436 /* 32 bit store short scaled or unscaled zero-
1437 or sign-extended 32-bit register offset. */
1438 static void
1439 strh_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1440 {
1441 unsigned rm = INSTR (20, 16);
1442 unsigned rn = INSTR (9, 5);
1443 unsigned rt = INSTR (4, 0);
1444 /* rn may reference SP, rm and rt must reference ZR */
1445
1446 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1447 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
1448 uint64_t displacement = OPT_SCALE (extended, 16, scaling);
1449
1450 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1451 aarch64_set_mem_u16 (cpu, address + displacement,
1452 aarch64_get_reg_u16 (cpu, rt, NO_SP));
1453 }
1454
1455 /* Prefetch unsigned 12 bit. */
1456 static void
1457 prfm_abs (sim_cpu *cpu, uint32_t offset)
1458 {
1459 /* instr[4,0] = prfop : 00000 ==> PLDL1KEEP, 00001 ==> PLDL1STRM,
1460 00010 ==> PLDL2KEEP, 00001 ==> PLDL2STRM,
1461 00100 ==> PLDL3KEEP, 00101 ==> PLDL3STRM,
1462 10000 ==> PSTL1KEEP, 10001 ==> PSTL1STRM,
1463 10010 ==> PSTL2KEEP, 10001 ==> PSTL2STRM,
1464 10100 ==> PSTL3KEEP, 10101 ==> PSTL3STRM,
1465 ow ==> UNALLOC
1466 PrfOp prfop = prfop (instr, 4, 0);
1467 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK)
1468 + SCALE (offset, 64). */
1469
1470 /* TODO : implement prefetch of address. */
1471 }
1472
1473 /* Prefetch scaled or unscaled zero- or sign-extended 32-bit register offset. */
1474 static void
1475 prfm_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1476 {
1477 /* instr[4,0] = prfop : 00000 ==> PLDL1KEEP, 00001 ==> PLDL1STRM,
1478 00010 ==> PLDL2KEEP, 00001 ==> PLDL2STRM,
1479 00100 ==> PLDL3KEEP, 00101 ==> PLDL3STRM,
1480 10000 ==> PSTL1KEEP, 10001 ==> PSTL1STRM,
1481 10010 ==> PSTL2KEEP, 10001 ==> PSTL2STRM,
1482 10100 ==> PSTL3KEEP, 10101 ==> PSTL3STRM,
1483 ow ==> UNALLOC
1484 rn may reference SP, rm may only reference ZR
1485 PrfOp prfop = prfop (instr, 4, 0);
1486 uint64_t base = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1487 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
1488 extension);
1489 uint64_t displacement = OPT_SCALE (extended, 64, scaling);
1490 uint64_t address = base + displacement. */
1491
1492 /* TODO : implement prefetch of address */
1493 }
1494
1495 /* 64 bit pc-relative prefetch. */
1496 static void
1497 prfm_pcrel (sim_cpu *cpu, int32_t offset)
1498 {
1499 /* instr[4,0] = prfop : 00000 ==> PLDL1KEEP, 00001 ==> PLDL1STRM,
1500 00010 ==> PLDL2KEEP, 00001 ==> PLDL2STRM,
1501 00100 ==> PLDL3KEEP, 00101 ==> PLDL3STRM,
1502 10000 ==> PSTL1KEEP, 10001 ==> PSTL1STRM,
1503 10010 ==> PSTL2KEEP, 10001 ==> PSTL2STRM,
1504 10100 ==> PSTL3KEEP, 10101 ==> PSTL3STRM,
1505 ow ==> UNALLOC
1506 PrfOp prfop = prfop (instr, 4, 0);
1507 uint64_t address = aarch64_get_PC (cpu) + offset. */
1508
1509 /* TODO : implement this */
1510 }
1511
1512 /* Load-store exclusive. */
1513
1514 static void
1515 ldxr (sim_cpu *cpu)
1516 {
1517 unsigned rn = INSTR (9, 5);
1518 unsigned rt = INSTR (4, 0);
1519 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1520 int size = INSTR (31, 30);
1521 /* int ordered = INSTR (15, 15); */
1522 /* int exclusive = ! INSTR (23, 23); */
1523
1524 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1525 switch (size)
1526 {
1527 case 0:
1528 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u8 (cpu, address));
1529 break;
1530 case 1:
1531 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u16 (cpu, address));
1532 break;
1533 case 2:
1534 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u32 (cpu, address));
1535 break;
1536 case 3:
1537 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u64 (cpu, address));
1538 break;
1539 }
1540 }
1541
1542 static void
1543 stxr (sim_cpu *cpu)
1544 {
1545 unsigned rn = INSTR (9, 5);
1546 unsigned rt = INSTR (4, 0);
1547 unsigned rs = INSTR (20, 16);
1548 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1549 int size = INSTR (31, 30);
1550 uint64_t data = aarch64_get_reg_u64 (cpu, rt, NO_SP);
1551
1552 switch (size)
1553 {
1554 case 0: aarch64_set_mem_u8 (cpu, address, data); break;
1555 case 1: aarch64_set_mem_u16 (cpu, address, data); break;
1556 case 2: aarch64_set_mem_u32 (cpu, address, data); break;
1557 case 3: aarch64_set_mem_u64 (cpu, address, data); break;
1558 }
1559
1560 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1561 aarch64_set_reg_u64 (cpu, rs, NO_SP, 0); /* Always exclusive... */
1562 }
1563
1564 static void
1565 dexLoadLiteral (sim_cpu *cpu)
1566 {
1567 /* instr[29,27] == 011
1568 instr[25,24] == 00
1569 instr[31,30:26] = opc: 000 ==> LDRW, 001 ==> FLDRS
1570 010 ==> LDRX, 011 ==> FLDRD
1571 100 ==> LDRSW, 101 ==> FLDRQ
1572 110 ==> PRFM, 111 ==> UNALLOC
1573 instr[26] ==> V : 0 ==> GReg, 1 ==> FReg
1574 instr[23, 5] == simm19 */
1575
1576 /* unsigned rt = INSTR (4, 0); */
1577 uint32_t dispatch = (INSTR (31, 30) << 1) | INSTR (26, 26);
1578 int32_t imm = simm32 (aarch64_get_instr (cpu), 23, 5);
1579
1580 switch (dispatch)
1581 {
1582 case 0: ldr32_pcrel (cpu, imm); break;
1583 case 1: fldrs_pcrel (cpu, imm); break;
1584 case 2: ldr_pcrel (cpu, imm); break;
1585 case 3: fldrd_pcrel (cpu, imm); break;
1586 case 4: ldrsw_pcrel (cpu, imm); break;
1587 case 5: fldrq_pcrel (cpu, imm); break;
1588 case 6: prfm_pcrel (cpu, imm); break;
1589 case 7:
1590 default:
1591 HALT_UNALLOC;
1592 }
1593 }
1594
1595 /* Immediate arithmetic
1596 The aimm argument is a 12 bit unsigned value or a 12 bit unsigned
1597 value left shifted by 12 bits (done at decode).
1598
1599 N.B. the register args (dest, source) can normally be Xn or SP.
1600 the exception occurs for flag setting instructions which may
1601 only use Xn for the output (dest). */
1602
1603 /* 32 bit add immediate. */
1604 static void
1605 add32 (sim_cpu *cpu, uint32_t aimm)
1606 {
1607 unsigned rn = INSTR (9, 5);
1608 unsigned rd = INSTR (4, 0);
1609
1610 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1611 aarch64_set_reg_u64 (cpu, rd, SP_OK,
1612 aarch64_get_reg_u32 (cpu, rn, SP_OK) + aimm);
1613 }
1614
1615 /* 64 bit add immediate. */
1616 static void
1617 add64 (sim_cpu *cpu, uint32_t aimm)
1618 {
1619 unsigned rn = INSTR (9, 5);
1620 unsigned rd = INSTR (4, 0);
1621
1622 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1623 aarch64_set_reg_u64 (cpu, rd, SP_OK,
1624 aarch64_get_reg_u64 (cpu, rn, SP_OK) + aimm);
1625 }
1626
1627 static void
1628 set_flags_for_add32 (sim_cpu *cpu, int32_t value1, int32_t value2)
1629 {
1630 int32_t result = value1 + value2;
1631 int64_t sresult = (int64_t) value1 + (int64_t) value2;
1632 uint64_t uresult = (uint64_t)(uint32_t) value1
1633 + (uint64_t)(uint32_t) value2;
1634 uint32_t flags = 0;
1635
1636 if (result == 0)
1637 flags |= Z;
1638
1639 if (result & (1 << 31))
1640 flags |= N;
1641
1642 if (uresult != (uint32_t)uresult)
1643 flags |= C;
1644
1645 if (sresult != (int32_t)sresult)
1646 flags |= V;
1647
1648 aarch64_set_CPSR (cpu, flags);
1649 }
1650
1651 #define NEG(a) (((a) & signbit) == signbit)
1652 #define POS(a) (((a) & signbit) == 0)
1653
1654 static void
1655 set_flags_for_add64 (sim_cpu *cpu, uint64_t value1, uint64_t value2)
1656 {
1657 uint64_t result = value1 + value2;
1658 uint32_t flags = 0;
1659 uint64_t signbit = 1ULL << 63;
1660
1661 if (result == 0)
1662 flags |= Z;
1663
1664 if (NEG (result))
1665 flags |= N;
1666
1667 if ( (NEG (value1) && NEG (value2))
1668 || (NEG (value1) && POS (result))
1669 || (NEG (value2) && POS (result)))
1670 flags |= C;
1671
1672 if ( (NEG (value1) && NEG (value2) && POS (result))
1673 || (POS (value1) && POS (value2) && NEG (result)))
1674 flags |= V;
1675
1676 aarch64_set_CPSR (cpu, flags);
1677 }
1678
1679 static void
1680 set_flags_for_sub32 (sim_cpu *cpu, uint32_t value1, uint32_t value2)
1681 {
1682 uint32_t result = value1 - value2;
1683 uint32_t flags = 0;
1684 uint32_t signbit = 1U << 31;
1685
1686 if (result == 0)
1687 flags |= Z;
1688
1689 if (NEG (result))
1690 flags |= N;
1691
1692 if ( (NEG (value1) && POS (value2))
1693 || (NEG (value1) && POS (result))
1694 || (POS (value2) && POS (result)))
1695 flags |= C;
1696
1697 if ( (NEG (value1) && POS (value2) && POS (result))
1698 || (POS (value1) && NEG (value2) && NEG (result)))
1699 flags |= V;
1700
1701 aarch64_set_CPSR (cpu, flags);
1702 }
1703
1704 static void
1705 set_flags_for_sub64 (sim_cpu *cpu, uint64_t value1, uint64_t value2)
1706 {
1707 uint64_t result = value1 - value2;
1708 uint32_t flags = 0;
1709 uint64_t signbit = 1ULL << 63;
1710
1711 if (result == 0)
1712 flags |= Z;
1713
1714 if (NEG (result))
1715 flags |= N;
1716
1717 if ( (NEG (value1) && POS (value2))
1718 || (NEG (value1) && POS (result))
1719 || (POS (value2) && POS (result)))
1720 flags |= C;
1721
1722 if ( (NEG (value1) && POS (value2) && POS (result))
1723 || (POS (value1) && NEG (value2) && NEG (result)))
1724 flags |= V;
1725
1726 aarch64_set_CPSR (cpu, flags);
1727 }
1728
1729 static void
1730 set_flags_for_binop32 (sim_cpu *cpu, uint32_t result)
1731 {
1732 uint32_t flags = 0;
1733
1734 if (result == 0)
1735 flags |= Z;
1736 else
1737 flags &= ~ Z;
1738
1739 if (result & (1 << 31))
1740 flags |= N;
1741 else
1742 flags &= ~ N;
1743
1744 aarch64_set_CPSR (cpu, flags);
1745 }
1746
1747 static void
1748 set_flags_for_binop64 (sim_cpu *cpu, uint64_t result)
1749 {
1750 uint32_t flags = 0;
1751
1752 if (result == 0)
1753 flags |= Z;
1754 else
1755 flags &= ~ Z;
1756
1757 if (result & (1ULL << 63))
1758 flags |= N;
1759 else
1760 flags &= ~ N;
1761
1762 aarch64_set_CPSR (cpu, flags);
1763 }
1764
1765 /* 32 bit add immediate set flags. */
1766 static void
1767 adds32 (sim_cpu *cpu, uint32_t aimm)
1768 {
1769 unsigned rn = INSTR (9, 5);
1770 unsigned rd = INSTR (4, 0);
1771 /* TODO : do we need to worry about signs here? */
1772 int32_t value1 = aarch64_get_reg_s32 (cpu, rn, SP_OK);
1773
1774 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1775 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + aimm);
1776 set_flags_for_add32 (cpu, value1, aimm);
1777 }
1778
1779 /* 64 bit add immediate set flags. */
1780 static void
1781 adds64 (sim_cpu *cpu, uint32_t aimm)
1782 {
1783 unsigned rn = INSTR (9, 5);
1784 unsigned rd = INSTR (4, 0);
1785 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1786 uint64_t value2 = aimm;
1787
1788 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1789 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2);
1790 set_flags_for_add64 (cpu, value1, value2);
1791 }
1792
1793 /* 32 bit sub immediate. */
1794 static void
1795 sub32 (sim_cpu *cpu, uint32_t aimm)
1796 {
1797 unsigned rn = INSTR (9, 5);
1798 unsigned rd = INSTR (4, 0);
1799
1800 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1801 aarch64_set_reg_u64 (cpu, rd, SP_OK,
1802 aarch64_get_reg_u32 (cpu, rn, SP_OK) - aimm);
1803 }
1804
1805 /* 64 bit sub immediate. */
1806 static void
1807 sub64 (sim_cpu *cpu, uint32_t aimm)
1808 {
1809 unsigned rn = INSTR (9, 5);
1810 unsigned rd = INSTR (4, 0);
1811
1812 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1813 aarch64_set_reg_u64 (cpu, rd, SP_OK,
1814 aarch64_get_reg_u64 (cpu, rn, SP_OK) - aimm);
1815 }
1816
1817 /* 32 bit sub immediate set flags. */
1818 static void
1819 subs32 (sim_cpu *cpu, uint32_t aimm)
1820 {
1821 unsigned rn = INSTR (9, 5);
1822 unsigned rd = INSTR (4, 0);
1823 uint32_t value1 = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1824 uint32_t value2 = aimm;
1825
1826 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1827 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 - value2);
1828 set_flags_for_sub32 (cpu, value1, value2);
1829 }
1830
1831 /* 64 bit sub immediate set flags. */
1832 static void
1833 subs64 (sim_cpu *cpu, uint32_t aimm)
1834 {
1835 unsigned rn = INSTR (9, 5);
1836 unsigned rd = INSTR (4, 0);
1837 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1838 uint32_t value2 = aimm;
1839
1840 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1841 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 - value2);
1842 set_flags_for_sub64 (cpu, value1, value2);
1843 }
1844
1845 /* Data Processing Register. */
1846
1847 /* First two helpers to perform the shift operations. */
1848
1849 static inline uint32_t
1850 shifted32 (uint32_t value, Shift shift, uint32_t count)
1851 {
1852 switch (shift)
1853 {
1854 default:
1855 case LSL:
1856 return (value << count);
1857 case LSR:
1858 return (value >> count);
1859 case ASR:
1860 {
1861 int32_t svalue = value;
1862 return (svalue >> count);
1863 }
1864 case ROR:
1865 {
1866 uint32_t top = value >> count;
1867 uint32_t bottom = value << (32 - count);
1868 return (bottom | top);
1869 }
1870 }
1871 }
1872
1873 static inline uint64_t
1874 shifted64 (uint64_t value, Shift shift, uint32_t count)
1875 {
1876 switch (shift)
1877 {
1878 default:
1879 case LSL:
1880 return (value << count);
1881 case LSR:
1882 return (value >> count);
1883 case ASR:
1884 {
1885 int64_t svalue = value;
1886 return (svalue >> count);
1887 }
1888 case ROR:
1889 {
1890 uint64_t top = value >> count;
1891 uint64_t bottom = value << (64 - count);
1892 return (bottom | top);
1893 }
1894 }
1895 }
1896
1897 /* Arithmetic shifted register.
1898 These allow an optional LSL, ASR or LSR to the second source
1899 register with a count up to the register bit count.
1900
1901 N.B register args may not be SP. */
1902
1903 /* 32 bit ADD shifted register. */
1904 static void
1905 add32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
1906 {
1907 unsigned rm = INSTR (20, 16);
1908 unsigned rn = INSTR (9, 5);
1909 unsigned rd = INSTR (4, 0);
1910
1911 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1912 aarch64_set_reg_u64 (cpu, rd, NO_SP,
1913 aarch64_get_reg_u32 (cpu, rn, NO_SP)
1914 + shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP),
1915 shift, count));
1916 }
1917
1918 /* 64 bit ADD shifted register. */
1919 static void
1920 add64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
1921 {
1922 unsigned rm = INSTR (20, 16);
1923 unsigned rn = INSTR (9, 5);
1924 unsigned rd = INSTR (4, 0);
1925
1926 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1927 aarch64_set_reg_u64 (cpu, rd, NO_SP,
1928 aarch64_get_reg_u64 (cpu, rn, NO_SP)
1929 + shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP),
1930 shift, count));
1931 }
1932
1933 /* 32 bit ADD shifted register setting flags. */
1934 static void
1935 adds32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
1936 {
1937 unsigned rm = INSTR (20, 16);
1938 unsigned rn = INSTR (9, 5);
1939 unsigned rd = INSTR (4, 0);
1940
1941 uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP);
1942 uint32_t value2 = shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP),
1943 shift, count);
1944
1945 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1946 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2);
1947 set_flags_for_add32 (cpu, value1, value2);
1948 }
1949
1950 /* 64 bit ADD shifted register setting flags. */
1951 static void
1952 adds64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
1953 {
1954 unsigned rm = INSTR (20, 16);
1955 unsigned rn = INSTR (9, 5);
1956 unsigned rd = INSTR (4, 0);
1957
1958 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP);
1959 uint64_t value2 = shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP),
1960 shift, count);
1961
1962 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1963 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2);
1964 set_flags_for_add64 (cpu, value1, value2);
1965 }
1966
1967 /* 32 bit SUB shifted register. */
1968 static void
1969 sub32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
1970 {
1971 unsigned rm = INSTR (20, 16);
1972 unsigned rn = INSTR (9, 5);
1973 unsigned rd = INSTR (4, 0);
1974
1975 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1976 aarch64_set_reg_u64 (cpu, rd, NO_SP,
1977 aarch64_get_reg_u32 (cpu, rn, NO_SP)
1978 - shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP),
1979 shift, count));
1980 }
1981
1982 /* 64 bit SUB shifted register. */
1983 static void
1984 sub64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
1985 {
1986 unsigned rm = INSTR (20, 16);
1987 unsigned rn = INSTR (9, 5);
1988 unsigned rd = INSTR (4, 0);
1989
1990 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1991 aarch64_set_reg_u64 (cpu, rd, NO_SP,
1992 aarch64_get_reg_u64 (cpu, rn, NO_SP)
1993 - shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP),
1994 shift, count));
1995 }
1996
1997 /* 32 bit SUB shifted register setting flags. */
1998 static void
1999 subs32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
2000 {
2001 unsigned rm = INSTR (20, 16);
2002 unsigned rn = INSTR (9, 5);
2003 unsigned rd = INSTR (4, 0);
2004
2005 uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP);
2006 uint32_t value2 = shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP),
2007 shift, count);
2008
2009 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2010 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 - value2);
2011 set_flags_for_sub32 (cpu, value1, value2);
2012 }
2013
2014 /* 64 bit SUB shifted register setting flags. */
2015 static void
2016 subs64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
2017 {
2018 unsigned rm = INSTR (20, 16);
2019 unsigned rn = INSTR (9, 5);
2020 unsigned rd = INSTR (4, 0);
2021
2022 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP);
2023 uint64_t value2 = shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP),
2024 shift, count);
2025
2026 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2027 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 - value2);
2028 set_flags_for_sub64 (cpu, value1, value2);
2029 }
2030
2031 /* First a couple more helpers to fetch the
2032 relevant source register element either
2033 sign or zero extended as required by the
2034 extension value. */
2035
2036 static uint32_t
2037 extreg32 (sim_cpu *cpu, unsigned int lo, Extension extension)
2038 {
2039 switch (extension)
2040 {
2041 case UXTB: return aarch64_get_reg_u8 (cpu, lo, NO_SP);
2042 case UXTH: return aarch64_get_reg_u16 (cpu, lo, NO_SP);
2043 case UXTW: ATTRIBUTE_FALLTHROUGH;
2044 case UXTX: return aarch64_get_reg_u32 (cpu, lo, NO_SP);
2045 case SXTB: return aarch64_get_reg_s8 (cpu, lo, NO_SP);
2046 case SXTH: return aarch64_get_reg_s16 (cpu, lo, NO_SP);
2047 case SXTW: ATTRIBUTE_FALLTHROUGH;
2048 case SXTX: ATTRIBUTE_FALLTHROUGH;
2049 default: return aarch64_get_reg_s32 (cpu, lo, NO_SP);
2050 }
2051 }
2052
2053 static uint64_t
2054 extreg64 (sim_cpu *cpu, unsigned int lo, Extension extension)
2055 {
2056 switch (extension)
2057 {
2058 case UXTB: return aarch64_get_reg_u8 (cpu, lo, NO_SP);
2059 case UXTH: return aarch64_get_reg_u16 (cpu, lo, NO_SP);
2060 case UXTW: return aarch64_get_reg_u32 (cpu, lo, NO_SP);
2061 case UXTX: return aarch64_get_reg_u64 (cpu, lo, NO_SP);
2062 case SXTB: return aarch64_get_reg_s8 (cpu, lo, NO_SP);
2063 case SXTH: return aarch64_get_reg_s16 (cpu, lo, NO_SP);
2064 case SXTW: return aarch64_get_reg_s32 (cpu, lo, NO_SP);
2065 case SXTX:
2066 default: return aarch64_get_reg_s64 (cpu, lo, NO_SP);
2067 }
2068 }
2069
2070 /* Arithmetic extending register
2071 These allow an optional sign extension of some portion of the
2072 second source register followed by an optional left shift of
2073 between 1 and 4 bits (i.e. a shift of 0-4 bits???)
2074
2075 N.B output (dest) and first input arg (source) may normally be Xn
2076 or SP. However, for flag setting operations dest can only be
2077 Xn. Second input registers are always Xn. */
2078
2079 /* 32 bit ADD extending register. */
2080 static void
2081 add32_ext (sim_cpu *cpu, Extension extension, uint32_t shift)
2082 {
2083 unsigned rm = INSTR (20, 16);
2084 unsigned rn = INSTR (9, 5);
2085 unsigned rd = INSTR (4, 0);
2086
2087 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2088 aarch64_set_reg_u64 (cpu, rd, SP_OK,
2089 aarch64_get_reg_u32 (cpu, rn, SP_OK)
2090 + (extreg32 (cpu, rm, extension) << shift));
2091 }
2092
2093 /* 64 bit ADD extending register.
2094 N.B. This subsumes the case with 64 bit source2 and UXTX #n or LSL #0. */
2095 static void
2096 add64_ext (sim_cpu *cpu, Extension extension, uint32_t shift)
2097 {
2098 unsigned rm = INSTR (20, 16);
2099 unsigned rn = INSTR (9, 5);
2100 unsigned rd = INSTR (4, 0);
2101
2102 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2103 aarch64_set_reg_u64 (cpu, rd, SP_OK,
2104 aarch64_get_reg_u64 (cpu, rn, SP_OK)
2105 + (extreg64 (cpu, rm, extension) << shift));
2106 }
2107
2108 /* 32 bit ADD extending register setting flags. */
2109 static void
2110 adds32_ext (sim_cpu *cpu, Extension extension, uint32_t shift)
2111 {
2112 unsigned rm = INSTR (20, 16);
2113 unsigned rn = INSTR (9, 5);
2114 unsigned rd = INSTR (4, 0);
2115
2116 uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, SP_OK);
2117 uint32_t value2 = extreg32 (cpu, rm, extension) << shift;
2118
2119 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2120 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2);
2121 set_flags_for_add32 (cpu, value1, value2);
2122 }
2123
2124 /* 64 bit ADD extending register setting flags */
2125 /* N.B. this subsumes the case with 64 bit source2 and UXTX #n or LSL #0 */
2126 static void
2127 adds64_ext (sim_cpu *cpu, Extension extension, uint32_t shift)
2128 {
2129 unsigned rm = INSTR (20, 16);
2130 unsigned rn = INSTR (9, 5);
2131 unsigned rd = INSTR (4, 0);
2132
2133 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, SP_OK);
2134 uint64_t value2 = extreg64 (cpu, rm, extension) << shift;
2135
2136 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2137 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2);
2138 set_flags_for_add64 (cpu, value1, value2);
2139 }
2140
2141 /* 32 bit SUB extending register. */
2142 static void
2143 sub32_ext (sim_cpu *cpu, Extension extension, uint32_t shift)
2144 {
2145 unsigned rm = INSTR (20, 16);
2146 unsigned rn = INSTR (9, 5);
2147 unsigned rd = INSTR (4, 0);
2148
2149 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2150 aarch64_set_reg_u64 (cpu, rd, SP_OK,
2151 aarch64_get_reg_u32 (cpu, rn, SP_OK)
2152 - (extreg32 (cpu, rm, extension) << shift));
2153 }
2154
2155 /* 64 bit SUB extending register. */
2156 /* N.B. this subsumes the case with 64 bit source2 and UXTX #n or LSL #0. */
2157 static void
2158 sub64_ext (sim_cpu *cpu, Extension extension, uint32_t shift)
2159 {
2160 unsigned rm = INSTR (20, 16);
2161 unsigned rn = INSTR (9, 5);
2162 unsigned rd = INSTR (4, 0);
2163
2164 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2165 aarch64_set_reg_u64 (cpu, rd, SP_OK,
2166 aarch64_get_reg_u64 (cpu, rn, SP_OK)
2167 - (extreg64 (cpu, rm, extension) << shift));
2168 }
2169
2170 /* 32 bit SUB extending register setting flags. */
2171 static void
2172 subs32_ext (sim_cpu *cpu, Extension extension, uint32_t shift)
2173 {
2174 unsigned rm = INSTR (20, 16);
2175 unsigned rn = INSTR (9, 5);
2176 unsigned rd = INSTR (4, 0);
2177
2178 uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, SP_OK);
2179 uint32_t value2 = extreg32 (cpu, rm, extension) << shift;
2180
2181 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2182 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 - value2);
2183 set_flags_for_sub32 (cpu, value1, value2);
2184 }
2185
2186 /* 64 bit SUB extending register setting flags */
2187 /* N.B. this subsumes the case with 64 bit source2 and UXTX #n or LSL #0 */
2188 static void
2189 subs64_ext (sim_cpu *cpu, Extension extension, uint32_t shift)
2190 {
2191 unsigned rm = INSTR (20, 16);
2192 unsigned rn = INSTR (9, 5);
2193 unsigned rd = INSTR (4, 0);
2194
2195 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, SP_OK);
2196 uint64_t value2 = extreg64 (cpu, rm, extension) << shift;
2197
2198 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2199 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 - value2);
2200 set_flags_for_sub64 (cpu, value1, value2);
2201 }
2202
2203 static void
2204 dexAddSubtractImmediate (sim_cpu *cpu)
2205 {
2206 /* instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
2207 instr[30] = op : 0 ==> ADD, 1 ==> SUB
2208 instr[29] = set : 0 ==> no flags, 1 ==> set flags
2209 instr[28,24] = 10001
2210 instr[23,22] = shift : 00 == LSL#0, 01 = LSL#12 1x = UNALLOC
2211 instr[21,10] = uimm12
2212 instr[9,5] = Rn
2213 instr[4,0] = Rd */
2214
2215 /* N.B. the shift is applied at decode before calling the add/sub routine. */
2216 uint32_t shift = INSTR (23, 22);
2217 uint32_t imm = INSTR (21, 10);
2218 uint32_t dispatch = INSTR (31, 29);
2219
2220 NYI_assert (28, 24, 0x11);
2221
2222 if (shift > 1)
2223 HALT_UNALLOC;
2224
2225 if (shift)
2226 imm <<= 12;
2227
2228 switch (dispatch)
2229 {
2230 case 0: add32 (cpu, imm); break;
2231 case 1: adds32 (cpu, imm); break;
2232 case 2: sub32 (cpu, imm); break;
2233 case 3: subs32 (cpu, imm); break;
2234 case 4: add64 (cpu, imm); break;
2235 case 5: adds64 (cpu, imm); break;
2236 case 6: sub64 (cpu, imm); break;
2237 case 7: subs64 (cpu, imm); break;
2238 }
2239 }
2240
2241 static void
2242 dexAddSubtractShiftedRegister (sim_cpu *cpu)
2243 {
2244 /* instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
2245 instr[30,29] = op : 00 ==> ADD, 01 ==> ADDS, 10 ==> SUB, 11 ==> SUBS
2246 instr[28,24] = 01011
2247 instr[23,22] = shift : 0 ==> LSL, 1 ==> LSR, 2 ==> ASR, 3 ==> UNALLOC
2248 instr[21] = 0
2249 instr[20,16] = Rm
2250 instr[15,10] = count : must be 0xxxxx for 32 bit
2251 instr[9,5] = Rn
2252 instr[4,0] = Rd */
2253
2254 uint32_t size = INSTR (31, 31);
2255 uint32_t count = INSTR (15, 10);
2256 Shift shiftType = INSTR (23, 22);
2257
2258 NYI_assert (28, 24, 0x0B);
2259 NYI_assert (21, 21, 0);
2260
2261 /* Shift encoded as ROR is unallocated. */
2262 if (shiftType == ROR)
2263 HALT_UNALLOC;
2264
2265 /* 32 bit operations must have count[5] = 0
2266 or else we have an UNALLOC. */
2267 if (size == 0 && uimm (count, 5, 5))
2268 HALT_UNALLOC;
2269
2270 /* Dispatch on size:op i.e instr [31,29]. */
2271 switch (INSTR (31, 29))
2272 {
2273 case 0: add32_shift (cpu, shiftType, count); break;
2274 case 1: adds32_shift (cpu, shiftType, count); break;
2275 case 2: sub32_shift (cpu, shiftType, count); break;
2276 case 3: subs32_shift (cpu, shiftType, count); break;
2277 case 4: add64_shift (cpu, shiftType, count); break;
2278 case 5: adds64_shift (cpu, shiftType, count); break;
2279 case 6: sub64_shift (cpu, shiftType, count); break;
2280 case 7: subs64_shift (cpu, shiftType, count); break;
2281 }
2282 }
2283
2284 static void
2285 dexAddSubtractExtendedRegister (sim_cpu *cpu)
2286 {
2287 /* instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
2288 instr[30] = op : 0 ==> ADD, 1 ==> SUB
2289 instr[29] = set? : 0 ==> no flags, 1 ==> set flags
2290 instr[28,24] = 01011
2291 instr[23,22] = opt : 0 ==> ok, 1,2,3 ==> UNALLOC
2292 instr[21] = 1
2293 instr[20,16] = Rm
2294 instr[15,13] = option : 000 ==> UXTB, 001 ==> UXTH,
2295 000 ==> LSL|UXTW, 001 ==> UXTZ,
2296 000 ==> SXTB, 001 ==> SXTH,
2297 000 ==> SXTW, 001 ==> SXTX,
2298 instr[12,10] = shift : 0,1,2,3,4 ==> ok, 5,6,7 ==> UNALLOC
2299 instr[9,5] = Rn
2300 instr[4,0] = Rd */
2301
2302 Extension extensionType = INSTR (15, 13);
2303 uint32_t shift = INSTR (12, 10);
2304
2305 NYI_assert (28, 24, 0x0B);
2306 NYI_assert (21, 21, 1);
2307
2308 /* Shift may not exceed 4. */
2309 if (shift > 4)
2310 HALT_UNALLOC;
2311
2312 /* Dispatch on size:op:set?. */
2313 switch (INSTR (31, 29))
2314 {
2315 case 0: add32_ext (cpu, extensionType, shift); break;
2316 case 1: adds32_ext (cpu, extensionType, shift); break;
2317 case 2: sub32_ext (cpu, extensionType, shift); break;
2318 case 3: subs32_ext (cpu, extensionType, shift); break;
2319 case 4: add64_ext (cpu, extensionType, shift); break;
2320 case 5: adds64_ext (cpu, extensionType, shift); break;
2321 case 6: sub64_ext (cpu, extensionType, shift); break;
2322 case 7: subs64_ext (cpu, extensionType, shift); break;
2323 }
2324 }
2325
2326 /* Conditional data processing
2327 Condition register is implicit 3rd source. */
2328
2329 /* 32 bit add with carry. */
2330 /* N.B register args may not be SP. */
2331
2332 static void
2333 adc32 (sim_cpu *cpu)
2334 {
2335 unsigned rm = INSTR (20, 16);
2336 unsigned rn = INSTR (9, 5);
2337 unsigned rd = INSTR (4, 0);
2338
2339 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2340 aarch64_set_reg_u64 (cpu, rd, NO_SP,
2341 aarch64_get_reg_u32 (cpu, rn, NO_SP)
2342 + aarch64_get_reg_u32 (cpu, rm, NO_SP)
2343 + IS_SET (C));
2344 }
2345
2346 /* 64 bit add with carry */
2347 static void
2348 adc64 (sim_cpu *cpu)
2349 {
2350 unsigned rm = INSTR (20, 16);
2351 unsigned rn = INSTR (9, 5);
2352 unsigned rd = INSTR (4, 0);
2353
2354 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2355 aarch64_set_reg_u64 (cpu, rd, NO_SP,
2356 aarch64_get_reg_u64 (cpu, rn, NO_SP)
2357 + aarch64_get_reg_u64 (cpu, rm, NO_SP)
2358 + IS_SET (C));
2359 }
2360
2361 /* 32 bit add with carry setting flags. */
2362 static void
2363 adcs32 (sim_cpu *cpu)
2364 {
2365 unsigned rm = INSTR (20, 16);
2366 unsigned rn = INSTR (9, 5);
2367 unsigned rd = INSTR (4, 0);
2368
2369 uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP);
2370 uint32_t value2 = aarch64_get_reg_u32 (cpu, rm, NO_SP);
2371 uint32_t carry = IS_SET (C);
2372
2373 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2374 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2 + carry);
2375 set_flags_for_add32 (cpu, value1, value2 + carry);
2376 }
2377
2378 /* 64 bit add with carry setting flags. */
2379 static void
2380 adcs64 (sim_cpu *cpu)
2381 {
2382 unsigned rm = INSTR (20, 16);
2383 unsigned rn = INSTR (9, 5);
2384 unsigned rd = INSTR (4, 0);
2385
2386 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP);
2387 uint64_t value2 = aarch64_get_reg_u64 (cpu, rm, NO_SP);
2388 uint64_t carry = IS_SET (C);
2389
2390 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2391 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2 + carry);
2392 set_flags_for_add64 (cpu, value1, value2 + carry);
2393 }
2394
2395 /* 32 bit sub with carry. */
2396 static void
2397 sbc32 (sim_cpu *cpu)
2398 {
2399 unsigned rm = INSTR (20, 16);
2400 unsigned rn = INSTR (9, 5); /* ngc iff rn == 31. */
2401 unsigned rd = INSTR (4, 0);
2402
2403 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2404 aarch64_set_reg_u64 (cpu, rd, NO_SP,
2405 aarch64_get_reg_u32 (cpu, rn, NO_SP)
2406 - aarch64_get_reg_u32 (cpu, rm, NO_SP)
2407 - 1 + IS_SET (C));
2408 }
2409
2410 /* 64 bit sub with carry */
2411 static void
2412 sbc64 (sim_cpu *cpu)
2413 {
2414 unsigned rm = INSTR (20, 16);
2415 unsigned rn = INSTR (9, 5);
2416 unsigned rd = INSTR (4, 0);
2417
2418 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2419 aarch64_set_reg_u64 (cpu, rd, NO_SP,
2420 aarch64_get_reg_u64 (cpu, rn, NO_SP)
2421 - aarch64_get_reg_u64 (cpu, rm, NO_SP)
2422 - 1 + IS_SET (C));
2423 }
2424
2425 /* 32 bit sub with carry setting flags */
2426 static void
2427 sbcs32 (sim_cpu *cpu)
2428 {
2429 unsigned rm = INSTR (20, 16);
2430 unsigned rn = INSTR (9, 5);
2431 unsigned rd = INSTR (4, 0);
2432
2433 uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP);
2434 uint32_t value2 = aarch64_get_reg_u32 (cpu, rm, NO_SP);
2435 uint32_t carry = IS_SET (C);
2436 uint32_t result = value1 - value2 + 1 - carry;
2437
2438 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2439 aarch64_set_reg_u64 (cpu, rd, NO_SP, result);
2440 set_flags_for_sub32 (cpu, value1, value2 + 1 - carry);
2441 }
2442
2443 /* 64 bit sub with carry setting flags */
2444 static void
2445 sbcs64 (sim_cpu *cpu)
2446 {
2447 unsigned rm = INSTR (20, 16);
2448 unsigned rn = INSTR (9, 5);
2449 unsigned rd = INSTR (4, 0);
2450
2451 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP);
2452 uint64_t value2 = aarch64_get_reg_u64 (cpu, rm, NO_SP);
2453 uint64_t carry = IS_SET (C);
2454 uint64_t result = value1 - value2 + 1 - carry;
2455
2456 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2457 aarch64_set_reg_u64 (cpu, rd, NO_SP, result);
2458 set_flags_for_sub64 (cpu, value1, value2 + 1 - carry);
2459 }
2460
2461 static void
2462 dexAddSubtractWithCarry (sim_cpu *cpu)
2463 {
2464 /* instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
2465 instr[30] = op : 0 ==> ADC, 1 ==> SBC
2466 instr[29] = set? : 0 ==> no flags, 1 ==> set flags
2467 instr[28,21] = 1 1010 000
2468 instr[20,16] = Rm
2469 instr[15,10] = op2 : 00000 ==> ok, ow ==> UNALLOC
2470 instr[9,5] = Rn
2471 instr[4,0] = Rd */
2472
2473 uint32_t op2 = INSTR (15, 10);
2474
2475 NYI_assert (28, 21, 0xD0);
2476
2477 if (op2 != 0)
2478 HALT_UNALLOC;
2479
2480 /* Dispatch on size:op:set?. */
2481 switch (INSTR (31, 29))
2482 {
2483 case 0: adc32 (cpu); break;
2484 case 1: adcs32 (cpu); break;
2485 case 2: sbc32 (cpu); break;
2486 case 3: sbcs32 (cpu); break;
2487 case 4: adc64 (cpu); break;
2488 case 5: adcs64 (cpu); break;
2489 case 6: sbc64 (cpu); break;
2490 case 7: sbcs64 (cpu); break;
2491 }
2492 }
2493
2494 static uint32_t
2495 testConditionCode (sim_cpu *cpu, CondCode cc)
2496 {
2497 /* This should be reduceable to branchless logic
2498 by some careful testing of bits in CC followed
2499 by the requisite masking and combining of bits
2500 from the flag register.
2501
2502 For now we do it with a switch. */
2503 int res;
2504
2505 switch (cc)
2506 {
2507 case EQ: res = IS_SET (Z); break;
2508 case NE: res = IS_CLEAR (Z); break;
2509 case CS: res = IS_SET (C); break;
2510 case CC: res = IS_CLEAR (C); break;
2511 case MI: res = IS_SET (N); break;
2512 case PL: res = IS_CLEAR (N); break;
2513 case VS: res = IS_SET (V); break;
2514 case VC: res = IS_CLEAR (V); break;
2515 case HI: res = IS_SET (C) && IS_CLEAR (Z); break;
2516 case LS: res = IS_CLEAR (C) || IS_SET (Z); break;
2517 case GE: res = IS_SET (N) == IS_SET (V); break;
2518 case LT: res = IS_SET (N) != IS_SET (V); break;
2519 case GT: res = IS_CLEAR (Z) && (IS_SET (N) == IS_SET (V)); break;
2520 case LE: res = IS_SET (Z) || (IS_SET (N) != IS_SET (V)); break;
2521 case AL:
2522 case NV:
2523 default:
2524 res = 1;
2525 break;
2526 }
2527 return res;
2528 }
2529
2530 static void
2531 CondCompare (sim_cpu *cpu) /* aka: ccmp and ccmn */
2532 {
2533 /* instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
2534 instr[30] = compare with positive (1) or negative value (0)
2535 instr[29,21] = 1 1101 0010
2536 instr[20,16] = Rm or const
2537 instr[15,12] = cond
2538 instr[11] = compare reg (0) or const (1)
2539 instr[10] = 0
2540 instr[9,5] = Rn
2541 instr[4] = 0
2542 instr[3,0] = value for CPSR bits if the comparison does not take place. */
2543 signed int negate;
2544 unsigned rm;
2545 unsigned rn;
2546
2547 NYI_assert (29, 21, 0x1d2);
2548 NYI_assert (10, 10, 0);
2549 NYI_assert (4, 4, 0);
2550
2551 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2552 if (! testConditionCode (cpu, INSTR (15, 12)))
2553 {
2554 aarch64_set_CPSR (cpu, INSTR (3, 0));
2555 return;
2556 }
2557
2558 negate = INSTR (30, 30) ? 1 : -1;
2559 rm = INSTR (20, 16);
2560 rn = INSTR ( 9, 5);
2561
2562 if (INSTR (31, 31))
2563 {
2564 if (INSTR (11, 11))
2565 set_flags_for_sub64 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK),
2566 negate * (uint64_t) rm);
2567 else
2568 set_flags_for_sub64 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK),
2569 negate * aarch64_get_reg_u64 (cpu, rm, SP_OK));
2570 }
2571 else
2572 {
2573 if (INSTR (11, 11))
2574 set_flags_for_sub32 (cpu, aarch64_get_reg_u32 (cpu, rn, SP_OK),
2575 negate * rm);
2576 else
2577 set_flags_for_sub32 (cpu, aarch64_get_reg_u32 (cpu, rn, SP_OK),
2578 negate * aarch64_get_reg_u32 (cpu, rm, SP_OK));
2579 }
2580 }
2581
2582 static void
2583 do_vec_MOV_whole_vector (sim_cpu *cpu)
2584 {
2585 /* MOV Vd.T, Vs.T (alias for ORR Vd.T, Vn.T, Vm.T where Vn == Vm)
2586
2587 instr[31] = 0
2588 instr[30] = half(0)/full(1)
2589 instr[29,21] = 001110101
2590 instr[20,16] = Vs
2591 instr[15,10] = 000111
2592 instr[9,5] = Vs
2593 instr[4,0] = Vd */
2594
2595 unsigned vs = INSTR (9, 5);
2596 unsigned vd = INSTR (4, 0);
2597
2598 NYI_assert (29, 21, 0x075);
2599 NYI_assert (15, 10, 0x07);
2600
2601 if (INSTR (20, 16) != vs)
2602 HALT_NYI;
2603
2604 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2605 if (INSTR (30, 30))
2606 aarch64_set_vec_u64 (cpu, vd, 1, aarch64_get_vec_u64 (cpu, vs, 1));
2607
2608 aarch64_set_vec_u64 (cpu, vd, 0, aarch64_get_vec_u64 (cpu, vs, 0));
2609 }
2610
2611 static void
2612 do_vec_SMOV_into_scalar (sim_cpu *cpu)
2613 {
2614 /* instr[31] = 0
2615 instr[30] = word(0)/long(1)
2616 instr[29,21] = 00 1110 000
2617 instr[20,16] = element size and index
2618 instr[15,10] = 00 0010 11
2619 instr[9,5] = V source
2620 instr[4,0] = R dest */
2621
2622 unsigned vs = INSTR (9, 5);
2623 unsigned rd = INSTR (4, 0);
2624 unsigned imm5 = INSTR (20, 16);
2625 unsigned full = INSTR (30, 30);
2626 int size, index;
2627
2628 NYI_assert (29, 21, 0x070);
2629 NYI_assert (15, 10, 0x0B);
2630
2631 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2632
2633 if (imm5 & 0x1)
2634 {
2635 size = 0;
2636 index = (imm5 >> 1) & 0xF;
2637 }
2638 else if (imm5 & 0x2)
2639 {
2640 size = 1;
2641 index = (imm5 >> 2) & 0x7;
2642 }
2643 else if (full && (imm5 & 0x4))
2644 {
2645 size = 2;
2646 index = (imm5 >> 3) & 0x3;
2647 }
2648 else
2649 HALT_UNALLOC;
2650
2651 switch (size)
2652 {
2653 case 0:
2654 if (full)
2655 aarch64_set_reg_s64 (cpu, rd, NO_SP,
2656 aarch64_get_vec_s8 (cpu, vs, index));
2657 else
2658 aarch64_set_reg_s32 (cpu, rd, NO_SP,
2659 aarch64_get_vec_s8 (cpu, vs, index));
2660 break;
2661
2662 case 1:
2663 if (full)
2664 aarch64_set_reg_s64 (cpu, rd, NO_SP,
2665 aarch64_get_vec_s16 (cpu, vs, index));
2666 else
2667 aarch64_set_reg_s32 (cpu, rd, NO_SP,
2668 aarch64_get_vec_s16 (cpu, vs, index));
2669 break;
2670
2671 case 2:
2672 aarch64_set_reg_s64 (cpu, rd, NO_SP,
2673 aarch64_get_vec_s32 (cpu, vs, index));
2674 break;
2675
2676 default:
2677 HALT_UNALLOC;
2678 }
2679 }
2680
2681 static void
2682 do_vec_UMOV_into_scalar (sim_cpu *cpu)
2683 {
2684 /* instr[31] = 0
2685 instr[30] = word(0)/long(1)
2686 instr[29,21] = 00 1110 000
2687 instr[20,16] = element size and index
2688 instr[15,10] = 00 0011 11
2689 instr[9,5] = V source
2690 instr[4,0] = R dest */
2691
2692 unsigned vs = INSTR (9, 5);
2693 unsigned rd = INSTR (4, 0);
2694 unsigned imm5 = INSTR (20, 16);
2695 unsigned full = INSTR (30, 30);
2696 int size, index;
2697
2698 NYI_assert (29, 21, 0x070);
2699 NYI_assert (15, 10, 0x0F);
2700
2701 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2702
2703 if (!full)
2704 {
2705 if (imm5 & 0x1)
2706 {
2707 size = 0;
2708 index = (imm5 >> 1) & 0xF;
2709 }
2710 else if (imm5 & 0x2)
2711 {
2712 size = 1;
2713 index = (imm5 >> 2) & 0x7;
2714 }
2715 else if (imm5 & 0x4)
2716 {
2717 size = 2;
2718 index = (imm5 >> 3) & 0x3;
2719 }
2720 else
2721 HALT_UNALLOC;
2722 }
2723 else if (imm5 & 0x8)
2724 {
2725 size = 3;
2726 index = (imm5 >> 4) & 0x1;
2727 }
2728 else
2729 HALT_UNALLOC;
2730
2731 switch (size)
2732 {
2733 case 0:
2734 aarch64_set_reg_u32 (cpu, rd, NO_SP,
2735 aarch64_get_vec_u8 (cpu, vs, index));
2736 break;
2737
2738 case 1:
2739 aarch64_set_reg_u32 (cpu, rd, NO_SP,
2740 aarch64_get_vec_u16 (cpu, vs, index));
2741 break;
2742
2743 case 2:
2744 aarch64_set_reg_u32 (cpu, rd, NO_SP,
2745 aarch64_get_vec_u32 (cpu, vs, index));
2746 break;
2747
2748 case 3:
2749 aarch64_set_reg_u64 (cpu, rd, NO_SP,
2750 aarch64_get_vec_u64 (cpu, vs, index));
2751 break;
2752
2753 default:
2754 HALT_UNALLOC;
2755 }
2756 }
2757
2758 static void
2759 do_vec_INS (sim_cpu *cpu)
2760 {
2761 /* instr[31,21] = 01001110000
2762 instr[20,16] = element size and index
2763 instr[15,10] = 000111
2764 instr[9,5] = W source
2765 instr[4,0] = V dest */
2766
2767 int index;
2768 unsigned rs = INSTR (9, 5);
2769 unsigned vd = INSTR (4, 0);
2770
2771 NYI_assert (31, 21, 0x270);
2772 NYI_assert (15, 10, 0x07);
2773
2774 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2775 if (INSTR (16, 16))
2776 {
2777 index = INSTR (20, 17);
2778 aarch64_set_vec_u8 (cpu, vd, index,
2779 aarch64_get_reg_u8 (cpu, rs, NO_SP));
2780 }
2781 else if (INSTR (17, 17))
2782 {
2783 index = INSTR (20, 18);
2784 aarch64_set_vec_u16 (cpu, vd, index,
2785 aarch64_get_reg_u16 (cpu, rs, NO_SP));
2786 }
2787 else if (INSTR (18, 18))
2788 {
2789 index = INSTR (20, 19);
2790 aarch64_set_vec_u32 (cpu, vd, index,
2791 aarch64_get_reg_u32 (cpu, rs, NO_SP));
2792 }
2793 else if (INSTR (19, 19))
2794 {
2795 index = INSTR (20, 20);
2796 aarch64_set_vec_u64 (cpu, vd, index,
2797 aarch64_get_reg_u64 (cpu, rs, NO_SP));
2798 }
2799 else
2800 HALT_NYI;
2801 }
2802
2803 static void
2804 do_vec_DUP_vector_into_vector (sim_cpu *cpu)
2805 {
2806 /* instr[31] = 0
2807 instr[30] = half(0)/full(1)
2808 instr[29,21] = 00 1110 000
2809 instr[20,16] = element size and index
2810 instr[15,10] = 0000 01
2811 instr[9,5] = V source
2812 instr[4,0] = V dest. */
2813
2814 unsigned full = INSTR (30, 30);
2815 unsigned vs = INSTR (9, 5);
2816 unsigned vd = INSTR (4, 0);
2817 int i, index;
2818
2819 NYI_assert (29, 21, 0x070);
2820 NYI_assert (15, 10, 0x01);
2821
2822 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2823 if (INSTR (16, 16))
2824 {
2825 index = INSTR (20, 17);
2826
2827 for (i = 0; i < (full ? 16 : 8); i++)
2828 aarch64_set_vec_u8 (cpu, vd, i, aarch64_get_vec_u8 (cpu, vs, index));
2829 }
2830 else if (INSTR (17, 17))
2831 {
2832 index = INSTR (20, 18);
2833
2834 for (i = 0; i < (full ? 8 : 4); i++)
2835 aarch64_set_vec_u16 (cpu, vd, i, aarch64_get_vec_u16 (cpu, vs, index));
2836 }
2837 else if (INSTR (18, 18))
2838 {
2839 index = INSTR (20, 19);
2840
2841 for (i = 0; i < (full ? 4 : 2); i++)
2842 aarch64_set_vec_u32 (cpu, vd, i, aarch64_get_vec_u32 (cpu, vs, index));
2843 }
2844 else
2845 {
2846 if (INSTR (19, 19) == 0)
2847 HALT_UNALLOC;
2848
2849 if (! full)
2850 HALT_UNALLOC;
2851
2852 index = INSTR (20, 20);
2853
2854 for (i = 0; i < 2; i++)
2855 aarch64_set_vec_u64 (cpu, vd, i, aarch64_get_vec_u64 (cpu, vs, index));
2856 }
2857 }
2858
2859 static void
2860 do_vec_TBL (sim_cpu *cpu)
2861 {
2862 /* instr[31] = 0
2863 instr[30] = half(0)/full(1)
2864 instr[29,21] = 00 1110 000
2865 instr[20,16] = Vm
2866 instr[15] = 0
2867 instr[14,13] = vec length
2868 instr[12,10] = 000
2869 instr[9,5] = V start
2870 instr[4,0] = V dest */
2871
2872 int full = INSTR (30, 30);
2873 int len = INSTR (14, 13) + 1;
2874 unsigned vm = INSTR (20, 16);
2875 unsigned vn = INSTR (9, 5);
2876 unsigned vd = INSTR (4, 0);
2877 unsigned i;
2878
2879 NYI_assert (29, 21, 0x070);
2880 NYI_assert (12, 10, 0);
2881
2882 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2883 for (i = 0; i < (full ? 16 : 8); i++)
2884 {
2885 unsigned int selector = aarch64_get_vec_u8 (cpu, vm, i);
2886 uint8_t val;
2887
2888 if (selector < 16)
2889 val = aarch64_get_vec_u8 (cpu, vn, selector);
2890 else if (selector < 32)
2891 val = len < 2 ? 0 : aarch64_get_vec_u8 (cpu, vn + 1, selector - 16);
2892 else if (selector < 48)
2893 val = len < 3 ? 0 : aarch64_get_vec_u8 (cpu, vn + 2, selector - 32);
2894 else if (selector < 64)
2895 val = len < 4 ? 0 : aarch64_get_vec_u8 (cpu, vn + 3, selector - 48);
2896 else
2897 val = 0;
2898
2899 aarch64_set_vec_u8 (cpu, vd, i, val);
2900 }
2901 }
2902
2903 static void
2904 do_vec_TRN (sim_cpu *cpu)
2905 {
2906 /* instr[31] = 0
2907 instr[30] = half(0)/full(1)
2908 instr[29,24] = 00 1110
2909 instr[23,22] = size
2910 instr[21] = 0
2911 instr[20,16] = Vm
2912 instr[15] = 0
2913 instr[14] = TRN1 (0) / TRN2 (1)
2914 instr[13,10] = 1010
2915 instr[9,5] = V source
2916 instr[4,0] = V dest. */
2917
2918 int full = INSTR (30, 30);
2919 int second = INSTR (14, 14);
2920 unsigned vm = INSTR (20, 16);
2921 unsigned vn = INSTR (9, 5);
2922 unsigned vd = INSTR (4, 0);
2923 unsigned i;
2924
2925 NYI_assert (29, 24, 0x0E);
2926 NYI_assert (13, 10, 0xA);
2927
2928 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2929 switch (INSTR (23, 22))
2930 {
2931 case 0:
2932 for (i = 0; i < (full ? 8 : 4); i++)
2933 {
2934 aarch64_set_vec_u8
2935 (cpu, vd, i * 2,
2936 aarch64_get_vec_u8 (cpu, second ? vm : vn, i * 2));
2937 aarch64_set_vec_u8
2938 (cpu, vd, 1 * 2 + 1,
2939 aarch64_get_vec_u8 (cpu, second ? vn : vm, i * 2 + 1));
2940 }
2941 break;
2942
2943 case 1:
2944 for (i = 0; i < (full ? 4 : 2); i++)
2945 {
2946 aarch64_set_vec_u16
2947 (cpu, vd, i * 2,
2948 aarch64_get_vec_u16 (cpu, second ? vm : vn, i * 2));
2949 aarch64_set_vec_u16
2950 (cpu, vd, 1 * 2 + 1,
2951 aarch64_get_vec_u16 (cpu, second ? vn : vm, i * 2 + 1));
2952 }
2953 break;
2954
2955 case 2:
2956 aarch64_set_vec_u32
2957 (cpu, vd, 0, aarch64_get_vec_u32 (cpu, second ? vm : vn, 0));
2958 aarch64_set_vec_u32
2959 (cpu, vd, 1, aarch64_get_vec_u32 (cpu, second ? vn : vm, 1));
2960 aarch64_set_vec_u32
2961 (cpu, vd, 2, aarch64_get_vec_u32 (cpu, second ? vm : vn, 2));
2962 aarch64_set_vec_u32
2963 (cpu, vd, 3, aarch64_get_vec_u32 (cpu, second ? vn : vm, 3));
2964 break;
2965
2966 case 3:
2967 if (! full)
2968 HALT_UNALLOC;
2969
2970 aarch64_set_vec_u64 (cpu, vd, 0,
2971 aarch64_get_vec_u64 (cpu, second ? vm : vn, 0));
2972 aarch64_set_vec_u64 (cpu, vd, 1,
2973 aarch64_get_vec_u64 (cpu, second ? vn : vm, 1));
2974 break;
2975 }
2976 }
2977
2978 static void
2979 do_vec_DUP_scalar_into_vector (sim_cpu *cpu)
2980 {
2981 /* instr[31] = 0
2982 instr[30] = 0=> zero top 64-bits, 1=> duplicate into top 64-bits
2983 [must be 1 for 64-bit xfer]
2984 instr[29,20] = 00 1110 0000
2985 instr[19,16] = element size: 0001=> 8-bits, 0010=> 16-bits,
2986 0100=> 32-bits. 1000=>64-bits
2987 instr[15,10] = 0000 11
2988 instr[9,5] = W source
2989 instr[4,0] = V dest. */
2990
2991 unsigned i;
2992 unsigned Vd = INSTR (4, 0);
2993 unsigned Rs = INSTR (9, 5);
2994 int both = INSTR (30, 30);
2995
2996 NYI_assert (29, 20, 0x0E0);
2997 NYI_assert (15, 10, 0x03);
2998
2999 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3000 switch (INSTR (19, 16))
3001 {
3002 case 1:
3003 for (i = 0; i < (both ? 16 : 8); i++)
3004 aarch64_set_vec_u8 (cpu, Vd, i, aarch64_get_reg_u8 (cpu, Rs, NO_SP));
3005 break;
3006
3007 case 2:
3008 for (i = 0; i < (both ? 8 : 4); i++)
3009 aarch64_set_vec_u16 (cpu, Vd, i, aarch64_get_reg_u16 (cpu, Rs, NO_SP));
3010 break;
3011
3012 case 4:
3013 for (i = 0; i < (both ? 4 : 2); i++)
3014 aarch64_set_vec_u32 (cpu, Vd, i, aarch64_get_reg_u32 (cpu, Rs, NO_SP));
3015 break;
3016
3017 case 8:
3018 if (!both)
3019 HALT_NYI;
3020 aarch64_set_vec_u64 (cpu, Vd, 0, aarch64_get_reg_u64 (cpu, Rs, NO_SP));
3021 aarch64_set_vec_u64 (cpu, Vd, 1, aarch64_get_reg_u64 (cpu, Rs, NO_SP));
3022 break;
3023
3024 default:
3025 HALT_NYI;
3026 }
3027 }
3028
3029 static void
3030 do_vec_UZP (sim_cpu *cpu)
3031 {
3032 /* instr[31] = 0
3033 instr[30] = half(0)/full(1)
3034 instr[29,24] = 00 1110
3035 instr[23,22] = size: byte(00), half(01), word (10), long (11)
3036 instr[21] = 0
3037 instr[20,16] = Vm
3038 instr[15] = 0
3039 instr[14] = lower (0) / upper (1)
3040 instr[13,10] = 0110
3041 instr[9,5] = Vn
3042 instr[4,0] = Vd. */
3043
3044 int full = INSTR (30, 30);
3045 int upper = INSTR (14, 14);
3046
3047 unsigned vm = INSTR (20, 16);
3048 unsigned vn = INSTR (9, 5);
3049 unsigned vd = INSTR (4, 0);
3050
3051 uint64_t val_m1 = aarch64_get_vec_u64 (cpu, vm, 0);
3052 uint64_t val_m2 = aarch64_get_vec_u64 (cpu, vm, 1);
3053 uint64_t val_n1 = aarch64_get_vec_u64 (cpu, vn, 0);
3054 uint64_t val_n2 = aarch64_get_vec_u64 (cpu, vn, 1);
3055
3056 uint64_t val1;
3057 uint64_t val2;
3058
3059 uint64_t input2 = full ? val_n2 : val_m1;
3060
3061 NYI_assert (29, 24, 0x0E);
3062 NYI_assert (21, 21, 0);
3063 NYI_assert (15, 15, 0);
3064 NYI_assert (13, 10, 6);
3065
3066 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3067 switch (INSTR (23, 22))
3068 {
3069 case 0:
3070 val1 = (val_n1 >> (upper * 8)) & 0xFFULL;
3071 val1 |= (val_n1 >> ((upper * 8) + 8)) & 0xFF00ULL;
3072 val1 |= (val_n1 >> ((upper * 8) + 16)) & 0xFF0000ULL;
3073 val1 |= (val_n1 >> ((upper * 8) + 24)) & 0xFF000000ULL;
3074
3075 val1 |= (input2 << (32 - (upper * 8))) & 0xFF00000000ULL;
3076 val1 |= (input2 << (24 - (upper * 8))) & 0xFF0000000000ULL;
3077 val1 |= (input2 << (16 - (upper * 8))) & 0xFF000000000000ULL;
3078 val1 |= (input2 << (8 - (upper * 8))) & 0xFF00000000000000ULL;
3079
3080 if (full)
3081 {
3082 val2 = (val_m1 >> (upper * 8)) & 0xFFULL;
3083 val2 |= (val_m1 >> ((upper * 8) + 8)) & 0xFF00ULL;
3084 val2 |= (val_m1 >> ((upper * 8) + 16)) & 0xFF0000ULL;
3085 val2 |= (val_m1 >> ((upper * 8) + 24)) & 0xFF000000ULL;
3086
3087 val2 |= (val_m2 << (32 - (upper * 8))) & 0xFF00000000ULL;
3088 val2 |= (val_m2 << (24 - (upper * 8))) & 0xFF0000000000ULL;
3089 val2 |= (val_m2 << (16 - (upper * 8))) & 0xFF000000000000ULL;
3090 val2 |= (val_m2 << (8 - (upper * 8))) & 0xFF00000000000000ULL;
3091 }
3092 break;
3093
3094 case 1:
3095 val1 = (val_n1 >> (upper * 16)) & 0xFFFFULL;
3096 val1 |= (val_n1 >> ((upper * 16) + 16)) & 0xFFFF0000ULL;
3097
3098 val1 |= (input2 << (32 - (upper * 16))) & 0xFFFF00000000ULL;;
3099 val1 |= (input2 << (16 - (upper * 16))) & 0xFFFF000000000000ULL;
3100
3101 if (full)
3102 {
3103 val2 = (val_m1 >> (upper * 16)) & 0xFFFFULL;
3104 val2 |= (val_m1 >> ((upper * 16) + 16)) & 0xFFFF0000ULL;
3105
3106 val2 |= (val_m2 << (32 - (upper * 16))) & 0xFFFF00000000ULL;
3107 val2 |= (val_m2 << (16 - (upper * 16))) & 0xFFFF000000000000ULL;
3108 }
3109 break;
3110
3111 case 2:
3112 val1 = (val_n1 >> (upper * 32)) & 0xFFFFFFFF;
3113 val1 |= (input2 << (32 - (upper * 32))) & 0xFFFFFFFF00000000ULL;
3114
3115 if (full)
3116 {
3117 val2 = (val_m1 >> (upper * 32)) & 0xFFFFFFFF;
3118 val2 |= (val_m2 << (32 - (upper * 32))) & 0xFFFFFFFF00000000ULL;
3119 }
3120 break;
3121
3122 case 3:
3123 if (! full)
3124 HALT_UNALLOC;
3125
3126 val1 = upper ? val_n2 : val_n1;
3127 val2 = upper ? val_m2 : val_m1;
3128 break;
3129 }
3130
3131 aarch64_set_vec_u64 (cpu, vd, 0, val1);
3132 if (full)
3133 aarch64_set_vec_u64 (cpu, vd, 1, val2);
3134 }
3135
3136 static void
3137 do_vec_ZIP (sim_cpu *cpu)
3138 {
3139 /* instr[31] = 0
3140 instr[30] = half(0)/full(1)
3141 instr[29,24] = 00 1110
3142 instr[23,22] = size: byte(00), hald(01), word (10), long (11)
3143 instr[21] = 0
3144 instr[20,16] = Vm
3145 instr[15] = 0
3146 instr[14] = lower (0) / upper (1)
3147 instr[13,10] = 1110
3148 instr[9,5] = Vn
3149 instr[4,0] = Vd. */
3150
3151 int full = INSTR (30, 30);
3152 int upper = INSTR (14, 14);
3153
3154 unsigned vm = INSTR (20, 16);
3155 unsigned vn = INSTR (9, 5);
3156 unsigned vd = INSTR (4, 0);
3157
3158 uint64_t val_m1 = aarch64_get_vec_u64 (cpu, vm, 0);
3159 uint64_t val_m2 = aarch64_get_vec_u64 (cpu, vm, 1);
3160 uint64_t val_n1 = aarch64_get_vec_u64 (cpu, vn, 0);
3161 uint64_t val_n2 = aarch64_get_vec_u64 (cpu, vn, 1);
3162
3163 uint64_t val1 = 0;
3164 uint64_t val2 = 0;
3165
3166 uint64_t input1 = upper ? val_n1 : val_m1;
3167 uint64_t input2 = upper ? val_n2 : val_m2;
3168
3169 NYI_assert (29, 24, 0x0E);
3170 NYI_assert (21, 21, 0);
3171 NYI_assert (15, 15, 0);
3172 NYI_assert (13, 10, 0xE);
3173
3174 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3175 switch (INSTR (23, 23))
3176 {
3177 case 0:
3178 val1 =
3179 ((input1 << 0) & (0xFF << 0))
3180 | ((input2 << 8) & (0xFF << 8))
3181 | ((input1 << 8) & (0xFF << 16))
3182 | ((input2 << 16) & (0xFF << 24))
3183 | ((input1 << 16) & (0xFFULL << 32))
3184 | ((input2 << 24) & (0xFFULL << 40))
3185 | ((input1 << 24) & (0xFFULL << 48))
3186 | ((input2 << 32) & (0xFFULL << 56));
3187
3188 val2 =
3189 ((input1 >> 32) & (0xFF << 0))
3190 | ((input2 >> 24) & (0xFF << 8))
3191 | ((input1 >> 24) & (0xFF << 16))
3192 | ((input2 >> 16) & (0xFF << 24))
3193 | ((input1 >> 16) & (0xFFULL << 32))
3194 | ((input2 >> 8) & (0xFFULL << 40))
3195 | ((input1 >> 8) & (0xFFULL << 48))
3196 | ((input2 >> 0) & (0xFFULL << 56));
3197 break;
3198
3199 case 1:
3200 val1 =
3201 ((input1 << 0) & (0xFFFF << 0))
3202 | ((input2 << 16) & (0xFFFF << 16))
3203 | ((input1 << 16) & (0xFFFFULL << 32))
3204 | ((input2 << 32) & (0xFFFFULL << 48));
3205
3206 val2 =
3207 ((input1 >> 32) & (0xFFFF << 0))
3208 | ((input2 >> 16) & (0xFFFF << 16))
3209 | ((input1 >> 16) & (0xFFFFULL << 32))
3210 | ((input2 >> 0) & (0xFFFFULL << 48));
3211 break;
3212
3213 case 2:
3214 val1 = (input1 & 0xFFFFFFFFULL) | (input2 << 32);
3215 val2 = (input2 & 0xFFFFFFFFULL) | (input1 << 32);
3216 break;
3217
3218 case 3:
3219 val1 = input1;
3220 val2 = input2;
3221 break;
3222 }
3223
3224 aarch64_set_vec_u64 (cpu, vd, 0, val1);
3225 if (full)
3226 aarch64_set_vec_u64 (cpu, vd, 1, val2);
3227 }
3228
3229 /* Floating point immediates are encoded in 8 bits.
3230 fpimm[7] = sign bit.
3231 fpimm[6:4] = signed exponent.
3232 fpimm[3:0] = fraction (assuming leading 1).
3233 i.e. F = s * 1.f * 2^(e - b). */
3234
3235 static float
3236 fp_immediate_for_encoding_32 (uint32_t imm8)
3237 {
3238 float u;
3239 uint32_t s, e, f, i;
3240
3241 s = (imm8 >> 7) & 0x1;
3242 e = (imm8 >> 4) & 0x7;
3243 f = imm8 & 0xf;
3244
3245 /* The fp value is s * n/16 * 2r where n is 16+e. */
3246 u = (16.0 + f) / 16.0;
3247
3248 /* N.B. exponent is signed. */
3249 if (e < 4)
3250 {
3251 int epos = e;
3252
3253 for (i = 0; i <= epos; i++)
3254 u *= 2.0;
3255 }
3256 else
3257 {
3258 int eneg = 7 - e;
3259
3260 for (i = 0; i < eneg; i++)
3261 u /= 2.0;
3262 }
3263
3264 if (s)
3265 u = - u;
3266
3267 return u;
3268 }
3269
3270 static double
3271 fp_immediate_for_encoding_64 (uint32_t imm8)
3272 {
3273 double u;
3274 uint32_t s, e, f, i;
3275
3276 s = (imm8 >> 7) & 0x1;
3277 e = (imm8 >> 4) & 0x7;
3278 f = imm8 & 0xf;
3279
3280 /* The fp value is s * n/16 * 2r where n is 16+e. */
3281 u = (16.0 + f) / 16.0;
3282
3283 /* N.B. exponent is signed. */
3284 if (e < 4)
3285 {
3286 int epos = e;
3287
3288 for (i = 0; i <= epos; i++)
3289 u *= 2.0;
3290 }
3291 else
3292 {
3293 int eneg = 7 - e;
3294
3295 for (i = 0; i < eneg; i++)
3296 u /= 2.0;
3297 }
3298
3299 if (s)
3300 u = - u;
3301
3302 return u;
3303 }
3304
3305 static void
3306 do_vec_MOV_immediate (sim_cpu *cpu)
3307 {
3308 /* instr[31] = 0
3309 instr[30] = full/half selector
3310 instr[29,19] = 00111100000
3311 instr[18,16] = high 3 bits of uimm8
3312 instr[15,12] = size & shift:
3313 0000 => 32-bit
3314 0010 => 32-bit + LSL#8
3315 0100 => 32-bit + LSL#16
3316 0110 => 32-bit + LSL#24
3317 1010 => 16-bit + LSL#8
3318 1000 => 16-bit
3319 1101 => 32-bit + MSL#16
3320 1100 => 32-bit + MSL#8
3321 1110 => 8-bit
3322 1111 => double
3323 instr[11,10] = 01
3324 instr[9,5] = low 5-bits of uimm8
3325 instr[4,0] = Vd. */
3326
3327 int full = INSTR (30, 30);
3328 unsigned vd = INSTR (4, 0);
3329 unsigned val = (INSTR (18, 16) << 5) | INSTR (9, 5);
3330 unsigned i;
3331
3332 NYI_assert (29, 19, 0x1E0);
3333 NYI_assert (11, 10, 1);
3334
3335 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3336 switch (INSTR (15, 12))
3337 {
3338 case 0x0: /* 32-bit, no shift. */
3339 case 0x2: /* 32-bit, shift by 8. */
3340 case 0x4: /* 32-bit, shift by 16. */
3341 case 0x6: /* 32-bit, shift by 24. */
3342 val <<= (8 * INSTR (14, 13));
3343 for (i = 0; i < (full ? 4 : 2); i++)
3344 aarch64_set_vec_u32 (cpu, vd, i, val);
3345 break;
3346
3347 case 0xa: /* 16-bit, shift by 8. */
3348 val <<= 8;
3349 ATTRIBUTE_FALLTHROUGH;
3350 case 0x8: /* 16-bit, no shift. */
3351 for (i = 0; i < (full ? 8 : 4); i++)
3352 aarch64_set_vec_u16 (cpu, vd, i, val);
3353 break;
3354
3355 case 0xd: /* 32-bit, mask shift by 16. */
3356 val <<= 8;
3357 val |= 0xFF;
3358 ATTRIBUTE_FALLTHROUGH;
3359 case 0xc: /* 32-bit, mask shift by 8. */
3360 val <<= 8;
3361 val |= 0xFF;
3362 for (i = 0; i < (full ? 4 : 2); i++)
3363 aarch64_set_vec_u32 (cpu, vd, i, val);
3364 break;
3365
3366 case 0xe: /* 8-bit, no shift. */
3367 for (i = 0; i < (full ? 16 : 8); i++)
3368 aarch64_set_vec_u8 (cpu, vd, i, val);
3369 break;
3370
3371 case 0xf: /* FMOV Vs.{2|4}S, #fpimm. */
3372 {
3373 float u = fp_immediate_for_encoding_32 (val);
3374 for (i = 0; i < (full ? 4 : 2); i++)
3375 aarch64_set_vec_float (cpu, vd, i, u);
3376 break;
3377 }
3378
3379 default:
3380 HALT_NYI;
3381 }
3382 }
3383
3384 static void
3385 do_vec_MVNI (sim_cpu *cpu)
3386 {
3387 /* instr[31] = 0
3388 instr[30] = full/half selector
3389 instr[29,19] = 10111100000
3390 instr[18,16] = high 3 bits of uimm8
3391 instr[15,12] = selector
3392 instr[11,10] = 01
3393 instr[9,5] = low 5-bits of uimm8
3394 instr[4,0] = Vd. */
3395
3396 int full = INSTR (30, 30);
3397 unsigned vd = INSTR (4, 0);
3398 unsigned val = (INSTR (18, 16) << 5) | INSTR (9, 5);
3399 unsigned i;
3400
3401 NYI_assert (29, 19, 0x5E0);
3402 NYI_assert (11, 10, 1);
3403
3404 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3405 switch (INSTR (15, 12))
3406 {
3407 case 0x0: /* 32-bit, no shift. */
3408 case 0x2: /* 32-bit, shift by 8. */
3409 case 0x4: /* 32-bit, shift by 16. */
3410 case 0x6: /* 32-bit, shift by 24. */
3411 val <<= (8 * INSTR (14, 13));
3412 val = ~ val;
3413 for (i = 0; i < (full ? 4 : 2); i++)
3414 aarch64_set_vec_u32 (cpu, vd, i, val);
3415 return;
3416
3417 case 0xa: /* 16-bit, 8 bit shift. */
3418 val <<= 8;
3419 ATTRIBUTE_FALLTHROUGH;
3420 case 0x8: /* 16-bit, no shift. */
3421 val = ~ val;
3422 for (i = 0; i < (full ? 8 : 4); i++)
3423 aarch64_set_vec_u16 (cpu, vd, i, val);
3424 return;
3425
3426 case 0xd: /* 32-bit, mask shift by 16. */
3427 val <<= 8;
3428 val |= 0xFF;
3429 ATTRIBUTE_FALLTHROUGH;
3430 case 0xc: /* 32-bit, mask shift by 8. */
3431 val <<= 8;
3432 val |= 0xFF;
3433 val = ~ val;
3434 for (i = 0; i < (full ? 4 : 2); i++)
3435 aarch64_set_vec_u32 (cpu, vd, i, val);
3436 return;
3437
3438 case 0xE: /* MOVI Dn, #mask64 */
3439 {
3440 uint64_t mask = 0;
3441
3442 for (i = 0; i < 8; i++)
3443 if (val & (1 << i))
3444 mask |= (0xFFUL << (i * 8));
3445 aarch64_set_vec_u64 (cpu, vd, 0, mask);
3446 aarch64_set_vec_u64 (cpu, vd, 1, mask);
3447 return;
3448 }
3449
3450 case 0xf: /* FMOV Vd.2D, #fpimm. */
3451 {
3452 double u = fp_immediate_for_encoding_64 (val);
3453
3454 if (! full)
3455 HALT_UNALLOC;
3456
3457 aarch64_set_vec_double (cpu, vd, 0, u);
3458 aarch64_set_vec_double (cpu, vd, 1, u);
3459 return;
3460 }
3461
3462 default:
3463 HALT_NYI;
3464 }
3465 }
3466
3467 #define ABS(A) ((A) < 0 ? - (A) : (A))
3468
3469 static void
3470 do_vec_ABS (sim_cpu *cpu)
3471 {
3472 /* instr[31] = 0
3473 instr[30] = half(0)/full(1)
3474 instr[29,24] = 00 1110
3475 instr[23,22] = size: 00=> 8-bit, 01=> 16-bit, 10=> 32-bit, 11=> 64-bit
3476 instr[21,10] = 10 0000 1011 10
3477 instr[9,5] = Vn
3478 instr[4.0] = Vd. */
3479
3480 unsigned vn = INSTR (9, 5);
3481 unsigned vd = INSTR (4, 0);
3482 unsigned full = INSTR (30, 30);
3483 unsigned i;
3484
3485 NYI_assert (29, 24, 0x0E);
3486 NYI_assert (21, 10, 0x82E);
3487
3488 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3489 switch (INSTR (23, 22))
3490 {
3491 case 0:
3492 for (i = 0; i < (full ? 16 : 8); i++)
3493 aarch64_set_vec_s8 (cpu, vd, i,
3494 ABS (aarch64_get_vec_s8 (cpu, vn, i)));
3495 break;
3496
3497 case 1:
3498 for (i = 0; i < (full ? 8 : 4); i++)
3499 aarch64_set_vec_s16 (cpu, vd, i,
3500 ABS (aarch64_get_vec_s16 (cpu, vn, i)));
3501 break;
3502
3503 case 2:
3504 for (i = 0; i < (full ? 4 : 2); i++)
3505 aarch64_set_vec_s32 (cpu, vd, i,
3506 ABS (aarch64_get_vec_s32 (cpu, vn, i)));
3507 break;
3508
3509 case 3:
3510 if (! full)
3511 HALT_NYI;
3512 for (i = 0; i < 2; i++)
3513 aarch64_set_vec_s64 (cpu, vd, i,
3514 ABS (aarch64_get_vec_s64 (cpu, vn, i)));
3515 break;
3516 }
3517 }
3518
3519 static void
3520 do_vec_ADDV (sim_cpu *cpu)
3521 {
3522 /* instr[31] = 0
3523 instr[30] = full/half selector
3524 instr[29,24] = 00 1110
3525 instr[23,22] = size: 00=> 8-bit, 01=> 16-bit, 10=> 32-bit, 11=> 64-bit
3526 instr[21,10] = 11 0001 1011 10
3527 instr[9,5] = Vm
3528 instr[4.0] = Rd. */
3529
3530 unsigned vm = INSTR (9, 5);
3531 unsigned rd = INSTR (4, 0);
3532 unsigned i;
3533 int full = INSTR (30, 30);
3534
3535 NYI_assert (29, 24, 0x0E);
3536 NYI_assert (21, 10, 0xC6E);
3537
3538 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3539 switch (INSTR (23, 22))
3540 {
3541 case 0:
3542 {
3543 uint8_t val = 0;
3544 for (i = 0; i < (full ? 16 : 8); i++)
3545 val += aarch64_get_vec_u8 (cpu, vm, i);
3546 aarch64_set_vec_u64 (cpu, rd, 0, val);
3547 return;
3548 }
3549
3550 case 1:
3551 {
3552 uint16_t val = 0;
3553 for (i = 0; i < (full ? 8 : 4); i++)
3554 val += aarch64_get_vec_u16 (cpu, vm, i);
3555 aarch64_set_vec_u64 (cpu, rd, 0, val);
3556 return;
3557 }
3558
3559 case 2:
3560 {
3561 uint32_t val = 0;
3562 if (! full)
3563 HALT_UNALLOC;
3564 for (i = 0; i < 4; i++)
3565 val += aarch64_get_vec_u32 (cpu, vm, i);
3566 aarch64_set_vec_u64 (cpu, rd, 0, val);
3567 return;
3568 }
3569
3570 case 3:
3571 HALT_UNALLOC;
3572 }
3573 }
3574
3575 static void
3576 do_vec_ins_2 (sim_cpu *cpu)
3577 {
3578 /* instr[31,21] = 01001110000
3579 instr[20,18] = size & element selector
3580 instr[17,14] = 0000
3581 instr[13] = direction: to vec(0), from vec (1)
3582 instr[12,10] = 111
3583 instr[9,5] = Vm
3584 instr[4,0] = Vd. */
3585
3586 unsigned elem;
3587 unsigned vm = INSTR (9, 5);
3588 unsigned vd = INSTR (4, 0);
3589
3590 NYI_assert (31, 21, 0x270);
3591 NYI_assert (17, 14, 0);
3592 NYI_assert (12, 10, 7);
3593
3594 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3595 if (INSTR (13, 13) == 1)
3596 {
3597 if (INSTR (18, 18) == 1)
3598 {
3599 /* 32-bit moves. */
3600 elem = INSTR (20, 19);
3601 aarch64_set_reg_u64 (cpu, vd, NO_SP,
3602 aarch64_get_vec_u32 (cpu, vm, elem));
3603 }
3604 else
3605 {
3606 /* 64-bit moves. */
3607 if (INSTR (19, 19) != 1)
3608 HALT_NYI;
3609
3610 elem = INSTR (20, 20);
3611 aarch64_set_reg_u64 (cpu, vd, NO_SP,
3612 aarch64_get_vec_u64 (cpu, vm, elem));
3613 }
3614 }
3615 else
3616 {
3617 if (INSTR (18, 18) == 1)
3618 {
3619 /* 32-bit moves. */
3620 elem = INSTR (20, 19);
3621 aarch64_set_vec_u32 (cpu, vd, elem,
3622 aarch64_get_reg_u32 (cpu, vm, NO_SP));
3623 }
3624 else
3625 {
3626 /* 64-bit moves. */
3627 if (INSTR (19, 19) != 1)
3628 HALT_NYI;
3629
3630 elem = INSTR (20, 20);
3631 aarch64_set_vec_u64 (cpu, vd, elem,
3632 aarch64_get_reg_u64 (cpu, vm, NO_SP));
3633 }
3634 }
3635 }
3636
3637 #define DO_VEC_WIDENING_MUL(N, DST_TYPE, READ_TYPE, WRITE_TYPE) \
3638 do \
3639 { \
3640 DST_TYPE a[N], b[N]; \
3641 \
3642 for (i = 0; i < (N); i++) \
3643 { \
3644 a[i] = aarch64_get_vec_##READ_TYPE (cpu, vn, i + bias); \
3645 b[i] = aarch64_get_vec_##READ_TYPE (cpu, vm, i + bias); \
3646 } \
3647 for (i = 0; i < (N); i++) \
3648 aarch64_set_vec_##WRITE_TYPE (cpu, vd, i, a[i] * b[i]); \
3649 } \
3650 while (0)
3651
3652 static void
3653 do_vec_mull (sim_cpu *cpu)
3654 {
3655 /* instr[31] = 0
3656 instr[30] = lower(0)/upper(1) selector
3657 instr[29] = signed(0)/unsigned(1)
3658 instr[28,24] = 0 1110
3659 instr[23,22] = size: 8-bit (00), 16-bit (01), 32-bit (10)
3660 instr[21] = 1
3661 instr[20,16] = Vm
3662 instr[15,10] = 11 0000
3663 instr[9,5] = Vn
3664 instr[4.0] = Vd. */
3665
3666 int unsign = INSTR (29, 29);
3667 int bias = INSTR (30, 30);
3668 unsigned vm = INSTR (20, 16);
3669 unsigned vn = INSTR ( 9, 5);
3670 unsigned vd = INSTR ( 4, 0);
3671 unsigned i;
3672
3673 NYI_assert (28, 24, 0x0E);
3674 NYI_assert (15, 10, 0x30);
3675
3676 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3677 /* NB: Read source values before writing results, in case
3678 the source and destination vectors are the same. */
3679 switch (INSTR (23, 22))
3680 {
3681 case 0:
3682 if (bias)
3683 bias = 8;
3684 if (unsign)
3685 DO_VEC_WIDENING_MUL (8, uint16_t, u8, u16);
3686 else
3687 DO_VEC_WIDENING_MUL (8, int16_t, s8, s16);
3688 return;
3689
3690 case 1:
3691 if (bias)
3692 bias = 4;
3693 if (unsign)
3694 DO_VEC_WIDENING_MUL (4, uint32_t, u16, u32);
3695 else
3696 DO_VEC_WIDENING_MUL (4, int32_t, s16, s32);
3697 return;
3698
3699 case 2:
3700 if (bias)
3701 bias = 2;
3702 if (unsign)
3703 DO_VEC_WIDENING_MUL (2, uint64_t, u32, u64);
3704 else
3705 DO_VEC_WIDENING_MUL (2, int64_t, s32, s64);
3706 return;
3707
3708 case 3:
3709 HALT_NYI;
3710 }
3711 }
3712
3713 static void
3714 do_vec_fadd (sim_cpu *cpu)
3715 {
3716 /* instr[31] = 0
3717 instr[30] = half(0)/full(1)
3718 instr[29,24] = 001110
3719 instr[23] = FADD(0)/FSUB(1)
3720 instr[22] = float (0)/double(1)
3721 instr[21] = 1
3722 instr[20,16] = Vm
3723 instr[15,10] = 110101
3724 instr[9,5] = Vn
3725 instr[4.0] = Vd. */
3726
3727 unsigned vm = INSTR (20, 16);
3728 unsigned vn = INSTR (9, 5);
3729 unsigned vd = INSTR (4, 0);
3730 unsigned i;
3731 int full = INSTR (30, 30);
3732
3733 NYI_assert (29, 24, 0x0E);
3734 NYI_assert (21, 21, 1);
3735 NYI_assert (15, 10, 0x35);
3736
3737 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3738 if (INSTR (23, 23))
3739 {
3740 if (INSTR (22, 22))
3741 {
3742 if (! full)
3743 HALT_NYI;
3744
3745 for (i = 0; i < 2; i++)
3746 aarch64_set_vec_double (cpu, vd, i,
3747 aarch64_get_vec_double (cpu, vn, i)
3748 - aarch64_get_vec_double (cpu, vm, i));
3749 }
3750 else
3751 {
3752 for (i = 0; i < (full ? 4 : 2); i++)
3753 aarch64_set_vec_float (cpu, vd, i,
3754 aarch64_get_vec_float (cpu, vn, i)
3755 - aarch64_get_vec_float (cpu, vm, i));
3756 }
3757 }
3758 else
3759 {
3760 if (INSTR (22, 22))
3761 {
3762 if (! full)
3763 HALT_NYI;
3764
3765 for (i = 0; i < 2; i++)
3766 aarch64_set_vec_double (cpu, vd, i,
3767 aarch64_get_vec_double (cpu, vm, i)
3768 + aarch64_get_vec_double (cpu, vn, i));
3769 }
3770 else
3771 {
3772 for (i = 0; i < (full ? 4 : 2); i++)
3773 aarch64_set_vec_float (cpu, vd, i,
3774 aarch64_get_vec_float (cpu, vm, i)
3775 + aarch64_get_vec_float (cpu, vn, i));
3776 }
3777 }
3778 }
3779
3780 static void
3781 do_vec_add (sim_cpu *cpu)
3782 {
3783 /* instr[31] = 0
3784 instr[30] = full/half selector
3785 instr[29,24] = 001110
3786 instr[23,22] = size: 00=> 8-bit, 01=> 16-bit, 10=> 32-bit, 11=> 64-bit
3787 instr[21] = 1
3788 instr[20,16] = Vn
3789 instr[15,10] = 100001
3790 instr[9,5] = Vm
3791 instr[4.0] = Vd. */
3792
3793 unsigned vm = INSTR (20, 16);
3794 unsigned vn = INSTR (9, 5);
3795 unsigned vd = INSTR (4, 0);
3796 unsigned i;
3797 int full = INSTR (30, 30);
3798
3799 NYI_assert (29, 24, 0x0E);
3800 NYI_assert (21, 21, 1);
3801 NYI_assert (15, 10, 0x21);
3802
3803 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3804 switch (INSTR (23, 22))
3805 {
3806 case 0:
3807 for (i = 0; i < (full ? 16 : 8); i++)
3808 aarch64_set_vec_u8 (cpu, vd, i, aarch64_get_vec_u8 (cpu, vn, i)
3809 + aarch64_get_vec_u8 (cpu, vm, i));
3810 return;
3811
3812 case 1:
3813 for (i = 0; i < (full ? 8 : 4); i++)
3814 aarch64_set_vec_u16 (cpu, vd, i, aarch64_get_vec_u16 (cpu, vn, i)
3815 + aarch64_get_vec_u16 (cpu, vm, i));
3816 return;
3817
3818 case 2:
3819 for (i = 0; i < (full ? 4 : 2); i++)
3820 aarch64_set_vec_u32 (cpu, vd, i, aarch64_get_vec_u32 (cpu, vn, i)
3821 + aarch64_get_vec_u32 (cpu, vm, i));
3822 return;
3823
3824 case 3:
3825 if (! full)
3826 HALT_UNALLOC;
3827 aarch64_set_vec_u64 (cpu, vd, 0, aarch64_get_vec_u64 (cpu, vn, 0)
3828 + aarch64_get_vec_u64 (cpu, vm, 0));
3829 aarch64_set_vec_u64 (cpu, vd, 1,
3830 aarch64_get_vec_u64 (cpu, vn, 1)
3831 + aarch64_get_vec_u64 (cpu, vm, 1));
3832 return;
3833 }
3834 }
3835
3836 static void
3837 do_vec_mul (sim_cpu *cpu)
3838 {
3839 /* instr[31] = 0
3840 instr[30] = full/half selector
3841 instr[29,24] = 00 1110
3842 instr[23,22] = size: 00=> 8-bit, 01=> 16-bit, 10=> 32-bit
3843 instr[21] = 1
3844 instr[20,16] = Vn
3845 instr[15,10] = 10 0111
3846 instr[9,5] = Vm
3847 instr[4.0] = Vd. */
3848
3849 unsigned vm = INSTR (20, 16);
3850 unsigned vn = INSTR (9, 5);
3851 unsigned vd = INSTR (4, 0);
3852 unsigned i;
3853 int full = INSTR (30, 30);
3854 int bias = 0;
3855
3856 NYI_assert (29, 24, 0x0E);
3857 NYI_assert (21, 21, 1);
3858 NYI_assert (15, 10, 0x27);
3859
3860 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3861 switch (INSTR (23, 22))
3862 {
3863 case 0:
3864 DO_VEC_WIDENING_MUL (full ? 16 : 8, uint8_t, u8, u8);
3865 return;
3866
3867 case 1:
3868 DO_VEC_WIDENING_MUL (full ? 8 : 4, uint16_t, u16, u16);
3869 return;
3870
3871 case 2:
3872 DO_VEC_WIDENING_MUL (full ? 4 : 2, uint32_t, u32, u32);
3873 return;
3874
3875 case 3:
3876 HALT_UNALLOC;
3877 }
3878 }
3879
3880 static void
3881 do_vec_MLA (sim_cpu *cpu)
3882 {
3883 /* instr[31] = 0
3884 instr[30] = full/half selector
3885 instr[29,24] = 00 1110
3886 instr[23,22] = size: 00=> 8-bit, 01=> 16-bit, 10=> 32-bit
3887 instr[21] = 1
3888 instr[20,16] = Vn
3889 instr[15,10] = 1001 01
3890 instr[9,5] = Vm
3891 instr[4.0] = Vd. */
3892
3893 unsigned vm = INSTR (20, 16);
3894 unsigned vn = INSTR (9, 5);
3895 unsigned vd = INSTR (4, 0);
3896 unsigned i;
3897 int full = INSTR (30, 30);
3898
3899 NYI_assert (29, 24, 0x0E);
3900 NYI_assert (21, 21, 1);
3901 NYI_assert (15, 10, 0x25);
3902
3903 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3904 switch (INSTR (23, 22))
3905 {
3906 case 0:
3907 for (i = 0; i < (full ? 16 : 8); i++)
3908 aarch64_set_vec_u8 (cpu, vd, i,
3909 aarch64_get_vec_u8 (cpu, vd, i)
3910 + (aarch64_get_vec_u8 (cpu, vn, i)
3911 * aarch64_get_vec_u8 (cpu, vm, i)));
3912 return;
3913
3914 case 1:
3915 for (i = 0; i < (full ? 8 : 4); i++)
3916 aarch64_set_vec_u16 (cpu, vd, i,
3917 aarch64_get_vec_u16 (cpu, vd, i)
3918 + (aarch64_get_vec_u16 (cpu, vn, i)
3919 * aarch64_get_vec_u16 (cpu, vm, i)));
3920 return;
3921
3922 case 2:
3923 for (i = 0; i < (full ? 4 : 2); i++)
3924 aarch64_set_vec_u32 (cpu, vd, i,
3925 aarch64_get_vec_u32 (cpu, vd, i)
3926 + (aarch64_get_vec_u32 (cpu, vn, i)
3927 * aarch64_get_vec_u32 (cpu, vm, i)));
3928 return;
3929
3930 default:
3931 HALT_UNALLOC;
3932 }
3933 }
3934
3935 static float
3936 fmaxnm (float a, float b)
3937 {
3938 if (! isnan (a))
3939 {
3940 if (! isnan (b))
3941 return a > b ? a : b;
3942 return a;
3943 }
3944 else if (! isnan (b))
3945 return b;
3946 return a;
3947 }
3948
3949 static float
3950 fminnm (float a, float b)
3951 {
3952 if (! isnan (a))
3953 {
3954 if (! isnan (b))
3955 return a < b ? a : b;
3956 return a;
3957 }
3958 else if (! isnan (b))
3959 return b;
3960 return a;
3961 }
3962
3963 static double
3964 dmaxnm (double a, double b)
3965 {
3966 if (! isnan (a))
3967 {
3968 if (! isnan (b))
3969 return a > b ? a : b;
3970 return a;
3971 }
3972 else if (! isnan (b))
3973 return b;
3974 return a;
3975 }
3976
3977 static double
3978 dminnm (double a, double b)
3979 {
3980 if (! isnan (a))
3981 {
3982 if (! isnan (b))
3983 return a < b ? a : b;
3984 return a;
3985 }
3986 else if (! isnan (b))
3987 return b;
3988 return a;
3989 }
3990
3991 static void
3992 do_vec_FminmaxNMP (sim_cpu *cpu)
3993 {
3994 /* instr [31] = 0
3995 instr [30] = half (0)/full (1)
3996 instr [29,24] = 10 1110
3997 instr [23] = max(0)/min(1)
3998 instr [22] = float (0)/double (1)
3999 instr [21] = 1
4000 instr [20,16] = Vn
4001 instr [15,10] = 1100 01
4002 instr [9,5] = Vm
4003 instr [4.0] = Vd. */
4004
4005 unsigned vm = INSTR (20, 16);
4006 unsigned vn = INSTR (9, 5);
4007 unsigned vd = INSTR (4, 0);
4008 int full = INSTR (30, 30);
4009
4010 NYI_assert (29, 24, 0x2E);
4011 NYI_assert (21, 21, 1);
4012 NYI_assert (15, 10, 0x31);
4013
4014 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4015 if (INSTR (22, 22))
4016 {
4017 double (* fn)(double, double) = INSTR (23, 23)
4018 ? dminnm : dmaxnm;
4019
4020 if (! full)
4021 HALT_NYI;
4022 aarch64_set_vec_double (cpu, vd, 0,
4023 fn (aarch64_get_vec_double (cpu, vn, 0),
4024 aarch64_get_vec_double (cpu, vn, 1)));
4025 aarch64_set_vec_double (cpu, vd, 0,
4026 fn (aarch64_get_vec_double (cpu, vm, 0),
4027 aarch64_get_vec_double (cpu, vm, 1)));
4028 }
4029 else
4030 {
4031 float (* fn)(float, float) = INSTR (23, 23)
4032 ? fminnm : fmaxnm;
4033
4034 aarch64_set_vec_float (cpu, vd, 0,
4035 fn (aarch64_get_vec_float (cpu, vn, 0),
4036 aarch64_get_vec_float (cpu, vn, 1)));
4037 if (full)
4038 aarch64_set_vec_float (cpu, vd, 1,
4039 fn (aarch64_get_vec_float (cpu, vn, 2),
4040 aarch64_get_vec_float (cpu, vn, 3)));
4041
4042 aarch64_set_vec_float (cpu, vd, (full ? 2 : 1),
4043 fn (aarch64_get_vec_float (cpu, vm, 0),
4044 aarch64_get_vec_float (cpu, vm, 1)));
4045 if (full)
4046 aarch64_set_vec_float (cpu, vd, 3,
4047 fn (aarch64_get_vec_float (cpu, vm, 2),
4048 aarch64_get_vec_float (cpu, vm, 3)));
4049 }
4050 }
4051
4052 static void
4053 do_vec_AND (sim_cpu *cpu)
4054 {
4055 /* instr[31] = 0
4056 instr[30] = half (0)/full (1)
4057 instr[29,21] = 001110001
4058 instr[20,16] = Vm
4059 instr[15,10] = 000111
4060 instr[9,5] = Vn
4061 instr[4.0] = Vd. */
4062
4063 unsigned vm = INSTR (20, 16);
4064 unsigned vn = INSTR (9, 5);
4065 unsigned vd = INSTR (4, 0);
4066 unsigned i;
4067 int full = INSTR (30, 30);
4068
4069 NYI_assert (29, 21, 0x071);
4070 NYI_assert (15, 10, 0x07);
4071
4072 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4073 for (i = 0; i < (full ? 4 : 2); i++)
4074 aarch64_set_vec_u32 (cpu, vd, i,
4075 aarch64_get_vec_u32 (cpu, vn, i)
4076 & aarch64_get_vec_u32 (cpu, vm, i));
4077 }
4078
4079 static void
4080 do_vec_BSL (sim_cpu *cpu)
4081 {
4082 /* instr[31] = 0
4083 instr[30] = half (0)/full (1)
4084 instr[29,21] = 101110011
4085 instr[20,16] = Vm
4086 instr[15,10] = 000111
4087 instr[9,5] = Vn
4088 instr[4.0] = Vd. */
4089
4090 unsigned vm = INSTR (20, 16);
4091 unsigned vn = INSTR (9, 5);
4092 unsigned vd = INSTR (4, 0);
4093 unsigned i;
4094 int full = INSTR (30, 30);
4095
4096 NYI_assert (29, 21, 0x173);
4097 NYI_assert (15, 10, 0x07);
4098
4099 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4100 for (i = 0; i < (full ? 16 : 8); i++)
4101 aarch64_set_vec_u8 (cpu, vd, i,
4102 ( aarch64_get_vec_u8 (cpu, vd, i)
4103 & aarch64_get_vec_u8 (cpu, vn, i))
4104 | ((~ aarch64_get_vec_u8 (cpu, vd, i))
4105 & aarch64_get_vec_u8 (cpu, vm, i)));
4106 }
4107
4108 static void
4109 do_vec_EOR (sim_cpu *cpu)
4110 {
4111 /* instr[31] = 0
4112 instr[30] = half (0)/full (1)
4113 instr[29,21] = 10 1110 001
4114 instr[20,16] = Vm
4115 instr[15,10] = 000111
4116 instr[9,5] = Vn
4117 instr[4.0] = Vd. */
4118
4119 unsigned vm = INSTR (20, 16);
4120 unsigned vn = INSTR (9, 5);
4121 unsigned vd = INSTR (4, 0);
4122 unsigned i;
4123 int full = INSTR (30, 30);
4124
4125 NYI_assert (29, 21, 0x171);
4126 NYI_assert (15, 10, 0x07);
4127
4128 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4129 for (i = 0; i < (full ? 4 : 2); i++)
4130 aarch64_set_vec_u32 (cpu, vd, i,
4131 aarch64_get_vec_u32 (cpu, vn, i)
4132 ^ aarch64_get_vec_u32 (cpu, vm, i));
4133 }
4134
4135 static void
4136 do_vec_bit (sim_cpu *cpu)
4137 {
4138 /* instr[31] = 0
4139 instr[30] = half (0)/full (1)
4140 instr[29,23] = 10 1110 1
4141 instr[22] = BIT (0) / BIF (1)
4142 instr[21] = 1
4143 instr[20,16] = Vm
4144 instr[15,10] = 0001 11
4145 instr[9,5] = Vn
4146 instr[4.0] = Vd. */
4147
4148 unsigned vm = INSTR (20, 16);
4149 unsigned vn = INSTR (9, 5);
4150 unsigned vd = INSTR (4, 0);
4151 unsigned full = INSTR (30, 30);
4152 unsigned test_false = INSTR (22, 22);
4153 unsigned i;
4154
4155 NYI_assert (29, 23, 0x5D);
4156 NYI_assert (21, 21, 1);
4157 NYI_assert (15, 10, 0x07);
4158
4159 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4160 for (i = 0; i < (full ? 4 : 2); i++)
4161 {
4162 uint32_t vd_val = aarch64_get_vec_u32 (cpu, vd, i);
4163 uint32_t vn_val = aarch64_get_vec_u32 (cpu, vn, i);
4164 uint32_t vm_val = aarch64_get_vec_u32 (cpu, vm, i);
4165 if (test_false)
4166 aarch64_set_vec_u32 (cpu, vd, i,
4167 (vd_val & vm_val) | (vn_val & ~vm_val));
4168 else
4169 aarch64_set_vec_u32 (cpu, vd, i,
4170 (vd_val & ~vm_val) | (vn_val & vm_val));
4171 }
4172 }
4173
4174 static void
4175 do_vec_ORN (sim_cpu *cpu)
4176 {
4177 /* instr[31] = 0
4178 instr[30] = half (0)/full (1)
4179 instr[29,21] = 00 1110 111
4180 instr[20,16] = Vm
4181 instr[15,10] = 00 0111
4182 instr[9,5] = Vn
4183 instr[4.0] = Vd. */
4184
4185 unsigned vm = INSTR (20, 16);
4186 unsigned vn = INSTR (9, 5);
4187 unsigned vd = INSTR (4, 0);
4188 unsigned i;
4189 int full = INSTR (30, 30);
4190
4191 NYI_assert (29, 21, 0x077);
4192 NYI_assert (15, 10, 0x07);
4193
4194 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4195 for (i = 0; i < (full ? 16 : 8); i++)
4196 aarch64_set_vec_u8 (cpu, vd, i,
4197 aarch64_get_vec_u8 (cpu, vn, i)
4198 | ~ aarch64_get_vec_u8 (cpu, vm, i));
4199 }
4200
4201 static void
4202 do_vec_ORR (sim_cpu *cpu)
4203 {
4204 /* instr[31] = 0
4205 instr[30] = half (0)/full (1)
4206 instr[29,21] = 00 1110 101
4207 instr[20,16] = Vm
4208 instr[15,10] = 0001 11
4209 instr[9,5] = Vn
4210 instr[4.0] = Vd. */
4211
4212 unsigned vm = INSTR (20, 16);
4213 unsigned vn = INSTR (9, 5);
4214 unsigned vd = INSTR (4, 0);
4215 unsigned i;
4216 int full = INSTR (30, 30);
4217
4218 NYI_assert (29, 21, 0x075);
4219 NYI_assert (15, 10, 0x07);
4220
4221 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4222 for (i = 0; i < (full ? 16 : 8); i++)
4223 aarch64_set_vec_u8 (cpu, vd, i,
4224 aarch64_get_vec_u8 (cpu, vn, i)
4225 | aarch64_get_vec_u8 (cpu, vm, i));
4226 }
4227
4228 static void
4229 do_vec_BIC (sim_cpu *cpu)
4230 {
4231 /* instr[31] = 0
4232 instr[30] = half (0)/full (1)
4233 instr[29,21] = 00 1110 011
4234 instr[20,16] = Vm
4235 instr[15,10] = 00 0111
4236 instr[9,5] = Vn
4237 instr[4.0] = Vd. */
4238
4239 unsigned vm = INSTR (20, 16);
4240 unsigned vn = INSTR (9, 5);
4241 unsigned vd = INSTR (4, 0);
4242 unsigned i;
4243 int full = INSTR (30, 30);
4244
4245 NYI_assert (29, 21, 0x073);
4246 NYI_assert (15, 10, 0x07);
4247
4248 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4249 for (i = 0; i < (full ? 16 : 8); i++)
4250 aarch64_set_vec_u8 (cpu, vd, i,
4251 aarch64_get_vec_u8 (cpu, vn, i)
4252 & ~ aarch64_get_vec_u8 (cpu, vm, i));
4253 }
4254
4255 static void
4256 do_vec_XTN (sim_cpu *cpu)
4257 {
4258 /* instr[31] = 0
4259 instr[30] = first part (0)/ second part (1)
4260 instr[29,24] = 00 1110
4261 instr[23,22] = size: byte(00), half(01), word (10)
4262 instr[21,10] = 1000 0100 1010
4263 instr[9,5] = Vs
4264 instr[4,0] = Vd. */
4265
4266 unsigned vs = INSTR (9, 5);
4267 unsigned vd = INSTR (4, 0);
4268 unsigned bias = INSTR (30, 30);
4269 unsigned i;
4270
4271 NYI_assert (29, 24, 0x0E);
4272 NYI_assert (21, 10, 0x84A);
4273
4274 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4275 switch (INSTR (23, 22))
4276 {
4277 case 0:
4278 for (i = 0; i < 8; i++)
4279 aarch64_set_vec_u8 (cpu, vd, i + (bias * 8),
4280 aarch64_get_vec_u16 (cpu, vs, i));
4281 return;
4282
4283 case 1:
4284 for (i = 0; i < 4; i++)
4285 aarch64_set_vec_u16 (cpu, vd, i + (bias * 4),
4286 aarch64_get_vec_u32 (cpu, vs, i));
4287 return;
4288
4289 case 2:
4290 for (i = 0; i < 2; i++)
4291 aarch64_set_vec_u32 (cpu, vd, i + (bias * 2),
4292 aarch64_get_vec_u64 (cpu, vs, i));
4293 return;
4294 }
4295 }
4296
4297 /* Return the number of bits set in the input value. */
4298 #if __GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4)
4299 # define popcount __builtin_popcount
4300 #else
4301 static int
4302 popcount (unsigned char x)
4303 {
4304 static const unsigned char popcnt[16] =
4305 {
4306 0, 1, 1, 2,
4307 1, 2, 2, 3,
4308 1, 2, 2, 3,
4309 2, 3, 3, 4
4310 };
4311
4312 /* Only counts the low 8 bits of the input as that is all we need. */
4313 return popcnt[x % 16] + popcnt[x / 16];
4314 }
4315 #endif
4316
4317 static void
4318 do_vec_CNT (sim_cpu *cpu)
4319 {
4320 /* instr[31] = 0
4321 instr[30] = half (0)/ full (1)
4322 instr[29,24] = 00 1110
4323 instr[23,22] = size: byte(00)
4324 instr[21,10] = 1000 0001 0110
4325 instr[9,5] = Vs
4326 instr[4,0] = Vd. */
4327
4328 unsigned vs = INSTR (9, 5);
4329 unsigned vd = INSTR (4, 0);
4330 int full = INSTR (30, 30);
4331 int size = INSTR (23, 22);
4332 int i;
4333
4334 NYI_assert (29, 24, 0x0E);
4335 NYI_assert (21, 10, 0x816);
4336
4337 if (size != 0)
4338 HALT_UNALLOC;
4339
4340 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4341
4342 for (i = 0; i < (full ? 16 : 8); i++)
4343 aarch64_set_vec_u8 (cpu, vd, i,
4344 popcount (aarch64_get_vec_u8 (cpu, vs, i)));
4345 }
4346
4347 static void
4348 do_vec_maxv (sim_cpu *cpu)
4349 {
4350 /* instr[31] = 0
4351 instr[30] = half(0)/full(1)
4352 instr[29] = signed (0)/unsigned(1)
4353 instr[28,24] = 0 1110
4354 instr[23,22] = size: byte(00), half(01), word (10)
4355 instr[21] = 1
4356 instr[20,17] = 1 000
4357 instr[16] = max(0)/min(1)
4358 instr[15,10] = 1010 10
4359 instr[9,5] = V source
4360 instr[4.0] = R dest. */
4361
4362 unsigned vs = INSTR (9, 5);
4363 unsigned rd = INSTR (4, 0);
4364 unsigned full = INSTR (30, 30);
4365 unsigned i;
4366
4367 NYI_assert (28, 24, 0x0E);
4368 NYI_assert (21, 21, 1);
4369 NYI_assert (20, 17, 8);
4370 NYI_assert (15, 10, 0x2A);
4371
4372 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4373 switch ((INSTR (29, 29) << 1) | INSTR (16, 16))
4374 {
4375 case 0: /* SMAXV. */
4376 {
4377 int64_t smax;
4378 switch (INSTR (23, 22))
4379 {
4380 case 0:
4381 smax = aarch64_get_vec_s8 (cpu, vs, 0);
4382 for (i = 1; i < (full ? 16 : 8); i++)
4383 smax = max (smax, aarch64_get_vec_s8 (cpu, vs, i));
4384 break;
4385 case 1:
4386 smax = aarch64_get_vec_s16 (cpu, vs, 0);
4387 for (i = 1; i < (full ? 8 : 4); i++)
4388 smax = max (smax, aarch64_get_vec_s16 (cpu, vs, i));
4389 break;
4390 case 2:
4391 smax = aarch64_get_vec_s32 (cpu, vs, 0);
4392 for (i = 1; i < (full ? 4 : 2); i++)
4393 smax = max (smax, aarch64_get_vec_s32 (cpu, vs, i));
4394 break;
4395 case 3:
4396 HALT_UNALLOC;
4397 }
4398 aarch64_set_reg_s64 (cpu, rd, NO_SP, smax);
4399 return;
4400 }
4401
4402 case 1: /* SMINV. */
4403 {
4404 int64_t smin;
4405 switch (INSTR (23, 22))
4406 {
4407 case 0:
4408 smin = aarch64_get_vec_s8 (cpu, vs, 0);
4409 for (i = 1; i < (full ? 16 : 8); i++)
4410 smin = min (smin, aarch64_get_vec_s8 (cpu, vs, i));
4411 break;
4412 case 1:
4413 smin = aarch64_get_vec_s16 (cpu, vs, 0);
4414 for (i = 1; i < (full ? 8 : 4); i++)
4415 smin = min (smin, aarch64_get_vec_s16 (cpu, vs, i));
4416 break;
4417 case 2:
4418 smin = aarch64_get_vec_s32 (cpu, vs, 0);
4419 for (i = 1; i < (full ? 4 : 2); i++)
4420 smin = min (smin, aarch64_get_vec_s32 (cpu, vs, i));
4421 break;
4422
4423 case 3:
4424 HALT_UNALLOC;
4425 }
4426 aarch64_set_reg_s64 (cpu, rd, NO_SP, smin);
4427 return;
4428 }
4429
4430 case 2: /* UMAXV. */
4431 {
4432 uint64_t umax;
4433 switch (INSTR (23, 22))
4434 {
4435 case 0:
4436 umax = aarch64_get_vec_u8 (cpu, vs, 0);
4437 for (i = 1; i < (full ? 16 : 8); i++)
4438 umax = max (umax, aarch64_get_vec_u8 (cpu, vs, i));
4439 break;
4440 case 1:
4441 umax = aarch64_get_vec_u16 (cpu, vs, 0);
4442 for (i = 1; i < (full ? 8 : 4); i++)
4443 umax = max (umax, aarch64_get_vec_u16 (cpu, vs, i));
4444 break;
4445 case 2:
4446 umax = aarch64_get_vec_u32 (cpu, vs, 0);
4447 for (i = 1; i < (full ? 4 : 2); i++)
4448 umax = max (umax, aarch64_get_vec_u32 (cpu, vs, i));
4449 break;
4450
4451 case 3:
4452 HALT_UNALLOC;
4453 }
4454 aarch64_set_reg_u64 (cpu, rd, NO_SP, umax);
4455 return;
4456 }
4457
4458 case 3: /* UMINV. */
4459 {
4460 uint64_t umin;
4461 switch (INSTR (23, 22))
4462 {
4463 case 0:
4464 umin = aarch64_get_vec_u8 (cpu, vs, 0);
4465 for (i = 1; i < (full ? 16 : 8); i++)
4466 umin = min (umin, aarch64_get_vec_u8 (cpu, vs, i));
4467 break;
4468 case 1:
4469 umin = aarch64_get_vec_u16 (cpu, vs, 0);
4470 for (i = 1; i < (full ? 8 : 4); i++)
4471 umin = min (umin, aarch64_get_vec_u16 (cpu, vs, i));
4472 break;
4473 case 2:
4474 umin = aarch64_get_vec_u32 (cpu, vs, 0);
4475 for (i = 1; i < (full ? 4 : 2); i++)
4476 umin = min (umin, aarch64_get_vec_u32 (cpu, vs, i));
4477 break;
4478
4479 case 3:
4480 HALT_UNALLOC;
4481 }
4482 aarch64_set_reg_u64 (cpu, rd, NO_SP, umin);
4483 return;
4484 }
4485 }
4486 }
4487
4488 static void
4489 do_vec_fminmaxV (sim_cpu *cpu)
4490 {
4491 /* instr[31,24] = 0110 1110
4492 instr[23] = max(0)/min(1)
4493 instr[22,14] = 011 0000 11
4494 instr[13,12] = nm(00)/normal(11)
4495 instr[11,10] = 10
4496 instr[9,5] = V source
4497 instr[4.0] = R dest. */
4498
4499 unsigned vs = INSTR (9, 5);
4500 unsigned rd = INSTR (4, 0);
4501 unsigned i;
4502 float res = aarch64_get_vec_float (cpu, vs, 0);
4503
4504 NYI_assert (31, 24, 0x6E);
4505 NYI_assert (22, 14, 0x0C3);
4506 NYI_assert (11, 10, 2);
4507
4508 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4509 if (INSTR (23, 23))
4510 {
4511 switch (INSTR (13, 12))
4512 {
4513 case 0: /* FMNINNMV. */
4514 for (i = 1; i < 4; i++)
4515 res = fminnm (res, aarch64_get_vec_float (cpu, vs, i));
4516 break;
4517
4518 case 3: /* FMINV. */
4519 for (i = 1; i < 4; i++)
4520 res = min (res, aarch64_get_vec_float (cpu, vs, i));
4521 break;
4522
4523 default:
4524 HALT_NYI;
4525 }
4526 }
4527 else
4528 {
4529 switch (INSTR (13, 12))
4530 {
4531 case 0: /* FMNAXNMV. */
4532 for (i = 1; i < 4; i++)
4533 res = fmaxnm (res, aarch64_get_vec_float (cpu, vs, i));
4534 break;
4535
4536 case 3: /* FMAXV. */
4537 for (i = 1; i < 4; i++)
4538 res = max (res, aarch64_get_vec_float (cpu, vs, i));
4539 break;
4540
4541 default:
4542 HALT_NYI;
4543 }
4544 }
4545
4546 aarch64_set_FP_float (cpu, rd, res);
4547 }
4548
4549 static void
4550 do_vec_Fminmax (sim_cpu *cpu)
4551 {
4552 /* instr[31] = 0
4553 instr[30] = half(0)/full(1)
4554 instr[29,24] = 00 1110
4555 instr[23] = max(0)/min(1)
4556 instr[22] = float(0)/double(1)
4557 instr[21] = 1
4558 instr[20,16] = Vm
4559 instr[15,14] = 11
4560 instr[13,12] = nm(00)/normal(11)
4561 instr[11,10] = 01
4562 instr[9,5] = Vn
4563 instr[4,0] = Vd. */
4564
4565 unsigned vm = INSTR (20, 16);
4566 unsigned vn = INSTR (9, 5);
4567 unsigned vd = INSTR (4, 0);
4568 unsigned full = INSTR (30, 30);
4569 unsigned min = INSTR (23, 23);
4570 unsigned i;
4571
4572 NYI_assert (29, 24, 0x0E);
4573 NYI_assert (21, 21, 1);
4574 NYI_assert (15, 14, 3);
4575 NYI_assert (11, 10, 1);
4576
4577 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4578 if (INSTR (22, 22))
4579 {
4580 double (* func)(double, double);
4581
4582 if (! full)
4583 HALT_NYI;
4584
4585 if (INSTR (13, 12) == 0)
4586 func = min ? dminnm : dmaxnm;
4587 else if (INSTR (13, 12) == 3)
4588 func = min ? fmin : fmax;
4589 else
4590 HALT_NYI;
4591
4592 for (i = 0; i < 2; i++)
4593 aarch64_set_vec_double (cpu, vd, i,
4594 func (aarch64_get_vec_double (cpu, vn, i),
4595 aarch64_get_vec_double (cpu, vm, i)));
4596 }
4597 else
4598 {
4599 float (* func)(float, float);
4600
4601 if (INSTR (13, 12) == 0)
4602 func = min ? fminnm : fmaxnm;
4603 else if (INSTR (13, 12) == 3)
4604 func = min ? fminf : fmaxf;
4605 else
4606 HALT_NYI;
4607
4608 for (i = 0; i < (full ? 4 : 2); i++)
4609 aarch64_set_vec_float (cpu, vd, i,
4610 func (aarch64_get_vec_float (cpu, vn, i),
4611 aarch64_get_vec_float (cpu, vm, i)));
4612 }
4613 }
4614
4615 static void
4616 do_vec_SCVTF (sim_cpu *cpu)
4617 {
4618 /* instr[31] = 0
4619 instr[30] = Q
4620 instr[29,23] = 00 1110 0
4621 instr[22] = float(0)/double(1)
4622 instr[21,10] = 10 0001 1101 10
4623 instr[9,5] = Vn
4624 instr[4,0] = Vd. */
4625
4626 unsigned vn = INSTR (9, 5);
4627 unsigned vd = INSTR (4, 0);
4628 unsigned full = INSTR (30, 30);
4629 unsigned size = INSTR (22, 22);
4630 unsigned i;
4631
4632 NYI_assert (29, 23, 0x1C);
4633 NYI_assert (21, 10, 0x876);
4634
4635 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4636 if (size)
4637 {
4638 if (! full)
4639 HALT_UNALLOC;
4640
4641 for (i = 0; i < 2; i++)
4642 {
4643 double val = (double) aarch64_get_vec_u64 (cpu, vn, i);
4644 aarch64_set_vec_double (cpu, vd, i, val);
4645 }
4646 }
4647 else
4648 {
4649 for (i = 0; i < (full ? 4 : 2); i++)
4650 {
4651 float val = (float) aarch64_get_vec_u32 (cpu, vn, i);
4652 aarch64_set_vec_float (cpu, vd, i, val);
4653 }
4654 }
4655 }
4656
4657 #define VEC_CMP(SOURCE, CMP) \
4658 do \
4659 { \
4660 switch (size) \
4661 { \
4662 case 0: \
4663 for (i = 0; i < (full ? 16 : 8); i++) \
4664 aarch64_set_vec_u8 (cpu, vd, i, \
4665 aarch64_get_vec_##SOURCE##8 (cpu, vn, i) \
4666 CMP \
4667 aarch64_get_vec_##SOURCE##8 (cpu, vm, i) \
4668 ? -1 : 0); \
4669 return; \
4670 case 1: \
4671 for (i = 0; i < (full ? 8 : 4); i++) \
4672 aarch64_set_vec_u16 (cpu, vd, i, \
4673 aarch64_get_vec_##SOURCE##16 (cpu, vn, i) \
4674 CMP \
4675 aarch64_get_vec_##SOURCE##16 (cpu, vm, i) \
4676 ? -1 : 0); \
4677 return; \
4678 case 2: \
4679 for (i = 0; i < (full ? 4 : 2); i++) \
4680 aarch64_set_vec_u32 (cpu, vd, i, \
4681 aarch64_get_vec_##SOURCE##32 (cpu, vn, i) \
4682 CMP \
4683 aarch64_get_vec_##SOURCE##32 (cpu, vm, i) \
4684 ? -1 : 0); \
4685 return; \
4686 case 3: \
4687 if (! full) \
4688 HALT_UNALLOC; \
4689 for (i = 0; i < 2; i++) \
4690 aarch64_set_vec_u64 (cpu, vd, i, \
4691 aarch64_get_vec_##SOURCE##64 (cpu, vn, i) \
4692 CMP \
4693 aarch64_get_vec_##SOURCE##64 (cpu, vm, i) \
4694 ? -1ULL : 0); \
4695 return; \
4696 default: \
4697 HALT_UNALLOC; \
4698 } \
4699 } \
4700 while (0)
4701
4702 #define VEC_CMP0(SOURCE, CMP) \
4703 do \
4704 { \
4705 switch (size) \
4706 { \
4707 case 0: \
4708 for (i = 0; i < (full ? 16 : 8); i++) \
4709 aarch64_set_vec_u8 (cpu, vd, i, \
4710 aarch64_get_vec_##SOURCE##8 (cpu, vn, i) \
4711 CMP 0 ? -1 : 0); \
4712 return; \
4713 case 1: \
4714 for (i = 0; i < (full ? 8 : 4); i++) \
4715 aarch64_set_vec_u16 (cpu, vd, i, \
4716 aarch64_get_vec_##SOURCE##16 (cpu, vn, i) \
4717 CMP 0 ? -1 : 0); \
4718 return; \
4719 case 2: \
4720 for (i = 0; i < (full ? 4 : 2); i++) \
4721 aarch64_set_vec_u32 (cpu, vd, i, \
4722 aarch64_get_vec_##SOURCE##32 (cpu, vn, i) \
4723 CMP 0 ? -1 : 0); \
4724 return; \
4725 case 3: \
4726 if (! full) \
4727 HALT_UNALLOC; \
4728 for (i = 0; i < 2; i++) \
4729 aarch64_set_vec_u64 (cpu, vd, i, \
4730 aarch64_get_vec_##SOURCE##64 (cpu, vn, i) \
4731 CMP 0 ? -1ULL : 0); \
4732 return; \
4733 default: \
4734 HALT_UNALLOC; \
4735 } \
4736 } \
4737 while (0)
4738
4739 #define VEC_FCMP0(CMP) \
4740 do \
4741 { \
4742 if (vm != 0) \
4743 HALT_NYI; \
4744 if (INSTR (22, 22)) \
4745 { \
4746 if (! full) \
4747 HALT_NYI; \
4748 for (i = 0; i < 2; i++) \
4749 aarch64_set_vec_u64 (cpu, vd, i, \
4750 aarch64_get_vec_double (cpu, vn, i) \
4751 CMP 0.0 ? -1 : 0); \
4752 } \
4753 else \
4754 { \
4755 for (i = 0; i < (full ? 4 : 2); i++) \
4756 aarch64_set_vec_u32 (cpu, vd, i, \
4757 aarch64_get_vec_float (cpu, vn, i) \
4758 CMP 0.0 ? -1 : 0); \
4759 } \
4760 return; \
4761 } \
4762 while (0)
4763
4764 #define VEC_FCMP(CMP) \
4765 do \
4766 { \
4767 if (INSTR (22, 22)) \
4768 { \
4769 if (! full) \
4770 HALT_NYI; \
4771 for (i = 0; i < 2; i++) \
4772 aarch64_set_vec_u64 (cpu, vd, i, \
4773 aarch64_get_vec_double (cpu, vn, i) \
4774 CMP \
4775 aarch64_get_vec_double (cpu, vm, i) \
4776 ? -1 : 0); \
4777 } \
4778 else \
4779 { \
4780 for (i = 0; i < (full ? 4 : 2); i++) \
4781 aarch64_set_vec_u32 (cpu, vd, i, \
4782 aarch64_get_vec_float (cpu, vn, i) \
4783 CMP \
4784 aarch64_get_vec_float (cpu, vm, i) \
4785 ? -1 : 0); \
4786 } \
4787 return; \
4788 } \
4789 while (0)
4790
4791 static void
4792 do_vec_compare (sim_cpu *cpu)
4793 {
4794 /* instr[31] = 0
4795 instr[30] = half(0)/full(1)
4796 instr[29] = part-of-comparison-type
4797 instr[28,24] = 0 1110
4798 instr[23,22] = size of integer compares: byte(00), half(01), word (10), long (11)
4799 type of float compares: single (-0) / double (-1)
4800 instr[21] = 1
4801 instr[20,16] = Vm or 00000 (compare vs 0)
4802 instr[15,10] = part-of-comparison-type
4803 instr[9,5] = Vn
4804 instr[4.0] = Vd. */
4805
4806 int full = INSTR (30, 30);
4807 int size = INSTR (23, 22);
4808 unsigned vm = INSTR (20, 16);
4809 unsigned vn = INSTR (9, 5);
4810 unsigned vd = INSTR (4, 0);
4811 unsigned i;
4812
4813 NYI_assert (28, 24, 0x0E);
4814 NYI_assert (21, 21, 1);
4815
4816 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4817 if ((INSTR (11, 11)
4818 && INSTR (14, 14))
4819 || ((INSTR (11, 11) == 0
4820 && INSTR (10, 10) == 0)))
4821 {
4822 /* A compare vs 0. */
4823 if (vm != 0)
4824 {
4825 if (INSTR (15, 10) == 0x2A)
4826 do_vec_maxv (cpu);
4827 else if (INSTR (15, 10) == 0x32
4828 || INSTR (15, 10) == 0x3E)
4829 do_vec_fminmaxV (cpu);
4830 else if (INSTR (29, 23) == 0x1C
4831 && INSTR (21, 10) == 0x876)
4832 do_vec_SCVTF (cpu);
4833 else
4834 HALT_NYI;
4835 return;
4836 }
4837 }
4838
4839 if (INSTR (14, 14))
4840 {
4841 /* A floating point compare. */
4842 unsigned decode = (INSTR (29, 29) << 5) | (INSTR (23, 23) << 4)
4843 | INSTR (13, 10);
4844
4845 NYI_assert (15, 15, 1);
4846
4847 switch (decode)
4848 {
4849 case /* 0b010010: GT#0 */ 0x12: VEC_FCMP0 (>);
4850 case /* 0b110010: GE#0 */ 0x32: VEC_FCMP0 (>=);
4851 case /* 0b010110: EQ#0 */ 0x16: VEC_FCMP0 (==);
4852 case /* 0b110110: LE#0 */ 0x36: VEC_FCMP0 (<=);
4853 case /* 0b011010: LT#0 */ 0x1A: VEC_FCMP0 (<);
4854 case /* 0b111001: GT */ 0x39: VEC_FCMP (>);
4855 case /* 0b101001: GE */ 0x29: VEC_FCMP (>=);
4856 case /* 0b001001: EQ */ 0x09: VEC_FCMP (==);
4857
4858 default:
4859 HALT_NYI;
4860 }
4861 }
4862 else
4863 {
4864 unsigned decode = (INSTR (29, 29) << 6) | INSTR (15, 10);
4865
4866 switch (decode)
4867 {
4868 case 0x0D: /* 0001101 GT */ VEC_CMP (s, > );
4869 case 0x0F: /* 0001111 GE */ VEC_CMP (s, >= );
4870 case 0x22: /* 0100010 GT #0 */ VEC_CMP0 (s, > );
4871 case 0x23: /* 0100011 TST */ VEC_CMP (u, & );
4872 case 0x26: /* 0100110 EQ #0 */ VEC_CMP0 (s, == );
4873 case 0x2A: /* 0101010 LT #0 */ VEC_CMP0 (s, < );
4874 case 0x4D: /* 1001101 HI */ VEC_CMP (u, > );
4875 case 0x4F: /* 1001111 HS */ VEC_CMP (u, >= );
4876 case 0x62: /* 1100010 GE #0 */ VEC_CMP0 (s, >= );
4877 case 0x63: /* 1100011 EQ */ VEC_CMP (u, == );
4878 case 0x66: /* 1100110 LE #0 */ VEC_CMP0 (s, <= );
4879 default:
4880 if (vm == 0)
4881 HALT_NYI;
4882 do_vec_maxv (cpu);
4883 }
4884 }
4885 }
4886
4887 static void
4888 do_vec_SSHL (sim_cpu *cpu)
4889 {
4890 /* instr[31] = 0
4891 instr[30] = first part (0)/ second part (1)
4892 instr[29,24] = 00 1110
4893 instr[23,22] = size: byte(00), half(01), word (10), long (11)
4894 instr[21] = 1
4895 instr[20,16] = Vm
4896 instr[15,10] = 0100 01
4897 instr[9,5] = Vn
4898 instr[4,0] = Vd. */
4899
4900 unsigned full = INSTR (30, 30);
4901 unsigned vm = INSTR (20, 16);
4902 unsigned vn = INSTR (9, 5);
4903 unsigned vd = INSTR (4, 0);
4904 unsigned i;
4905 signed int shift;
4906
4907 NYI_assert (29, 24, 0x0E);
4908 NYI_assert (21, 21, 1);
4909 NYI_assert (15, 10, 0x11);
4910
4911 /* FIXME: What is a signed shift left in this context ?. */
4912
4913 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4914 switch (INSTR (23, 22))
4915 {
4916 case 0:
4917 for (i = 0; i < (full ? 16 : 8); i++)
4918 {
4919 shift = aarch64_get_vec_s8 (cpu, vm, i);
4920 if (shift >= 0)
4921 aarch64_set_vec_s8 (cpu, vd, i, aarch64_get_vec_s8 (cpu, vn, i)
4922 << shift);
4923 else
4924 aarch64_set_vec_s8 (cpu, vd, i, aarch64_get_vec_s8 (cpu, vn, i)
4925 >> - shift);
4926 }
4927 return;
4928
4929 case 1:
4930 for (i = 0; i < (full ? 8 : 4); i++)
4931 {
4932 shift = aarch64_get_vec_s8 (cpu, vm, i * 2);
4933 if (shift >= 0)
4934 aarch64_set_vec_s16 (cpu, vd, i, aarch64_get_vec_s16 (cpu, vn, i)
4935 << shift);
4936 else
4937 aarch64_set_vec_s16 (cpu, vd, i, aarch64_get_vec_s16 (cpu, vn, i)
4938 >> - shift);
4939 }
4940 return;
4941
4942 case 2:
4943 for (i = 0; i < (full ? 4 : 2); i++)
4944 {
4945 shift = aarch64_get_vec_s8 (cpu, vm, i * 4);
4946 if (shift >= 0)
4947 aarch64_set_vec_s32 (cpu, vd, i, aarch64_get_vec_s32 (cpu, vn, i)
4948 << shift);
4949 else
4950 aarch64_set_vec_s32 (cpu, vd, i, aarch64_get_vec_s32 (cpu, vn, i)
4951 >> - shift);
4952 }
4953 return;
4954
4955 case 3:
4956 if (! full)
4957 HALT_UNALLOC;
4958 for (i = 0; i < 2; i++)
4959 {
4960 shift = aarch64_get_vec_s8 (cpu, vm, i * 8);
4961 if (shift >= 0)
4962 aarch64_set_vec_s64 (cpu, vd, i, aarch64_get_vec_s64 (cpu, vn, i)
4963 << shift);
4964 else
4965 aarch64_set_vec_s64 (cpu, vd, i, aarch64_get_vec_s64 (cpu, vn, i)
4966 >> - shift);
4967 }
4968 return;
4969 }
4970 }
4971
4972 static void
4973 do_vec_USHL (sim_cpu *cpu)
4974 {
4975 /* instr[31] = 0
4976 instr[30] = first part (0)/ second part (1)
4977 instr[29,24] = 10 1110
4978 instr[23,22] = size: byte(00), half(01), word (10), long (11)
4979 instr[21] = 1
4980 instr[20,16] = Vm
4981 instr[15,10] = 0100 01
4982 instr[9,5] = Vn
4983 instr[4,0] = Vd */
4984
4985 unsigned full = INSTR (30, 30);
4986 unsigned vm = INSTR (20, 16);
4987 unsigned vn = INSTR (9, 5);
4988 unsigned vd = INSTR (4, 0);
4989 unsigned i;
4990 signed int shift;
4991
4992 NYI_assert (29, 24, 0x2E);
4993 NYI_assert (15, 10, 0x11);
4994
4995 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4996 switch (INSTR (23, 22))
4997 {
4998 case 0:
4999 for (i = 0; i < (full ? 16 : 8); i++)
5000 {
5001 shift = aarch64_get_vec_s8 (cpu, vm, i);
5002 if (shift >= 0)
5003 aarch64_set_vec_u8 (cpu, vd, i, aarch64_get_vec_u8 (cpu, vn, i)
5004 << shift);
5005 else
5006 aarch64_set_vec_u8 (cpu, vd, i, aarch64_get_vec_u8 (cpu, vn, i)
5007 >> - shift);
5008 }
5009 return;
5010
5011 case 1:
5012 for (i = 0; i < (full ? 8 : 4); i++)
5013 {
5014 shift = aarch64_get_vec_s8 (cpu, vm, i * 2);
5015 if (shift >= 0)
5016 aarch64_set_vec_u16 (cpu, vd, i, aarch64_get_vec_u16 (cpu, vn, i)
5017 << shift);
5018 else
5019 aarch64_set_vec_u16 (cpu, vd, i, aarch64_get_vec_u16 (cpu, vn, i)
5020 >> - shift);
5021 }
5022 return;
5023
5024 case 2:
5025 for (i = 0; i < (full ? 4 : 2); i++)
5026 {
5027 shift = aarch64_get_vec_s8 (cpu, vm, i * 4);
5028 if (shift >= 0)
5029 aarch64_set_vec_u32 (cpu, vd, i, aarch64_get_vec_u32 (cpu, vn, i)
5030 << shift);
5031 else
5032 aarch64_set_vec_u32 (cpu, vd, i, aarch64_get_vec_u32 (cpu, vn, i)
5033 >> - shift);
5034 }
5035 return;
5036
5037 case 3:
5038 if (! full)
5039 HALT_UNALLOC;
5040 for (i = 0; i < 2; i++)
5041 {
5042 shift = aarch64_get_vec_s8 (cpu, vm, i * 8);
5043 if (shift >= 0)
5044 aarch64_set_vec_u64 (cpu, vd, i, aarch64_get_vec_u64 (cpu, vn, i)
5045 << shift);
5046 else
5047 aarch64_set_vec_u64 (cpu, vd, i, aarch64_get_vec_u64 (cpu, vn, i)
5048 >> - shift);
5049 }
5050 return;
5051 }
5052 }
5053
5054 static void
5055 do_vec_FMLA (sim_cpu *cpu)
5056 {
5057 /* instr[31] = 0
5058 instr[30] = full/half selector
5059 instr[29,23] = 0011100
5060 instr[22] = size: 0=>float, 1=>double
5061 instr[21] = 1
5062 instr[20,16] = Vn
5063 instr[15,10] = 1100 11
5064 instr[9,5] = Vm
5065 instr[4.0] = Vd. */
5066
5067 unsigned vm = INSTR (20, 16);
5068 unsigned vn = INSTR (9, 5);
5069 unsigned vd = INSTR (4, 0);
5070 unsigned i;
5071 int full = INSTR (30, 30);
5072
5073 NYI_assert (29, 23, 0x1C);
5074 NYI_assert (21, 21, 1);
5075 NYI_assert (15, 10, 0x33);
5076
5077 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5078 if (INSTR (22, 22))
5079 {
5080 if (! full)
5081 HALT_UNALLOC;
5082 for (i = 0; i < 2; i++)
5083 aarch64_set_vec_double (cpu, vd, i,
5084 aarch64_get_vec_double (cpu, vn, i) *
5085 aarch64_get_vec_double (cpu, vm, i) +
5086 aarch64_get_vec_double (cpu, vd, i));
5087 }
5088 else
5089 {
5090 for (i = 0; i < (full ? 4 : 2); i++)
5091 aarch64_set_vec_float (cpu, vd, i,
5092 aarch64_get_vec_float (cpu, vn, i) *
5093 aarch64_get_vec_float (cpu, vm, i) +
5094 aarch64_get_vec_float (cpu, vd, i));
5095 }
5096 }
5097
5098 static void
5099 do_vec_max (sim_cpu *cpu)
5100 {
5101 /* instr[31] = 0
5102 instr[30] = full/half selector
5103 instr[29] = SMAX (0) / UMAX (1)
5104 instr[28,24] = 0 1110
5105 instr[23,22] = size: 00=> 8-bit, 01=> 16-bit, 10=> 32-bit
5106 instr[21] = 1
5107 instr[20,16] = Vn
5108 instr[15,10] = 0110 01
5109 instr[9,5] = Vm
5110 instr[4.0] = Vd. */
5111
5112 unsigned vm = INSTR (20, 16);
5113 unsigned vn = INSTR (9, 5);
5114 unsigned vd = INSTR (4, 0);
5115 unsigned i;
5116 int full = INSTR (30, 30);
5117
5118 NYI_assert (28, 24, 0x0E);
5119 NYI_assert (21, 21, 1);
5120 NYI_assert (15, 10, 0x19);
5121
5122 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5123 if (INSTR (29, 29))
5124 {
5125 switch (INSTR (23, 22))
5126 {
5127 case 0:
5128 for (i = 0; i < (full ? 16 : 8); i++)
5129 aarch64_set_vec_u8 (cpu, vd, i,
5130 aarch64_get_vec_u8 (cpu, vn, i)
5131 > aarch64_get_vec_u8 (cpu, vm, i)
5132 ? aarch64_get_vec_u8 (cpu, vn, i)
5133 : aarch64_get_vec_u8 (cpu, vm, i));
5134 return;
5135
5136 case 1:
5137 for (i = 0; i < (full ? 8 : 4); i++)
5138 aarch64_set_vec_u16 (cpu, vd, i,
5139 aarch64_get_vec_u16 (cpu, vn, i)
5140 > aarch64_get_vec_u16 (cpu, vm, i)
5141 ? aarch64_get_vec_u16 (cpu, vn, i)
5142 : aarch64_get_vec_u16 (cpu, vm, i));
5143 return;
5144
5145 case 2:
5146 for (i = 0; i < (full ? 4 : 2); i++)
5147 aarch64_set_vec_u32 (cpu, vd, i,
5148 aarch64_get_vec_u32 (cpu, vn, i)
5149 > aarch64_get_vec_u32 (cpu, vm, i)
5150 ? aarch64_get_vec_u32 (cpu, vn, i)
5151 : aarch64_get_vec_u32 (cpu, vm, i));
5152 return;
5153
5154 case 3:
5155 HALT_UNALLOC;
5156 }
5157 }
5158 else
5159 {
5160 switch (INSTR (23, 22))
5161 {
5162 case 0:
5163 for (i = 0; i < (full ? 16 : 8); i++)
5164 aarch64_set_vec_s8 (cpu, vd, i,
5165 aarch64_get_vec_s8 (cpu, vn, i)
5166 > aarch64_get_vec_s8 (cpu, vm, i)
5167 ? aarch64_get_vec_s8 (cpu, vn, i)
5168 : aarch64_get_vec_s8 (cpu, vm, i));
5169 return;
5170
5171 case 1:
5172 for (i = 0; i < (full ? 8 : 4); i++)
5173 aarch64_set_vec_s16 (cpu, vd, i,
5174 aarch64_get_vec_s16 (cpu, vn, i)
5175 > aarch64_get_vec_s16 (cpu, vm, i)
5176 ? aarch64_get_vec_s16 (cpu, vn, i)
5177 : aarch64_get_vec_s16 (cpu, vm, i));
5178 return;
5179
5180 case 2:
5181 for (i = 0; i < (full ? 4 : 2); i++)
5182 aarch64_set_vec_s32 (cpu, vd, i,
5183 aarch64_get_vec_s32 (cpu, vn, i)
5184 > aarch64_get_vec_s32 (cpu, vm, i)
5185 ? aarch64_get_vec_s32 (cpu, vn, i)
5186 : aarch64_get_vec_s32 (cpu, vm, i));
5187 return;
5188
5189 case 3:
5190 HALT_UNALLOC;
5191 }
5192 }
5193 }
5194
5195 static void
5196 do_vec_min (sim_cpu *cpu)
5197 {
5198 /* instr[31] = 0
5199 instr[30] = full/half selector
5200 instr[29] = SMIN (0) / UMIN (1)
5201 instr[28,24] = 0 1110
5202 instr[23,22] = size: 00=> 8-bit, 01=> 16-bit, 10=> 32-bit
5203 instr[21] = 1
5204 instr[20,16] = Vn
5205 instr[15,10] = 0110 11
5206 instr[9,5] = Vm
5207 instr[4.0] = Vd. */
5208
5209 unsigned vm = INSTR (20, 16);
5210 unsigned vn = INSTR (9, 5);
5211 unsigned vd = INSTR (4, 0);
5212 unsigned i;
5213 int full = INSTR (30, 30);
5214
5215 NYI_assert (28, 24, 0x0E);
5216 NYI_assert (21, 21, 1);
5217 NYI_assert (15, 10, 0x1B);
5218
5219 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5220 if (INSTR (29, 29))
5221 {
5222 switch (INSTR (23, 22))
5223 {
5224 case 0:
5225 for (i = 0; i < (full ? 16 : 8); i++)
5226 aarch64_set_vec_u8 (cpu, vd, i,
5227 aarch64_get_vec_u8 (cpu, vn, i)
5228 < aarch64_get_vec_u8 (cpu, vm, i)
5229 ? aarch64_get_vec_u8 (cpu, vn, i)
5230 : aarch64_get_vec_u8 (cpu, vm, i));
5231 return;
5232
5233 case 1:
5234 for (i = 0; i < (full ? 8 : 4); i++)
5235 aarch64_set_vec_u16 (cpu, vd, i,
5236 aarch64_get_vec_u16 (cpu, vn, i)
5237 < aarch64_get_vec_u16 (cpu, vm, i)
5238 ? aarch64_get_vec_u16 (cpu, vn, i)
5239 : aarch64_get_vec_u16 (cpu, vm, i));
5240 return;
5241
5242 case 2:
5243 for (i = 0; i < (full ? 4 : 2); i++)
5244 aarch64_set_vec_u32 (cpu, vd, i,
5245 aarch64_get_vec_u32 (cpu, vn, i)
5246 < aarch64_get_vec_u32 (cpu, vm, i)
5247 ? aarch64_get_vec_u32 (cpu, vn, i)
5248 : aarch64_get_vec_u32 (cpu, vm, i));
5249 return;
5250
5251 case 3:
5252 HALT_UNALLOC;
5253 }
5254 }
5255 else
5256 {
5257 switch (INSTR (23, 22))
5258 {
5259 case 0:
5260 for (i = 0; i < (full ? 16 : 8); i++)
5261 aarch64_set_vec_s8 (cpu, vd, i,
5262 aarch64_get_vec_s8 (cpu, vn, i)
5263 < aarch64_get_vec_s8 (cpu, vm, i)
5264 ? aarch64_get_vec_s8 (cpu, vn, i)
5265 : aarch64_get_vec_s8 (cpu, vm, i));
5266 return;
5267
5268 case 1:
5269 for (i = 0; i < (full ? 8 : 4); i++)
5270 aarch64_set_vec_s16 (cpu, vd, i,
5271 aarch64_get_vec_s16 (cpu, vn, i)
5272 < aarch64_get_vec_s16 (cpu, vm, i)
5273 ? aarch64_get_vec_s16 (cpu, vn, i)
5274 : aarch64_get_vec_s16 (cpu, vm, i));
5275 return;
5276
5277 case 2:
5278 for (i = 0; i < (full ? 4 : 2); i++)
5279 aarch64_set_vec_s32 (cpu, vd, i,
5280 aarch64_get_vec_s32 (cpu, vn, i)
5281 < aarch64_get_vec_s32 (cpu, vm, i)
5282 ? aarch64_get_vec_s32 (cpu, vn, i)
5283 : aarch64_get_vec_s32 (cpu, vm, i));
5284 return;
5285
5286 case 3:
5287 HALT_UNALLOC;
5288 }
5289 }
5290 }
5291
5292 static void
5293 do_vec_sub_long (sim_cpu *cpu)
5294 {
5295 /* instr[31] = 0
5296 instr[30] = lower (0) / upper (1)
5297 instr[29] = signed (0) / unsigned (1)
5298 instr[28,24] = 0 1110
5299 instr[23,22] = size: bytes (00), half (01), word (10)
5300 instr[21] = 1
5301 insrt[20,16] = Vm
5302 instr[15,10] = 0010 00
5303 instr[9,5] = Vn
5304 instr[4,0] = V dest. */
5305
5306 unsigned size = INSTR (23, 22);
5307 unsigned vm = INSTR (20, 16);
5308 unsigned vn = INSTR (9, 5);
5309 unsigned vd = INSTR (4, 0);
5310 unsigned bias = 0;
5311 unsigned i;
5312
5313 NYI_assert (28, 24, 0x0E);
5314 NYI_assert (21, 21, 1);
5315 NYI_assert (15, 10, 0x08);
5316
5317 if (size == 3)
5318 HALT_UNALLOC;
5319
5320 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5321 switch (INSTR (30, 29))
5322 {
5323 case 2: /* SSUBL2. */
5324 bias = 2;
5325 ATTRIBUTE_FALLTHROUGH;
5326 case 0: /* SSUBL. */
5327 switch (size)
5328 {
5329 case 0:
5330 bias *= 3;
5331 for (i = 0; i < 8; i++)
5332 aarch64_set_vec_s16 (cpu, vd, i,
5333 aarch64_get_vec_s8 (cpu, vn, i + bias)
5334 - aarch64_get_vec_s8 (cpu, vm, i + bias));
5335 break;
5336
5337 case 1:
5338 bias *= 2;
5339 for (i = 0; i < 4; i++)
5340 aarch64_set_vec_s32 (cpu, vd, i,
5341 aarch64_get_vec_s16 (cpu, vn, i + bias)
5342 - aarch64_get_vec_s16 (cpu, vm, i + bias));
5343 break;
5344
5345 case 2:
5346 for (i = 0; i < 2; i++)
5347 aarch64_set_vec_s64 (cpu, vd, i,
5348 aarch64_get_vec_s32 (cpu, vn, i + bias)
5349 - aarch64_get_vec_s32 (cpu, vm, i + bias));
5350 break;
5351
5352 default:
5353 HALT_UNALLOC;
5354 }
5355 break;
5356
5357 case 3: /* USUBL2. */
5358 bias = 2;
5359 ATTRIBUTE_FALLTHROUGH;
5360 case 1: /* USUBL. */
5361 switch (size)
5362 {
5363 case 0:
5364 bias *= 3;
5365 for (i = 0; i < 8; i++)
5366 aarch64_set_vec_u16 (cpu, vd, i,
5367 aarch64_get_vec_u8 (cpu, vn, i + bias)
5368 - aarch64_get_vec_u8 (cpu, vm, i + bias));
5369 break;
5370
5371 case 1:
5372 bias *= 2;
5373 for (i = 0; i < 4; i++)
5374 aarch64_set_vec_u32 (cpu, vd, i,
5375 aarch64_get_vec_u16 (cpu, vn, i + bias)
5376 - aarch64_get_vec_u16 (cpu, vm, i + bias));
5377 break;
5378
5379 case 2:
5380 for (i = 0; i < 2; i++)
5381 aarch64_set_vec_u64 (cpu, vd, i,
5382 aarch64_get_vec_u32 (cpu, vn, i + bias)
5383 - aarch64_get_vec_u32 (cpu, vm, i + bias));
5384 break;
5385
5386 default:
5387 HALT_UNALLOC;
5388 }
5389 break;
5390 }
5391 }
5392
5393 static void
5394 do_vec_ADDP (sim_cpu *cpu)
5395 {
5396 /* instr[31] = 0
5397 instr[30] = half(0)/full(1)
5398 instr[29,24] = 00 1110
5399 instr[23,22] = size: bytes (00), half (01), word (10), long (11)
5400 instr[21] = 1
5401 insrt[20,16] = Vm
5402 instr[15,10] = 1011 11
5403 instr[9,5] = Vn
5404 instr[4,0] = V dest. */
5405
5406 struct aarch64_sim_cpu *aarch64_cpu = AARCH64_SIM_CPU (cpu);
5407 FRegister copy_vn;
5408 FRegister copy_vm;
5409 unsigned full = INSTR (30, 30);
5410 unsigned size = INSTR (23, 22);
5411 unsigned vm = INSTR (20, 16);
5412 unsigned vn = INSTR (9, 5);
5413 unsigned vd = INSTR (4, 0);
5414 unsigned i, range;
5415
5416 NYI_assert (29, 24, 0x0E);
5417 NYI_assert (21, 21, 1);
5418 NYI_assert (15, 10, 0x2F);
5419
5420 /* Make copies of the source registers in case vd == vn/vm. */
5421 copy_vn = aarch64_cpu->fr[vn];
5422 copy_vm = aarch64_cpu->fr[vm];
5423
5424 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5425 switch (size)
5426 {
5427 case 0:
5428 range = full ? 8 : 4;
5429 for (i = 0; i < range; i++)
5430 {
5431 aarch64_set_vec_u8 (cpu, vd, i,
5432 copy_vn.b[i * 2] + copy_vn.b[i * 2 + 1]);
5433 aarch64_set_vec_u8 (cpu, vd, i + range,
5434 copy_vm.b[i * 2] + copy_vm.b[i * 2 + 1]);
5435 }
5436 return;
5437
5438 case 1:
5439 range = full ? 4 : 2;
5440 for (i = 0; i < range; i++)
5441 {
5442 aarch64_set_vec_u16 (cpu, vd, i,
5443 copy_vn.h[i * 2] + copy_vn.h[i * 2 + 1]);
5444 aarch64_set_vec_u16 (cpu, vd, i + range,
5445 copy_vm.h[i * 2] + copy_vm.h[i * 2 + 1]);
5446 }
5447 return;
5448
5449 case 2:
5450 range = full ? 2 : 1;
5451 for (i = 0; i < range; i++)
5452 {
5453 aarch64_set_vec_u32 (cpu, vd, i,
5454 copy_vn.w[i * 2] + copy_vn.w[i * 2 + 1]);
5455 aarch64_set_vec_u32 (cpu, vd, i + range,
5456 copy_vm.w[i * 2] + copy_vm.w[i * 2 + 1]);
5457 }
5458 return;
5459
5460 case 3:
5461 if (! full)
5462 HALT_UNALLOC;
5463 aarch64_set_vec_u64 (cpu, vd, 0, copy_vn.v[0] + copy_vn.v[1]);
5464 aarch64_set_vec_u64 (cpu, vd, 1, copy_vm.v[0] + copy_vm.v[1]);
5465 return;
5466 }
5467 }
5468
5469 /* Float point vector convert to longer (precision). */
5470 static void
5471 do_vec_FCVTL (sim_cpu *cpu)
5472 {
5473 /* instr[31] = 0
5474 instr[30] = half (0) / all (1)
5475 instr[29,23] = 00 1110 0
5476 instr[22] = single (0) / double (1)
5477 instr[21,10] = 10 0001 0111 10
5478 instr[9,5] = Rn
5479 instr[4,0] = Rd. */
5480
5481 unsigned rn = INSTR (9, 5);
5482 unsigned rd = INSTR (4, 0);
5483 unsigned full = INSTR (30, 30);
5484 unsigned i;
5485
5486 NYI_assert (31, 31, 0);
5487 NYI_assert (29, 23, 0x1C);
5488 NYI_assert (21, 10, 0x85E);
5489
5490 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5491 if (INSTR (22, 22))
5492 {
5493 for (i = 0; i < 2; i++)
5494 aarch64_set_vec_double (cpu, rd, i,
5495 aarch64_get_vec_float (cpu, rn, i + 2*full));
5496 }
5497 else
5498 {
5499 HALT_NYI;
5500
5501 #if 0
5502 /* TODO: Implement missing half-float support. */
5503 for (i = 0; i < 4; i++)
5504 aarch64_set_vec_float (cpu, rd, i,
5505 aarch64_get_vec_halffloat (cpu, rn, i + 4*full));
5506 #endif
5507 }
5508 }
5509
5510 static void
5511 do_vec_FABS (sim_cpu *cpu)
5512 {
5513 /* instr[31] = 0
5514 instr[30] = half(0)/full(1)
5515 instr[29,23] = 00 1110 1
5516 instr[22] = float(0)/double(1)
5517 instr[21,16] = 10 0000
5518 instr[15,10] = 1111 10
5519 instr[9,5] = Vn
5520 instr[4,0] = Vd. */
5521
5522 unsigned vn = INSTR (9, 5);
5523 unsigned vd = INSTR (4, 0);
5524 unsigned full = INSTR (30, 30);
5525 unsigned i;
5526
5527 NYI_assert (29, 23, 0x1D);
5528 NYI_assert (21, 10, 0x83E);
5529
5530 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5531 if (INSTR (22, 22))
5532 {
5533 if (! full)
5534 HALT_NYI;
5535
5536 for (i = 0; i < 2; i++)
5537 aarch64_set_vec_double (cpu, vd, i,
5538 fabs (aarch64_get_vec_double (cpu, vn, i)));
5539 }
5540 else
5541 {
5542 for (i = 0; i < (full ? 4 : 2); i++)
5543 aarch64_set_vec_float (cpu, vd, i,
5544 fabsf (aarch64_get_vec_float (cpu, vn, i)));
5545 }
5546 }
5547
5548 static void
5549 do_vec_FCVTZS (sim_cpu *cpu)
5550 {
5551 /* instr[31] = 0
5552 instr[30] = half (0) / all (1)
5553 instr[29,23] = 00 1110 1
5554 instr[22] = single (0) / double (1)
5555 instr[21,10] = 10 0001 1011 10
5556 instr[9,5] = Rn
5557 instr[4,0] = Rd. */
5558
5559 unsigned rn = INSTR (9, 5);
5560 unsigned rd = INSTR (4, 0);
5561 unsigned full = INSTR (30, 30);
5562 unsigned i;
5563
5564 NYI_assert (31, 31, 0);
5565 NYI_assert (29, 23, 0x1D);
5566 NYI_assert (21, 10, 0x86E);
5567
5568 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5569 if (INSTR (22, 22))
5570 {
5571 if (! full)
5572 HALT_UNALLOC;
5573
5574 for (i = 0; i < 2; i++)
5575 aarch64_set_vec_s64 (cpu, rd, i,
5576 (int64_t) aarch64_get_vec_double (cpu, rn, i));
5577 }
5578 else
5579 for (i = 0; i < (full ? 4 : 2); i++)
5580 aarch64_set_vec_s32 (cpu, rd, i,
5581 (int32_t) aarch64_get_vec_float (cpu, rn, i));
5582 }
5583
5584 static void
5585 do_vec_REV64 (sim_cpu *cpu)
5586 {
5587 /* instr[31] = 0
5588 instr[30] = full/half
5589 instr[29,24] = 00 1110
5590 instr[23,22] = size
5591 instr[21,10] = 10 0000 0000 10
5592 instr[9,5] = Rn
5593 instr[4,0] = Rd. */
5594
5595 unsigned rn = INSTR (9, 5);
5596 unsigned rd = INSTR (4, 0);
5597 unsigned size = INSTR (23, 22);
5598 unsigned full = INSTR (30, 30);
5599 unsigned i;
5600 FRegister val;
5601
5602 NYI_assert (29, 24, 0x0E);
5603 NYI_assert (21, 10, 0x802);
5604
5605 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5606 switch (size)
5607 {
5608 case 0:
5609 for (i = 0; i < (full ? 16 : 8); i++)
5610 val.b[i ^ 0x7] = aarch64_get_vec_u8 (cpu, rn, i);
5611 break;
5612
5613 case 1:
5614 for (i = 0; i < (full ? 8 : 4); i++)
5615 val.h[i ^ 0x3] = aarch64_get_vec_u16 (cpu, rn, i);
5616 break;
5617
5618 case 2:
5619 for (i = 0; i < (full ? 4 : 2); i++)
5620 val.w[i ^ 0x1] = aarch64_get_vec_u32 (cpu, rn, i);
5621 break;
5622
5623 case 3:
5624 HALT_UNALLOC;
5625 }
5626
5627 aarch64_set_vec_u64 (cpu, rd, 0, val.v[0]);
5628 if (full)
5629 aarch64_set_vec_u64 (cpu, rd, 1, val.v[1]);
5630 }
5631
5632 static void
5633 do_vec_REV16 (sim_cpu *cpu)
5634 {
5635 /* instr[31] = 0
5636 instr[30] = full/half
5637 instr[29,24] = 00 1110
5638 instr[23,22] = size
5639 instr[21,10] = 10 0000 0001 10
5640 instr[9,5] = Rn
5641 instr[4,0] = Rd. */
5642
5643 unsigned rn = INSTR (9, 5);
5644 unsigned rd = INSTR (4, 0);
5645 unsigned size = INSTR (23, 22);
5646 unsigned full = INSTR (30, 30);
5647 unsigned i;
5648 FRegister val;
5649
5650 NYI_assert (29, 24, 0x0E);
5651 NYI_assert (21, 10, 0x806);
5652
5653 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5654 switch (size)
5655 {
5656 case 0:
5657 for (i = 0; i < (full ? 16 : 8); i++)
5658 val.b[i ^ 0x1] = aarch64_get_vec_u8 (cpu, rn, i);
5659 break;
5660
5661 default:
5662 HALT_UNALLOC;
5663 }
5664
5665 aarch64_set_vec_u64 (cpu, rd, 0, val.v[0]);
5666 if (full)
5667 aarch64_set_vec_u64 (cpu, rd, 1, val.v[1]);
5668 }
5669
5670 static void
5671 do_vec_op1 (sim_cpu *cpu)
5672 {
5673 /* instr[31] = 0
5674 instr[30] = half/full
5675 instr[29,24] = 00 1110
5676 instr[23,21] = ???
5677 instr[20,16] = Vm
5678 instr[15,10] = sub-opcode
5679 instr[9,5] = Vn
5680 instr[4,0] = Vd */
5681 NYI_assert (29, 24, 0x0E);
5682
5683 if (INSTR (21, 21) == 0)
5684 {
5685 if (INSTR (23, 22) == 0)
5686 {
5687 if (INSTR (30, 30) == 1
5688 && INSTR (17, 14) == 0
5689 && INSTR (12, 10) == 7)
5690 return do_vec_ins_2 (cpu);
5691
5692 switch (INSTR (15, 10))
5693 {
5694 case 0x01: do_vec_DUP_vector_into_vector (cpu); return;
5695 case 0x03: do_vec_DUP_scalar_into_vector (cpu); return;
5696 case 0x07: do_vec_INS (cpu); return;
5697 case 0x0B: do_vec_SMOV_into_scalar (cpu); return;
5698 case 0x0F: do_vec_UMOV_into_scalar (cpu); return;
5699
5700 case 0x00:
5701 case 0x08:
5702 case 0x10:
5703 case 0x18:
5704 do_vec_TBL (cpu); return;
5705
5706 case 0x06:
5707 case 0x16:
5708 do_vec_UZP (cpu); return;
5709
5710 case 0x0A: do_vec_TRN (cpu); return;
5711
5712 case 0x0E:
5713 case 0x1E:
5714 do_vec_ZIP (cpu); return;
5715
5716 default:
5717 HALT_NYI;
5718 }
5719 }
5720
5721 switch (INSTR (13, 10))
5722 {
5723 case 0x6: do_vec_UZP (cpu); return;
5724 case 0xE: do_vec_ZIP (cpu); return;
5725 case 0xA: do_vec_TRN (cpu); return;
5726 default: HALT_NYI;
5727 }
5728 }
5729
5730 switch (INSTR (15, 10))
5731 {
5732 case 0x02: do_vec_REV64 (cpu); return;
5733 case 0x06: do_vec_REV16 (cpu); return;
5734
5735 case 0x07:
5736 switch (INSTR (23, 21))
5737 {
5738 case 1: do_vec_AND (cpu); return;
5739 case 3: do_vec_BIC (cpu); return;
5740 case 5: do_vec_ORR (cpu); return;
5741 case 7: do_vec_ORN (cpu); return;
5742 default: HALT_NYI;
5743 }
5744
5745 case 0x08: do_vec_sub_long (cpu); return;
5746 case 0x0a: do_vec_XTN (cpu); return;
5747 case 0x11: do_vec_SSHL (cpu); return;
5748 case 0x16: do_vec_CNT (cpu); return;
5749 case 0x19: do_vec_max (cpu); return;
5750 case 0x1B: do_vec_min (cpu); return;
5751 case 0x21: do_vec_add (cpu); return;
5752 case 0x25: do_vec_MLA (cpu); return;
5753 case 0x27: do_vec_mul (cpu); return;
5754 case 0x2F: do_vec_ADDP (cpu); return;
5755 case 0x30: do_vec_mull (cpu); return;
5756 case 0x33: do_vec_FMLA (cpu); return;
5757 case 0x35: do_vec_fadd (cpu); return;
5758
5759 case 0x1E:
5760 switch (INSTR (20, 16))
5761 {
5762 case 0x01: do_vec_FCVTL (cpu); return;
5763 default: HALT_NYI;
5764 }
5765
5766 case 0x2E:
5767 switch (INSTR (20, 16))
5768 {
5769 case 0x00: do_vec_ABS (cpu); return;
5770 case 0x01: do_vec_FCVTZS (cpu); return;
5771 case 0x11: do_vec_ADDV (cpu); return;
5772 default: HALT_NYI;
5773 }
5774
5775 case 0x31:
5776 case 0x3B:
5777 do_vec_Fminmax (cpu); return;
5778
5779 case 0x0D:
5780 case 0x0F:
5781 case 0x22:
5782 case 0x23:
5783 case 0x26:
5784 case 0x2A:
5785 case 0x32:
5786 case 0x36:
5787 case 0x39:
5788 case 0x3A:
5789 do_vec_compare (cpu); return;
5790
5791 case 0x3E:
5792 do_vec_FABS (cpu); return;
5793
5794 default:
5795 HALT_NYI;
5796 }
5797 }
5798
5799 static void
5800 do_vec_xtl (sim_cpu *cpu)
5801 {
5802 /* instr[31] = 0
5803 instr[30,29] = SXTL (00), UXTL (01), SXTL2 (10), UXTL2 (11)
5804 instr[28,22] = 0 1111 00
5805 instr[21,16] = size & shift (USHLL, SSHLL, USHLL2, SSHLL2)
5806 instr[15,10] = 1010 01
5807 instr[9,5] = V source
5808 instr[4,0] = V dest. */
5809
5810 unsigned vs = INSTR (9, 5);
5811 unsigned vd = INSTR (4, 0);
5812 unsigned i, shift, bias = 0;
5813
5814 NYI_assert (28, 22, 0x3C);
5815 NYI_assert (15, 10, 0x29);
5816
5817 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5818 switch (INSTR (30, 29))
5819 {
5820 case 2: /* SXTL2, SSHLL2. */
5821 bias = 2;
5822 ATTRIBUTE_FALLTHROUGH;
5823 case 0: /* SXTL, SSHLL. */
5824 if (INSTR (21, 21))
5825 {
5826 int64_t val1, val2;
5827
5828 shift = INSTR (20, 16);
5829 /* Get the source values before setting the destination values
5830 in case the source and destination are the same. */
5831 val1 = aarch64_get_vec_s32 (cpu, vs, bias) << shift;
5832 val2 = aarch64_get_vec_s32 (cpu, vs, bias + 1) << shift;
5833 aarch64_set_vec_s64 (cpu, vd, 0, val1);
5834 aarch64_set_vec_s64 (cpu, vd, 1, val2);
5835 }
5836 else if (INSTR (20, 20))
5837 {
5838 int32_t v[4];
5839
5840 shift = INSTR (19, 16);
5841 bias *= 2;
5842 for (i = 0; i < 4; i++)
5843 v[i] = aarch64_get_vec_s16 (cpu, vs, bias + i) << shift;
5844 for (i = 0; i < 4; i++)
5845 aarch64_set_vec_s32 (cpu, vd, i, v[i]);
5846 }
5847 else
5848 {
5849 int16_t v[8];
5850 NYI_assert (19, 19, 1);
5851
5852 shift = INSTR (18, 16);
5853 bias *= 4;
5854 for (i = 0; i < 8; i++)
5855 v[i] = aarch64_get_vec_s8 (cpu, vs, i + bias) << shift;
5856 for (i = 0; i < 8; i++)
5857 aarch64_set_vec_s16 (cpu, vd, i, v[i]);
5858 }
5859 return;
5860
5861 case 3: /* UXTL2, USHLL2. */
5862 bias = 2;
5863 ATTRIBUTE_FALLTHROUGH;
5864 case 1: /* UXTL, USHLL. */
5865 if (INSTR (21, 21))
5866 {
5867 uint64_t v1, v2;
5868 shift = INSTR (20, 16);
5869 v1 = aarch64_get_vec_u32 (cpu, vs, bias) << shift;
5870 v2 = aarch64_get_vec_u32 (cpu, vs, bias + 1) << shift;
5871 aarch64_set_vec_u64 (cpu, vd, 0, v1);
5872 aarch64_set_vec_u64 (cpu, vd, 1, v2);
5873 }
5874 else if (INSTR (20, 20))
5875 {
5876 uint32_t v[4];
5877 shift = INSTR (19, 16);
5878 bias *= 2;
5879 for (i = 0; i < 4; i++)
5880 v[i] = aarch64_get_vec_u16 (cpu, vs, i + bias) << shift;
5881 for (i = 0; i < 4; i++)
5882 aarch64_set_vec_u32 (cpu, vd, i, v[i]);
5883 }
5884 else
5885 {
5886 uint16_t v[8];
5887 NYI_assert (19, 19, 1);
5888
5889 shift = INSTR (18, 16);
5890 bias *= 4;
5891 for (i = 0; i < 8; i++)
5892 v[i] = aarch64_get_vec_u8 (cpu, vs, i + bias) << shift;
5893 for (i = 0; i < 8; i++)
5894 aarch64_set_vec_u16 (cpu, vd, i, v[i]);
5895 }
5896 return;
5897 }
5898 }
5899
5900 static void
5901 do_vec_SHL (sim_cpu *cpu)
5902 {
5903 /* instr [31] = 0
5904 instr [30] = half(0)/full(1)
5905 instr [29,23] = 001 1110
5906 instr [22,16] = size and shift amount
5907 instr [15,10] = 01 0101
5908 instr [9, 5] = Vs
5909 instr [4, 0] = Vd. */
5910
5911 int shift;
5912 int full = INSTR (30, 30);
5913 unsigned vs = INSTR (9, 5);
5914 unsigned vd = INSTR (4, 0);
5915 unsigned i;
5916
5917 NYI_assert (29, 23, 0x1E);
5918 NYI_assert (15, 10, 0x15);
5919
5920 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5921 if (INSTR (22, 22))
5922 {
5923 shift = INSTR (21, 16);
5924
5925 if (full == 0)
5926 HALT_UNALLOC;
5927
5928 for (i = 0; i < 2; i++)
5929 {
5930 uint64_t val = aarch64_get_vec_u64 (cpu, vs, i);
5931 aarch64_set_vec_u64 (cpu, vd, i, val << shift);
5932 }
5933
5934 return;
5935 }
5936
5937 if (INSTR (21, 21))
5938 {
5939 shift = INSTR (20, 16);
5940
5941 for (i = 0; i < (full ? 4 : 2); i++)
5942 {
5943 uint32_t val = aarch64_get_vec_u32 (cpu, vs, i);
5944 aarch64_set_vec_u32 (cpu, vd, i, val << shift);
5945 }
5946
5947 return;
5948 }
5949
5950 if (INSTR (20, 20))
5951 {
5952 shift = INSTR (19, 16);
5953
5954 for (i = 0; i < (full ? 8 : 4); i++)
5955 {
5956 uint16_t val = aarch64_get_vec_u16 (cpu, vs, i);
5957 aarch64_set_vec_u16 (cpu, vd, i, val << shift);
5958 }
5959
5960 return;
5961 }
5962
5963 if (INSTR (19, 19) == 0)
5964 HALT_UNALLOC;
5965
5966 shift = INSTR (18, 16);
5967
5968 for (i = 0; i < (full ? 16 : 8); i++)
5969 {
5970 uint8_t val = aarch64_get_vec_u8 (cpu, vs, i);
5971 aarch64_set_vec_u8 (cpu, vd, i, val << shift);
5972 }
5973 }
5974
5975 static void
5976 do_vec_SSHR_USHR (sim_cpu *cpu)
5977 {
5978 /* instr [31] = 0
5979 instr [30] = half(0)/full(1)
5980 instr [29] = signed(0)/unsigned(1)
5981 instr [28,23] = 0 1111 0
5982 instr [22,16] = size and shift amount
5983 instr [15,10] = 0000 01
5984 instr [9, 5] = Vs
5985 instr [4, 0] = Vd. */
5986
5987 int full = INSTR (30, 30);
5988 int sign = ! INSTR (29, 29);
5989 unsigned shift = INSTR (22, 16);
5990 unsigned vs = INSTR (9, 5);
5991 unsigned vd = INSTR (4, 0);
5992 unsigned i;
5993
5994 NYI_assert (28, 23, 0x1E);
5995 NYI_assert (15, 10, 0x01);
5996
5997 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5998 if (INSTR (22, 22))
5999 {
6000 shift = 128 - shift;
6001
6002 if (full == 0)
6003 HALT_UNALLOC;
6004
6005 if (sign)
6006 for (i = 0; i < 2; i++)
6007 {
6008 int64_t val = aarch64_get_vec_s64 (cpu, vs, i);
6009 aarch64_set_vec_s64 (cpu, vd, i, val >> shift);
6010 }
6011 else
6012 for (i = 0; i < 2; i++)
6013 {
6014 uint64_t val = aarch64_get_vec_u64 (cpu, vs, i);
6015 aarch64_set_vec_u64 (cpu, vd, i, val >> shift);
6016 }
6017
6018 return;
6019 }
6020
6021 if (INSTR (21, 21))
6022 {
6023 shift = 64 - shift;
6024
6025 if (sign)
6026 for (i = 0; i < (full ? 4 : 2); i++)
6027 {
6028 int32_t val = aarch64_get_vec_s32 (cpu, vs, i);
6029 aarch64_set_vec_s32 (cpu, vd, i, val >> shift);
6030 }
6031 else
6032 for (i = 0; i < (full ? 4 : 2); i++)
6033 {
6034 uint32_t val = aarch64_get_vec_u32 (cpu, vs, i);
6035 aarch64_set_vec_u32 (cpu, vd, i, val >> shift);
6036 }
6037
6038 return;
6039 }
6040
6041 if (INSTR (20, 20))
6042 {
6043 shift = 32 - shift;
6044
6045 if (sign)
6046 for (i = 0; i < (full ? 8 : 4); i++)
6047 {
6048 int16_t val = aarch64_get_vec_s16 (cpu, vs, i);
6049 aarch64_set_vec_s16 (cpu, vd, i, val >> shift);
6050 }
6051 else
6052 for (i = 0; i < (full ? 8 : 4); i++)
6053 {
6054 uint16_t val = aarch64_get_vec_u16 (cpu, vs, i);
6055 aarch64_set_vec_u16 (cpu, vd, i, val >> shift);
6056 }
6057
6058 return;
6059 }
6060
6061 if (INSTR (19, 19) == 0)
6062 HALT_UNALLOC;
6063
6064 shift = 16 - shift;
6065
6066 if (sign)
6067 for (i = 0; i < (full ? 16 : 8); i++)
6068 {
6069 int8_t val = aarch64_get_vec_s8 (cpu, vs, i);
6070 aarch64_set_vec_s8 (cpu, vd, i, val >> shift);
6071 }
6072 else
6073 for (i = 0; i < (full ? 16 : 8); i++)
6074 {
6075 uint8_t val = aarch64_get_vec_u8 (cpu, vs, i);
6076 aarch64_set_vec_u8 (cpu, vd, i, val >> shift);
6077 }
6078 }
6079
6080 static void
6081 do_vec_MUL_by_element (sim_cpu *cpu)
6082 {
6083 /* instr[31] = 0
6084 instr[30] = half/full
6085 instr[29,24] = 00 1111
6086 instr[23,22] = size
6087 instr[21] = L
6088 instr[20] = M
6089 instr[19,16] = m
6090 instr[15,12] = 1000
6091 instr[11] = H
6092 instr[10] = 0
6093 instr[9,5] = Vn
6094 instr[4,0] = Vd */
6095
6096 unsigned full = INSTR (30, 30);
6097 unsigned L = INSTR (21, 21);
6098 unsigned H = INSTR (11, 11);
6099 unsigned vn = INSTR (9, 5);
6100 unsigned vd = INSTR (4, 0);
6101 unsigned size = INSTR (23, 22);
6102 unsigned index;
6103 unsigned vm;
6104 unsigned e;
6105
6106 NYI_assert (29, 24, 0x0F);
6107 NYI_assert (15, 12, 0x8);
6108 NYI_assert (10, 10, 0);
6109
6110 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6111 switch (size)
6112 {
6113 case 1:
6114 {
6115 /* 16 bit products. */
6116 uint16_t product;
6117 uint16_t element1;
6118 uint16_t element2;
6119
6120 index = (H << 2) | (L << 1) | INSTR (20, 20);
6121 vm = INSTR (19, 16);
6122 element2 = aarch64_get_vec_u16 (cpu, vm, index);
6123
6124 for (e = 0; e < (full ? 8 : 4); e ++)
6125 {
6126 element1 = aarch64_get_vec_u16 (cpu, vn, e);
6127 product = element1 * element2;
6128 aarch64_set_vec_u16 (cpu, vd, e, product);
6129 }
6130 }
6131 break;
6132
6133 case 2:
6134 {
6135 /* 32 bit products. */
6136 uint32_t product;
6137 uint32_t element1;
6138 uint32_t element2;
6139
6140 index = (H << 1) | L;
6141 vm = INSTR (20, 16);
6142 element2 = aarch64_get_vec_u32 (cpu, vm, index);
6143
6144 for (e = 0; e < (full ? 4 : 2); e ++)
6145 {
6146 element1 = aarch64_get_vec_u32 (cpu, vn, e);
6147 product = element1 * element2;
6148 aarch64_set_vec_u32 (cpu, vd, e, product);
6149 }
6150 }
6151 break;
6152
6153 default:
6154 HALT_UNALLOC;
6155 }
6156 }
6157
6158 static void
6159 do_FMLA_by_element (sim_cpu *cpu)
6160 {
6161 /* instr[31] = 0
6162 instr[30] = half/full
6163 instr[29,23] = 00 1111 1
6164 instr[22] = size
6165 instr[21] = L
6166 instr[20,16] = m
6167 instr[15,12] = 0001
6168 instr[11] = H
6169 instr[10] = 0
6170 instr[9,5] = Vn
6171 instr[4,0] = Vd */
6172
6173 unsigned full = INSTR (30, 30);
6174 unsigned size = INSTR (22, 22);
6175 unsigned L = INSTR (21, 21);
6176 unsigned vm = INSTR (20, 16);
6177 unsigned H = INSTR (11, 11);
6178 unsigned vn = INSTR (9, 5);
6179 unsigned vd = INSTR (4, 0);
6180 unsigned e;
6181
6182 NYI_assert (29, 23, 0x1F);
6183 NYI_assert (15, 12, 0x1);
6184 NYI_assert (10, 10, 0);
6185
6186 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6187 if (size)
6188 {
6189 double element1, element2;
6190
6191 if (! full || L)
6192 HALT_UNALLOC;
6193
6194 element2 = aarch64_get_vec_double (cpu, vm, H);
6195
6196 for (e = 0; e < 2; e++)
6197 {
6198 element1 = aarch64_get_vec_double (cpu, vn, e);
6199 element1 *= element2;
6200 element1 += aarch64_get_vec_double (cpu, vd, e);
6201 aarch64_set_vec_double (cpu, vd, e, element1);
6202 }
6203 }
6204 else
6205 {
6206 float element1;
6207 float element2 = aarch64_get_vec_float (cpu, vm, (H << 1) | L);
6208
6209 for (e = 0; e < (full ? 4 : 2); e++)
6210 {
6211 element1 = aarch64_get_vec_float (cpu, vn, e);
6212 element1 *= element2;
6213 element1 += aarch64_get_vec_float (cpu, vd, e);
6214 aarch64_set_vec_float (cpu, vd, e, element1);
6215 }
6216 }
6217 }
6218
6219 static void
6220 do_vec_op2 (sim_cpu *cpu)
6221 {
6222 /* instr[31] = 0
6223 instr[30] = half/full
6224 instr[29,24] = 00 1111
6225 instr[23] = ?
6226 instr[22,16] = element size & index
6227 instr[15,10] = sub-opcode
6228 instr[9,5] = Vm
6229 instr[4,0] = Vd */
6230
6231 NYI_assert (29, 24, 0x0F);
6232
6233 if (INSTR (23, 23) != 0)
6234 {
6235 switch (INSTR (15, 10))
6236 {
6237 case 0x04:
6238 case 0x06:
6239 do_FMLA_by_element (cpu);
6240 return;
6241
6242 case 0x20:
6243 case 0x22:
6244 do_vec_MUL_by_element (cpu);
6245 return;
6246
6247 default:
6248 HALT_NYI;
6249 }
6250 }
6251 else
6252 {
6253 switch (INSTR (15, 10))
6254 {
6255 case 0x01: do_vec_SSHR_USHR (cpu); return;
6256 case 0x15: do_vec_SHL (cpu); return;
6257 case 0x20:
6258 case 0x22: do_vec_MUL_by_element (cpu); return;
6259 case 0x29: do_vec_xtl (cpu); return;
6260 default: HALT_NYI;
6261 }
6262 }
6263 }
6264
6265 static void
6266 do_vec_neg (sim_cpu *cpu)
6267 {
6268 /* instr[31] = 0
6269 instr[30] = full(1)/half(0)
6270 instr[29,24] = 10 1110
6271 instr[23,22] = size: byte(00), half (01), word (10), long (11)
6272 instr[21,10] = 1000 0010 1110
6273 instr[9,5] = Vs
6274 instr[4,0] = Vd */
6275
6276 int full = INSTR (30, 30);
6277 unsigned vs = INSTR (9, 5);
6278 unsigned vd = INSTR (4, 0);
6279 unsigned i;
6280
6281 NYI_assert (29, 24, 0x2E);
6282 NYI_assert (21, 10, 0x82E);
6283
6284 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6285 switch (INSTR (23, 22))
6286 {
6287 case 0:
6288 for (i = 0; i < (full ? 16 : 8); i++)
6289 aarch64_set_vec_s8 (cpu, vd, i, - aarch64_get_vec_s8 (cpu, vs, i));
6290 return;
6291
6292 case 1:
6293 for (i = 0; i < (full ? 8 : 4); i++)
6294 aarch64_set_vec_s16 (cpu, vd, i, - aarch64_get_vec_s16 (cpu, vs, i));
6295 return;
6296
6297 case 2:
6298 for (i = 0; i < (full ? 4 : 2); i++)
6299 aarch64_set_vec_s32 (cpu, vd, i, - aarch64_get_vec_s32 (cpu, vs, i));
6300 return;
6301
6302 case 3:
6303 if (! full)
6304 HALT_NYI;
6305 for (i = 0; i < 2; i++)
6306 aarch64_set_vec_s64 (cpu, vd, i, - aarch64_get_vec_s64 (cpu, vs, i));
6307 return;
6308 }
6309 }
6310
6311 static void
6312 do_vec_sqrt (sim_cpu *cpu)
6313 {
6314 /* instr[31] = 0
6315 instr[30] = full(1)/half(0)
6316 instr[29,23] = 101 1101
6317 instr[22] = single(0)/double(1)
6318 instr[21,10] = 1000 0111 1110
6319 instr[9,5] = Vs
6320 instr[4,0] = Vd. */
6321
6322 int full = INSTR (30, 30);
6323 unsigned vs = INSTR (9, 5);
6324 unsigned vd = INSTR (4, 0);
6325 unsigned i;
6326
6327 NYI_assert (29, 23, 0x5B);
6328 NYI_assert (21, 10, 0x87E);
6329
6330 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6331 if (INSTR (22, 22) == 0)
6332 for (i = 0; i < (full ? 4 : 2); i++)
6333 aarch64_set_vec_float (cpu, vd, i,
6334 sqrtf (aarch64_get_vec_float (cpu, vs, i)));
6335 else
6336 for (i = 0; i < 2; i++)
6337 aarch64_set_vec_double (cpu, vd, i,
6338 sqrt (aarch64_get_vec_double (cpu, vs, i)));
6339 }
6340
6341 static void
6342 do_vec_mls_indexed (sim_cpu *cpu)
6343 {
6344 /* instr[31] = 0
6345 instr[30] = half(0)/full(1)
6346 instr[29,24] = 10 1111
6347 instr[23,22] = 16-bit(01)/32-bit(10)
6348 instr[21,20+11] = index (if 16-bit)
6349 instr[21+11] = index (if 32-bit)
6350 instr[20,16] = Vm
6351 instr[15,12] = 0100
6352 instr[11] = part of index
6353 instr[10] = 0
6354 instr[9,5] = Vs
6355 instr[4,0] = Vd. */
6356
6357 int full = INSTR (30, 30);
6358 unsigned vs = INSTR (9, 5);
6359 unsigned vd = INSTR (4, 0);
6360 unsigned vm = INSTR (20, 16);
6361 unsigned i;
6362
6363 NYI_assert (15, 12, 4);
6364 NYI_assert (10, 10, 0);
6365
6366 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6367 switch (INSTR (23, 22))
6368 {
6369 case 1:
6370 {
6371 unsigned elem;
6372 uint32_t val;
6373
6374 if (vm > 15)
6375 HALT_NYI;
6376
6377 elem = (INSTR (21, 20) << 1) | INSTR (11, 11);
6378 val = aarch64_get_vec_u16 (cpu, vm, elem);
6379
6380 for (i = 0; i < (full ? 8 : 4); i++)
6381 aarch64_set_vec_u32 (cpu, vd, i,
6382 aarch64_get_vec_u32 (cpu, vd, i) -
6383 (aarch64_get_vec_u32 (cpu, vs, i) * val));
6384 return;
6385 }
6386
6387 case 2:
6388 {
6389 unsigned elem = (INSTR (21, 21) << 1) | INSTR (11, 11);
6390 uint64_t val = aarch64_get_vec_u32 (cpu, vm, elem);
6391
6392 for (i = 0; i < (full ? 4 : 2); i++)
6393 aarch64_set_vec_u64 (cpu, vd, i,
6394 aarch64_get_vec_u64 (cpu, vd, i) -
6395 (aarch64_get_vec_u64 (cpu, vs, i) * val));
6396 return;
6397 }
6398
6399 case 0:
6400 case 3:
6401 default:
6402 HALT_NYI;
6403 }
6404 }
6405
6406 static void
6407 do_vec_SUB (sim_cpu *cpu)
6408 {
6409 /* instr [31] = 0
6410 instr [30] = half(0)/full(1)
6411 instr [29,24] = 10 1110
6412 instr [23,22] = size: byte(00, half(01), word (10), long (11)
6413 instr [21] = 1
6414 instr [20,16] = Vm
6415 instr [15,10] = 10 0001
6416 instr [9, 5] = Vn
6417 instr [4, 0] = Vd. */
6418
6419 unsigned full = INSTR (30, 30);
6420 unsigned vm = INSTR (20, 16);
6421 unsigned vn = INSTR (9, 5);
6422 unsigned vd = INSTR (4, 0);
6423 unsigned i;
6424
6425 NYI_assert (29, 24, 0x2E);
6426 NYI_assert (21, 21, 1);
6427 NYI_assert (15, 10, 0x21);
6428
6429 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6430 switch (INSTR (23, 22))
6431 {
6432 case 0:
6433 for (i = 0; i < (full ? 16 : 8); i++)
6434 aarch64_set_vec_s8 (cpu, vd, i,
6435 aarch64_get_vec_s8 (cpu, vn, i)
6436 - aarch64_get_vec_s8 (cpu, vm, i));
6437 return;
6438
6439 case 1:
6440 for (i = 0; i < (full ? 8 : 4); i++)
6441 aarch64_set_vec_s16 (cpu, vd, i,
6442 aarch64_get_vec_s16 (cpu, vn, i)
6443 - aarch64_get_vec_s16 (cpu, vm, i));
6444 return;
6445
6446 case 2:
6447 for (i = 0; i < (full ? 4 : 2); i++)
6448 aarch64_set_vec_s32 (cpu, vd, i,
6449 aarch64_get_vec_s32 (cpu, vn, i)
6450 - aarch64_get_vec_s32 (cpu, vm, i));
6451 return;
6452
6453 case 3:
6454 if (full == 0)
6455 HALT_UNALLOC;
6456
6457 for (i = 0; i < 2; i++)
6458 aarch64_set_vec_s64 (cpu, vd, i,
6459 aarch64_get_vec_s64 (cpu, vn, i)
6460 - aarch64_get_vec_s64 (cpu, vm, i));
6461 return;
6462 }
6463 }
6464
6465 static void
6466 do_vec_MLS (sim_cpu *cpu)
6467 {
6468 /* instr [31] = 0
6469 instr [30] = half(0)/full(1)
6470 instr [29,24] = 10 1110
6471 instr [23,22] = size: byte(00, half(01), word (10)
6472 instr [21] = 1
6473 instr [20,16] = Vm
6474 instr [15,10] = 10 0101
6475 instr [9, 5] = Vn
6476 instr [4, 0] = Vd. */
6477
6478 unsigned full = INSTR (30, 30);
6479 unsigned vm = INSTR (20, 16);
6480 unsigned vn = INSTR (9, 5);
6481 unsigned vd = INSTR (4, 0);
6482 unsigned i;
6483
6484 NYI_assert (29, 24, 0x2E);
6485 NYI_assert (21, 21, 1);
6486 NYI_assert (15, 10, 0x25);
6487
6488 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6489 switch (INSTR (23, 22))
6490 {
6491 case 0:
6492 for (i = 0; i < (full ? 16 : 8); i++)
6493 aarch64_set_vec_u8 (cpu, vd, i,
6494 aarch64_get_vec_u8 (cpu, vd, i)
6495 - (aarch64_get_vec_u8 (cpu, vn, i)
6496 * aarch64_get_vec_u8 (cpu, vm, i)));
6497 return;
6498
6499 case 1:
6500 for (i = 0; i < (full ? 8 : 4); i++)
6501 aarch64_set_vec_u16 (cpu, vd, i,
6502 aarch64_get_vec_u16 (cpu, vd, i)
6503 - (aarch64_get_vec_u16 (cpu, vn, i)
6504 * aarch64_get_vec_u16 (cpu, vm, i)));
6505 return;
6506
6507 case 2:
6508 for (i = 0; i < (full ? 4 : 2); i++)
6509 aarch64_set_vec_u32 (cpu, vd, i,
6510 aarch64_get_vec_u32 (cpu, vd, i)
6511 - (aarch64_get_vec_u32 (cpu, vn, i)
6512 * aarch64_get_vec_u32 (cpu, vm, i)));
6513 return;
6514
6515 default:
6516 HALT_UNALLOC;
6517 }
6518 }
6519
6520 static void
6521 do_vec_FDIV (sim_cpu *cpu)
6522 {
6523 /* instr [31] = 0
6524 instr [30] = half(0)/full(1)
6525 instr [29,23] = 10 1110 0
6526 instr [22] = float()/double(1)
6527 instr [21] = 1
6528 instr [20,16] = Vm
6529 instr [15,10] = 1111 11
6530 instr [9, 5] = Vn
6531 instr [4, 0] = Vd. */
6532
6533 unsigned full = INSTR (30, 30);
6534 unsigned vm = INSTR (20, 16);
6535 unsigned vn = INSTR (9, 5);
6536 unsigned vd = INSTR (4, 0);
6537 unsigned i;
6538
6539 NYI_assert (29, 23, 0x5C);
6540 NYI_assert (21, 21, 1);
6541 NYI_assert (15, 10, 0x3F);
6542
6543 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6544 if (INSTR (22, 22))
6545 {
6546 if (! full)
6547 HALT_UNALLOC;
6548
6549 for (i = 0; i < 2; i++)
6550 aarch64_set_vec_double (cpu, vd, i,
6551 aarch64_get_vec_double (cpu, vn, i)
6552 / aarch64_get_vec_double (cpu, vm, i));
6553 }
6554 else
6555 for (i = 0; i < (full ? 4 : 2); i++)
6556 aarch64_set_vec_float (cpu, vd, i,
6557 aarch64_get_vec_float (cpu, vn, i)
6558 / aarch64_get_vec_float (cpu, vm, i));
6559 }
6560
6561 static void
6562 do_vec_FMUL (sim_cpu *cpu)
6563 {
6564 /* instr [31] = 0
6565 instr [30] = half(0)/full(1)
6566 instr [29,23] = 10 1110 0
6567 instr [22] = float(0)/double(1)
6568 instr [21] = 1
6569 instr [20,16] = Vm
6570 instr [15,10] = 1101 11
6571 instr [9, 5] = Vn
6572 instr [4, 0] = Vd. */
6573
6574 unsigned full = INSTR (30, 30);
6575 unsigned vm = INSTR (20, 16);
6576 unsigned vn = INSTR (9, 5);
6577 unsigned vd = INSTR (4, 0);
6578 unsigned i;
6579
6580 NYI_assert (29, 23, 0x5C);
6581 NYI_assert (21, 21, 1);
6582 NYI_assert (15, 10, 0x37);
6583
6584 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6585 if (INSTR (22, 22))
6586 {
6587 if (! full)
6588 HALT_UNALLOC;
6589
6590 for (i = 0; i < 2; i++)
6591 aarch64_set_vec_double (cpu, vd, i,
6592 aarch64_get_vec_double (cpu, vn, i)
6593 * aarch64_get_vec_double (cpu, vm, i));
6594 }
6595 else
6596 for (i = 0; i < (full ? 4 : 2); i++)
6597 aarch64_set_vec_float (cpu, vd, i,
6598 aarch64_get_vec_float (cpu, vn, i)
6599 * aarch64_get_vec_float (cpu, vm, i));
6600 }
6601
6602 static void
6603 do_vec_FADDP (sim_cpu *cpu)
6604 {
6605 /* instr [31] = 0
6606 instr [30] = half(0)/full(1)
6607 instr [29,23] = 10 1110 0
6608 instr [22] = float(0)/double(1)
6609 instr [21] = 1
6610 instr [20,16] = Vm
6611 instr [15,10] = 1101 01
6612 instr [9, 5] = Vn
6613 instr [4, 0] = Vd. */
6614
6615 unsigned full = INSTR (30, 30);
6616 unsigned vm = INSTR (20, 16);
6617 unsigned vn = INSTR (9, 5);
6618 unsigned vd = INSTR (4, 0);
6619
6620 NYI_assert (29, 23, 0x5C);
6621 NYI_assert (21, 21, 1);
6622 NYI_assert (15, 10, 0x35);
6623
6624 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6625 if (INSTR (22, 22))
6626 {
6627 /* Extract values before adding them incase vd == vn/vm. */
6628 double tmp1 = aarch64_get_vec_double (cpu, vn, 0);
6629 double tmp2 = aarch64_get_vec_double (cpu, vn, 1);
6630 double tmp3 = aarch64_get_vec_double (cpu, vm, 0);
6631 double tmp4 = aarch64_get_vec_double (cpu, vm, 1);
6632
6633 if (! full)
6634 HALT_UNALLOC;
6635
6636 aarch64_set_vec_double (cpu, vd, 0, tmp1 + tmp2);
6637 aarch64_set_vec_double (cpu, vd, 1, tmp3 + tmp4);
6638 }
6639 else
6640 {
6641 /* Extract values before adding them incase vd == vn/vm. */
6642 float tmp1 = aarch64_get_vec_float (cpu, vn, 0);
6643 float tmp2 = aarch64_get_vec_float (cpu, vn, 1);
6644 float tmp5 = aarch64_get_vec_float (cpu, vm, 0);
6645 float tmp6 = aarch64_get_vec_float (cpu, vm, 1);
6646
6647 if (full)
6648 {
6649 float tmp3 = aarch64_get_vec_float (cpu, vn, 2);
6650 float tmp4 = aarch64_get_vec_float (cpu, vn, 3);
6651 float tmp7 = aarch64_get_vec_float (cpu, vm, 2);
6652 float tmp8 = aarch64_get_vec_float (cpu, vm, 3);
6653
6654 aarch64_set_vec_float (cpu, vd, 0, tmp1 + tmp2);
6655 aarch64_set_vec_float (cpu, vd, 1, tmp3 + tmp4);
6656 aarch64_set_vec_float (cpu, vd, 2, tmp5 + tmp6);
6657 aarch64_set_vec_float (cpu, vd, 3, tmp7 + tmp8);
6658 }
6659 else
6660 {
6661 aarch64_set_vec_float (cpu, vd, 0, tmp1 + tmp2);
6662 aarch64_set_vec_float (cpu, vd, 1, tmp5 + tmp6);
6663 }
6664 }
6665 }
6666
6667 static void
6668 do_vec_FSQRT (sim_cpu *cpu)
6669 {
6670 /* instr[31] = 0
6671 instr[30] = half(0)/full(1)
6672 instr[29,23] = 10 1110 1
6673 instr[22] = single(0)/double(1)
6674 instr[21,10] = 10 0001 1111 10
6675 instr[9,5] = Vsrc
6676 instr[4,0] = Vdest. */
6677
6678 unsigned vn = INSTR (9, 5);
6679 unsigned vd = INSTR (4, 0);
6680 unsigned full = INSTR (30, 30);
6681 int i;
6682
6683 NYI_assert (29, 23, 0x5D);
6684 NYI_assert (21, 10, 0x87E);
6685
6686 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6687 if (INSTR (22, 22))
6688 {
6689 if (! full)
6690 HALT_UNALLOC;
6691
6692 for (i = 0; i < 2; i++)
6693 aarch64_set_vec_double (cpu, vd, i,
6694 sqrt (aarch64_get_vec_double (cpu, vn, i)));
6695 }
6696 else
6697 {
6698 for (i = 0; i < (full ? 4 : 2); i++)
6699 aarch64_set_vec_float (cpu, vd, i,
6700 sqrtf (aarch64_get_vec_float (cpu, vn, i)));
6701 }
6702 }
6703
6704 static void
6705 do_vec_FNEG (sim_cpu *cpu)
6706 {
6707 /* instr[31] = 0
6708 instr[30] = half (0)/full (1)
6709 instr[29,23] = 10 1110 1
6710 instr[22] = single (0)/double (1)
6711 instr[21,10] = 10 0000 1111 10
6712 instr[9,5] = Vsrc
6713 instr[4,0] = Vdest. */
6714
6715 unsigned vn = INSTR (9, 5);
6716 unsigned vd = INSTR (4, 0);
6717 unsigned full = INSTR (30, 30);
6718 int i;
6719
6720 NYI_assert (29, 23, 0x5D);
6721 NYI_assert (21, 10, 0x83E);
6722
6723 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6724 if (INSTR (22, 22))
6725 {
6726 if (! full)
6727 HALT_UNALLOC;
6728
6729 for (i = 0; i < 2; i++)
6730 aarch64_set_vec_double (cpu, vd, i,
6731 - aarch64_get_vec_double (cpu, vn, i));
6732 }
6733 else
6734 {
6735 for (i = 0; i < (full ? 4 : 2); i++)
6736 aarch64_set_vec_float (cpu, vd, i,
6737 - aarch64_get_vec_float (cpu, vn, i));
6738 }
6739 }
6740
6741 static void
6742 do_vec_NOT (sim_cpu *cpu)
6743 {
6744 /* instr[31] = 0
6745 instr[30] = half (0)/full (1)
6746 instr[29,10] = 10 1110 0010 0000 0101 10
6747 instr[9,5] = Vn
6748 instr[4.0] = Vd. */
6749
6750 unsigned vn = INSTR (9, 5);
6751 unsigned vd = INSTR (4, 0);
6752 unsigned i;
6753 int full = INSTR (30, 30);
6754
6755 NYI_assert (29, 10, 0xB8816);
6756
6757 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6758 for (i = 0; i < (full ? 16 : 8); i++)
6759 aarch64_set_vec_u8 (cpu, vd, i, ~ aarch64_get_vec_u8 (cpu, vn, i));
6760 }
6761
6762 static unsigned int
6763 clz (uint64_t val, unsigned size)
6764 {
6765 uint64_t mask = 1;
6766 int count;
6767
6768 mask <<= (size - 1);
6769 count = 0;
6770 do
6771 {
6772 if (val & mask)
6773 break;
6774 mask >>= 1;
6775 count ++;
6776 }
6777 while (mask);
6778
6779 return count;
6780 }
6781
6782 static void
6783 do_vec_CLZ (sim_cpu *cpu)
6784 {
6785 /* instr[31] = 0
6786 instr[30] = half (0)/full (1)
6787 instr[29,24] = 10 1110
6788 instr[23,22] = size
6789 instr[21,10] = 10 0000 0100 10
6790 instr[9,5] = Vn
6791 instr[4.0] = Vd. */
6792
6793 unsigned vn = INSTR (9, 5);
6794 unsigned vd = INSTR (4, 0);
6795 unsigned i;
6796 int full = INSTR (30,30);
6797
6798 NYI_assert (29, 24, 0x2E);
6799 NYI_assert (21, 10, 0x812);
6800
6801 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6802 switch (INSTR (23, 22))
6803 {
6804 case 0:
6805 for (i = 0; i < (full ? 16 : 8); i++)
6806 aarch64_set_vec_u8 (cpu, vd, i, clz (aarch64_get_vec_u8 (cpu, vn, i), 8));
6807 break;
6808 case 1:
6809 for (i = 0; i < (full ? 8 : 4); i++)
6810 aarch64_set_vec_u16 (cpu, vd, i, clz (aarch64_get_vec_u16 (cpu, vn, i), 16));
6811 break;
6812 case 2:
6813 for (i = 0; i < (full ? 4 : 2); i++)
6814 aarch64_set_vec_u32 (cpu, vd, i, clz (aarch64_get_vec_u32 (cpu, vn, i), 32));
6815 break;
6816 case 3:
6817 if (! full)
6818 HALT_UNALLOC;
6819 aarch64_set_vec_u64 (cpu, vd, 0, clz (aarch64_get_vec_u64 (cpu, vn, 0), 64));
6820 aarch64_set_vec_u64 (cpu, vd, 1, clz (aarch64_get_vec_u64 (cpu, vn, 1), 64));
6821 break;
6822 }
6823 }
6824
6825 static void
6826 do_vec_MOV_element (sim_cpu *cpu)
6827 {
6828 /* instr[31,21] = 0110 1110 000
6829 instr[20,16] = size & dest index
6830 instr[15] = 0
6831 instr[14,11] = source index
6832 instr[10] = 1
6833 instr[9,5] = Vs
6834 instr[4.0] = Vd. */
6835
6836 unsigned vs = INSTR (9, 5);
6837 unsigned vd = INSTR (4, 0);
6838 unsigned src_index;
6839 unsigned dst_index;
6840
6841 NYI_assert (31, 21, 0x370);
6842 NYI_assert (15, 15, 0);
6843 NYI_assert (10, 10, 1);
6844
6845 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6846 if (INSTR (16, 16))
6847 {
6848 /* Move a byte. */
6849 src_index = INSTR (14, 11);
6850 dst_index = INSTR (20, 17);
6851 aarch64_set_vec_u8 (cpu, vd, dst_index,
6852 aarch64_get_vec_u8 (cpu, vs, src_index));
6853 }
6854 else if (INSTR (17, 17))
6855 {
6856 /* Move 16-bits. */
6857 NYI_assert (11, 11, 0);
6858 src_index = INSTR (14, 12);
6859 dst_index = INSTR (20, 18);
6860 aarch64_set_vec_u16 (cpu, vd, dst_index,
6861 aarch64_get_vec_u16 (cpu, vs, src_index));
6862 }
6863 else if (INSTR (18, 18))
6864 {
6865 /* Move 32-bits. */
6866 NYI_assert (12, 11, 0);
6867 src_index = INSTR (14, 13);
6868 dst_index = INSTR (20, 19);
6869 aarch64_set_vec_u32 (cpu, vd, dst_index,
6870 aarch64_get_vec_u32 (cpu, vs, src_index));
6871 }
6872 else
6873 {
6874 NYI_assert (19, 19, 1);
6875 NYI_assert (13, 11, 0);
6876 src_index = INSTR (14, 14);
6877 dst_index = INSTR (20, 20);
6878 aarch64_set_vec_u64 (cpu, vd, dst_index,
6879 aarch64_get_vec_u64 (cpu, vs, src_index));
6880 }
6881 }
6882
6883 static void
6884 do_vec_REV32 (sim_cpu *cpu)
6885 {
6886 /* instr[31] = 0
6887 instr[30] = full/half
6888 instr[29,24] = 10 1110
6889 instr[23,22] = size
6890 instr[21,10] = 10 0000 0000 10
6891 instr[9,5] = Rn
6892 instr[4,0] = Rd. */
6893
6894 unsigned rn = INSTR (9, 5);
6895 unsigned rd = INSTR (4, 0);
6896 unsigned size = INSTR (23, 22);
6897 unsigned full = INSTR (30, 30);
6898 unsigned i;
6899 FRegister val;
6900
6901 NYI_assert (29, 24, 0x2E);
6902 NYI_assert (21, 10, 0x802);
6903
6904 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6905 switch (size)
6906 {
6907 case 0:
6908 for (i = 0; i < (full ? 16 : 8); i++)
6909 val.b[i ^ 0x3] = aarch64_get_vec_u8 (cpu, rn, i);
6910 break;
6911
6912 case 1:
6913 for (i = 0; i < (full ? 8 : 4); i++)
6914 val.h[i ^ 0x1] = aarch64_get_vec_u16 (cpu, rn, i);
6915 break;
6916
6917 default:
6918 HALT_UNALLOC;
6919 }
6920
6921 aarch64_set_vec_u64 (cpu, rd, 0, val.v[0]);
6922 if (full)
6923 aarch64_set_vec_u64 (cpu, rd, 1, val.v[1]);
6924 }
6925
6926 static void
6927 do_vec_EXT (sim_cpu *cpu)
6928 {
6929 /* instr[31] = 0
6930 instr[30] = full/half
6931 instr[29,21] = 10 1110 000
6932 instr[20,16] = Vm
6933 instr[15] = 0
6934 instr[14,11] = source index
6935 instr[10] = 0
6936 instr[9,5] = Vn
6937 instr[4.0] = Vd. */
6938
6939 unsigned vm = INSTR (20, 16);
6940 unsigned vn = INSTR (9, 5);
6941 unsigned vd = INSTR (4, 0);
6942 unsigned src_index = INSTR (14, 11);
6943 unsigned full = INSTR (30, 30);
6944 unsigned i;
6945 unsigned j;
6946 FRegister val;
6947
6948 NYI_assert (31, 21, 0x370);
6949 NYI_assert (15, 15, 0);
6950 NYI_assert (10, 10, 0);
6951
6952 if (!full && (src_index & 0x8))
6953 HALT_UNALLOC;
6954
6955 j = 0;
6956
6957 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6958 for (i = src_index; i < (full ? 16 : 8); i++)
6959 val.b[j ++] = aarch64_get_vec_u8 (cpu, vn, i);
6960 for (i = 0; i < src_index; i++)
6961 val.b[j ++] = aarch64_get_vec_u8 (cpu, vm, i);
6962
6963 aarch64_set_vec_u64 (cpu, vd, 0, val.v[0]);
6964 if (full)
6965 aarch64_set_vec_u64 (cpu, vd, 1, val.v[1]);
6966 }
6967
6968 static void
6969 dexAdvSIMD0 (sim_cpu *cpu)
6970 {
6971 /* instr [28,25] = 0 111. */
6972 if ( INSTR (15, 10) == 0x07
6973 && (INSTR (9, 5) ==
6974 INSTR (20, 16)))
6975 {
6976 if (INSTR (31, 21) == 0x075
6977 || INSTR (31, 21) == 0x275)
6978 {
6979 do_vec_MOV_whole_vector (cpu);
6980 return;
6981 }
6982 }
6983
6984 if (INSTR (29, 19) == 0x1E0)
6985 {
6986 do_vec_MOV_immediate (cpu);
6987 return;
6988 }
6989
6990 if (INSTR (29, 19) == 0x5E0)
6991 {
6992 do_vec_MVNI (cpu);
6993 return;
6994 }
6995
6996 if (INSTR (29, 19) == 0x1C0
6997 || INSTR (29, 19) == 0x1C1)
6998 {
6999 if (INSTR (15, 10) == 0x03)
7000 {
7001 do_vec_DUP_scalar_into_vector (cpu);
7002 return;
7003 }
7004 }
7005
7006 switch (INSTR (29, 24))
7007 {
7008 case 0x0E: do_vec_op1 (cpu); return;
7009 case 0x0F: do_vec_op2 (cpu); return;
7010
7011 case 0x2E:
7012 if (INSTR (21, 21) == 1)
7013 {
7014 switch (INSTR (15, 10))
7015 {
7016 case 0x02:
7017 do_vec_REV32 (cpu);
7018 return;
7019
7020 case 0x07:
7021 switch (INSTR (23, 22))
7022 {
7023 case 0: do_vec_EOR (cpu); return;
7024 case 1: do_vec_BSL (cpu); return;
7025 case 2:
7026 case 3: do_vec_bit (cpu); return;
7027 }
7028 break;
7029
7030 case 0x08: do_vec_sub_long (cpu); return;
7031 case 0x11: do_vec_USHL (cpu); return;
7032 case 0x12: do_vec_CLZ (cpu); return;
7033 case 0x16: do_vec_NOT (cpu); return;
7034 case 0x19: do_vec_max (cpu); return;
7035 case 0x1B: do_vec_min (cpu); return;
7036 case 0x21: do_vec_SUB (cpu); return;
7037 case 0x25: do_vec_MLS (cpu); return;
7038 case 0x31: do_vec_FminmaxNMP (cpu); return;
7039 case 0x35: do_vec_FADDP (cpu); return;
7040 case 0x37: do_vec_FMUL (cpu); return;
7041 case 0x3F: do_vec_FDIV (cpu); return;
7042
7043 case 0x3E:
7044 switch (INSTR (20, 16))
7045 {
7046 case 0x00: do_vec_FNEG (cpu); return;
7047 case 0x01: do_vec_FSQRT (cpu); return;
7048 default: HALT_NYI;
7049 }
7050
7051 case 0x0D:
7052 case 0x0F:
7053 case 0x22:
7054 case 0x23:
7055 case 0x26:
7056 case 0x2A:
7057 case 0x32:
7058 case 0x36:
7059 case 0x39:
7060 case 0x3A:
7061 do_vec_compare (cpu); return;
7062
7063 default:
7064 break;
7065 }
7066 }
7067
7068 if (INSTR (31, 21) == 0x370)
7069 {
7070 if (INSTR (10, 10))
7071 do_vec_MOV_element (cpu);
7072 else
7073 do_vec_EXT (cpu);
7074 return;
7075 }
7076
7077 switch (INSTR (21, 10))
7078 {
7079 case 0x82E: do_vec_neg (cpu); return;
7080 case 0x87E: do_vec_sqrt (cpu); return;
7081 default:
7082 if (INSTR (15, 10) == 0x30)
7083 {
7084 do_vec_mull (cpu);
7085 return;
7086 }
7087 break;
7088 }
7089 break;
7090
7091 case 0x2f:
7092 switch (INSTR (15, 10))
7093 {
7094 case 0x01: do_vec_SSHR_USHR (cpu); return;
7095 case 0x10:
7096 case 0x12: do_vec_mls_indexed (cpu); return;
7097 case 0x29: do_vec_xtl (cpu); return;
7098 default:
7099 HALT_NYI;
7100 }
7101
7102 default:
7103 break;
7104 }
7105
7106 HALT_NYI;
7107 }
7108
7109 /* 3 sources. */
7110
7111 /* Float multiply add. */
7112 static void
7113 fmadds (sim_cpu *cpu)
7114 {
7115 unsigned sa = INSTR (14, 10);
7116 unsigned sm = INSTR (20, 16);
7117 unsigned sn = INSTR ( 9, 5);
7118 unsigned sd = INSTR ( 4, 0);
7119
7120 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7121 aarch64_set_FP_float (cpu, sd, aarch64_get_FP_float (cpu, sa)
7122 + aarch64_get_FP_float (cpu, sn)
7123 * aarch64_get_FP_float (cpu, sm));
7124 }
7125
7126 /* Double multiply add. */
7127 static void
7128 fmaddd (sim_cpu *cpu)
7129 {
7130 unsigned sa = INSTR (14, 10);
7131 unsigned sm = INSTR (20, 16);
7132 unsigned sn = INSTR ( 9, 5);
7133 unsigned sd = INSTR ( 4, 0);
7134
7135 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7136 aarch64_set_FP_double (cpu, sd, aarch64_get_FP_double (cpu, sa)
7137 + aarch64_get_FP_double (cpu, sn)
7138 * aarch64_get_FP_double (cpu, sm));
7139 }
7140
7141 /* Float multiply subtract. */
7142 static void
7143 fmsubs (sim_cpu *cpu)
7144 {
7145 unsigned sa = INSTR (14, 10);
7146 unsigned sm = INSTR (20, 16);
7147 unsigned sn = INSTR ( 9, 5);
7148 unsigned sd = INSTR ( 4, 0);
7149
7150 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7151 aarch64_set_FP_float (cpu, sd, aarch64_get_FP_float (cpu, sa)
7152 - aarch64_get_FP_float (cpu, sn)
7153 * aarch64_get_FP_float (cpu, sm));
7154 }
7155
7156 /* Double multiply subtract. */
7157 static void
7158 fmsubd (sim_cpu *cpu)
7159 {
7160 unsigned sa = INSTR (14, 10);
7161 unsigned sm = INSTR (20, 16);
7162 unsigned sn = INSTR ( 9, 5);
7163 unsigned sd = INSTR ( 4, 0);
7164
7165 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7166 aarch64_set_FP_double (cpu, sd, aarch64_get_FP_double (cpu, sa)
7167 - aarch64_get_FP_double (cpu, sn)
7168 * aarch64_get_FP_double (cpu, sm));
7169 }
7170
7171 /* Float negative multiply add. */
7172 static void
7173 fnmadds (sim_cpu *cpu)
7174 {
7175 unsigned sa = INSTR (14, 10);
7176 unsigned sm = INSTR (20, 16);
7177 unsigned sn = INSTR ( 9, 5);
7178 unsigned sd = INSTR ( 4, 0);
7179
7180 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7181 aarch64_set_FP_float (cpu, sd, - aarch64_get_FP_float (cpu, sa)
7182 + (- aarch64_get_FP_float (cpu, sn))
7183 * aarch64_get_FP_float (cpu, sm));
7184 }
7185
7186 /* Double negative multiply add. */
7187 static void
7188 fnmaddd (sim_cpu *cpu)
7189 {
7190 unsigned sa = INSTR (14, 10);
7191 unsigned sm = INSTR (20, 16);
7192 unsigned sn = INSTR ( 9, 5);
7193 unsigned sd = INSTR ( 4, 0);
7194
7195 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7196 aarch64_set_FP_double (cpu, sd, - aarch64_get_FP_double (cpu, sa)
7197 + (- aarch64_get_FP_double (cpu, sn))
7198 * aarch64_get_FP_double (cpu, sm));
7199 }
7200
7201 /* Float negative multiply subtract. */
7202 static void
7203 fnmsubs (sim_cpu *cpu)
7204 {
7205 unsigned sa = INSTR (14, 10);
7206 unsigned sm = INSTR (20, 16);
7207 unsigned sn = INSTR ( 9, 5);
7208 unsigned sd = INSTR ( 4, 0);
7209
7210 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7211 aarch64_set_FP_float (cpu, sd, - aarch64_get_FP_float (cpu, sa)
7212 + aarch64_get_FP_float (cpu, sn)
7213 * aarch64_get_FP_float (cpu, sm));
7214 }
7215
7216 /* Double negative multiply subtract. */
7217 static void
7218 fnmsubd (sim_cpu *cpu)
7219 {
7220 unsigned sa = INSTR (14, 10);
7221 unsigned sm = INSTR (20, 16);
7222 unsigned sn = INSTR ( 9, 5);
7223 unsigned sd = INSTR ( 4, 0);
7224
7225 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7226 aarch64_set_FP_double (cpu, sd, - aarch64_get_FP_double (cpu, sa)
7227 + aarch64_get_FP_double (cpu, sn)
7228 * aarch64_get_FP_double (cpu, sm));
7229 }
7230
7231 static void
7232 dexSimpleFPDataProc3Source (sim_cpu *cpu)
7233 {
7234 /* instr[31] ==> M : 0 ==> OK, 1 ==> UNALLOC
7235 instr[30] = 0
7236 instr[29] ==> S : 0 ==> OK, 1 ==> UNALLOC
7237 instr[28,25] = 1111
7238 instr[24] = 1
7239 instr[23,22] ==> type : 0 ==> single, 01 ==> double, 1x ==> UNALLOC
7240 instr[21] ==> o1 : 0 ==> unnegated, 1 ==> negated
7241 instr[15] ==> o2 : 0 ==> ADD, 1 ==> SUB */
7242
7243 uint32_t M_S = (INSTR (31, 31) << 1) | INSTR (29, 29);
7244 /* dispatch on combined type:o1:o2. */
7245 uint32_t dispatch = (INSTR (23, 21) << 1) | INSTR (15, 15);
7246
7247 if (M_S != 0)
7248 HALT_UNALLOC;
7249
7250 switch (dispatch)
7251 {
7252 case 0: fmadds (cpu); return;
7253 case 1: fmsubs (cpu); return;
7254 case 2: fnmadds (cpu); return;
7255 case 3: fnmsubs (cpu); return;
7256 case 4: fmaddd (cpu); return;
7257 case 5: fmsubd (cpu); return;
7258 case 6: fnmaddd (cpu); return;
7259 case 7: fnmsubd (cpu); return;
7260 default:
7261 /* type > 1 is currently unallocated. */
7262 HALT_UNALLOC;
7263 }
7264 }
7265
7266 static void
7267 dexSimpleFPFixedConvert (sim_cpu *cpu)
7268 {
7269 HALT_NYI;
7270 }
7271
7272 static void
7273 dexSimpleFPCondCompare (sim_cpu *cpu)
7274 {
7275 /* instr [31,23] = 0001 1110 0
7276 instr [22] = type
7277 instr [21] = 1
7278 instr [20,16] = Rm
7279 instr [15,12] = condition
7280 instr [11,10] = 01
7281 instr [9,5] = Rn
7282 instr [4] = 0
7283 instr [3,0] = nzcv */
7284
7285 unsigned rm = INSTR (20, 16);
7286 unsigned rn = INSTR (9, 5);
7287
7288 NYI_assert (31, 23, 0x3C);
7289 NYI_assert (11, 10, 0x1);
7290 NYI_assert (4, 4, 0);
7291
7292 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7293 if (! testConditionCode (cpu, INSTR (15, 12)))
7294 {
7295 aarch64_set_CPSR (cpu, INSTR (3, 0));
7296 return;
7297 }
7298
7299 if (INSTR (22, 22))
7300 {
7301 /* Double precision. */
7302 double val1 = aarch64_get_vec_double (cpu, rn, 0);
7303 double val2 = aarch64_get_vec_double (cpu, rm, 0);
7304
7305 /* FIXME: Check for NaNs. */
7306 if (val1 == val2)
7307 aarch64_set_CPSR (cpu, (Z | C));
7308 else if (val1 < val2)
7309 aarch64_set_CPSR (cpu, N);
7310 else /* val1 > val2 */
7311 aarch64_set_CPSR (cpu, C);
7312 }
7313 else
7314 {
7315 /* Single precision. */
7316 float val1 = aarch64_get_vec_float (cpu, rn, 0);
7317 float val2 = aarch64_get_vec_float (cpu, rm, 0);
7318
7319 /* FIXME: Check for NaNs. */
7320 if (val1 == val2)
7321 aarch64_set_CPSR (cpu, (Z | C));
7322 else if (val1 < val2)
7323 aarch64_set_CPSR (cpu, N);
7324 else /* val1 > val2 */
7325 aarch64_set_CPSR (cpu, C);
7326 }
7327 }
7328
7329 /* 2 sources. */
7330
7331 /* Float add. */
7332 static void
7333 fadds (sim_cpu *cpu)
7334 {
7335 unsigned sm = INSTR (20, 16);
7336 unsigned sn = INSTR ( 9, 5);
7337 unsigned sd = INSTR ( 4, 0);
7338
7339 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7340 aarch64_set_FP_float (cpu, sd, aarch64_get_FP_float (cpu, sn)
7341 + aarch64_get_FP_float (cpu, sm));
7342 }
7343
7344 /* Double add. */
7345 static void
7346 faddd (sim_cpu *cpu)
7347 {
7348 unsigned sm = INSTR (20, 16);
7349 unsigned sn = INSTR ( 9, 5);
7350 unsigned sd = INSTR ( 4, 0);
7351
7352 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7353 aarch64_set_FP_double (cpu, sd, aarch64_get_FP_double (cpu, sn)
7354 + aarch64_get_FP_double (cpu, sm));
7355 }
7356
7357 /* Float divide. */
7358 static void
7359 fdivs (sim_cpu *cpu)
7360 {
7361 unsigned sm = INSTR (20, 16);
7362 unsigned sn = INSTR ( 9, 5);
7363 unsigned sd = INSTR ( 4, 0);
7364
7365 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7366 aarch64_set_FP_float (cpu, sd, aarch64_get_FP_float (cpu, sn)
7367 / aarch64_get_FP_float (cpu, sm));
7368 }
7369
7370 /* Double divide. */
7371 static void
7372 fdivd (sim_cpu *cpu)
7373 {
7374 unsigned sm = INSTR (20, 16);
7375 unsigned sn = INSTR ( 9, 5);
7376 unsigned sd = INSTR ( 4, 0);
7377
7378 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7379 aarch64_set_FP_double (cpu, sd, aarch64_get_FP_double (cpu, sn)
7380 / aarch64_get_FP_double (cpu, sm));
7381 }
7382
7383 /* Float multiply. */
7384 static void
7385 fmuls (sim_cpu *cpu)
7386 {
7387 unsigned sm = INSTR (20, 16);
7388 unsigned sn = INSTR ( 9, 5);
7389 unsigned sd = INSTR ( 4, 0);
7390
7391 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7392 aarch64_set_FP_float (cpu, sd, aarch64_get_FP_float (cpu, sn)
7393 * aarch64_get_FP_float (cpu, sm));
7394 }
7395
7396 /* Double multiply. */
7397 static void
7398 fmuld (sim_cpu *cpu)
7399 {
7400 unsigned sm = INSTR (20, 16);
7401 unsigned sn = INSTR ( 9, 5);
7402 unsigned sd = INSTR ( 4, 0);
7403
7404 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7405 aarch64_set_FP_double (cpu, sd, aarch64_get_FP_double (cpu, sn)
7406 * aarch64_get_FP_double (cpu, sm));
7407 }
7408
7409 /* Float negate and multiply. */
7410 static void
7411 fnmuls (sim_cpu *cpu)
7412 {
7413 unsigned sm = INSTR (20, 16);
7414 unsigned sn = INSTR ( 9, 5);
7415 unsigned sd = INSTR ( 4, 0);
7416
7417 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7418 aarch64_set_FP_float (cpu, sd, - (aarch64_get_FP_float (cpu, sn)
7419 * aarch64_get_FP_float (cpu, sm)));
7420 }
7421
7422 /* Double negate and multiply. */
7423 static void
7424 fnmuld (sim_cpu *cpu)
7425 {
7426 unsigned sm = INSTR (20, 16);
7427 unsigned sn = INSTR ( 9, 5);
7428 unsigned sd = INSTR ( 4, 0);
7429
7430 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7431 aarch64_set_FP_double (cpu, sd, - (aarch64_get_FP_double (cpu, sn)
7432 * aarch64_get_FP_double (cpu, sm)));
7433 }
7434
7435 /* Float subtract. */
7436 static void
7437 fsubs (sim_cpu *cpu)
7438 {
7439 unsigned sm = INSTR (20, 16);
7440 unsigned sn = INSTR ( 9, 5);
7441 unsigned sd = INSTR ( 4, 0);
7442
7443 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7444 aarch64_set_FP_float (cpu, sd, aarch64_get_FP_float (cpu, sn)
7445 - aarch64_get_FP_float (cpu, sm));
7446 }
7447
7448 /* Double subtract. */
7449 static void
7450 fsubd (sim_cpu *cpu)
7451 {
7452 unsigned sm = INSTR (20, 16);
7453 unsigned sn = INSTR ( 9, 5);
7454 unsigned sd = INSTR ( 4, 0);
7455
7456 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7457 aarch64_set_FP_double (cpu, sd, aarch64_get_FP_double (cpu, sn)
7458 - aarch64_get_FP_double (cpu, sm));
7459 }
7460
7461 static void
7462 do_FMINNM (sim_cpu *cpu)
7463 {
7464 /* instr[31,23] = 0 0011 1100
7465 instr[22] = float(0)/double(1)
7466 instr[21] = 1
7467 instr[20,16] = Sm
7468 instr[15,10] = 01 1110
7469 instr[9,5] = Sn
7470 instr[4,0] = Cpu */
7471
7472 unsigned sm = INSTR (20, 16);
7473 unsigned sn = INSTR ( 9, 5);
7474 unsigned sd = INSTR ( 4, 0);
7475
7476 NYI_assert (31, 23, 0x03C);
7477 NYI_assert (15, 10, 0x1E);
7478
7479 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7480 if (INSTR (22, 22))
7481 aarch64_set_FP_double (cpu, sd,
7482 dminnm (aarch64_get_FP_double (cpu, sn),
7483 aarch64_get_FP_double (cpu, sm)));
7484 else
7485 aarch64_set_FP_float (cpu, sd,
7486 fminnm (aarch64_get_FP_float (cpu, sn),
7487 aarch64_get_FP_float (cpu, sm)));
7488 }
7489
7490 static void
7491 do_FMAXNM (sim_cpu *cpu)
7492 {
7493 /* instr[31,23] = 0 0011 1100
7494 instr[22] = float(0)/double(1)
7495 instr[21] = 1
7496 instr[20,16] = Sm
7497 instr[15,10] = 01 1010
7498 instr[9,5] = Sn
7499 instr[4,0] = Cpu */
7500
7501 unsigned sm = INSTR (20, 16);
7502 unsigned sn = INSTR ( 9, 5);
7503 unsigned sd = INSTR ( 4, 0);
7504
7505 NYI_assert (31, 23, 0x03C);
7506 NYI_assert (15, 10, 0x1A);
7507
7508 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7509 if (INSTR (22, 22))
7510 aarch64_set_FP_double (cpu, sd,
7511 dmaxnm (aarch64_get_FP_double (cpu, sn),
7512 aarch64_get_FP_double (cpu, sm)));
7513 else
7514 aarch64_set_FP_float (cpu, sd,
7515 fmaxnm (aarch64_get_FP_float (cpu, sn),
7516 aarch64_get_FP_float (cpu, sm)));
7517 }
7518
7519 static void
7520 dexSimpleFPDataProc2Source (sim_cpu *cpu)
7521 {
7522 /* instr[31] ==> M : 0 ==> OK, 1 ==> UNALLOC
7523 instr[30] = 0
7524 instr[29] ==> S : 0 ==> OK, 1 ==> UNALLOC
7525 instr[28,25] = 1111
7526 instr[24] = 0
7527 instr[23,22] ==> type : 0 ==> single, 01 ==> double, 1x ==> UNALLOC
7528 instr[21] = 1
7529 instr[20,16] = Vm
7530 instr[15,12] ==> opcode : 0000 ==> FMUL, 0001 ==> FDIV
7531 0010 ==> FADD, 0011 ==> FSUB,
7532 0100 ==> FMAX, 0101 ==> FMIN
7533 0110 ==> FMAXNM, 0111 ==> FMINNM
7534 1000 ==> FNMUL, ow ==> UNALLOC
7535 instr[11,10] = 10
7536 instr[9,5] = Vn
7537 instr[4,0] = Vd */
7538
7539 uint32_t M_S = (INSTR (31, 31) << 1) | INSTR (29, 29);
7540 uint32_t type = INSTR (23, 22);
7541 /* Dispatch on opcode. */
7542 uint32_t dispatch = INSTR (15, 12);
7543
7544 if (type > 1)
7545 HALT_UNALLOC;
7546
7547 if (M_S != 0)
7548 HALT_UNALLOC;
7549
7550 if (type)
7551 switch (dispatch)
7552 {
7553 case 0: fmuld (cpu); return;
7554 case 1: fdivd (cpu); return;
7555 case 2: faddd (cpu); return;
7556 case 3: fsubd (cpu); return;
7557 case 6: do_FMAXNM (cpu); return;
7558 case 7: do_FMINNM (cpu); return;
7559 case 8: fnmuld (cpu); return;
7560
7561 /* Have not yet implemented fmax and fmin. */
7562 case 4:
7563 case 5:
7564 HALT_NYI;
7565
7566 default:
7567 HALT_UNALLOC;
7568 }
7569 else /* type == 0 => floats. */
7570 switch (dispatch)
7571 {
7572 case 0: fmuls (cpu); return;
7573 case 1: fdivs (cpu); return;
7574 case 2: fadds (cpu); return;
7575 case 3: fsubs (cpu); return;
7576 case 6: do_FMAXNM (cpu); return;
7577 case 7: do_FMINNM (cpu); return;
7578 case 8: fnmuls (cpu); return;
7579
7580 case 4:
7581 case 5:
7582 HALT_NYI;
7583
7584 default:
7585 HALT_UNALLOC;
7586 }
7587 }
7588
7589 static void
7590 dexSimpleFPCondSelect (sim_cpu *cpu)
7591 {
7592 /* FCSEL
7593 instr[31,23] = 0 0011 1100
7594 instr[22] = 0=>single 1=>double
7595 instr[21] = 1
7596 instr[20,16] = Sm
7597 instr[15,12] = cond
7598 instr[11,10] = 11
7599 instr[9,5] = Sn
7600 instr[4,0] = Cpu */
7601 unsigned sm = INSTR (20, 16);
7602 unsigned sn = INSTR ( 9, 5);
7603 unsigned sd = INSTR ( 4, 0);
7604 uint32_t set = testConditionCode (cpu, INSTR (15, 12));
7605
7606 NYI_assert (31, 23, 0x03C);
7607 NYI_assert (11, 10, 0x3);
7608
7609 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7610 if (INSTR (22, 22))
7611 aarch64_set_FP_double (cpu, sd, (set ? aarch64_get_FP_double (cpu, sn)
7612 : aarch64_get_FP_double (cpu, sm)));
7613 else
7614 aarch64_set_FP_float (cpu, sd, (set ? aarch64_get_FP_float (cpu, sn)
7615 : aarch64_get_FP_float (cpu, sm)));
7616 }
7617
7618 /* Store 32 bit unscaled signed 9 bit. */
7619 static void
7620 fsturs (sim_cpu *cpu, int32_t offset)
7621 {
7622 unsigned int rn = INSTR (9, 5);
7623 unsigned int st = INSTR (4, 0);
7624
7625 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7626 aarch64_set_mem_u32 (cpu, aarch64_get_reg_u64 (cpu, rn, 1) + offset,
7627 aarch64_get_vec_u32 (cpu, st, 0));
7628 }
7629
7630 /* Store 64 bit unscaled signed 9 bit. */
7631 static void
7632 fsturd (sim_cpu *cpu, int32_t offset)
7633 {
7634 unsigned int rn = INSTR (9, 5);
7635 unsigned int st = INSTR (4, 0);
7636
7637 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7638 aarch64_set_mem_u64 (cpu, aarch64_get_reg_u64 (cpu, rn, 1) + offset,
7639 aarch64_get_vec_u64 (cpu, st, 0));
7640 }
7641
7642 /* Store 128 bit unscaled signed 9 bit. */
7643 static void
7644 fsturq (sim_cpu *cpu, int32_t offset)
7645 {
7646 unsigned int rn = INSTR (9, 5);
7647 unsigned int st = INSTR (4, 0);
7648 FRegister a;
7649
7650 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7651 aarch64_get_FP_long_double (cpu, st, & a);
7652 aarch64_set_mem_long_double (cpu,
7653 aarch64_get_reg_u64 (cpu, rn, 1)
7654 + offset, a);
7655 }
7656
7657 /* TODO FP move register. */
7658
7659 /* 32 bit fp to fp move register. */
7660 static void
7661 ffmovs (sim_cpu *cpu)
7662 {
7663 unsigned int rn = INSTR (9, 5);
7664 unsigned int st = INSTR (4, 0);
7665
7666 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7667 aarch64_set_FP_float (cpu, st, aarch64_get_FP_float (cpu, rn));
7668 }
7669
7670 /* 64 bit fp to fp move register. */
7671 static void
7672 ffmovd (sim_cpu *cpu)
7673 {
7674 unsigned int rn = INSTR (9, 5);
7675 unsigned int st = INSTR (4, 0);
7676
7677 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7678 aarch64_set_FP_double (cpu, st, aarch64_get_FP_double (cpu, rn));
7679 }
7680
7681 /* 32 bit GReg to Vec move register. */
7682 static void
7683 fgmovs (sim_cpu *cpu)
7684 {
7685 unsigned int rn = INSTR (9, 5);
7686 unsigned int st = INSTR (4, 0);
7687
7688 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7689 aarch64_set_vec_u32 (cpu, st, 0, aarch64_get_reg_u32 (cpu, rn, NO_SP));
7690 }
7691
7692 /* 64 bit g to fp move register. */
7693 static void
7694 fgmovd (sim_cpu *cpu)
7695 {
7696 unsigned int rn = INSTR (9, 5);
7697 unsigned int st = INSTR (4, 0);
7698
7699 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7700 aarch64_set_vec_u64 (cpu, st, 0, aarch64_get_reg_u64 (cpu, rn, NO_SP));
7701 }
7702
7703 /* 32 bit fp to g move register. */
7704 static void
7705 gfmovs (sim_cpu *cpu)
7706 {
7707 unsigned int rn = INSTR (9, 5);
7708 unsigned int st = INSTR (4, 0);
7709
7710 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7711 aarch64_set_reg_u64 (cpu, st, NO_SP, aarch64_get_vec_u32 (cpu, rn, 0));
7712 }
7713
7714 /* 64 bit fp to g move register. */
7715 static void
7716 gfmovd (sim_cpu *cpu)
7717 {
7718 unsigned int rn = INSTR (9, 5);
7719 unsigned int st = INSTR (4, 0);
7720
7721 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7722 aarch64_set_reg_u64 (cpu, st, NO_SP, aarch64_get_vec_u64 (cpu, rn, 0));
7723 }
7724
7725 /* FP move immediate
7726
7727 These install an immediate 8 bit value in the target register
7728 where the 8 bits comprise 1 sign bit, 4 bits of fraction and a 3
7729 bit exponent. */
7730
7731 static void
7732 fmovs (sim_cpu *cpu)
7733 {
7734 unsigned int sd = INSTR (4, 0);
7735 uint32_t imm = INSTR (20, 13);
7736 float f = fp_immediate_for_encoding_32 (imm);
7737
7738 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7739 aarch64_set_FP_float (cpu, sd, f);
7740 }
7741
7742 static void
7743 fmovd (sim_cpu *cpu)
7744 {
7745 unsigned int sd = INSTR (4, 0);
7746 uint32_t imm = INSTR (20, 13);
7747 double d = fp_immediate_for_encoding_64 (imm);
7748
7749 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7750 aarch64_set_FP_double (cpu, sd, d);
7751 }
7752
7753 static void
7754 dexSimpleFPImmediate (sim_cpu *cpu)
7755 {
7756 /* instr[31,23] == 00111100
7757 instr[22] == type : single(0)/double(1)
7758 instr[21] == 1
7759 instr[20,13] == imm8
7760 instr[12,10] == 100
7761 instr[9,5] == imm5 : 00000 ==> PK, ow ==> UNALLOC
7762 instr[4,0] == Rd */
7763 uint32_t imm5 = INSTR (9, 5);
7764
7765 NYI_assert (31, 23, 0x3C);
7766
7767 if (imm5 != 0)
7768 HALT_UNALLOC;
7769
7770 if (INSTR (22, 22))
7771 fmovd (cpu);
7772 else
7773 fmovs (cpu);
7774 }
7775
7776 /* TODO specific decode and execute for group Load Store. */
7777
7778 /* TODO FP load/store single register (unscaled offset). */
7779
7780 /* TODO load 8 bit unscaled signed 9 bit. */
7781 /* TODO load 16 bit unscaled signed 9 bit. */
7782
7783 /* Load 32 bit unscaled signed 9 bit. */
7784 static void
7785 fldurs (sim_cpu *cpu, int32_t offset)
7786 {
7787 unsigned int rn = INSTR (9, 5);
7788 unsigned int st = INSTR (4, 0);
7789
7790 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7791 aarch64_set_vec_u32 (cpu, st, 0, aarch64_get_mem_u32
7792 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset));
7793 }
7794
7795 /* Load 64 bit unscaled signed 9 bit. */
7796 static void
7797 fldurd (sim_cpu *cpu, int32_t offset)
7798 {
7799 unsigned int rn = INSTR (9, 5);
7800 unsigned int st = INSTR (4, 0);
7801
7802 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7803 aarch64_set_vec_u64 (cpu, st, 0, aarch64_get_mem_u64
7804 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset));
7805 }
7806
7807 /* Load 128 bit unscaled signed 9 bit. */
7808 static void
7809 fldurq (sim_cpu *cpu, int32_t offset)
7810 {
7811 unsigned int rn = INSTR (9, 5);
7812 unsigned int st = INSTR (4, 0);
7813 FRegister a;
7814 uint64_t addr = aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset;
7815
7816 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7817 aarch64_get_mem_long_double (cpu, addr, & a);
7818 aarch64_set_FP_long_double (cpu, st, a);
7819 }
7820
7821 /* TODO store 8 bit unscaled signed 9 bit. */
7822 /* TODO store 16 bit unscaled signed 9 bit. */
7823
7824
7825 /* 1 source. */
7826
7827 /* Float absolute value. */
7828 static void
7829 fabss (sim_cpu *cpu)
7830 {
7831 unsigned sn = INSTR (9, 5);
7832 unsigned sd = INSTR (4, 0);
7833 float value = aarch64_get_FP_float (cpu, sn);
7834
7835 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7836 aarch64_set_FP_float (cpu, sd, fabsf (value));
7837 }
7838
7839 /* Double absolute value. */
7840 static void
7841 fabcpu (sim_cpu *cpu)
7842 {
7843 unsigned sn = INSTR (9, 5);
7844 unsigned sd = INSTR (4, 0);
7845 double value = aarch64_get_FP_double (cpu, sn);
7846
7847 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7848 aarch64_set_FP_double (cpu, sd, fabs (value));
7849 }
7850
7851 /* Float negative value. */
7852 static void
7853 fnegs (sim_cpu *cpu)
7854 {
7855 unsigned sn = INSTR (9, 5);
7856 unsigned sd = INSTR (4, 0);
7857
7858 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7859 aarch64_set_FP_float (cpu, sd, - aarch64_get_FP_float (cpu, sn));
7860 }
7861
7862 /* Double negative value. */
7863 static void
7864 fnegd (sim_cpu *cpu)
7865 {
7866 unsigned sn = INSTR (9, 5);
7867 unsigned sd = INSTR (4, 0);
7868
7869 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7870 aarch64_set_FP_double (cpu, sd, - aarch64_get_FP_double (cpu, sn));
7871 }
7872
7873 /* Float square root. */
7874 static void
7875 fsqrts (sim_cpu *cpu)
7876 {
7877 unsigned sn = INSTR (9, 5);
7878 unsigned sd = INSTR (4, 0);
7879
7880 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7881 aarch64_set_FP_float (cpu, sd, sqrtf (aarch64_get_FP_float (cpu, sn)));
7882 }
7883
7884 /* Double square root. */
7885 static void
7886 fsqrtd (sim_cpu *cpu)
7887 {
7888 unsigned sn = INSTR (9, 5);
7889 unsigned sd = INSTR (4, 0);
7890
7891 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7892 aarch64_set_FP_double (cpu, sd,
7893 sqrt (aarch64_get_FP_double (cpu, sn)));
7894 }
7895
7896 /* Convert double to float. */
7897 static void
7898 fcvtds (sim_cpu *cpu)
7899 {
7900 unsigned sn = INSTR (9, 5);
7901 unsigned sd = INSTR (4, 0);
7902
7903 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7904 aarch64_set_FP_float (cpu, sd, (float) aarch64_get_FP_double (cpu, sn));
7905 }
7906
7907 /* Convert float to double. */
7908 static void
7909 fcvtcpu (sim_cpu *cpu)
7910 {
7911 unsigned sn = INSTR (9, 5);
7912 unsigned sd = INSTR (4, 0);
7913
7914 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7915 aarch64_set_FP_double (cpu, sd, (double) aarch64_get_FP_float (cpu, sn));
7916 }
7917
7918 static void
7919 do_FRINT (sim_cpu *cpu)
7920 {
7921 /* instr[31,23] = 0001 1110 0
7922 instr[22] = single(0)/double(1)
7923 instr[21,18] = 1001
7924 instr[17,15] = rounding mode
7925 instr[14,10] = 10000
7926 instr[9,5] = source
7927 instr[4,0] = dest */
7928
7929 float val;
7930 unsigned rs = INSTR (9, 5);
7931 unsigned rd = INSTR (4, 0);
7932 unsigned int rmode = INSTR (17, 15);
7933
7934 NYI_assert (31, 23, 0x03C);
7935 NYI_assert (21, 18, 0x9);
7936 NYI_assert (14, 10, 0x10);
7937
7938 if (rmode == 6 || rmode == 7)
7939 /* FIXME: Add support for rmode == 6 exactness check. */
7940 rmode = uimm (aarch64_get_FPSR (cpu), 23, 22);
7941
7942 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7943 if (INSTR (22, 22))
7944 {
7945 double dval = aarch64_get_FP_double (cpu, rs);
7946
7947 switch (rmode)
7948 {
7949 case 0: /* mode N: nearest or even. */
7950 {
7951 double rval = round (dval);
7952
7953 if (dval - rval == 0.5)
7954 {
7955 if (((rval / 2.0) * 2.0) != rval)
7956 rval += 1.0;
7957 }
7958
7959 aarch64_set_FP_double (cpu, rd, round (dval));
7960 return;
7961 }
7962
7963 case 1: /* mode P: towards +inf. */
7964 if (dval < 0.0)
7965 aarch64_set_FP_double (cpu, rd, trunc (dval));
7966 else
7967 aarch64_set_FP_double (cpu, rd, round (dval));
7968 return;
7969
7970 case 2: /* mode M: towards -inf. */
7971 if (dval < 0.0)
7972 aarch64_set_FP_double (cpu, rd, round (dval));
7973 else
7974 aarch64_set_FP_double (cpu, rd, trunc (dval));
7975 return;
7976
7977 case 3: /* mode Z: towards 0. */
7978 aarch64_set_FP_double (cpu, rd, trunc (dval));
7979 return;
7980
7981 case 4: /* mode A: away from 0. */
7982 aarch64_set_FP_double (cpu, rd, round (dval));
7983 return;
7984
7985 case 6: /* mode X: use FPCR with exactness check. */
7986 case 7: /* mode I: use FPCR mode. */
7987 HALT_NYI;
7988
7989 default:
7990 HALT_UNALLOC;
7991 }
7992 }
7993
7994 val = aarch64_get_FP_float (cpu, rs);
7995
7996 switch (rmode)
7997 {
7998 case 0: /* mode N: nearest or even. */
7999 {
8000 float rval = roundf (val);
8001
8002 if (val - rval == 0.5)
8003 {
8004 if (((rval / 2.0) * 2.0) != rval)
8005 rval += 1.0;
8006 }
8007
8008 aarch64_set_FP_float (cpu, rd, rval);
8009 return;
8010 }
8011
8012 case 1: /* mode P: towards +inf. */
8013 if (val < 0.0)
8014 aarch64_set_FP_float (cpu, rd, truncf (val));
8015 else
8016 aarch64_set_FP_float (cpu, rd, roundf (val));
8017 return;
8018
8019 case 2: /* mode M: towards -inf. */
8020 if (val < 0.0)
8021 aarch64_set_FP_float (cpu, rd, truncf (val));
8022 else
8023 aarch64_set_FP_float (cpu, rd, roundf (val));
8024 return;
8025
8026 case 3: /* mode Z: towards 0. */
8027 aarch64_set_FP_float (cpu, rd, truncf (val));
8028 return;
8029
8030 case 4: /* mode A: away from 0. */
8031 aarch64_set_FP_float (cpu, rd, roundf (val));
8032 return;
8033
8034 case 6: /* mode X: use FPCR with exactness check. */
8035 case 7: /* mode I: use FPCR mode. */
8036 HALT_NYI;
8037
8038 default:
8039 HALT_UNALLOC;
8040 }
8041 }
8042
8043 /* Convert half to float. */
8044 static void
8045 do_FCVT_half_to_single (sim_cpu *cpu)
8046 {
8047 unsigned rn = INSTR (9, 5);
8048 unsigned rd = INSTR (4, 0);
8049
8050 NYI_assert (31, 10, 0x7B890);
8051
8052 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8053 aarch64_set_FP_float (cpu, rd, (float) aarch64_get_FP_half (cpu, rn));
8054 }
8055
8056 /* Convert half to double. */
8057 static void
8058 do_FCVT_half_to_double (sim_cpu *cpu)
8059 {
8060 unsigned rn = INSTR (9, 5);
8061 unsigned rd = INSTR (4, 0);
8062
8063 NYI_assert (31, 10, 0x7B8B0);
8064
8065 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8066 aarch64_set_FP_double (cpu, rd, (double) aarch64_get_FP_half (cpu, rn));
8067 }
8068
8069 static void
8070 do_FCVT_single_to_half (sim_cpu *cpu)
8071 {
8072 unsigned rn = INSTR (9, 5);
8073 unsigned rd = INSTR (4, 0);
8074
8075 NYI_assert (31, 10, 0x788F0);
8076
8077 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8078 aarch64_set_FP_half (cpu, rd, aarch64_get_FP_float (cpu, rn));
8079 }
8080
8081 /* Convert double to half. */
8082 static void
8083 do_FCVT_double_to_half (sim_cpu *cpu)
8084 {
8085 unsigned rn = INSTR (9, 5);
8086 unsigned rd = INSTR (4, 0);
8087
8088 NYI_assert (31, 10, 0x798F0);
8089
8090 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8091 aarch64_set_FP_half (cpu, rd, (float) aarch64_get_FP_double (cpu, rn));
8092 }
8093
8094 static void
8095 dexSimpleFPDataProc1Source (sim_cpu *cpu)
8096 {
8097 /* instr[31] ==> M : 0 ==> OK, 1 ==> UNALLOC
8098 instr[30] = 0
8099 instr[29] ==> S : 0 ==> OK, 1 ==> UNALLOC
8100 instr[28,25] = 1111
8101 instr[24] = 0
8102 instr[23,22] ==> type : 00 ==> source is single,
8103 01 ==> source is double
8104 10 ==> UNALLOC
8105 11 ==> UNALLOC or source is half
8106 instr[21] = 1
8107 instr[20,15] ==> opcode : with type 00 or 01
8108 000000 ==> FMOV, 000001 ==> FABS,
8109 000010 ==> FNEG, 000011 ==> FSQRT,
8110 000100 ==> UNALLOC, 000101 ==> FCVT,(to single/double)
8111 000110 ==> UNALLOC, 000111 ==> FCVT (to half)
8112 001000 ==> FRINTN, 001001 ==> FRINTP,
8113 001010 ==> FRINTM, 001011 ==> FRINTZ,
8114 001100 ==> FRINTA, 001101 ==> UNALLOC
8115 001110 ==> FRINTX, 001111 ==> FRINTI
8116 with type 11
8117 000100 ==> FCVT (half-to-single)
8118 000101 ==> FCVT (half-to-double)
8119 instr[14,10] = 10000. */
8120
8121 uint32_t M_S = (INSTR (31, 31) << 1) | INSTR (29, 29);
8122 uint32_t type = INSTR (23, 22);
8123 uint32_t opcode = INSTR (20, 15);
8124
8125 if (M_S != 0)
8126 HALT_UNALLOC;
8127
8128 if (type == 3)
8129 {
8130 if (opcode == 4)
8131 do_FCVT_half_to_single (cpu);
8132 else if (opcode == 5)
8133 do_FCVT_half_to_double (cpu);
8134 else
8135 HALT_UNALLOC;
8136 return;
8137 }
8138
8139 if (type == 2)
8140 HALT_UNALLOC;
8141
8142 switch (opcode)
8143 {
8144 case 0:
8145 if (type)
8146 ffmovd (cpu);
8147 else
8148 ffmovs (cpu);
8149 return;
8150
8151 case 1:
8152 if (type)
8153 fabcpu (cpu);
8154 else
8155 fabss (cpu);
8156 return;
8157
8158 case 2:
8159 if (type)
8160 fnegd (cpu);
8161 else
8162 fnegs (cpu);
8163 return;
8164
8165 case 3:
8166 if (type)
8167 fsqrtd (cpu);
8168 else
8169 fsqrts (cpu);
8170 return;
8171
8172 case 4:
8173 if (type)
8174 fcvtds (cpu);
8175 else
8176 HALT_UNALLOC;
8177 return;
8178
8179 case 5:
8180 if (type)
8181 HALT_UNALLOC;
8182 fcvtcpu (cpu);
8183 return;
8184
8185 case 8: /* FRINTN etc. */
8186 case 9:
8187 case 10:
8188 case 11:
8189 case 12:
8190 case 14:
8191 case 15:
8192 do_FRINT (cpu);
8193 return;
8194
8195 case 7:
8196 if (INSTR (22, 22))
8197 do_FCVT_double_to_half (cpu);
8198 else
8199 do_FCVT_single_to_half (cpu);
8200 return;
8201
8202 case 13:
8203 HALT_NYI;
8204
8205 default:
8206 HALT_UNALLOC;
8207 }
8208 }
8209
8210 /* 32 bit signed int to float. */
8211 static void
8212 scvtf32 (sim_cpu *cpu)
8213 {
8214 unsigned rn = INSTR (9, 5);
8215 unsigned sd = INSTR (4, 0);
8216
8217 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8218 aarch64_set_FP_float
8219 (cpu, sd, (float) aarch64_get_reg_s32 (cpu, rn, NO_SP));
8220 }
8221
8222 /* signed int to float. */
8223 static void
8224 scvtf (sim_cpu *cpu)
8225 {
8226 unsigned rn = INSTR (9, 5);
8227 unsigned sd = INSTR (4, 0);
8228
8229 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8230 aarch64_set_FP_float
8231 (cpu, sd, (float) aarch64_get_reg_s64 (cpu, rn, NO_SP));
8232 }
8233
8234 /* 32 bit signed int to double. */
8235 static void
8236 scvtd32 (sim_cpu *cpu)
8237 {
8238 unsigned rn = INSTR (9, 5);
8239 unsigned sd = INSTR (4, 0);
8240
8241 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8242 aarch64_set_FP_double
8243 (cpu, sd, (double) aarch64_get_reg_s32 (cpu, rn, NO_SP));
8244 }
8245
8246 /* signed int to double. */
8247 static void
8248 scvtd (sim_cpu *cpu)
8249 {
8250 unsigned rn = INSTR (9, 5);
8251 unsigned sd = INSTR (4, 0);
8252
8253 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8254 aarch64_set_FP_double
8255 (cpu, sd, (double) aarch64_get_reg_s64 (cpu, rn, NO_SP));
8256 }
8257
8258 static const float FLOAT_INT_MAX = (float) INT_MAX;
8259 static const float FLOAT_INT_MIN = (float) INT_MIN;
8260 static const double DOUBLE_INT_MAX = (double) INT_MAX;
8261 static const double DOUBLE_INT_MIN = (double) INT_MIN;
8262 static const float FLOAT_LONG_MAX = (float) LONG_MAX;
8263 static const float FLOAT_LONG_MIN = (float) LONG_MIN;
8264 static const double DOUBLE_LONG_MAX = (double) LONG_MAX;
8265 static const double DOUBLE_LONG_MIN = (double) LONG_MIN;
8266
8267 #define UINT_MIN 0
8268 #define ULONG_MIN 0
8269 static const float FLOAT_UINT_MAX = (float) UINT_MAX;
8270 static const float FLOAT_UINT_MIN = (float) UINT_MIN;
8271 static const double DOUBLE_UINT_MAX = (double) UINT_MAX;
8272 static const double DOUBLE_UINT_MIN = (double) UINT_MIN;
8273 static const float FLOAT_ULONG_MAX = (float) ULONG_MAX;
8274 static const float FLOAT_ULONG_MIN = (float) ULONG_MIN;
8275 static const double DOUBLE_ULONG_MAX = (double) ULONG_MAX;
8276 static const double DOUBLE_ULONG_MIN = (double) ULONG_MIN;
8277
8278 /* Check for FP exception conditions:
8279 NaN raises IO
8280 Infinity raises IO
8281 Out of Range raises IO and IX and saturates value
8282 Denormal raises ID and IX and sets to zero. */
8283 #define RAISE_EXCEPTIONS(F, VALUE, FTYPE, ITYPE) \
8284 do \
8285 { \
8286 switch (fpclassify (F)) \
8287 { \
8288 case FP_INFINITE: \
8289 case FP_NAN: \
8290 aarch64_set_FPSR (cpu, IO); \
8291 if (signbit (F)) \
8292 VALUE = ITYPE##_MAX; \
8293 else \
8294 VALUE = ITYPE##_MIN; \
8295 break; \
8296 \
8297 case FP_NORMAL: \
8298 if (F >= FTYPE##_##ITYPE##_MAX) \
8299 { \
8300 aarch64_set_FPSR_bits (cpu, IO | IX, IO | IX); \
8301 VALUE = ITYPE##_MAX; \
8302 } \
8303 else if (F <= FTYPE##_##ITYPE##_MIN) \
8304 { \
8305 aarch64_set_FPSR_bits (cpu, IO | IX, IO | IX); \
8306 VALUE = ITYPE##_MIN; \
8307 } \
8308 break; \
8309 \
8310 case FP_SUBNORMAL: \
8311 aarch64_set_FPSR_bits (cpu, IO | IX | ID, IX | ID); \
8312 VALUE = 0; \
8313 break; \
8314 \
8315 default: \
8316 case FP_ZERO: \
8317 VALUE = 0; \
8318 break; \
8319 } \
8320 } \
8321 while (0)
8322
8323 /* 32 bit convert float to signed int truncate towards zero. */
8324 static void
8325 fcvtszs32 (sim_cpu *cpu)
8326 {
8327 unsigned sn = INSTR (9, 5);
8328 unsigned rd = INSTR (4, 0);
8329 /* TODO : check that this rounds toward zero. */
8330 float f = aarch64_get_FP_float (cpu, sn);
8331 int32_t value = (int32_t) f;
8332
8333 RAISE_EXCEPTIONS (f, value, FLOAT, INT);
8334
8335 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8336 /* Avoid sign extension to 64 bit. */
8337 aarch64_set_reg_u64 (cpu, rd, NO_SP, (uint32_t) value);
8338 }
8339
8340 /* 64 bit convert float to signed int truncate towards zero. */
8341 static void
8342 fcvtszs (sim_cpu *cpu)
8343 {
8344 unsigned sn = INSTR (9, 5);
8345 unsigned rd = INSTR (4, 0);
8346 float f = aarch64_get_FP_float (cpu, sn);
8347 int64_t value = (int64_t) f;
8348
8349 RAISE_EXCEPTIONS (f, value, FLOAT, LONG);
8350
8351 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8352 aarch64_set_reg_s64 (cpu, rd, NO_SP, value);
8353 }
8354
8355 /* 32 bit convert double to signed int truncate towards zero. */
8356 static void
8357 fcvtszd32 (sim_cpu *cpu)
8358 {
8359 unsigned sn = INSTR (9, 5);
8360 unsigned rd = INSTR (4, 0);
8361 /* TODO : check that this rounds toward zero. */
8362 double d = aarch64_get_FP_double (cpu, sn);
8363 int32_t value = (int32_t) d;
8364
8365 RAISE_EXCEPTIONS (d, value, DOUBLE, INT);
8366
8367 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8368 /* Avoid sign extension to 64 bit. */
8369 aarch64_set_reg_u64 (cpu, rd, NO_SP, (uint32_t) value);
8370 }
8371
8372 /* 64 bit convert double to signed int truncate towards zero. */
8373 static void
8374 fcvtszd (sim_cpu *cpu)
8375 {
8376 unsigned sn = INSTR (9, 5);
8377 unsigned rd = INSTR (4, 0);
8378 /* TODO : check that this rounds toward zero. */
8379 double d = aarch64_get_FP_double (cpu, sn);
8380 int64_t value;
8381
8382 value = (int64_t) d;
8383
8384 RAISE_EXCEPTIONS (d, value, DOUBLE, LONG);
8385
8386 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8387 aarch64_set_reg_s64 (cpu, rd, NO_SP, value);
8388 }
8389
8390 static void
8391 do_fcvtzu (sim_cpu *cpu)
8392 {
8393 /* instr[31] = size: 32-bit (0), 64-bit (1)
8394 instr[30,23] = 00111100
8395 instr[22] = type: single (0)/ double (1)
8396 instr[21] = enable (0)/disable(1) precision
8397 instr[20,16] = 11001
8398 instr[15,10] = precision
8399 instr[9,5] = Rs
8400 instr[4,0] = Rd. */
8401
8402 unsigned rs = INSTR (9, 5);
8403 unsigned rd = INSTR (4, 0);
8404
8405 NYI_assert (30, 23, 0x3C);
8406 NYI_assert (20, 16, 0x19);
8407
8408 if (INSTR (21, 21) != 1)
8409 /* Convert to fixed point. */
8410 HALT_NYI;
8411
8412 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8413 if (INSTR (31, 31))
8414 {
8415 /* Convert to unsigned 64-bit integer. */
8416 if (INSTR (22, 22))
8417 {
8418 double d = aarch64_get_FP_double (cpu, rs);
8419 uint64_t value = (uint64_t) d;
8420
8421 /* Do not raise an exception if we have reached ULONG_MAX. */
8422 if (value != (1ULL << 63))
8423 RAISE_EXCEPTIONS (d, value, DOUBLE, ULONG);
8424
8425 aarch64_set_reg_u64 (cpu, rd, NO_SP, value);
8426 }
8427 else
8428 {
8429 float f = aarch64_get_FP_float (cpu, rs);
8430 uint64_t value = (uint64_t) f;
8431
8432 /* Do not raise an exception if we have reached ULONG_MAX. */
8433 if (value != (1ULL << 63))
8434 RAISE_EXCEPTIONS (f, value, FLOAT, ULONG);
8435
8436 aarch64_set_reg_u64 (cpu, rd, NO_SP, value);
8437 }
8438 }
8439 else
8440 {
8441 uint32_t value;
8442
8443 /* Convert to unsigned 32-bit integer. */
8444 if (INSTR (22, 22))
8445 {
8446 double d = aarch64_get_FP_double (cpu, rs);
8447
8448 value = (uint32_t) d;
8449 /* Do not raise an exception if we have reached UINT_MAX. */
8450 if (value != (1UL << 31))
8451 RAISE_EXCEPTIONS (d, value, DOUBLE, UINT);
8452 }
8453 else
8454 {
8455 float f = aarch64_get_FP_float (cpu, rs);
8456
8457 value = (uint32_t) f;
8458 /* Do not raise an exception if we have reached UINT_MAX. */
8459 if (value != (1UL << 31))
8460 RAISE_EXCEPTIONS (f, value, FLOAT, UINT);
8461 }
8462
8463 aarch64_set_reg_u64 (cpu, rd, NO_SP, value);
8464 }
8465 }
8466
8467 static void
8468 do_UCVTF (sim_cpu *cpu)
8469 {
8470 /* instr[31] = size: 32-bit (0), 64-bit (1)
8471 instr[30,23] = 001 1110 0
8472 instr[22] = type: single (0)/ double (1)
8473 instr[21] = enable (0)/disable(1) precision
8474 instr[20,16] = 0 0011
8475 instr[15,10] = precision
8476 instr[9,5] = Rs
8477 instr[4,0] = Rd. */
8478
8479 unsigned rs = INSTR (9, 5);
8480 unsigned rd = INSTR (4, 0);
8481
8482 NYI_assert (30, 23, 0x3C);
8483 NYI_assert (20, 16, 0x03);
8484
8485 if (INSTR (21, 21) != 1)
8486 HALT_NYI;
8487
8488 /* FIXME: Add exception raising. */
8489 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8490 if (INSTR (31, 31))
8491 {
8492 uint64_t value = aarch64_get_reg_u64 (cpu, rs, NO_SP);
8493
8494 if (INSTR (22, 22))
8495 aarch64_set_FP_double (cpu, rd, (double) value);
8496 else
8497 aarch64_set_FP_float (cpu, rd, (float) value);
8498 }
8499 else
8500 {
8501 uint32_t value = aarch64_get_reg_u32 (cpu, rs, NO_SP);
8502
8503 if (INSTR (22, 22))
8504 aarch64_set_FP_double (cpu, rd, (double) value);
8505 else
8506 aarch64_set_FP_float (cpu, rd, (float) value);
8507 }
8508 }
8509
8510 static void
8511 float_vector_move (sim_cpu *cpu)
8512 {
8513 /* instr[31,17] == 100 1111 0101 0111
8514 instr[16] ==> direction 0=> to GR, 1=> from GR
8515 instr[15,10] => ???
8516 instr[9,5] ==> source
8517 instr[4,0] ==> dest. */
8518
8519 unsigned rn = INSTR (9, 5);
8520 unsigned rd = INSTR (4, 0);
8521
8522 NYI_assert (31, 17, 0x4F57);
8523
8524 if (INSTR (15, 10) != 0)
8525 HALT_UNALLOC;
8526
8527 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8528 if (INSTR (16, 16))
8529 aarch64_set_vec_u64 (cpu, rd, 1, aarch64_get_reg_u64 (cpu, rn, NO_SP));
8530 else
8531 aarch64_set_reg_u64 (cpu, rd, NO_SP, aarch64_get_vec_u64 (cpu, rn, 1));
8532 }
8533
8534 static void
8535 dexSimpleFPIntegerConvert (sim_cpu *cpu)
8536 {
8537 /* instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
8538 instr[30 = 0
8539 instr[29] = S : 0 ==> OK, 1 ==> UNALLOC
8540 instr[28,25] = 1111
8541 instr[24] = 0
8542 instr[23,22] = type : 00 ==> single, 01 ==> double, 1x ==> UNALLOC
8543 instr[21] = 1
8544 instr[20,19] = rmode
8545 instr[18,16] = opcode
8546 instr[15,10] = 10 0000 */
8547
8548 uint32_t rmode_opcode;
8549 uint32_t size_type;
8550 uint32_t type;
8551 uint32_t size;
8552 uint32_t S;
8553
8554 if (INSTR (31, 17) == 0x4F57)
8555 {
8556 float_vector_move (cpu);
8557 return;
8558 }
8559
8560 size = INSTR (31, 31);
8561 S = INSTR (29, 29);
8562 if (S != 0)
8563 HALT_UNALLOC;
8564
8565 type = INSTR (23, 22);
8566 if (type > 1)
8567 HALT_UNALLOC;
8568
8569 rmode_opcode = INSTR (20, 16);
8570 size_type = (size << 1) | type; /* 0==32f, 1==32d, 2==64f, 3==64d. */
8571
8572 switch (rmode_opcode)
8573 {
8574 case 2: /* SCVTF. */
8575 switch (size_type)
8576 {
8577 case 0: scvtf32 (cpu); return;
8578 case 1: scvtd32 (cpu); return;
8579 case 2: scvtf (cpu); return;
8580 case 3: scvtd (cpu); return;
8581 default: HALT_UNALLOC;
8582 }
8583
8584 case 6: /* FMOV GR, Vec. */
8585 switch (size_type)
8586 {
8587 case 0: gfmovs (cpu); return;
8588 case 3: gfmovd (cpu); return;
8589 default: HALT_UNALLOC;
8590 }
8591
8592 case 7: /* FMOV vec, GR. */
8593 switch (size_type)
8594 {
8595 case 0: fgmovs (cpu); return;
8596 case 3: fgmovd (cpu); return;
8597 default: HALT_UNALLOC;
8598 }
8599
8600 case 24: /* FCVTZS. */
8601 switch (size_type)
8602 {
8603 case 0: fcvtszs32 (cpu); return;
8604 case 1: fcvtszd32 (cpu); return;
8605 case 2: fcvtszs (cpu); return;
8606 case 3: fcvtszd (cpu); return;
8607 default: HALT_UNALLOC;
8608 }
8609
8610 case 25: do_fcvtzu (cpu); return;
8611 case 3: do_UCVTF (cpu); return;
8612
8613 case 0: /* FCVTNS. */
8614 case 1: /* FCVTNU. */
8615 case 4: /* FCVTAS. */
8616 case 5: /* FCVTAU. */
8617 case 8: /* FCVPTS. */
8618 case 9: /* FCVTPU. */
8619 case 16: /* FCVTMS. */
8620 case 17: /* FCVTMU. */
8621 default:
8622 HALT_NYI;
8623 }
8624 }
8625
8626 static void
8627 set_flags_for_float_compare (sim_cpu *cpu, float fvalue1, float fvalue2)
8628 {
8629 uint32_t flags;
8630
8631 /* FIXME: Add exception raising. */
8632 if (isnan (fvalue1) || isnan (fvalue2))
8633 flags = C|V;
8634 else if (isinf (fvalue1) && isinf (fvalue2))
8635 {
8636 /* Subtracting two infinities may give a NaN. We only need to compare
8637 the signs, which we can get from isinf. */
8638 int result = isinf (fvalue1) - isinf (fvalue2);
8639
8640 if (result == 0)
8641 flags = Z|C;
8642 else if (result < 0)
8643 flags = N;
8644 else /* (result > 0). */
8645 flags = C;
8646 }
8647 else
8648 {
8649 float result = fvalue1 - fvalue2;
8650
8651 if (result == 0.0)
8652 flags = Z|C;
8653 else if (result < 0)
8654 flags = N;
8655 else /* (result > 0). */
8656 flags = C;
8657 }
8658
8659 aarch64_set_CPSR (cpu, flags);
8660 }
8661
8662 static void
8663 fcmps (sim_cpu *cpu)
8664 {
8665 unsigned sm = INSTR (20, 16);
8666 unsigned sn = INSTR ( 9, 5);
8667
8668 float fvalue1 = aarch64_get_FP_float (cpu, sn);
8669 float fvalue2 = aarch64_get_FP_float (cpu, sm);
8670
8671 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8672 set_flags_for_float_compare (cpu, fvalue1, fvalue2);
8673 }
8674
8675 /* Float compare to zero -- Invalid Operation exception
8676 only on signaling NaNs. */
8677 static void
8678 fcmpzs (sim_cpu *cpu)
8679 {
8680 unsigned sn = INSTR ( 9, 5);
8681 float fvalue1 = aarch64_get_FP_float (cpu, sn);
8682
8683 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8684 set_flags_for_float_compare (cpu, fvalue1, 0.0f);
8685 }
8686
8687 /* Float compare -- Invalid Operation exception on all NaNs. */
8688 static void
8689 fcmpes (sim_cpu *cpu)
8690 {
8691 unsigned sm = INSTR (20, 16);
8692 unsigned sn = INSTR ( 9, 5);
8693
8694 float fvalue1 = aarch64_get_FP_float (cpu, sn);
8695 float fvalue2 = aarch64_get_FP_float (cpu, sm);
8696
8697 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8698 set_flags_for_float_compare (cpu, fvalue1, fvalue2);
8699 }
8700
8701 /* Float compare to zero -- Invalid Operation exception on all NaNs. */
8702 static void
8703 fcmpzes (sim_cpu *cpu)
8704 {
8705 unsigned sn = INSTR ( 9, 5);
8706 float fvalue1 = aarch64_get_FP_float (cpu, sn);
8707
8708 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8709 set_flags_for_float_compare (cpu, fvalue1, 0.0f);
8710 }
8711
8712 static void
8713 set_flags_for_double_compare (sim_cpu *cpu, double dval1, double dval2)
8714 {
8715 uint32_t flags;
8716
8717 /* FIXME: Add exception raising. */
8718 if (isnan (dval1) || isnan (dval2))
8719 flags = C|V;
8720 else if (isinf (dval1) && isinf (dval2))
8721 {
8722 /* Subtracting two infinities may give a NaN. We only need to compare
8723 the signs, which we can get from isinf. */
8724 int result = isinf (dval1) - isinf (dval2);
8725
8726 if (result == 0)
8727 flags = Z|C;
8728 else if (result < 0)
8729 flags = N;
8730 else /* (result > 0). */
8731 flags = C;
8732 }
8733 else
8734 {
8735 double result = dval1 - dval2;
8736
8737 if (result == 0.0)
8738 flags = Z|C;
8739 else if (result < 0)
8740 flags = N;
8741 else /* (result > 0). */
8742 flags = C;
8743 }
8744
8745 aarch64_set_CPSR (cpu, flags);
8746 }
8747
8748 /* Double compare -- Invalid Operation exception only on signaling NaNs. */
8749 static void
8750 fcmpd (sim_cpu *cpu)
8751 {
8752 unsigned sm = INSTR (20, 16);
8753 unsigned sn = INSTR ( 9, 5);
8754
8755 double dvalue1 = aarch64_get_FP_double (cpu, sn);
8756 double dvalue2 = aarch64_get_FP_double (cpu, sm);
8757
8758 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8759 set_flags_for_double_compare (cpu, dvalue1, dvalue2);
8760 }
8761
8762 /* Double compare to zero -- Invalid Operation exception
8763 only on signaling NaNs. */
8764 static void
8765 fcmpzd (sim_cpu *cpu)
8766 {
8767 unsigned sn = INSTR ( 9, 5);
8768 double dvalue1 = aarch64_get_FP_double (cpu, sn);
8769
8770 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8771 set_flags_for_double_compare (cpu, dvalue1, 0.0);
8772 }
8773
8774 /* Double compare -- Invalid Operation exception on all NaNs. */
8775 static void
8776 fcmped (sim_cpu *cpu)
8777 {
8778 unsigned sm = INSTR (20, 16);
8779 unsigned sn = INSTR ( 9, 5);
8780
8781 double dvalue1 = aarch64_get_FP_double (cpu, sn);
8782 double dvalue2 = aarch64_get_FP_double (cpu, sm);
8783
8784 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8785 set_flags_for_double_compare (cpu, dvalue1, dvalue2);
8786 }
8787
8788 /* Double compare to zero -- Invalid Operation exception on all NaNs. */
8789 static void
8790 fcmpzed (sim_cpu *cpu)
8791 {
8792 unsigned sn = INSTR ( 9, 5);
8793 double dvalue1 = aarch64_get_FP_double (cpu, sn);
8794
8795 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8796 set_flags_for_double_compare (cpu, dvalue1, 0.0);
8797 }
8798
8799 static void
8800 dexSimpleFPCompare (sim_cpu *cpu)
8801 {
8802 /* assert instr[28,25] == 1111
8803 instr[30:24:21:13,10] = 0011000
8804 instr[31] = M : 0 ==> OK, 1 ==> UNALLOC
8805 instr[29] ==> S : 0 ==> OK, 1 ==> UNALLOC
8806 instr[23,22] ==> type : 0 ==> single, 01 ==> double, 1x ==> UNALLOC
8807 instr[15,14] ==> op : 00 ==> OK, ow ==> UNALLOC
8808 instr[4,0] ==> opcode2 : 00000 ==> FCMP, 10000 ==> FCMPE,
8809 01000 ==> FCMPZ, 11000 ==> FCMPEZ,
8810 ow ==> UNALLOC */
8811 uint32_t dispatch;
8812 uint32_t M_S = (INSTR (31, 31) << 1) | INSTR (29, 29);
8813 uint32_t type = INSTR (23, 22);
8814 uint32_t op = INSTR (15, 14);
8815 uint32_t op2_2_0 = INSTR (2, 0);
8816
8817 if (op2_2_0 != 0)
8818 HALT_UNALLOC;
8819
8820 if (M_S != 0)
8821 HALT_UNALLOC;
8822
8823 if (type > 1)
8824 HALT_UNALLOC;
8825
8826 if (op != 0)
8827 HALT_UNALLOC;
8828
8829 /* dispatch on type and top 2 bits of opcode. */
8830 dispatch = (type << 2) | INSTR (4, 3);
8831
8832 switch (dispatch)
8833 {
8834 case 0: fcmps (cpu); return;
8835 case 1: fcmpzs (cpu); return;
8836 case 2: fcmpes (cpu); return;
8837 case 3: fcmpzes (cpu); return;
8838 case 4: fcmpd (cpu); return;
8839 case 5: fcmpzd (cpu); return;
8840 case 6: fcmped (cpu); return;
8841 case 7: fcmpzed (cpu); return;
8842 }
8843 }
8844
8845 static void
8846 do_scalar_FADDP (sim_cpu *cpu)
8847 {
8848 /* instr [31,23] = 0111 1110 0
8849 instr [22] = single(0)/double(1)
8850 instr [21,10] = 11 0000 1101 10
8851 instr [9,5] = Fn
8852 instr [4,0] = Fd. */
8853
8854 unsigned Fn = INSTR (9, 5);
8855 unsigned Fd = INSTR (4, 0);
8856
8857 NYI_assert (31, 23, 0x0FC);
8858 NYI_assert (21, 10, 0xC36);
8859
8860 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8861 if (INSTR (22, 22))
8862 {
8863 double val1 = aarch64_get_vec_double (cpu, Fn, 0);
8864 double val2 = aarch64_get_vec_double (cpu, Fn, 1);
8865
8866 aarch64_set_FP_double (cpu, Fd, val1 + val2);
8867 }
8868 else
8869 {
8870 float val1 = aarch64_get_vec_float (cpu, Fn, 0);
8871 float val2 = aarch64_get_vec_float (cpu, Fn, 1);
8872
8873 aarch64_set_FP_float (cpu, Fd, val1 + val2);
8874 }
8875 }
8876
8877 /* Floating point absolute difference. */
8878
8879 static void
8880 do_scalar_FABD (sim_cpu *cpu)
8881 {
8882 /* instr [31,23] = 0111 1110 1
8883 instr [22] = float(0)/double(1)
8884 instr [21] = 1
8885 instr [20,16] = Rm
8886 instr [15,10] = 1101 01
8887 instr [9, 5] = Rn
8888 instr [4, 0] = Rd. */
8889
8890 unsigned rm = INSTR (20, 16);
8891 unsigned rn = INSTR (9, 5);
8892 unsigned rd = INSTR (4, 0);
8893
8894 NYI_assert (31, 23, 0x0FD);
8895 NYI_assert (21, 21, 1);
8896 NYI_assert (15, 10, 0x35);
8897
8898 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8899 if (INSTR (22, 22))
8900 aarch64_set_FP_double (cpu, rd,
8901 fabs (aarch64_get_FP_double (cpu, rn)
8902 - aarch64_get_FP_double (cpu, rm)));
8903 else
8904 aarch64_set_FP_float (cpu, rd,
8905 fabsf (aarch64_get_FP_float (cpu, rn)
8906 - aarch64_get_FP_float (cpu, rm)));
8907 }
8908
8909 static void
8910 do_scalar_CMGT (sim_cpu *cpu)
8911 {
8912 /* instr [31,21] = 0101 1110 111
8913 instr [20,16] = Rm
8914 instr [15,10] = 00 1101
8915 instr [9, 5] = Rn
8916 instr [4, 0] = Rd. */
8917
8918 unsigned rm = INSTR (20, 16);
8919 unsigned rn = INSTR (9, 5);
8920 unsigned rd = INSTR (4, 0);
8921
8922 NYI_assert (31, 21, 0x2F7);
8923 NYI_assert (15, 10, 0x0D);
8924
8925 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8926 aarch64_set_vec_u64 (cpu, rd, 0,
8927 aarch64_get_vec_u64 (cpu, rn, 0) >
8928 aarch64_get_vec_u64 (cpu, rm, 0) ? -1L : 0L);
8929 }
8930
8931 static void
8932 do_scalar_USHR (sim_cpu *cpu)
8933 {
8934 /* instr [31,23] = 0111 1111 0
8935 instr [22,16] = shift amount
8936 instr [15,10] = 0000 01
8937 instr [9, 5] = Rn
8938 instr [4, 0] = Rd. */
8939
8940 unsigned amount = 128 - INSTR (22, 16);
8941 unsigned rn = INSTR (9, 5);
8942 unsigned rd = INSTR (4, 0);
8943
8944 NYI_assert (31, 23, 0x0FE);
8945 NYI_assert (15, 10, 0x01);
8946
8947 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8948 aarch64_set_vec_u64 (cpu, rd, 0,
8949 aarch64_get_vec_u64 (cpu, rn, 0) >> amount);
8950 }
8951
8952 static void
8953 do_scalar_SSHL (sim_cpu *cpu)
8954 {
8955 /* instr [31,21] = 0101 1110 111
8956 instr [20,16] = Rm
8957 instr [15,10] = 0100 01
8958 instr [9, 5] = Rn
8959 instr [4, 0] = Rd. */
8960
8961 unsigned rm = INSTR (20, 16);
8962 unsigned rn = INSTR (9, 5);
8963 unsigned rd = INSTR (4, 0);
8964 signed int shift = aarch64_get_vec_s8 (cpu, rm, 0);
8965
8966 NYI_assert (31, 21, 0x2F7);
8967 NYI_assert (15, 10, 0x11);
8968
8969 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8970 if (shift >= 0)
8971 aarch64_set_vec_s64 (cpu, rd, 0,
8972 aarch64_get_vec_s64 (cpu, rn, 0) << shift);
8973 else
8974 aarch64_set_vec_s64 (cpu, rd, 0,
8975 aarch64_get_vec_s64 (cpu, rn, 0) >> - shift);
8976 }
8977
8978 /* Floating point scalar compare greater than or equal to 0. */
8979 static void
8980 do_scalar_FCMGE_zero (sim_cpu *cpu)
8981 {
8982 /* instr [31,23] = 0111 1110 1
8983 instr [22,22] = size
8984 instr [21,16] = 1000 00
8985 instr [15,10] = 1100 10
8986 instr [9, 5] = Rn
8987 instr [4, 0] = Rd. */
8988
8989 unsigned size = INSTR (22, 22);
8990 unsigned rn = INSTR (9, 5);
8991 unsigned rd = INSTR (4, 0);
8992
8993 NYI_assert (31, 23, 0x0FD);
8994 NYI_assert (21, 16, 0x20);
8995 NYI_assert (15, 10, 0x32);
8996
8997 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8998 if (size)
8999 aarch64_set_vec_u64 (cpu, rd, 0,
9000 aarch64_get_vec_double (cpu, rn, 0) >= 0.0 ? -1 : 0);
9001 else
9002 aarch64_set_vec_u32 (cpu, rd, 0,
9003 aarch64_get_vec_float (cpu, rn, 0) >= 0.0 ? -1 : 0);
9004 }
9005
9006 /* Floating point scalar compare less than or equal to 0. */
9007 static void
9008 do_scalar_FCMLE_zero (sim_cpu *cpu)
9009 {
9010 /* instr [31,23] = 0111 1110 1
9011 instr [22,22] = size
9012 instr [21,16] = 1000 00
9013 instr [15,10] = 1101 10
9014 instr [9, 5] = Rn
9015 instr [4, 0] = Rd. */
9016
9017 unsigned size = INSTR (22, 22);
9018 unsigned rn = INSTR (9, 5);
9019 unsigned rd = INSTR (4, 0);
9020
9021 NYI_assert (31, 23, 0x0FD);
9022 NYI_assert (21, 16, 0x20);
9023 NYI_assert (15, 10, 0x36);
9024
9025 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9026 if (size)
9027 aarch64_set_vec_u64 (cpu, rd, 0,
9028 aarch64_get_vec_double (cpu, rn, 0) <= 0.0 ? -1 : 0);
9029 else
9030 aarch64_set_vec_u32 (cpu, rd, 0,
9031 aarch64_get_vec_float (cpu, rn, 0) <= 0.0 ? -1 : 0);
9032 }
9033
9034 /* Floating point scalar compare greater than 0. */
9035 static void
9036 do_scalar_FCMGT_zero (sim_cpu *cpu)
9037 {
9038 /* instr [31,23] = 0101 1110 1
9039 instr [22,22] = size
9040 instr [21,16] = 1000 00
9041 instr [15,10] = 1100 10
9042 instr [9, 5] = Rn
9043 instr [4, 0] = Rd. */
9044
9045 unsigned size = INSTR (22, 22);
9046 unsigned rn = INSTR (9, 5);
9047 unsigned rd = INSTR (4, 0);
9048
9049 NYI_assert (31, 23, 0x0BD);
9050 NYI_assert (21, 16, 0x20);
9051 NYI_assert (15, 10, 0x32);
9052
9053 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9054 if (size)
9055 aarch64_set_vec_u64 (cpu, rd, 0,
9056 aarch64_get_vec_double (cpu, rn, 0) > 0.0 ? -1 : 0);
9057 else
9058 aarch64_set_vec_u32 (cpu, rd, 0,
9059 aarch64_get_vec_float (cpu, rn, 0) > 0.0 ? -1 : 0);
9060 }
9061
9062 /* Floating point scalar compare equal to 0. */
9063 static void
9064 do_scalar_FCMEQ_zero (sim_cpu *cpu)
9065 {
9066 /* instr [31,23] = 0101 1110 1
9067 instr [22,22] = size
9068 instr [21,16] = 1000 00
9069 instr [15,10] = 1101 10
9070 instr [9, 5] = Rn
9071 instr [4, 0] = Rd. */
9072
9073 unsigned size = INSTR (22, 22);
9074 unsigned rn = INSTR (9, 5);
9075 unsigned rd = INSTR (4, 0);
9076
9077 NYI_assert (31, 23, 0x0BD);
9078 NYI_assert (21, 16, 0x20);
9079 NYI_assert (15, 10, 0x36);
9080
9081 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9082 if (size)
9083 aarch64_set_vec_u64 (cpu, rd, 0,
9084 aarch64_get_vec_double (cpu, rn, 0) == 0.0 ? -1 : 0);
9085 else
9086 aarch64_set_vec_u32 (cpu, rd, 0,
9087 aarch64_get_vec_float (cpu, rn, 0) == 0.0 ? -1 : 0);
9088 }
9089
9090 /* Floating point scalar compare less than 0. */
9091 static void
9092 do_scalar_FCMLT_zero (sim_cpu *cpu)
9093 {
9094 /* instr [31,23] = 0101 1110 1
9095 instr [22,22] = size
9096 instr [21,16] = 1000 00
9097 instr [15,10] = 1110 10
9098 instr [9, 5] = Rn
9099 instr [4, 0] = Rd. */
9100
9101 unsigned size = INSTR (22, 22);
9102 unsigned rn = INSTR (9, 5);
9103 unsigned rd = INSTR (4, 0);
9104
9105 NYI_assert (31, 23, 0x0BD);
9106 NYI_assert (21, 16, 0x20);
9107 NYI_assert (15, 10, 0x3A);
9108
9109 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9110 if (size)
9111 aarch64_set_vec_u64 (cpu, rd, 0,
9112 aarch64_get_vec_double (cpu, rn, 0) < 0.0 ? -1 : 0);
9113 else
9114 aarch64_set_vec_u32 (cpu, rd, 0,
9115 aarch64_get_vec_float (cpu, rn, 0) < 0.0 ? -1 : 0);
9116 }
9117
9118 static void
9119 do_scalar_shift (sim_cpu *cpu)
9120 {
9121 /* instr [31,23] = 0101 1111 0
9122 instr [22,16] = shift amount
9123 instr [15,10] = 0101 01 [SHL]
9124 instr [15,10] = 0000 01 [SSHR]
9125 instr [9, 5] = Rn
9126 instr [4, 0] = Rd. */
9127
9128 unsigned rn = INSTR (9, 5);
9129 unsigned rd = INSTR (4, 0);
9130 unsigned amount;
9131
9132 NYI_assert (31, 23, 0x0BE);
9133
9134 if (INSTR (22, 22) == 0)
9135 HALT_UNALLOC;
9136
9137 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9138 switch (INSTR (15, 10))
9139 {
9140 case 0x01: /* SSHR */
9141 amount = 128 - INSTR (22, 16);
9142 aarch64_set_vec_s64 (cpu, rd, 0,
9143 aarch64_get_vec_s64 (cpu, rn, 0) >> amount);
9144 return;
9145 case 0x15: /* SHL */
9146 amount = INSTR (22, 16) - 64;
9147 aarch64_set_vec_u64 (cpu, rd, 0,
9148 aarch64_get_vec_u64 (cpu, rn, 0) << amount);
9149 return;
9150 default:
9151 HALT_NYI;
9152 }
9153 }
9154
9155 /* FCMEQ FCMGT FCMGE. */
9156 static void
9157 do_scalar_FCM (sim_cpu *cpu)
9158 {
9159 /* instr [31,30] = 01
9160 instr [29] = U
9161 instr [28,24] = 1 1110
9162 instr [23] = E
9163 instr [22] = size
9164 instr [21] = 1
9165 instr [20,16] = Rm
9166 instr [15,12] = 1110
9167 instr [11] = AC
9168 instr [10] = 1
9169 instr [9, 5] = Rn
9170 instr [4, 0] = Rd. */
9171
9172 unsigned rm = INSTR (20, 16);
9173 unsigned rn = INSTR (9, 5);
9174 unsigned rd = INSTR (4, 0);
9175 unsigned EUac = (INSTR (23, 23) << 2) | (INSTR (29, 29) << 1) | INSTR (11, 11);
9176 unsigned result;
9177 float val1;
9178 float val2;
9179
9180 NYI_assert (31, 30, 1);
9181 NYI_assert (28, 24, 0x1E);
9182 NYI_assert (21, 21, 1);
9183 NYI_assert (15, 12, 0xE);
9184 NYI_assert (10, 10, 1);
9185
9186 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9187 if (INSTR (22, 22))
9188 {
9189 double dval1 = aarch64_get_FP_double (cpu, rn);
9190 double dval2 = aarch64_get_FP_double (cpu, rm);
9191
9192 switch (EUac)
9193 {
9194 case 0: /* 000 */
9195 result = dval1 == dval2;
9196 break;
9197
9198 case 3: /* 011 */
9199 dval1 = fabs (dval1);
9200 dval2 = fabs (dval2);
9201 ATTRIBUTE_FALLTHROUGH;
9202 case 2: /* 010 */
9203 result = dval1 >= dval2;
9204 break;
9205
9206 case 7: /* 111 */
9207 dval1 = fabs (dval1);
9208 dval2 = fabs (dval2);
9209 ATTRIBUTE_FALLTHROUGH;
9210 case 6: /* 110 */
9211 result = dval1 > dval2;
9212 break;
9213
9214 default:
9215 HALT_UNALLOC;
9216 }
9217
9218 aarch64_set_vec_u32 (cpu, rd, 0, result ? -1 : 0);
9219 return;
9220 }
9221
9222 val1 = aarch64_get_FP_float (cpu, rn);
9223 val2 = aarch64_get_FP_float (cpu, rm);
9224
9225 switch (EUac)
9226 {
9227 case 0: /* 000 */
9228 result = val1 == val2;
9229 break;
9230
9231 case 3: /* 011 */
9232 val1 = fabsf (val1);
9233 val2 = fabsf (val2);
9234 ATTRIBUTE_FALLTHROUGH;
9235 case 2: /* 010 */
9236 result = val1 >= val2;
9237 break;
9238
9239 case 7: /* 111 */
9240 val1 = fabsf (val1);
9241 val2 = fabsf (val2);
9242 ATTRIBUTE_FALLTHROUGH;
9243 case 6: /* 110 */
9244 result = val1 > val2;
9245 break;
9246
9247 default:
9248 HALT_UNALLOC;
9249 }
9250
9251 aarch64_set_vec_u32 (cpu, rd, 0, result ? -1 : 0);
9252 }
9253
9254 /* An alias of DUP. */
9255 static void
9256 do_scalar_MOV (sim_cpu *cpu)
9257 {
9258 /* instr [31,21] = 0101 1110 000
9259 instr [20,16] = imm5
9260 instr [15,10] = 0000 01
9261 instr [9, 5] = Rn
9262 instr [4, 0] = Rd. */
9263
9264 unsigned rn = INSTR (9, 5);
9265 unsigned rd = INSTR (4, 0);
9266 unsigned index;
9267
9268 NYI_assert (31, 21, 0x2F0);
9269 NYI_assert (15, 10, 0x01);
9270
9271 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9272 if (INSTR (16, 16))
9273 {
9274 /* 8-bit. */
9275 index = INSTR (20, 17);
9276 aarch64_set_vec_u8
9277 (cpu, rd, 0, aarch64_get_vec_u8 (cpu, rn, index));
9278 }
9279 else if (INSTR (17, 17))
9280 {
9281 /* 16-bit. */
9282 index = INSTR (20, 18);
9283 aarch64_set_vec_u16
9284 (cpu, rd, 0, aarch64_get_vec_u16 (cpu, rn, index));
9285 }
9286 else if (INSTR (18, 18))
9287 {
9288 /* 32-bit. */
9289 index = INSTR (20, 19);
9290 aarch64_set_vec_u32
9291 (cpu, rd, 0, aarch64_get_vec_u32 (cpu, rn, index));
9292 }
9293 else if (INSTR (19, 19))
9294 {
9295 /* 64-bit. */
9296 index = INSTR (20, 20);
9297 aarch64_set_vec_u64
9298 (cpu, rd, 0, aarch64_get_vec_u64 (cpu, rn, index));
9299 }
9300 else
9301 HALT_UNALLOC;
9302 }
9303
9304 static void
9305 do_scalar_NEG (sim_cpu *cpu)
9306 {
9307 /* instr [31,10] = 0111 1110 1110 0000 1011 10
9308 instr [9, 5] = Rn
9309 instr [4, 0] = Rd. */
9310
9311 unsigned rn = INSTR (9, 5);
9312 unsigned rd = INSTR (4, 0);
9313
9314 NYI_assert (31, 10, 0x1FB82E);
9315
9316 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9317 aarch64_set_vec_u64 (cpu, rd, 0, - aarch64_get_vec_u64 (cpu, rn, 0));
9318 }
9319
9320 static void
9321 do_scalar_USHL (sim_cpu *cpu)
9322 {
9323 /* instr [31,21] = 0111 1110 111
9324 instr [20,16] = Rm
9325 instr [15,10] = 0100 01
9326 instr [9, 5] = Rn
9327 instr [4, 0] = Rd. */
9328
9329 unsigned rm = INSTR (20, 16);
9330 unsigned rn = INSTR (9, 5);
9331 unsigned rd = INSTR (4, 0);
9332 signed int shift = aarch64_get_vec_s8 (cpu, rm, 0);
9333
9334 NYI_assert (31, 21, 0x3F7);
9335 NYI_assert (15, 10, 0x11);
9336
9337 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9338 if (shift >= 0)
9339 aarch64_set_vec_u64 (cpu, rd, 0, aarch64_get_vec_u64 (cpu, rn, 0) << shift);
9340 else
9341 aarch64_set_vec_u64 (cpu, rd, 0, aarch64_get_vec_u64 (cpu, rn, 0) >> - shift);
9342 }
9343
9344 static void
9345 do_double_add (sim_cpu *cpu)
9346 {
9347 /* instr [31,21] = 0101 1110 111
9348 instr [20,16] = Fn
9349 instr [15,10] = 1000 01
9350 instr [9,5] = Fm
9351 instr [4,0] = Fd. */
9352 unsigned Fd;
9353 unsigned Fm;
9354 unsigned Fn;
9355 double val1;
9356 double val2;
9357
9358 NYI_assert (31, 21, 0x2F7);
9359 NYI_assert (15, 10, 0x21);
9360
9361 Fd = INSTR (4, 0);
9362 Fm = INSTR (9, 5);
9363 Fn = INSTR (20, 16);
9364
9365 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9366 val1 = aarch64_get_FP_double (cpu, Fm);
9367 val2 = aarch64_get_FP_double (cpu, Fn);
9368
9369 aarch64_set_FP_double (cpu, Fd, val1 + val2);
9370 }
9371
9372 static void
9373 do_scalar_UCVTF (sim_cpu *cpu)
9374 {
9375 /* instr [31,23] = 0111 1110 0
9376 instr [22] = single(0)/double(1)
9377 instr [21,10] = 10 0001 1101 10
9378 instr [9,5] = rn
9379 instr [4,0] = rd. */
9380
9381 unsigned rn = INSTR (9, 5);
9382 unsigned rd = INSTR (4, 0);
9383
9384 NYI_assert (31, 23, 0x0FC);
9385 NYI_assert (21, 10, 0x876);
9386
9387 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9388 if (INSTR (22, 22))
9389 {
9390 uint64_t val = aarch64_get_vec_u64 (cpu, rn, 0);
9391
9392 aarch64_set_vec_double (cpu, rd, 0, (double) val);
9393 }
9394 else
9395 {
9396 uint32_t val = aarch64_get_vec_u32 (cpu, rn, 0);
9397
9398 aarch64_set_vec_float (cpu, rd, 0, (float) val);
9399 }
9400 }
9401
9402 static void
9403 do_scalar_vec (sim_cpu *cpu)
9404 {
9405 /* instr [30] = 1. */
9406 /* instr [28,25] = 1111. */
9407 switch (INSTR (31, 23))
9408 {
9409 case 0xBC:
9410 switch (INSTR (15, 10))
9411 {
9412 case 0x01: do_scalar_MOV (cpu); return;
9413 case 0x39: do_scalar_FCM (cpu); return;
9414 case 0x3B: do_scalar_FCM (cpu); return;
9415 }
9416 break;
9417
9418 case 0xBE: do_scalar_shift (cpu); return;
9419
9420 case 0xFC:
9421 switch (INSTR (15, 10))
9422 {
9423 case 0x36:
9424 switch (INSTR (21, 16))
9425 {
9426 case 0x30: do_scalar_FADDP (cpu); return;
9427 case 0x21: do_scalar_UCVTF (cpu); return;
9428 }
9429 HALT_NYI;
9430 case 0x39: do_scalar_FCM (cpu); return;
9431 case 0x3B: do_scalar_FCM (cpu); return;
9432 }
9433 break;
9434
9435 case 0xFD:
9436 switch (INSTR (15, 10))
9437 {
9438 case 0x0D: do_scalar_CMGT (cpu); return;
9439 case 0x11: do_scalar_USHL (cpu); return;
9440 case 0x2E: do_scalar_NEG (cpu); return;
9441 case 0x32: do_scalar_FCMGE_zero (cpu); return;
9442 case 0x35: do_scalar_FABD (cpu); return;
9443 case 0x36: do_scalar_FCMLE_zero (cpu); return;
9444 case 0x39: do_scalar_FCM (cpu); return;
9445 case 0x3B: do_scalar_FCM (cpu); return;
9446 default:
9447 HALT_NYI;
9448 }
9449
9450 case 0xFE: do_scalar_USHR (cpu); return;
9451
9452 case 0xBD:
9453 switch (INSTR (15, 10))
9454 {
9455 case 0x21: do_double_add (cpu); return;
9456 case 0x11: do_scalar_SSHL (cpu); return;
9457 case 0x32: do_scalar_FCMGT_zero (cpu); return;
9458 case 0x36: do_scalar_FCMEQ_zero (cpu); return;
9459 case 0x3A: do_scalar_FCMLT_zero (cpu); return;
9460 default:
9461 HALT_NYI;
9462 }
9463
9464 default:
9465 HALT_NYI;
9466 }
9467 }
9468
9469 static void
9470 dexAdvSIMD1 (sim_cpu *cpu)
9471 {
9472 /* instr [28,25] = 1 111. */
9473
9474 /* We are currently only interested in the basic
9475 scalar fp routines which all have bit 30 = 0. */
9476 if (INSTR (30, 30))
9477 do_scalar_vec (cpu);
9478
9479 /* instr[24] is set for FP data processing 3-source and clear for
9480 all other basic scalar fp instruction groups. */
9481 else if (INSTR (24, 24))
9482 dexSimpleFPDataProc3Source (cpu);
9483
9484 /* instr[21] is clear for floating <-> fixed conversions and set for
9485 all other basic scalar fp instruction groups. */
9486 else if (!INSTR (21, 21))
9487 dexSimpleFPFixedConvert (cpu);
9488
9489 /* instr[11,10] : 01 ==> cond compare, 10 ==> Data Proc 2 Source
9490 11 ==> cond select, 00 ==> other. */
9491 else
9492 switch (INSTR (11, 10))
9493 {
9494 case 1: dexSimpleFPCondCompare (cpu); return;
9495 case 2: dexSimpleFPDataProc2Source (cpu); return;
9496 case 3: dexSimpleFPCondSelect (cpu); return;
9497
9498 default:
9499 /* Now an ordered cascade of tests.
9500 FP immediate has instr [12] == 1.
9501 FP compare has instr [13] == 1.
9502 FP Data Proc 1 Source has instr [14] == 1.
9503 FP floating <--> integer conversions has instr [15] == 0. */
9504 if (INSTR (12, 12))
9505 dexSimpleFPImmediate (cpu);
9506
9507 else if (INSTR (13, 13))
9508 dexSimpleFPCompare (cpu);
9509
9510 else if (INSTR (14, 14))
9511 dexSimpleFPDataProc1Source (cpu);
9512
9513 else if (!INSTR (15, 15))
9514 dexSimpleFPIntegerConvert (cpu);
9515
9516 else
9517 /* If we get here then instr[15] == 1 which means UNALLOC. */
9518 HALT_UNALLOC;
9519 }
9520 }
9521
9522 /* PC relative addressing. */
9523
9524 static void
9525 pcadr (sim_cpu *cpu)
9526 {
9527 /* instr[31] = op : 0 ==> ADR, 1 ==> ADRP
9528 instr[30,29] = immlo
9529 instr[23,5] = immhi. */
9530 uint64_t address;
9531 unsigned rd = INSTR (4, 0);
9532 uint32_t isPage = INSTR (31, 31);
9533 union { int64_t u64; uint64_t s64; } imm;
9534 uint64_t offset;
9535
9536 imm.s64 = simm64 (aarch64_get_instr (cpu), 23, 5);
9537 offset = imm.u64;
9538 offset = (offset << 2) | INSTR (30, 29);
9539
9540 address = aarch64_get_PC (cpu);
9541
9542 if (isPage)
9543 {
9544 offset <<= 12;
9545 address &= ~0xfff;
9546 }
9547
9548 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9549 aarch64_set_reg_u64 (cpu, rd, NO_SP, address + offset);
9550 }
9551
9552 /* Specific decode and execute for group Data Processing Immediate. */
9553
9554 static void
9555 dexPCRelAddressing (sim_cpu *cpu)
9556 {
9557 /* assert instr[28,24] = 10000. */
9558 pcadr (cpu);
9559 }
9560
9561 /* Immediate logical.
9562 The bimm32/64 argument is constructed by replicating a 2, 4, 8,
9563 16, 32 or 64 bit sequence pulled out at decode and possibly
9564 inverting it..
9565
9566 N.B. the output register (dest) can normally be Xn or SP
9567 the exception occurs for flag setting instructions which may
9568 only use Xn for the output (dest). The input register can
9569 never be SP. */
9570
9571 /* 32 bit and immediate. */
9572 static void
9573 and32 (sim_cpu *cpu, uint32_t bimm)
9574 {
9575 unsigned rn = INSTR (9, 5);
9576 unsigned rd = INSTR (4, 0);
9577
9578 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9579 aarch64_set_reg_u64 (cpu, rd, SP_OK,
9580 aarch64_get_reg_u32 (cpu, rn, NO_SP) & bimm);
9581 }
9582
9583 /* 64 bit and immediate. */
9584 static void
9585 and64 (sim_cpu *cpu, uint64_t bimm)
9586 {
9587 unsigned rn = INSTR (9, 5);
9588 unsigned rd = INSTR (4, 0);
9589
9590 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9591 aarch64_set_reg_u64 (cpu, rd, SP_OK,
9592 aarch64_get_reg_u64 (cpu, rn, NO_SP) & bimm);
9593 }
9594
9595 /* 32 bit and immediate set flags. */
9596 static void
9597 ands32 (sim_cpu *cpu, uint32_t bimm)
9598 {
9599 unsigned rn = INSTR (9, 5);
9600 unsigned rd = INSTR (4, 0);
9601
9602 uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP);
9603 uint32_t value2 = bimm;
9604
9605 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9606 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 & value2);
9607 set_flags_for_binop32 (cpu, value1 & value2);
9608 }
9609
9610 /* 64 bit and immediate set flags. */
9611 static void
9612 ands64 (sim_cpu *cpu, uint64_t bimm)
9613 {
9614 unsigned rn = INSTR (9, 5);
9615 unsigned rd = INSTR (4, 0);
9616
9617 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP);
9618 uint64_t value2 = bimm;
9619
9620 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9621 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 & value2);
9622 set_flags_for_binop64 (cpu, value1 & value2);
9623 }
9624
9625 /* 32 bit exclusive or immediate. */
9626 static void
9627 eor32 (sim_cpu *cpu, uint32_t bimm)
9628 {
9629 unsigned rn = INSTR (9, 5);
9630 unsigned rd = INSTR (4, 0);
9631
9632 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9633 aarch64_set_reg_u64 (cpu, rd, SP_OK,
9634 aarch64_get_reg_u32 (cpu, rn, NO_SP) ^ bimm);
9635 }
9636
9637 /* 64 bit exclusive or immediate. */
9638 static void
9639 eor64 (sim_cpu *cpu, uint64_t bimm)
9640 {
9641 unsigned rn = INSTR (9, 5);
9642 unsigned rd = INSTR (4, 0);
9643
9644 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9645 aarch64_set_reg_u64 (cpu, rd, SP_OK,
9646 aarch64_get_reg_u64 (cpu, rn, NO_SP) ^ bimm);
9647 }
9648
9649 /* 32 bit or immediate. */
9650 static void
9651 orr32 (sim_cpu *cpu, uint32_t bimm)
9652 {
9653 unsigned rn = INSTR (9, 5);
9654 unsigned rd = INSTR (4, 0);
9655
9656 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9657 aarch64_set_reg_u64 (cpu, rd, SP_OK,
9658 aarch64_get_reg_u32 (cpu, rn, NO_SP) | bimm);
9659 }
9660
9661 /* 64 bit or immediate. */
9662 static void
9663 orr64 (sim_cpu *cpu, uint64_t bimm)
9664 {
9665 unsigned rn = INSTR (9, 5);
9666 unsigned rd = INSTR (4, 0);
9667
9668 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9669 aarch64_set_reg_u64 (cpu, rd, SP_OK,
9670 aarch64_get_reg_u64 (cpu, rn, NO_SP) | bimm);
9671 }
9672
9673 /* Logical shifted register.
9674 These allow an optional LSL, ASR, LSR or ROR to the second source
9675 register with a count up to the register bit count.
9676 N.B register args may not be SP. */
9677
9678 /* 32 bit AND shifted register. */
9679 static void
9680 and32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9681 {
9682 unsigned rm = INSTR (20, 16);
9683 unsigned rn = INSTR (9, 5);
9684 unsigned rd = INSTR (4, 0);
9685
9686 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9687 aarch64_set_reg_u64
9688 (cpu, rd, NO_SP, aarch64_get_reg_u32 (cpu, rn, NO_SP)
9689 & shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP), shift, count));
9690 }
9691
9692 /* 64 bit AND shifted register. */
9693 static void
9694 and64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9695 {
9696 unsigned rm = INSTR (20, 16);
9697 unsigned rn = INSTR (9, 5);
9698 unsigned rd = INSTR (4, 0);
9699
9700 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9701 aarch64_set_reg_u64
9702 (cpu, rd, NO_SP, aarch64_get_reg_u64 (cpu, rn, NO_SP)
9703 & shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP), shift, count));
9704 }
9705
9706 /* 32 bit AND shifted register setting flags. */
9707 static void
9708 ands32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9709 {
9710 unsigned rm = INSTR (20, 16);
9711 unsigned rn = INSTR (9, 5);
9712 unsigned rd = INSTR (4, 0);
9713
9714 uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP);
9715 uint32_t value2 = shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP),
9716 shift, count);
9717
9718 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9719 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 & value2);
9720 set_flags_for_binop32 (cpu, value1 & value2);
9721 }
9722
9723 /* 64 bit AND shifted register setting flags. */
9724 static void
9725 ands64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9726 {
9727 unsigned rm = INSTR (20, 16);
9728 unsigned rn = INSTR (9, 5);
9729 unsigned rd = INSTR (4, 0);
9730
9731 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP);
9732 uint64_t value2 = shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP),
9733 shift, count);
9734
9735 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9736 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 & value2);
9737 set_flags_for_binop64 (cpu, value1 & value2);
9738 }
9739
9740 /* 32 bit BIC shifted register. */
9741 static void
9742 bic32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9743 {
9744 unsigned rm = INSTR (20, 16);
9745 unsigned rn = INSTR (9, 5);
9746 unsigned rd = INSTR (4, 0);
9747
9748 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9749 aarch64_set_reg_u64
9750 (cpu, rd, NO_SP, aarch64_get_reg_u32 (cpu, rn, NO_SP)
9751 & ~ shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP), shift, count));
9752 }
9753
9754 /* 64 bit BIC shifted register. */
9755 static void
9756 bic64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9757 {
9758 unsigned rm = INSTR (20, 16);
9759 unsigned rn = INSTR (9, 5);
9760 unsigned rd = INSTR (4, 0);
9761
9762 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9763 aarch64_set_reg_u64
9764 (cpu, rd, NO_SP, aarch64_get_reg_u64 (cpu, rn, NO_SP)
9765 & ~ shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP), shift, count));
9766 }
9767
9768 /* 32 bit BIC shifted register setting flags. */
9769 static void
9770 bics32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9771 {
9772 unsigned rm = INSTR (20, 16);
9773 unsigned rn = INSTR (9, 5);
9774 unsigned rd = INSTR (4, 0);
9775
9776 uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP);
9777 uint32_t value2 = ~ shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP),
9778 shift, count);
9779
9780 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9781 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 & value2);
9782 set_flags_for_binop32 (cpu, value1 & value2);
9783 }
9784
9785 /* 64 bit BIC shifted register setting flags. */
9786 static void
9787 bics64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9788 {
9789 unsigned rm = INSTR (20, 16);
9790 unsigned rn = INSTR (9, 5);
9791 unsigned rd = INSTR (4, 0);
9792
9793 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP);
9794 uint64_t value2 = ~ shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP),
9795 shift, count);
9796
9797 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9798 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 & value2);
9799 set_flags_for_binop64 (cpu, value1 & value2);
9800 }
9801
9802 /* 32 bit EON shifted register. */
9803 static void
9804 eon32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9805 {
9806 unsigned rm = INSTR (20, 16);
9807 unsigned rn = INSTR (9, 5);
9808 unsigned rd = INSTR (4, 0);
9809
9810 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9811 aarch64_set_reg_u64
9812 (cpu, rd, NO_SP, aarch64_get_reg_u32 (cpu, rn, NO_SP)
9813 ^ ~ shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP), shift, count));
9814 }
9815
9816 /* 64 bit EON shifted register. */
9817 static void
9818 eon64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9819 {
9820 unsigned rm = INSTR (20, 16);
9821 unsigned rn = INSTR (9, 5);
9822 unsigned rd = INSTR (4, 0);
9823
9824 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9825 aarch64_set_reg_u64
9826 (cpu, rd, NO_SP, aarch64_get_reg_u64 (cpu, rn, NO_SP)
9827 ^ ~ shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP), shift, count));
9828 }
9829
9830 /* 32 bit EOR shifted register. */
9831 static void
9832 eor32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9833 {
9834 unsigned rm = INSTR (20, 16);
9835 unsigned rn = INSTR (9, 5);
9836 unsigned rd = INSTR (4, 0);
9837
9838 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9839 aarch64_set_reg_u64
9840 (cpu, rd, NO_SP, aarch64_get_reg_u32 (cpu, rn, NO_SP)
9841 ^ shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP), shift, count));
9842 }
9843
9844 /* 64 bit EOR shifted register. */
9845 static void
9846 eor64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9847 {
9848 unsigned rm = INSTR (20, 16);
9849 unsigned rn = INSTR (9, 5);
9850 unsigned rd = INSTR (4, 0);
9851
9852 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9853 aarch64_set_reg_u64
9854 (cpu, rd, NO_SP, aarch64_get_reg_u64 (cpu, rn, NO_SP)
9855 ^ shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP), shift, count));
9856 }
9857
9858 /* 32 bit ORR shifted register. */
9859 static void
9860 orr32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9861 {
9862 unsigned rm = INSTR (20, 16);
9863 unsigned rn = INSTR (9, 5);
9864 unsigned rd = INSTR (4, 0);
9865
9866 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9867 aarch64_set_reg_u64
9868 (cpu, rd, NO_SP, aarch64_get_reg_u32 (cpu, rn, NO_SP)
9869 | shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP), shift, count));
9870 }
9871
9872 /* 64 bit ORR shifted register. */
9873 static void
9874 orr64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9875 {
9876 unsigned rm = INSTR (20, 16);
9877 unsigned rn = INSTR (9, 5);
9878 unsigned rd = INSTR (4, 0);
9879
9880 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9881 aarch64_set_reg_u64
9882 (cpu, rd, NO_SP, aarch64_get_reg_u64 (cpu, rn, NO_SP)
9883 | shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP), shift, count));
9884 }
9885
9886 /* 32 bit ORN shifted register. */
9887 static void
9888 orn32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9889 {
9890 unsigned rm = INSTR (20, 16);
9891 unsigned rn = INSTR (9, 5);
9892 unsigned rd = INSTR (4, 0);
9893
9894 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9895 aarch64_set_reg_u64
9896 (cpu, rd, NO_SP, aarch64_get_reg_u32 (cpu, rn, NO_SP)
9897 | ~ shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP), shift, count));
9898 }
9899
9900 /* 64 bit ORN shifted register. */
9901 static void
9902 orn64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9903 {
9904 unsigned rm = INSTR (20, 16);
9905 unsigned rn = INSTR (9, 5);
9906 unsigned rd = INSTR (4, 0);
9907
9908 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9909 aarch64_set_reg_u64
9910 (cpu, rd, NO_SP, aarch64_get_reg_u64 (cpu, rn, NO_SP)
9911 | ~ shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP), shift, count));
9912 }
9913
9914 static void
9915 dexLogicalImmediate (sim_cpu *cpu)
9916 {
9917 /* assert instr[28,23] = 1001000
9918 instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
9919 instr[30,29] = op : 0 ==> AND, 1 ==> ORR, 2 ==> EOR, 3 ==> ANDS
9920 instr[22] = N : used to construct immediate mask
9921 instr[21,16] = immr
9922 instr[15,10] = imms
9923 instr[9,5] = Rn
9924 instr[4,0] = Rd */
9925
9926 /* 32 bit operations must have N = 0 or else we have an UNALLOC. */
9927 uint32_t size = INSTR (31, 31);
9928 uint32_t N = INSTR (22, 22);
9929 /* uint32_t immr = INSTR (21, 16);. */
9930 /* uint32_t imms = INSTR (15, 10);. */
9931 uint32_t index = INSTR (22, 10);
9932 uint64_t bimm64 = LITable [index];
9933 uint32_t dispatch = INSTR (30, 29);
9934
9935 if (~size & N)
9936 HALT_UNALLOC;
9937
9938 if (!bimm64)
9939 HALT_UNALLOC;
9940
9941 if (size == 0)
9942 {
9943 uint32_t bimm = (uint32_t) bimm64;
9944
9945 switch (dispatch)
9946 {
9947 case 0: and32 (cpu, bimm); return;
9948 case 1: orr32 (cpu, bimm); return;
9949 case 2: eor32 (cpu, bimm); return;
9950 case 3: ands32 (cpu, bimm); return;
9951 }
9952 }
9953 else
9954 {
9955 switch (dispatch)
9956 {
9957 case 0: and64 (cpu, bimm64); return;
9958 case 1: orr64 (cpu, bimm64); return;
9959 case 2: eor64 (cpu, bimm64); return;
9960 case 3: ands64 (cpu, bimm64); return;
9961 }
9962 }
9963 HALT_UNALLOC;
9964 }
9965
9966 /* Immediate move.
9967 The uimm argument is a 16 bit value to be inserted into the
9968 target register the pos argument locates the 16 bit word in the
9969 dest register i.e. it is in {0, 1} for 32 bit and {0, 1, 2,
9970 3} for 64 bit.
9971 N.B register arg may not be SP so it should be.
9972 accessed using the setGZRegisterXXX accessors. */
9973
9974 /* 32 bit move 16 bit immediate zero remaining shorts. */
9975 static void
9976 movz32 (sim_cpu *cpu, uint32_t val, uint32_t pos)
9977 {
9978 unsigned rd = INSTR (4, 0);
9979
9980 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9981 aarch64_set_reg_u64 (cpu, rd, NO_SP, val << (pos * 16));
9982 }
9983
9984 /* 64 bit move 16 bit immediate zero remaining shorts. */
9985 static void
9986 movz64 (sim_cpu *cpu, uint32_t val, uint32_t pos)
9987 {
9988 unsigned rd = INSTR (4, 0);
9989
9990 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9991 aarch64_set_reg_u64 (cpu, rd, NO_SP, ((uint64_t) val) << (pos * 16));
9992 }
9993
9994 /* 32 bit move 16 bit immediate negated. */
9995 static void
9996 movn32 (sim_cpu *cpu, uint32_t val, uint32_t pos)
9997 {
9998 unsigned rd = INSTR (4, 0);
9999
10000 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
10001 aarch64_set_reg_u64 (cpu, rd, NO_SP, ((val << (pos * 16)) ^ 0xffffffffU));
10002 }
10003
10004 /* 64 bit move 16 bit immediate negated. */
10005 static void
10006 movn64 (sim_cpu *cpu, uint32_t val, uint32_t pos)
10007 {
10008 unsigned rd = INSTR (4, 0);
10009
10010 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
10011 aarch64_set_reg_u64
10012 (cpu, rd, NO_SP, ((((uint64_t) val) << (pos * 16))
10013 ^ 0xffffffffffffffffULL));
10014 }
10015
10016 /* 32 bit move 16 bit immediate keep remaining shorts. */
10017 static void
10018 movk32 (sim_cpu *cpu, uint32_t val, uint32_t pos)
10019 {
10020 unsigned rd = INSTR (4, 0);
10021 uint32_t current = aarch64_get_reg_u32 (cpu, rd, NO_SP);
10022 uint32_t value = val << (pos * 16);
10023 uint32_t mask = ~(0xffffU << (pos * 16));
10024
10025 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
10026 aarch64_set_reg_u64 (cpu, rd, NO_SP, (value | (current & mask)));
10027 }
10028
10029 /* 64 bit move 16 it immediate keep remaining shorts. */
10030 static void
10031 movk64 (sim_cpu *cpu, uint32_t val, uint32_t pos)
10032 {
10033 unsigned rd = INSTR (4, 0);
10034 uint64_t current = aarch64_get_reg_u64 (cpu, rd, NO_SP);
10035 uint64_t value = (uint64_t) val << (pos * 16);
10036 uint64_t mask = ~(0xffffULL << (pos * 16));
10037
10038 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
10039 aarch64_set_reg_u64 (cpu, rd, NO_SP, (value | (current & mask)));
10040 }
10041
10042 static void
10043 dexMoveWideImmediate (sim_cpu *cpu)
10044 {
10045 /* assert instr[28:23] = 100101
10046 instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
10047 instr[30,29] = op : 0 ==> MOVN, 1 ==> UNALLOC, 2 ==> MOVZ, 3 ==> MOVK
10048 instr[22,21] = shift : 00 == LSL#0, 01 = LSL#16, 10 = LSL#32, 11 = LSL#48
10049 instr[20,5] = uimm16
10050 instr[4,0] = Rd */
10051
10052 /* N.B. the (multiple of 16) shift is applied by the called routine,
10053 we just pass the multiplier. */
10054
10055 uint32_t imm;
10056 uint32_t size = INSTR (31, 31);
10057 uint32_t op = INSTR (30, 29);
10058 uint32_t shift = INSTR (22, 21);
10059
10060 /* 32 bit can only shift 0 or 1 lot of 16.
10061 anything else is an unallocated instruction. */
10062 if (size == 0 && (shift > 1))
10063 HALT_UNALLOC;
10064
10065 if (op == 1)
10066 HALT_UNALLOC;
10067
10068 imm = INSTR (20, 5);
10069
10070 if (size == 0)
10071 {
10072 if (op == 0)
10073 movn32 (cpu, imm, shift);
10074 else if (op == 2)
10075 movz32 (cpu, imm, shift);
10076 else
10077 movk32 (cpu, imm, shift);
10078 }
10079 else
10080 {
10081 if (op == 0)
10082 movn64 (cpu, imm, shift);
10083 else if (op == 2)
10084 movz64 (cpu, imm, shift);
10085 else
10086 movk64 (cpu, imm, shift);
10087 }
10088 }
10089
10090 /* Bitfield operations.
10091 These take a pair of bit positions r and s which are in {0..31}
10092 or {0..63} depending on the instruction word size.
10093 N.B register args may not be SP. */
10094
10095 /* OK, we start with ubfm which just needs to pick
10096 some bits out of source zero the rest and write
10097 the result to dest. Just need two logical shifts. */
10098
10099 /* 32 bit bitfield move, left and right of affected zeroed
10100 if r <= s Wd<s-r:0> = Wn<s:r> else Wd<32+s-r,32-r> = Wn<s:0>. */
10101 static void
10102 ubfm32 (sim_cpu *cpu, uint32_t r, uint32_t s)
10103 {
10104 unsigned rd;
10105 unsigned rn = INSTR (9, 5);
10106 uint32_t value = aarch64_get_reg_u32 (cpu, rn, NO_SP);
10107
10108 /* Pick either s+1-r or s+1 consecutive bits out of the original word. */
10109 if (r <= s)
10110 {
10111 /* 31:...:s:xxx:r:...:0 ==> 31:...:s-r:xxx:0.
10112 We want only bits s:xxx:r at the bottom of the word
10113 so we LSL bit s up to bit 31 i.e. by 31 - s
10114 and then we LSR to bring bit 31 down to bit s - r
10115 i.e. by 31 + r - s. */
10116 value <<= 31 - s;
10117 value >>= 31 + r - s;
10118 }
10119 else
10120 {
10121 /* 31:...:s:xxx:0 ==> 31:...:31-(r-1)+s:xxx:31-(r-1):...:0
10122 We want only bits s:xxx:0 starting at it 31-(r-1)
10123 so we LSL bit s up to bit 31 i.e. by 31 - s
10124 and then we LSL to bring bit 31 down to 31-(r-1)+s
10125 i.e. by r - (s + 1). */
10126 value <<= 31 - s;
10127 value >>= r - (s + 1);
10128 }
10129
10130 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
10131 rd = INSTR (4, 0);
10132 aarch64_set_reg_u64 (cpu, rd, NO_SP, value);
10133 }
10134
10135 /* 64 bit bitfield move, left and right of affected zeroed
10136 if r <= s Wd<s-r:0> = Wn<s:r> else Wd<64+s-r,64-r> = Wn<s:0>. */
10137 static void
10138 ubfm (sim_cpu *cpu, uint32_t r, uint32_t s)
10139 {
10140 unsigned rd;
10141 unsigned rn = INSTR (9, 5);
10142 uint64_t value = aarch64_get_reg_u64 (cpu, rn, NO_SP);
10143
10144 if (r <= s)
10145 {
10146 /* 63:...:s:xxx:r:...:0 ==> 63:...:s-r:xxx:0.
10147 We want only bits s:xxx:r at the bottom of the word.
10148 So we LSL bit s up to bit 63 i.e. by 63 - s
10149 and then we LSR to bring bit 63 down to bit s - r
10150 i.e. by 63 + r - s. */
10151 value <<= 63 - s;
10152 value >>= 63 + r - s;
10153 }
10154 else
10155 {
10156 /* 63:...:s:xxx:0 ==> 63:...:63-(r-1)+s:xxx:63-(r-1):...:0.
10157 We want only bits s:xxx:0 starting at it 63-(r-1).
10158 So we LSL bit s up to bit 63 i.e. by 63 - s
10159 and then we LSL to bring bit 63 down to 63-(r-1)+s
10160 i.e. by r - (s + 1). */
10161 value <<= 63 - s;
10162 value >>= r - (s + 1);
10163 }
10164
10165 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
10166 rd = INSTR (4, 0);
10167 aarch64_set_reg_u64 (cpu, rd, NO_SP, value);
10168 }
10169
10170 /* The signed versions need to insert sign bits
10171 on the left of the inserted bit field. so we do
10172 much the same as the unsigned version except we
10173 use an arithmetic shift right -- this just means
10174 we need to operate on signed values. */
10175
10176 /* 32 bit bitfield move, left of affected sign-extended, right zeroed. */
10177 /* If r <= s Wd<s-r:0> = Wn<s:r> else Wd<32+s-r,32-r> = Wn<s:0>. */
10178 static void
10179 sbfm32 (sim_cpu *cpu, uint32_t r, uint32_t s)
10180 {
10181 unsigned rd;
10182 unsigned rn = INSTR (9, 5);
10183 /* as per ubfm32 but use an ASR instead of an LSR. */
10184 int32_t value = aarch64_get_reg_s32 (cpu, rn, NO_SP);
10185
10186 if (r <= s)
10187 {
10188 value <<= 31 - s;
10189 value >>= 31 + r - s;
10190 }
10191 else
10192 {
10193 value <<= 31 - s;
10194 value >>= r - (s + 1);
10195 }
10196
10197 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
10198 rd = INSTR (4, 0);
10199 aarch64_set_reg_u64 (cpu, rd, NO_SP, (uint32_t) value);
10200 }
10201
10202 /* 64 bit bitfield move, left of affected sign-extended, right zeroed. */
10203 /* If r <= s Wd<s-r:0> = Wn<s:r> else Wd<64+s-r,64-r> = Wn<s:0>. */
10204 static void
10205 sbfm (sim_cpu *cpu, uint32_t r, uint32_t s)
10206 {
10207 unsigned rd;
10208 unsigned rn = INSTR (9, 5);
10209 /* acpu per ubfm but use an ASR instead of an LSR. */
10210 int64_t value = aarch64_get_reg_s64 (cpu, rn, NO_SP);
10211
10212 if (r <= s)
10213 {
10214 value <<= 63 - s;
10215 value >>= 63 + r - s;
10216 }
10217 else
10218 {
10219 value <<= 63 - s;
10220 value >>= r - (s + 1);
10221 }
10222
10223 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
10224 rd = INSTR (4, 0);
10225 aarch64_set_reg_s64 (cpu, rd, NO_SP, value);
10226 }
10227
10228 /* Finally, these versions leave non-affected bits
10229 as is. so we need to generate the bits as per
10230 ubfm and also generate a mask to pick the
10231 bits from the original and computed values. */
10232
10233 /* 32 bit bitfield move, non-affected bits left as is.
10234 If r <= s Wd<s-r:0> = Wn<s:r> else Wd<32+s-r,32-r> = Wn<s:0>. */
10235 static void
10236 bfm32 (sim_cpu *cpu, uint32_t r, uint32_t s)
10237 {
10238 unsigned rn = INSTR (9, 5);
10239 uint32_t value = aarch64_get_reg_u32 (cpu, rn, NO_SP);
10240 uint32_t mask = -1;
10241 unsigned rd;
10242 uint32_t value2;
10243
10244 /* Pick either s+1-r or s+1 consecutive bits out of the original word. */
10245 if (r <= s)
10246 {
10247 /* 31:...:s:xxx:r:...:0 ==> 31:...:s-r:xxx:0.
10248 We want only bits s:xxx:r at the bottom of the word
10249 so we LSL bit s up to bit 31 i.e. by 31 - s
10250 and then we LSR to bring bit 31 down to bit s - r
10251 i.e. by 31 + r - s. */
10252 value <<= 31 - s;
10253 value >>= 31 + r - s;
10254 /* the mask must include the same bits. */
10255 mask <<= 31 - s;
10256 mask >>= 31 + r - s;
10257 }
10258 else
10259 {
10260 /* 31:...:s:xxx:0 ==> 31:...:31-(r-1)+s:xxx:31-(r-1):...:0.
10261 We want only bits s:xxx:0 starting at it 31-(r-1)
10262 so we LSL bit s up to bit 31 i.e. by 31 - s
10263 and then we LSL to bring bit 31 down to 31-(r-1)+s
10264 i.e. by r - (s + 1). */
10265 value <<= 31 - s;
10266 value >>= r - (s + 1);
10267 /* The mask must include the same bits. */
10268 mask <<= 31 - s;
10269 mask >>= r - (s + 1);
10270 }
10271
10272 rd = INSTR (4, 0);
10273 value2 = aarch64_get_reg_u32 (cpu, rd, NO_SP);
10274
10275 value2 &= ~mask;
10276 value2 |= value;
10277
10278 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
10279 aarch64_set_reg_u64 (cpu, rd, NO_SP, value2);
10280 }
10281
10282 /* 64 bit bitfield move, non-affected bits left as is.
10283 If r <= s Wd<s-r:0> = Wn<s:r> else Wd<64+s-r,64-r> = Wn<s:0>. */
10284 static void
10285 bfm (sim_cpu *cpu, uint32_t r, uint32_t s)
10286 {
10287 unsigned rd;
10288 unsigned rn = INSTR (9, 5);
10289 uint64_t value = aarch64_get_reg_u64 (cpu, rn, NO_SP);
10290 uint64_t mask = 0xffffffffffffffffULL;
10291
10292 if (r <= s)
10293 {
10294 /* 63:...:s:xxx:r:...:0 ==> 63:...:s-r:xxx:0.
10295 We want only bits s:xxx:r at the bottom of the word
10296 so we LSL bit s up to bit 63 i.e. by 63 - s
10297 and then we LSR to bring bit 63 down to bit s - r
10298 i.e. by 63 + r - s. */
10299 value <<= 63 - s;
10300 value >>= 63 + r - s;
10301 /* The mask must include the same bits. */
10302 mask <<= 63 - s;
10303 mask >>= 63 + r - s;
10304 }
10305 else
10306 {
10307 /* 63:...:s:xxx:0 ==> 63:...:63-(r-1)+s:xxx:63-(r-1):...:0
10308 We want only bits s:xxx:0 starting at it 63-(r-1)
10309 so we LSL bit s up to bit 63 i.e. by 63 - s
10310 and then we LSL to bring bit 63 down to 63-(r-1)+s
10311 i.e. by r - (s + 1). */
10312 value <<= 63 - s;
10313 value >>= r - (s + 1);
10314 /* The mask must include the same bits. */
10315 mask <<= 63 - s;
10316 mask >>= r - (s + 1);
10317 }
10318
10319 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
10320 rd = INSTR (4, 0);
10321 aarch64_set_reg_u64
10322 (cpu, rd, NO_SP, (aarch64_get_reg_u64 (cpu, rd, NO_SP) & ~mask) | value);
10323 }
10324
10325 static void
10326 dexBitfieldImmediate (sim_cpu *cpu)
10327 {
10328 /* assert instr[28:23] = 100110
10329 instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
10330 instr[30,29] = op : 0 ==> SBFM, 1 ==> BFM, 2 ==> UBFM, 3 ==> UNALLOC
10331 instr[22] = N : must be 0 for 32 bit, 1 for 64 bit ow UNALLOC
10332 instr[21,16] = immr : 0xxxxx for 32 bit, xxxxxx for 64 bit
10333 instr[15,10] = imms : 0xxxxx for 32 bit, xxxxxx for 64 bit
10334 instr[9,5] = Rn
10335 instr[4,0] = Rd */
10336
10337 /* 32 bit operations must have N = 0 or else we have an UNALLOC. */
10338 uint32_t dispatch;
10339 uint32_t imms;
10340 uint32_t size = INSTR (31, 31);
10341 uint32_t N = INSTR (22, 22);
10342 /* 32 bit operations must have immr[5] = 0 and imms[5] = 0. */
10343 /* or else we have an UNALLOC. */
10344 uint32_t immr = INSTR (21, 16);
10345
10346 if (~size & N)
10347 HALT_UNALLOC;
10348
10349 if (!size && uimm (immr, 5, 5))
10350 HALT_UNALLOC;
10351
10352 imms = INSTR (15, 10);
10353 if (!size && uimm (imms, 5, 5))
10354 HALT_UNALLOC;
10355
10356 /* Switch on combined size and op. */
10357 dispatch = INSTR (31, 29);
10358 switch (dispatch)
10359 {
10360 case 0: sbfm32 (cpu, immr, imms); return;
10361 case 1: bfm32 (cpu, immr, imms); return;
10362 case 2: ubfm32 (cpu, immr, imms); return;
10363 case 4: sbfm (cpu, immr, imms); return;
10364 case 5: bfm (cpu, immr, imms); return;
10365 case 6: ubfm (cpu, immr, imms); return;
10366 default: HALT_UNALLOC;
10367 }
10368 }
10369
10370 static void
10371 do_EXTR_32 (sim_cpu *cpu)
10372 {
10373 /* instr[31:21] = 00010011100
10374 instr[20,16] = Rm
10375 instr[15,10] = imms : 0xxxxx for 32 bit
10376 instr[9,5] = Rn
10377 instr[4,0] = Rd */
10378 unsigned rm = INSTR (20, 16);
10379 unsigned imms = INSTR (15, 10) & 31;
10380 unsigned rn = INSTR ( 9, 5);
10381 unsigned rd = INSTR ( 4, 0);
10382 uint64_t val1;
10383 uint64_t val2;
10384
10385 val1 = aarch64_get_reg_u32 (cpu, rm, NO_SP);
10386 val1 >>= imms;
10387 val2 = aarch64_get_reg_u32 (cpu, rn, NO_SP);
10388 val2 <<= (32 - imms);
10389
10390 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
10391 aarch64_set_reg_u64 (cpu, rd, NO_SP, val1 | val2);
10392 }
10393
10394 static void
10395 do_EXTR_64 (sim_cpu *cpu)
10396 {
10397 /* instr[31:21] = 10010011100
10398 instr[20,16] = Rm
10399 instr[15,10] = imms
10400 instr[9,5] = Rn
10401 instr[4,0] = Rd */
10402 unsigned rm = INSTR (20, 16);
10403 unsigned imms = INSTR (15, 10) & 63;
10404 unsigned rn = INSTR ( 9, 5);
10405 unsigned rd = INSTR ( 4, 0);
10406 uint64_t val;
10407
10408 val = aarch64_get_reg_u64 (cpu, rm, NO_SP);
10409 val >>= imms;
10410 val |= (aarch64_get_reg_u64 (cpu, rn, NO_SP) << (64 - imms));
10411
10412 aarch64_set_reg_u64 (cpu, rd, NO_SP, val);
10413 }
10414
10415 static void
10416 dexExtractImmediate (sim_cpu *cpu)
10417 {
10418 /* assert instr[28:23] = 100111
10419 instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
10420 instr[30,29] = op21 : 0 ==> EXTR, 1,2,3 ==> UNALLOC
10421 instr[22] = N : must be 0 for 32 bit, 1 for 64 bit or UNALLOC
10422 instr[21] = op0 : must be 0 or UNALLOC
10423 instr[20,16] = Rm
10424 instr[15,10] = imms : 0xxxxx for 32 bit, xxxxxx for 64 bit
10425 instr[9,5] = Rn
10426 instr[4,0] = Rd */
10427
10428 /* 32 bit operations must have N = 0 or else we have an UNALLOC. */
10429 /* 64 bit operations must have N = 1 or else we have an UNALLOC. */
10430 uint32_t dispatch;
10431 uint32_t size = INSTR (31, 31);
10432 uint32_t N = INSTR (22, 22);
10433 /* 32 bit operations must have imms[5] = 0
10434 or else we have an UNALLOC. */
10435 uint32_t imms = INSTR (15, 10);
10436
10437 if (size ^ N)
10438 HALT_UNALLOC;
10439
10440 if (!size && uimm (imms, 5, 5))
10441 HALT_UNALLOC;
10442
10443 /* Switch on combined size and op. */
10444 dispatch = INSTR (31, 29);
10445
10446 if (dispatch == 0)
10447 do_EXTR_32 (cpu);
10448
10449 else if (dispatch == 4)
10450 do_EXTR_64 (cpu);
10451
10452 else if (dispatch == 1)
10453 HALT_NYI;
10454 else
10455 HALT_UNALLOC;
10456 }
10457
10458 static void
10459 dexDPImm (sim_cpu *cpu)
10460 {
10461 /* uint32_t group = dispatchGroup (aarch64_get_instr (cpu));
10462 assert group == GROUP_DPIMM_1000 || grpoup == GROUP_DPIMM_1001
10463 bits [25,23] of a DPImm are the secondary dispatch vector. */
10464 uint32_t group2 = dispatchDPImm (aarch64_get_instr (cpu));
10465
10466 switch (group2)
10467 {
10468 case DPIMM_PCADR_000:
10469 case DPIMM_PCADR_001:
10470 dexPCRelAddressing (cpu);
10471 return;
10472
10473 case DPIMM_ADDSUB_010:
10474 case DPIMM_ADDSUB_011:
10475 dexAddSubtractImmediate (cpu);
10476 return;
10477
10478 case DPIMM_LOG_100:
10479 dexLogicalImmediate (cpu);
10480 return;
10481
10482 case DPIMM_MOV_101:
10483 dexMoveWideImmediate (cpu);
10484 return;
10485
10486 case DPIMM_BITF_110:
10487 dexBitfieldImmediate (cpu);
10488 return;
10489
10490 case DPIMM_EXTR_111:
10491 dexExtractImmediate (cpu);
10492 return;
10493
10494 default:
10495 /* Should never reach here. */
10496 HALT_NYI;
10497 }
10498 }
10499
10500 static void
10501 dexLoadUnscaledImmediate (sim_cpu *cpu)
10502 {
10503 /* instr[29,24] == 111_00
10504 instr[21] == 0
10505 instr[11,10] == 00
10506 instr[31,30] = size
10507 instr[26] = V
10508 instr[23,22] = opc
10509 instr[20,12] = simm9
10510 instr[9,5] = rn may be SP. */
10511 /* unsigned rt = INSTR (4, 0); */
10512 uint32_t V = INSTR (26, 26);
10513 uint32_t dispatch = ((INSTR (31, 30) << 2) | INSTR (23, 22));
10514 int32_t imm = simm32 (aarch64_get_instr (cpu), 20, 12);
10515
10516 if (!V)
10517 {
10518 /* GReg operations. */
10519 switch (dispatch)
10520 {
10521 case 0: sturb (cpu, imm); return;
10522 case 1: ldurb32 (cpu, imm); return;
10523 case 2: ldursb64 (cpu, imm); return;
10524 case 3: ldursb32 (cpu, imm); return;
10525 case 4: sturh (cpu, imm); return;
10526 case 5: ldurh32 (cpu, imm); return;
10527 case 6: ldursh64 (cpu, imm); return;
10528 case 7: ldursh32 (cpu, imm); return;
10529 case 8: stur32 (cpu, imm); return;
10530 case 9: ldur32 (cpu, imm); return;
10531 case 10: ldursw (cpu, imm); return;
10532 case 12: stur64 (cpu, imm); return;
10533 case 13: ldur64 (cpu, imm); return;
10534
10535 case 14:
10536 /* PRFUM NYI. */
10537 HALT_NYI;
10538
10539 default:
10540 case 11:
10541 case 15:
10542 HALT_UNALLOC;
10543 }
10544 }
10545
10546 /* FReg operations. */
10547 switch (dispatch)
10548 {
10549 case 2: fsturq (cpu, imm); return;
10550 case 3: fldurq (cpu, imm); return;
10551 case 8: fsturs (cpu, imm); return;
10552 case 9: fldurs (cpu, imm); return;
10553 case 12: fsturd (cpu, imm); return;
10554 case 13: fldurd (cpu, imm); return;
10555
10556 case 0: /* STUR 8 bit FP. */
10557 case 1: /* LDUR 8 bit FP. */
10558 case 4: /* STUR 16 bit FP. */
10559 case 5: /* LDUR 8 bit FP. */
10560 HALT_NYI;
10561
10562 default:
10563 case 6:
10564 case 7:
10565 case 10:
10566 case 11:
10567 case 14:
10568 case 15:
10569 HALT_UNALLOC;
10570 }
10571 }
10572
10573 /* N.B. A preliminary note regarding all the ldrs<x>32
10574 instructions
10575
10576 The signed value loaded by these instructions is cast to unsigned
10577 before being assigned to aarch64_get_reg_u64 (cpu, N) i.e. to the
10578 64 bit element of the GReg union. this performs a 32 bit sign extension
10579 (as required) but avoids 64 bit sign extension, thus ensuring that the
10580 top half of the register word is zero. this is what the spec demands
10581 when a 32 bit load occurs. */
10582
10583 /* 32 bit load sign-extended byte scaled unsigned 12 bit. */
10584 static void
10585 ldrsb32_abs (sim_cpu *cpu, uint32_t offset)
10586 {
10587 unsigned int rn = INSTR (9, 5);
10588 unsigned int rt = INSTR (4, 0);
10589
10590 /* The target register may not be SP but the source may be
10591 there is no scaling required for a byte load. */
10592 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset;
10593 aarch64_set_reg_u64 (cpu, rt, NO_SP,
10594 (int64_t) aarch64_get_mem_s8 (cpu, address));
10595 }
10596
10597 /* 32 bit load sign-extended byte scaled or unscaled zero-
10598 or sign-extended 32-bit register offset. */
10599 static void
10600 ldrsb32_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
10601 {
10602 unsigned int rm = INSTR (20, 16);
10603 unsigned int rn = INSTR (9, 5);
10604 unsigned int rt = INSTR (4, 0);
10605
10606 /* rn may reference SP, rm and rt must reference ZR. */
10607
10608 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10609 int64_t displacement = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
10610 extension);
10611
10612 /* There is no scaling required for a byte load. */
10613 aarch64_set_reg_u64
10614 (cpu, rt, NO_SP, (int64_t) aarch64_get_mem_s8 (cpu, address
10615 + displacement));
10616 }
10617
10618 /* 32 bit load sign-extended byte unscaled signed 9 bit with
10619 pre- or post-writeback. */
10620 static void
10621 ldrsb32_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
10622 {
10623 uint64_t address;
10624 unsigned int rn = INSTR (9, 5);
10625 unsigned int rt = INSTR (4, 0);
10626
10627 if (rn == rt && wb != NoWriteBack)
10628 HALT_UNALLOC;
10629
10630 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10631
10632 if (wb == Pre)
10633 address += offset;
10634
10635 aarch64_set_reg_u64 (cpu, rt, NO_SP,
10636 (int64_t) aarch64_get_mem_s8 (cpu, address));
10637
10638 if (wb == Post)
10639 address += offset;
10640
10641 if (wb != NoWriteBack)
10642 aarch64_set_reg_u64 (cpu, rn, NO_SP, address);
10643 }
10644
10645 /* 8 bit store scaled. */
10646 static void
10647 fstrb_abs (sim_cpu *cpu, uint32_t offset)
10648 {
10649 unsigned st = INSTR (4, 0);
10650 unsigned rn = INSTR (9, 5);
10651
10652 aarch64_set_mem_u8 (cpu,
10653 aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset,
10654 aarch64_get_vec_u8 (cpu, st, 0));
10655 }
10656
10657 /* 8 bit store scaled or unscaled zero- or
10658 sign-extended 8-bit register offset. */
10659 static void
10660 fstrb_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
10661 {
10662 unsigned rm = INSTR (20, 16);
10663 unsigned rn = INSTR (9, 5);
10664 unsigned st = INSTR (4, 0);
10665
10666 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10667 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
10668 extension);
10669 uint64_t displacement = scaling == Scaled ? extended : 0;
10670
10671 aarch64_set_mem_u8
10672 (cpu, address + displacement, aarch64_get_vec_u8 (cpu, st, 0));
10673 }
10674
10675 /* 16 bit store scaled. */
10676 static void
10677 fstrh_abs (sim_cpu *cpu, uint32_t offset)
10678 {
10679 unsigned st = INSTR (4, 0);
10680 unsigned rn = INSTR (9, 5);
10681
10682 aarch64_set_mem_u16
10683 (cpu,
10684 aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 16),
10685 aarch64_get_vec_u16 (cpu, st, 0));
10686 }
10687
10688 /* 16 bit store scaled or unscaled zero-
10689 or sign-extended 16-bit register offset. */
10690 static void
10691 fstrh_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
10692 {
10693 unsigned rm = INSTR (20, 16);
10694 unsigned rn = INSTR (9, 5);
10695 unsigned st = INSTR (4, 0);
10696
10697 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10698 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
10699 extension);
10700 uint64_t displacement = OPT_SCALE (extended, 16, scaling);
10701
10702 aarch64_set_mem_u16
10703 (cpu, address + displacement, aarch64_get_vec_u16 (cpu, st, 0));
10704 }
10705
10706 /* 32 bit store scaled unsigned 12 bit. */
10707 static void
10708 fstrs_abs (sim_cpu *cpu, uint32_t offset)
10709 {
10710 unsigned st = INSTR (4, 0);
10711 unsigned rn = INSTR (9, 5);
10712
10713 aarch64_set_mem_u32
10714 (cpu,
10715 aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 32),
10716 aarch64_get_vec_u32 (cpu, st, 0));
10717 }
10718
10719 /* 32 bit store unscaled signed 9 bit with pre- or post-writeback. */
10720 static void
10721 fstrs_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
10722 {
10723 unsigned rn = INSTR (9, 5);
10724 unsigned st = INSTR (4, 0);
10725
10726 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10727
10728 if (wb != Post)
10729 address += offset;
10730
10731 aarch64_set_mem_u32 (cpu, address, aarch64_get_vec_u32 (cpu, st, 0));
10732
10733 if (wb == Post)
10734 address += offset;
10735
10736 if (wb != NoWriteBack)
10737 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
10738 }
10739
10740 /* 32 bit store scaled or unscaled zero-
10741 or sign-extended 32-bit register offset. */
10742 static void
10743 fstrs_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
10744 {
10745 unsigned rm = INSTR (20, 16);
10746 unsigned rn = INSTR (9, 5);
10747 unsigned st = INSTR (4, 0);
10748
10749 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10750 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
10751 extension);
10752 uint64_t displacement = OPT_SCALE (extended, 32, scaling);
10753
10754 aarch64_set_mem_u32
10755 (cpu, address + displacement, aarch64_get_vec_u32 (cpu, st, 0));
10756 }
10757
10758 /* 64 bit store scaled unsigned 12 bit. */
10759 static void
10760 fstrd_abs (sim_cpu *cpu, uint32_t offset)
10761 {
10762 unsigned st = INSTR (4, 0);
10763 unsigned rn = INSTR (9, 5);
10764
10765 aarch64_set_mem_u64
10766 (cpu,
10767 aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 64),
10768 aarch64_get_vec_u64 (cpu, st, 0));
10769 }
10770
10771 /* 64 bit store unscaled signed 9 bit with pre- or post-writeback. */
10772 static void
10773 fstrd_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
10774 {
10775 unsigned rn = INSTR (9, 5);
10776 unsigned st = INSTR (4, 0);
10777
10778 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10779
10780 if (wb != Post)
10781 address += offset;
10782
10783 aarch64_set_mem_u64 (cpu, address, aarch64_get_vec_u64 (cpu, st, 0));
10784
10785 if (wb == Post)
10786 address += offset;
10787
10788 if (wb != NoWriteBack)
10789 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
10790 }
10791
10792 /* 64 bit store scaled or unscaled zero-
10793 or sign-extended 32-bit register offset. */
10794 static void
10795 fstrd_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
10796 {
10797 unsigned rm = INSTR (20, 16);
10798 unsigned rn = INSTR (9, 5);
10799 unsigned st = INSTR (4, 0);
10800
10801 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10802 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
10803 extension);
10804 uint64_t displacement = OPT_SCALE (extended, 64, scaling);
10805
10806 aarch64_set_mem_u64
10807 (cpu, address + displacement, aarch64_get_vec_u64 (cpu, st, 0));
10808 }
10809
10810 /* 128 bit store scaled unsigned 12 bit. */
10811 static void
10812 fstrq_abs (sim_cpu *cpu, uint32_t offset)
10813 {
10814 FRegister a;
10815 unsigned st = INSTR (4, 0);
10816 unsigned rn = INSTR (9, 5);
10817 uint64_t addr;
10818
10819 aarch64_get_FP_long_double (cpu, st, & a);
10820
10821 addr = aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 128);
10822 aarch64_set_mem_long_double (cpu, addr, a);
10823 }
10824
10825 /* 128 bit store unscaled signed 9 bit with pre- or post-writeback. */
10826 static void
10827 fstrq_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
10828 {
10829 FRegister a;
10830 unsigned rn = INSTR (9, 5);
10831 unsigned st = INSTR (4, 0);
10832 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10833
10834 if (wb != Post)
10835 address += offset;
10836
10837 aarch64_get_FP_long_double (cpu, st, & a);
10838 aarch64_set_mem_long_double (cpu, address, a);
10839
10840 if (wb == Post)
10841 address += offset;
10842
10843 if (wb != NoWriteBack)
10844 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
10845 }
10846
10847 /* 128 bit store scaled or unscaled zero-
10848 or sign-extended 32-bit register offset. */
10849 static void
10850 fstrq_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
10851 {
10852 unsigned rm = INSTR (20, 16);
10853 unsigned rn = INSTR (9, 5);
10854 unsigned st = INSTR (4, 0);
10855
10856 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10857 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
10858 extension);
10859 uint64_t displacement = OPT_SCALE (extended, 128, scaling);
10860
10861 FRegister a;
10862
10863 aarch64_get_FP_long_double (cpu, st, & a);
10864 aarch64_set_mem_long_double (cpu, address + displacement, a);
10865 }
10866
10867 static void
10868 dexLoadImmediatePrePost (sim_cpu *cpu)
10869 {
10870 /* instr[31,30] = size
10871 instr[29,27] = 111
10872 instr[26] = V
10873 instr[25,24] = 00
10874 instr[23,22] = opc
10875 instr[21] = 0
10876 instr[20,12] = simm9
10877 instr[11] = wb : 0 ==> Post, 1 ==> Pre
10878 instr[10] = 0
10879 instr[9,5] = Rn may be SP.
10880 instr[4,0] = Rt */
10881
10882 uint32_t V = INSTR (26, 26);
10883 uint32_t dispatch = ((INSTR (31, 30) << 2) | INSTR (23, 22));
10884 int32_t imm = simm32 (aarch64_get_instr (cpu), 20, 12);
10885 WriteBack wb = INSTR (11, 11);
10886
10887 if (!V)
10888 {
10889 /* GReg operations. */
10890 switch (dispatch)
10891 {
10892 case 0: strb_wb (cpu, imm, wb); return;
10893 case 1: ldrb32_wb (cpu, imm, wb); return;
10894 case 2: ldrsb_wb (cpu, imm, wb); return;
10895 case 3: ldrsb32_wb (cpu, imm, wb); return;
10896 case 4: strh_wb (cpu, imm, wb); return;
10897 case 5: ldrh32_wb (cpu, imm, wb); return;
10898 case 6: ldrsh64_wb (cpu, imm, wb); return;
10899 case 7: ldrsh32_wb (cpu, imm, wb); return;
10900 case 8: str32_wb (cpu, imm, wb); return;
10901 case 9: ldr32_wb (cpu, imm, wb); return;
10902 case 10: ldrsw_wb (cpu, imm, wb); return;
10903 case 12: str_wb (cpu, imm, wb); return;
10904 case 13: ldr_wb (cpu, imm, wb); return;
10905
10906 default:
10907 case 11:
10908 case 14:
10909 case 15:
10910 HALT_UNALLOC;
10911 }
10912 }
10913
10914 /* FReg operations. */
10915 switch (dispatch)
10916 {
10917 case 2: fstrq_wb (cpu, imm, wb); return;
10918 case 3: fldrq_wb (cpu, imm, wb); return;
10919 case 8: fstrs_wb (cpu, imm, wb); return;
10920 case 9: fldrs_wb (cpu, imm, wb); return;
10921 case 12: fstrd_wb (cpu, imm, wb); return;
10922 case 13: fldrd_wb (cpu, imm, wb); return;
10923
10924 case 0: /* STUR 8 bit FP. */
10925 case 1: /* LDUR 8 bit FP. */
10926 case 4: /* STUR 16 bit FP. */
10927 case 5: /* LDUR 8 bit FP. */
10928 HALT_NYI;
10929
10930 default:
10931 case 6:
10932 case 7:
10933 case 10:
10934 case 11:
10935 case 14:
10936 case 15:
10937 HALT_UNALLOC;
10938 }
10939 }
10940
10941 static void
10942 dexLoadRegisterOffset (sim_cpu *cpu)
10943 {
10944 /* instr[31,30] = size
10945 instr[29,27] = 111
10946 instr[26] = V
10947 instr[25,24] = 00
10948 instr[23,22] = opc
10949 instr[21] = 1
10950 instr[20,16] = rm
10951 instr[15,13] = option : 010 ==> UXTW, 011 ==> UXTX/LSL,
10952 110 ==> SXTW, 111 ==> SXTX,
10953 ow ==> RESERVED
10954 instr[12] = scaled
10955 instr[11,10] = 10
10956 instr[9,5] = rn
10957 instr[4,0] = rt. */
10958
10959 uint32_t V = INSTR (26, 26);
10960 uint32_t dispatch = ((INSTR (31, 30) << 2) | INSTR (23, 22));
10961 Scaling scale = INSTR (12, 12);
10962 Extension extensionType = INSTR (15, 13);
10963
10964 /* Check for illegal extension types. */
10965 if (uimm (extensionType, 1, 1) == 0)
10966 HALT_UNALLOC;
10967
10968 if (extensionType == UXTX || extensionType == SXTX)
10969 extensionType = NoExtension;
10970
10971 if (!V)
10972 {
10973 /* GReg operations. */
10974 switch (dispatch)
10975 {
10976 case 0: strb_scale_ext (cpu, scale, extensionType); return;
10977 case 1: ldrb32_scale_ext (cpu, scale, extensionType); return;
10978 case 2: ldrsb_scale_ext (cpu, scale, extensionType); return;
10979 case 3: ldrsb32_scale_ext (cpu, scale, extensionType); return;
10980 case 4: strh_scale_ext (cpu, scale, extensionType); return;
10981 case 5: ldrh32_scale_ext (cpu, scale, extensionType); return;
10982 case 6: ldrsh_scale_ext (cpu, scale, extensionType); return;
10983 case 7: ldrsh32_scale_ext (cpu, scale, extensionType); return;
10984 case 8: str32_scale_ext (cpu, scale, extensionType); return;
10985 case 9: ldr32_scale_ext (cpu, scale, extensionType); return;
10986 case 10: ldrsw_scale_ext (cpu, scale, extensionType); return;
10987 case 12: str_scale_ext (cpu, scale, extensionType); return;
10988 case 13: ldr_scale_ext (cpu, scale, extensionType); return;
10989 case 14: prfm_scale_ext (cpu, scale, extensionType); return;
10990
10991 default:
10992 case 11:
10993 case 15:
10994 HALT_UNALLOC;
10995 }
10996 }
10997
10998 /* FReg operations. */
10999 switch (dispatch)
11000 {
11001 case 1: /* LDUR 8 bit FP. */
11002 HALT_NYI;
11003 case 3: fldrq_scale_ext (cpu, scale, extensionType); return;
11004 case 5: /* LDUR 8 bit FP. */
11005 HALT_NYI;
11006 case 9: fldrs_scale_ext (cpu, scale, extensionType); return;
11007 case 13: fldrd_scale_ext (cpu, scale, extensionType); return;
11008
11009 case 0: fstrb_scale_ext (cpu, scale, extensionType); return;
11010 case 2: fstrq_scale_ext (cpu, scale, extensionType); return;
11011 case 4: fstrh_scale_ext (cpu, scale, extensionType); return;
11012 case 8: fstrs_scale_ext (cpu, scale, extensionType); return;
11013 case 12: fstrd_scale_ext (cpu, scale, extensionType); return;
11014
11015 default:
11016 case 6:
11017 case 7:
11018 case 10:
11019 case 11:
11020 case 14:
11021 case 15:
11022 HALT_UNALLOC;
11023 }
11024 }
11025
11026 static void
11027 dexLoadUnsignedImmediate (sim_cpu *cpu)
11028 {
11029 /* instr[29,24] == 111_01
11030 instr[31,30] = size
11031 instr[26] = V
11032 instr[23,22] = opc
11033 instr[21,10] = uimm12 : unsigned immediate offset
11034 instr[9,5] = rn may be SP.
11035 instr[4,0] = rt. */
11036
11037 uint32_t V = INSTR (26,26);
11038 uint32_t dispatch = ((INSTR (31, 30) << 2) | INSTR (23, 22));
11039 uint32_t imm = INSTR (21, 10);
11040
11041 if (!V)
11042 {
11043 /* GReg operations. */
11044 switch (dispatch)
11045 {
11046 case 0: strb_abs (cpu, imm); return;
11047 case 1: ldrb32_abs (cpu, imm); return;
11048 case 2: ldrsb_abs (cpu, imm); return;
11049 case 3: ldrsb32_abs (cpu, imm); return;
11050 case 4: strh_abs (cpu, imm); return;
11051 case 5: ldrh32_abs (cpu, imm); return;
11052 case 6: ldrsh_abs (cpu, imm); return;
11053 case 7: ldrsh32_abs (cpu, imm); return;
11054 case 8: str32_abs (cpu, imm); return;
11055 case 9: ldr32_abs (cpu, imm); return;
11056 case 10: ldrsw_abs (cpu, imm); return;
11057 case 12: str_abs (cpu, imm); return;
11058 case 13: ldr_abs (cpu, imm); return;
11059 case 14: prfm_abs (cpu, imm); return;
11060
11061 default:
11062 case 11:
11063 case 15:
11064 HALT_UNALLOC;
11065 }
11066 }
11067
11068 /* FReg operations. */
11069 switch (dispatch)
11070 {
11071 case 0: fstrb_abs (cpu, imm); return;
11072 case 4: fstrh_abs (cpu, imm); return;
11073 case 8: fstrs_abs (cpu, imm); return;
11074 case 12: fstrd_abs (cpu, imm); return;
11075 case 2: fstrq_abs (cpu, imm); return;
11076
11077 case 1: fldrb_abs (cpu, imm); return;
11078 case 5: fldrh_abs (cpu, imm); return;
11079 case 9: fldrs_abs (cpu, imm); return;
11080 case 13: fldrd_abs (cpu, imm); return;
11081 case 3: fldrq_abs (cpu, imm); return;
11082
11083 default:
11084 case 6:
11085 case 7:
11086 case 10:
11087 case 11:
11088 case 14:
11089 case 15:
11090 HALT_UNALLOC;
11091 }
11092 }
11093
11094 static void
11095 dexLoadExclusive (sim_cpu *cpu)
11096 {
11097 /* assert instr[29:24] = 001000;
11098 instr[31,30] = size
11099 instr[23] = 0 if exclusive
11100 instr[22] = L : 1 if load, 0 if store
11101 instr[21] = 1 if pair
11102 instr[20,16] = Rs
11103 instr[15] = o0 : 1 if ordered
11104 instr[14,10] = Rt2
11105 instr[9,5] = Rn
11106 instr[4.0] = Rt. */
11107
11108 switch (INSTR (22, 21))
11109 {
11110 case 2: ldxr (cpu); return;
11111 case 0: stxr (cpu); return;
11112 default: HALT_NYI;
11113 }
11114 }
11115
11116 static void
11117 dexLoadOther (sim_cpu *cpu)
11118 {
11119 uint32_t dispatch;
11120
11121 /* instr[29,25] = 111_0
11122 instr[24] == 0 ==> dispatch, 1 ==> ldst reg unsigned immediate
11123 instr[21:11,10] is the secondary dispatch. */
11124 if (INSTR (24, 24))
11125 {
11126 dexLoadUnsignedImmediate (cpu);
11127 return;
11128 }
11129
11130 dispatch = ((INSTR (21, 21) << 2) | INSTR (11, 10));
11131 switch (dispatch)
11132 {
11133 case 0: dexLoadUnscaledImmediate (cpu); return;
11134 case 1: dexLoadImmediatePrePost (cpu); return;
11135 case 3: dexLoadImmediatePrePost (cpu); return;
11136 case 6: dexLoadRegisterOffset (cpu); return;
11137
11138 default:
11139 case 2:
11140 case 4:
11141 case 5:
11142 case 7:
11143 HALT_NYI;
11144 }
11145 }
11146
11147 static void
11148 store_pair_u32 (sim_cpu *cpu, int32_t offset, WriteBack wb)
11149 {
11150 unsigned rn = INSTR (14, 10);
11151 unsigned rd = INSTR (9, 5);
11152 unsigned rm = INSTR (4, 0);
11153 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
11154
11155 if ((rn == rd || rm == rd) && wb != NoWriteBack)
11156 HALT_UNALLOC; /* ??? */
11157
11158 offset <<= 2;
11159
11160 if (wb != Post)
11161 address += offset;
11162
11163 aarch64_set_mem_u32 (cpu, address,
11164 aarch64_get_reg_u32 (cpu, rm, NO_SP));
11165 aarch64_set_mem_u32 (cpu, address + 4,
11166 aarch64_get_reg_u32 (cpu, rn, NO_SP));
11167
11168 if (wb == Post)
11169 address += offset;
11170
11171 if (wb != NoWriteBack)
11172 aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
11173 }
11174
11175 static void
11176 store_pair_u64 (sim_cpu *cpu, int32_t offset, WriteBack wb)
11177 {
11178 unsigned rn = INSTR (14, 10);
11179 unsigned rd = INSTR (9, 5);
11180 unsigned rm = INSTR (4, 0);
11181 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
11182
11183 if ((rn == rd || rm == rd) && wb != NoWriteBack)
11184 HALT_UNALLOC; /* ??? */
11185
11186 offset <<= 3;
11187
11188 if (wb != Post)
11189 address += offset;
11190
11191 aarch64_set_mem_u64 (cpu, address,
11192 aarch64_get_reg_u64 (cpu, rm, NO_SP));
11193 aarch64_set_mem_u64 (cpu, address + 8,
11194 aarch64_get_reg_u64 (cpu, rn, NO_SP));
11195
11196 if (wb == Post)
11197 address += offset;
11198
11199 if (wb != NoWriteBack)
11200 aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
11201 }
11202
11203 static void
11204 load_pair_u32 (sim_cpu *cpu, int32_t offset, WriteBack wb)
11205 {
11206 unsigned rn = INSTR (14, 10);
11207 unsigned rd = INSTR (9, 5);
11208 unsigned rm = INSTR (4, 0);
11209 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
11210
11211 /* Treat this as unalloc to make sure we don't do it. */
11212 if (rn == rm)
11213 HALT_UNALLOC;
11214
11215 offset <<= 2;
11216
11217 if (wb != Post)
11218 address += offset;
11219
11220 aarch64_set_reg_u64 (cpu, rm, SP_OK, aarch64_get_mem_u32 (cpu, address));
11221 aarch64_set_reg_u64 (cpu, rn, SP_OK, aarch64_get_mem_u32 (cpu, address + 4));
11222
11223 if (wb == Post)
11224 address += offset;
11225
11226 if (wb != NoWriteBack)
11227 aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
11228 }
11229
11230 static void
11231 load_pair_s32 (sim_cpu *cpu, int32_t offset, WriteBack wb)
11232 {
11233 unsigned rn = INSTR (14, 10);
11234 unsigned rd = INSTR (9, 5);
11235 unsigned rm = INSTR (4, 0);
11236 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
11237
11238 /* Treat this as unalloc to make sure we don't do it. */
11239 if (rn == rm)
11240 HALT_UNALLOC;
11241
11242 offset <<= 2;
11243
11244 if (wb != Post)
11245 address += offset;
11246
11247 aarch64_set_reg_s64 (cpu, rm, SP_OK, aarch64_get_mem_s32 (cpu, address));
11248 aarch64_set_reg_s64 (cpu, rn, SP_OK, aarch64_get_mem_s32 (cpu, address + 4));
11249
11250 if (wb == Post)
11251 address += offset;
11252
11253 if (wb != NoWriteBack)
11254 aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
11255 }
11256
11257 static void
11258 load_pair_u64 (sim_cpu *cpu, int32_t offset, WriteBack wb)
11259 {
11260 unsigned rn = INSTR (14, 10);
11261 unsigned rd = INSTR (9, 5);
11262 unsigned rm = INSTR (4, 0);
11263 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
11264
11265 /* Treat this as unalloc to make sure we don't do it. */
11266 if (rn == rm)
11267 HALT_UNALLOC;
11268
11269 offset <<= 3;
11270
11271 if (wb != Post)
11272 address += offset;
11273
11274 aarch64_set_reg_u64 (cpu, rm, SP_OK, aarch64_get_mem_u64 (cpu, address));
11275 aarch64_set_reg_u64 (cpu, rn, SP_OK, aarch64_get_mem_u64 (cpu, address + 8));
11276
11277 if (wb == Post)
11278 address += offset;
11279
11280 if (wb != NoWriteBack)
11281 aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
11282 }
11283
11284 static void
11285 dex_load_store_pair_gr (sim_cpu *cpu)
11286 {
11287 /* instr[31,30] = size (10=> 64-bit, 01=> signed 32-bit, 00=> 32-bit)
11288 instr[29,25] = instruction encoding: 101_0
11289 instr[26] = V : 1 if fp 0 if gp
11290 instr[24,23] = addressing mode (10=> offset, 01=> post, 11=> pre)
11291 instr[22] = load/store (1=> load)
11292 instr[21,15] = signed, scaled, offset
11293 instr[14,10] = Rn
11294 instr[ 9, 5] = Rd
11295 instr[ 4, 0] = Rm. */
11296
11297 uint32_t dispatch = ((INSTR (31, 30) << 3) | INSTR (24, 22));
11298 int32_t offset = simm32 (aarch64_get_instr (cpu), 21, 15);
11299
11300 switch (dispatch)
11301 {
11302 case 2: store_pair_u32 (cpu, offset, Post); return;
11303 case 3: load_pair_u32 (cpu, offset, Post); return;
11304 case 4: store_pair_u32 (cpu, offset, NoWriteBack); return;
11305 case 5: load_pair_u32 (cpu, offset, NoWriteBack); return;
11306 case 6: store_pair_u32 (cpu, offset, Pre); return;
11307 case 7: load_pair_u32 (cpu, offset, Pre); return;
11308
11309 case 11: load_pair_s32 (cpu, offset, Post); return;
11310 case 13: load_pair_s32 (cpu, offset, NoWriteBack); return;
11311 case 15: load_pair_s32 (cpu, offset, Pre); return;
11312
11313 case 18: store_pair_u64 (cpu, offset, Post); return;
11314 case 19: load_pair_u64 (cpu, offset, Post); return;
11315 case 20: store_pair_u64 (cpu, offset, NoWriteBack); return;
11316 case 21: load_pair_u64 (cpu, offset, NoWriteBack); return;
11317 case 22: store_pair_u64 (cpu, offset, Pre); return;
11318 case 23: load_pair_u64 (cpu, offset, Pre); return;
11319
11320 default:
11321 HALT_UNALLOC;
11322 }
11323 }
11324
11325 static void
11326 store_pair_float (sim_cpu *cpu, int32_t offset, WriteBack wb)
11327 {
11328 unsigned rn = INSTR (14, 10);
11329 unsigned rd = INSTR (9, 5);
11330 unsigned rm = INSTR (4, 0);
11331 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
11332
11333 offset <<= 2;
11334
11335 if (wb != Post)
11336 address += offset;
11337
11338 aarch64_set_mem_u32 (cpu, address, aarch64_get_vec_u32 (cpu, rm, 0));
11339 aarch64_set_mem_u32 (cpu, address + 4, aarch64_get_vec_u32 (cpu, rn, 0));
11340
11341 if (wb == Post)
11342 address += offset;
11343
11344 if (wb != NoWriteBack)
11345 aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
11346 }
11347
11348 static void
11349 store_pair_double (sim_cpu *cpu, int32_t offset, WriteBack wb)
11350 {
11351 unsigned rn = INSTR (14, 10);
11352 unsigned rd = INSTR (9, 5);
11353 unsigned rm = INSTR (4, 0);
11354 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
11355
11356 offset <<= 3;
11357
11358 if (wb != Post)
11359 address += offset;
11360
11361 aarch64_set_mem_u64 (cpu, address, aarch64_get_vec_u64 (cpu, rm, 0));
11362 aarch64_set_mem_u64 (cpu, address + 8, aarch64_get_vec_u64 (cpu, rn, 0));
11363
11364 if (wb == Post)
11365 address += offset;
11366
11367 if (wb != NoWriteBack)
11368 aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
11369 }
11370
11371 static void
11372 store_pair_long_double (sim_cpu *cpu, int32_t offset, WriteBack wb)
11373 {
11374 FRegister a;
11375 unsigned rn = INSTR (14, 10);
11376 unsigned rd = INSTR (9, 5);
11377 unsigned rm = INSTR (4, 0);
11378 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
11379
11380 offset <<= 4;
11381
11382 if (wb != Post)
11383 address += offset;
11384
11385 aarch64_get_FP_long_double (cpu, rm, & a);
11386 aarch64_set_mem_long_double (cpu, address, a);
11387 aarch64_get_FP_long_double (cpu, rn, & a);
11388 aarch64_set_mem_long_double (cpu, address + 16, a);
11389
11390 if (wb == Post)
11391 address += offset;
11392
11393 if (wb != NoWriteBack)
11394 aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
11395 }
11396
11397 static void
11398 load_pair_float (sim_cpu *cpu, int32_t offset, WriteBack wb)
11399 {
11400 unsigned rn = INSTR (14, 10);
11401 unsigned rd = INSTR (9, 5);
11402 unsigned rm = INSTR (4, 0);
11403 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
11404
11405 if (rm == rn)
11406 HALT_UNALLOC;
11407
11408 offset <<= 2;
11409
11410 if (wb != Post)
11411 address += offset;
11412
11413 aarch64_set_vec_u32 (cpu, rm, 0, aarch64_get_mem_u32 (cpu, address));
11414 aarch64_set_vec_u32 (cpu, rn, 0, aarch64_get_mem_u32 (cpu, address + 4));
11415
11416 if (wb == Post)
11417 address += offset;
11418
11419 if (wb != NoWriteBack)
11420 aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
11421 }
11422
11423 static void
11424 load_pair_double (sim_cpu *cpu, int32_t offset, WriteBack wb)
11425 {
11426 unsigned rn = INSTR (14, 10);
11427 unsigned rd = INSTR (9, 5);
11428 unsigned rm = INSTR (4, 0);
11429 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
11430
11431 if (rm == rn)
11432 HALT_UNALLOC;
11433
11434 offset <<= 3;
11435
11436 if (wb != Post)
11437 address += offset;
11438
11439 aarch64_set_vec_u64 (cpu, rm, 0, aarch64_get_mem_u64 (cpu, address));
11440 aarch64_set_vec_u64 (cpu, rn, 0, aarch64_get_mem_u64 (cpu, address + 8));
11441
11442 if (wb == Post)
11443 address += offset;
11444
11445 if (wb != NoWriteBack)
11446 aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
11447 }
11448
11449 static void
11450 load_pair_long_double (sim_cpu *cpu, int32_t offset, WriteBack wb)
11451 {
11452 FRegister a;
11453 unsigned rn = INSTR (14, 10);
11454 unsigned rd = INSTR (9, 5);
11455 unsigned rm = INSTR (4, 0);
11456 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
11457
11458 if (rm == rn)
11459 HALT_UNALLOC;
11460
11461 offset <<= 4;
11462
11463 if (wb != Post)
11464 address += offset;
11465
11466 aarch64_get_mem_long_double (cpu, address, & a);
11467 aarch64_set_FP_long_double (cpu, rm, a);
11468 aarch64_get_mem_long_double (cpu, address + 16, & a);
11469 aarch64_set_FP_long_double (cpu, rn, a);
11470
11471 if (wb == Post)
11472 address += offset;
11473
11474 if (wb != NoWriteBack)
11475 aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
11476 }
11477
11478 static void
11479 dex_load_store_pair_fp (sim_cpu *cpu)
11480 {
11481 /* instr[31,30] = size (10=> 128-bit, 01=> 64-bit, 00=> 32-bit)
11482 instr[29,25] = instruction encoding
11483 instr[24,23] = addressing mode (10=> offset, 01=> post, 11=> pre)
11484 instr[22] = load/store (1=> load)
11485 instr[21,15] = signed, scaled, offset
11486 instr[14,10] = Rn
11487 instr[ 9, 5] = Rd
11488 instr[ 4, 0] = Rm */
11489
11490 uint32_t dispatch = ((INSTR (31, 30) << 3) | INSTR (24, 22));
11491 int32_t offset = simm32 (aarch64_get_instr (cpu), 21, 15);
11492
11493 switch (dispatch)
11494 {
11495 case 2: store_pair_float (cpu, offset, Post); return;
11496 case 3: load_pair_float (cpu, offset, Post); return;
11497 case 4: store_pair_float (cpu, offset, NoWriteBack); return;
11498 case 5: load_pair_float (cpu, offset, NoWriteBack); return;
11499 case 6: store_pair_float (cpu, offset, Pre); return;
11500 case 7: load_pair_float (cpu, offset, Pre); return;
11501
11502 case 10: store_pair_double (cpu, offset, Post); return;
11503 case 11: load_pair_double (cpu, offset, Post); return;
11504 case 12: store_pair_double (cpu, offset, NoWriteBack); return;
11505 case 13: load_pair_double (cpu, offset, NoWriteBack); return;
11506 case 14: store_pair_double (cpu, offset, Pre); return;
11507 case 15: load_pair_double (cpu, offset, Pre); return;
11508
11509 case 18: store_pair_long_double (cpu, offset, Post); return;
11510 case 19: load_pair_long_double (cpu, offset, Post); return;
11511 case 20: store_pair_long_double (cpu, offset, NoWriteBack); return;
11512 case 21: load_pair_long_double (cpu, offset, NoWriteBack); return;
11513 case 22: store_pair_long_double (cpu, offset, Pre); return;
11514 case 23: load_pair_long_double (cpu, offset, Pre); return;
11515
11516 default:
11517 HALT_UNALLOC;
11518 }
11519 }
11520
11521 static inline unsigned
11522 vec_reg (unsigned v, unsigned o)
11523 {
11524 return (v + o) & 0x3F;
11525 }
11526
11527 /* Load multiple N-element structures to M consecutive registers. */
11528 static void
11529 vec_load (sim_cpu *cpu, uint64_t address, unsigned N, unsigned M)
11530 {
11531 int all = INSTR (30, 30);
11532 unsigned size = INSTR (11, 10);
11533 unsigned vd = INSTR (4, 0);
11534 unsigned rpt = (N == M) ? 1 : M;
11535 unsigned selem = N;
11536 unsigned i, j, k;
11537
11538 switch (size)
11539 {
11540 case 0: /* 8-bit operations. */
11541 for (i = 0; i < rpt; i++)
11542 for (j = 0; j < (8 + (8 * all)); j++)
11543 for (k = 0; k < selem; k++)
11544 {
11545 aarch64_set_vec_u8 (cpu, vec_reg (vd, i + k), j,
11546 aarch64_get_mem_u8 (cpu, address));
11547 address += 1;
11548 }
11549 return;
11550
11551 case 1: /* 16-bit operations. */
11552 for (i = 0; i < rpt; i++)
11553 for (j = 0; j < (4 + (4 * all)); j++)
11554 for (k = 0; k < selem; k++)
11555 {
11556 aarch64_set_vec_u16 (cpu, vec_reg (vd, i + k), j,
11557 aarch64_get_mem_u16 (cpu, address));
11558 address += 2;
11559 }
11560 return;
11561
11562 case 2: /* 32-bit operations. */
11563 for (i = 0; i < rpt; i++)
11564 for (j = 0; j < (2 + (2 * all)); j++)
11565 for (k = 0; k < selem; k++)
11566 {
11567 aarch64_set_vec_u32 (cpu, vec_reg (vd, i + k), j,
11568 aarch64_get_mem_u32 (cpu, address));
11569 address += 4;
11570 }
11571 return;
11572
11573 case 3: /* 64-bit operations. */
11574 for (i = 0; i < rpt; i++)
11575 for (j = 0; j < (1 + all); j++)
11576 for (k = 0; k < selem; k++)
11577 {
11578 aarch64_set_vec_u64 (cpu, vec_reg (vd, i + k), j,
11579 aarch64_get_mem_u64 (cpu, address));
11580 address += 8;
11581 }
11582 return;
11583 }
11584 }
11585
11586 /* Load multiple 4-element structures into four consecutive registers. */
11587 static void
11588 LD4 (sim_cpu *cpu, uint64_t address)
11589 {
11590 vec_load (cpu, address, 4, 4);
11591 }
11592
11593 /* Load multiple 3-element structures into three consecutive registers. */
11594 static void
11595 LD3 (sim_cpu *cpu, uint64_t address)
11596 {
11597 vec_load (cpu, address, 3, 3);
11598 }
11599
11600 /* Load multiple 2-element structures into two consecutive registers. */
11601 static void
11602 LD2 (sim_cpu *cpu, uint64_t address)
11603 {
11604 vec_load (cpu, address, 2, 2);
11605 }
11606
11607 /* Load multiple 1-element structures into one register. */
11608 static void
11609 LD1_1 (sim_cpu *cpu, uint64_t address)
11610 {
11611 vec_load (cpu, address, 1, 1);
11612 }
11613
11614 /* Load multiple 1-element structures into two registers. */
11615 static void
11616 LD1_2 (sim_cpu *cpu, uint64_t address)
11617 {
11618 vec_load (cpu, address, 1, 2);
11619 }
11620
11621 /* Load multiple 1-element structures into three registers. */
11622 static void
11623 LD1_3 (sim_cpu *cpu, uint64_t address)
11624 {
11625 vec_load (cpu, address, 1, 3);
11626 }
11627
11628 /* Load multiple 1-element structures into four registers. */
11629 static void
11630 LD1_4 (sim_cpu *cpu, uint64_t address)
11631 {
11632 vec_load (cpu, address, 1, 4);
11633 }
11634
11635 /* Store multiple N-element structures from M consecutive registers. */
11636 static void
11637 vec_store (sim_cpu *cpu, uint64_t address, unsigned N, unsigned M)
11638 {
11639 int all = INSTR (30, 30);
11640 unsigned size = INSTR (11, 10);
11641 unsigned vd = INSTR (4, 0);
11642 unsigned rpt = (N == M) ? 1 : M;
11643 unsigned selem = N;
11644 unsigned i, j, k;
11645
11646 switch (size)
11647 {
11648 case 0: /* 8-bit operations. */
11649 for (i = 0; i < rpt; i++)
11650 for (j = 0; j < (8 + (8 * all)); j++)
11651 for (k = 0; k < selem; k++)
11652 {
11653 aarch64_set_mem_u8
11654 (cpu, address,
11655 aarch64_get_vec_u8 (cpu, vec_reg (vd, i + k), j));
11656 address += 1;
11657 }
11658 return;
11659
11660 case 1: /* 16-bit operations. */
11661 for (i = 0; i < rpt; i++)
11662 for (j = 0; j < (4 + (4 * all)); j++)
11663 for (k = 0; k < selem; k++)
11664 {
11665 aarch64_set_mem_u16
11666 (cpu, address,
11667 aarch64_get_vec_u16 (cpu, vec_reg (vd, i + k), j));
11668 address += 2;
11669 }
11670 return;
11671
11672 case 2: /* 32-bit operations. */
11673 for (i = 0; i < rpt; i++)
11674 for (j = 0; j < (2 + (2 * all)); j++)
11675 for (k = 0; k < selem; k++)
11676 {
11677 aarch64_set_mem_u32
11678 (cpu, address,
11679 aarch64_get_vec_u32 (cpu, vec_reg (vd, i + k), j));
11680 address += 4;
11681 }
11682 return;
11683
11684 case 3: /* 64-bit operations. */
11685 for (i = 0; i < rpt; i++)
11686 for (j = 0; j < (1 + all); j++)
11687 for (k = 0; k < selem; k++)
11688 {
11689 aarch64_set_mem_u64
11690 (cpu, address,
11691 aarch64_get_vec_u64 (cpu, vec_reg (vd, i + k), j));
11692 address += 8;
11693 }
11694 return;
11695 }
11696 }
11697
11698 /* Store multiple 4-element structure from four consecutive registers. */
11699 static void
11700 ST4 (sim_cpu *cpu, uint64_t address)
11701 {
11702 vec_store (cpu, address, 4, 4);
11703 }
11704
11705 /* Store multiple 3-element structures from three consecutive registers. */
11706 static void
11707 ST3 (sim_cpu *cpu, uint64_t address)
11708 {
11709 vec_store (cpu, address, 3, 3);
11710 }
11711
11712 /* Store multiple 2-element structures from two consecutive registers. */
11713 static void
11714 ST2 (sim_cpu *cpu, uint64_t address)
11715 {
11716 vec_store (cpu, address, 2, 2);
11717 }
11718
11719 /* Store multiple 1-element structures from one register. */
11720 static void
11721 ST1_1 (sim_cpu *cpu, uint64_t address)
11722 {
11723 vec_store (cpu, address, 1, 1);
11724 }
11725
11726 /* Store multiple 1-element structures from two registers. */
11727 static void
11728 ST1_2 (sim_cpu *cpu, uint64_t address)
11729 {
11730 vec_store (cpu, address, 1, 2);
11731 }
11732
11733 /* Store multiple 1-element structures from three registers. */
11734 static void
11735 ST1_3 (sim_cpu *cpu, uint64_t address)
11736 {
11737 vec_store (cpu, address, 1, 3);
11738 }
11739
11740 /* Store multiple 1-element structures from four registers. */
11741 static void
11742 ST1_4 (sim_cpu *cpu, uint64_t address)
11743 {
11744 vec_store (cpu, address, 1, 4);
11745 }
11746
11747 #define LDn_STn_SINGLE_LANE_AND_SIZE() \
11748 do \
11749 { \
11750 switch (INSTR (15, 14)) \
11751 { \
11752 case 0: \
11753 lane = (full << 3) | (s << 2) | size; \
11754 size = 0; \
11755 break; \
11756 \
11757 case 1: \
11758 if ((size & 1) == 1) \
11759 HALT_UNALLOC; \
11760 lane = (full << 2) | (s << 1) | (size >> 1); \
11761 size = 1; \
11762 break; \
11763 \
11764 case 2: \
11765 if ((size & 2) == 2) \
11766 HALT_UNALLOC; \
11767 \
11768 if ((size & 1) == 0) \
11769 { \
11770 lane = (full << 1) | s; \
11771 size = 2; \
11772 } \
11773 else \
11774 { \
11775 if (s) \
11776 HALT_UNALLOC; \
11777 lane = full; \
11778 size = 3; \
11779 } \
11780 break; \
11781 \
11782 default: \
11783 HALT_UNALLOC; \
11784 } \
11785 } \
11786 while (0)
11787
11788 /* Load single structure into one lane of N registers. */
11789 static void
11790 do_vec_LDn_single (sim_cpu *cpu, uint64_t address)
11791 {
11792 /* instr[31] = 0
11793 instr[30] = element selector 0=>half, 1=>all elements
11794 instr[29,24] = 00 1101
11795 instr[23] = 0=>simple, 1=>post
11796 instr[22] = 1
11797 instr[21] = width: LD1-or-LD3 (0) / LD2-or-LD4 (1)
11798 instr[20,16] = 0 0000 (simple), Vinc (reg-post-inc, no SP),
11799 11111 (immediate post inc)
11800 instr[15,13] = opcode
11801 instr[12] = S, used for lane number
11802 instr[11,10] = size, also used for lane number
11803 instr[9,5] = address
11804 instr[4,0] = Vd */
11805
11806 unsigned full = INSTR (30, 30);
11807 unsigned vd = INSTR (4, 0);
11808 unsigned size = INSTR (11, 10);
11809 unsigned s = INSTR (12, 12);
11810 int nregs = ((INSTR (13, 13) << 1) | INSTR (21, 21)) + 1;
11811 int lane = 0;
11812 int i;
11813
11814 NYI_assert (29, 24, 0x0D);
11815 NYI_assert (22, 22, 1);
11816
11817 /* Compute the lane number first (using size), and then compute size. */
11818 LDn_STn_SINGLE_LANE_AND_SIZE ();
11819
11820 for (i = 0; i < nregs; i++)
11821 switch (size)
11822 {
11823 case 0:
11824 {
11825 uint8_t val = aarch64_get_mem_u8 (cpu, address + i);
11826 aarch64_set_vec_u8 (cpu, vd + i, lane, val);
11827 break;
11828 }
11829
11830 case 1:
11831 {
11832 uint16_t val = aarch64_get_mem_u16 (cpu, address + (i * 2));
11833 aarch64_set_vec_u16 (cpu, vd + i, lane, val);
11834 break;
11835 }
11836
11837 case 2:
11838 {
11839 uint32_t val = aarch64_get_mem_u32 (cpu, address + (i * 4));
11840 aarch64_set_vec_u32 (cpu, vd + i, lane, val);
11841 break;
11842 }
11843
11844 case 3:
11845 {
11846 uint64_t val = aarch64_get_mem_u64 (cpu, address + (i * 8));
11847 aarch64_set_vec_u64 (cpu, vd + i, lane, val);
11848 break;
11849 }
11850 }
11851 }
11852
11853 /* Store single structure from one lane from N registers. */
11854 static void
11855 do_vec_STn_single (sim_cpu *cpu, uint64_t address)
11856 {
11857 /* instr[31] = 0
11858 instr[30] = element selector 0=>half, 1=>all elements
11859 instr[29,24] = 00 1101
11860 instr[23] = 0=>simple, 1=>post
11861 instr[22] = 0
11862 instr[21] = width: LD1-or-LD3 (0) / LD2-or-LD4 (1)
11863 instr[20,16] = 0 0000 (simple), Vinc (reg-post-inc, no SP),
11864 11111 (immediate post inc)
11865 instr[15,13] = opcode
11866 instr[12] = S, used for lane number
11867 instr[11,10] = size, also used for lane number
11868 instr[9,5] = address
11869 instr[4,0] = Vd */
11870
11871 unsigned full = INSTR (30, 30);
11872 unsigned vd = INSTR (4, 0);
11873 unsigned size = INSTR (11, 10);
11874 unsigned s = INSTR (12, 12);
11875 int nregs = ((INSTR (13, 13) << 1) | INSTR (21, 21)) + 1;
11876 int lane = 0;
11877 int i;
11878
11879 NYI_assert (29, 24, 0x0D);
11880 NYI_assert (22, 22, 0);
11881
11882 /* Compute the lane number first (using size), and then compute size. */
11883 LDn_STn_SINGLE_LANE_AND_SIZE ();
11884
11885 for (i = 0; i < nregs; i++)
11886 switch (size)
11887 {
11888 case 0:
11889 {
11890 uint8_t val = aarch64_get_vec_u8 (cpu, vd + i, lane);
11891 aarch64_set_mem_u8 (cpu, address + i, val);
11892 break;
11893 }
11894
11895 case 1:
11896 {
11897 uint16_t val = aarch64_get_vec_u16 (cpu, vd + i, lane);
11898 aarch64_set_mem_u16 (cpu, address + (i * 2), val);
11899 break;
11900 }
11901
11902 case 2:
11903 {
11904 uint32_t val = aarch64_get_vec_u32 (cpu, vd + i, lane);
11905 aarch64_set_mem_u32 (cpu, address + (i * 4), val);
11906 break;
11907 }
11908
11909 case 3:
11910 {
11911 uint64_t val = aarch64_get_vec_u64 (cpu, vd + i, lane);
11912 aarch64_set_mem_u64 (cpu, address + (i * 8), val);
11913 break;
11914 }
11915 }
11916 }
11917
11918 /* Load single structure into all lanes of N registers. */
11919 static void
11920 do_vec_LDnR (sim_cpu *cpu, uint64_t address)
11921 {
11922 /* instr[31] = 0
11923 instr[30] = element selector 0=>half, 1=>all elements
11924 instr[29,24] = 00 1101
11925 instr[23] = 0=>simple, 1=>post
11926 instr[22] = 1
11927 instr[21] = width: LD1R-or-LD3R (0) / LD2R-or-LD4R (1)
11928 instr[20,16] = 0 0000 (simple), Vinc (reg-post-inc, no SP),
11929 11111 (immediate post inc)
11930 instr[15,14] = 11
11931 instr[13] = width: LD1R-or-LD2R (0) / LD3R-or-LD4R (1)
11932 instr[12] = 0
11933 instr[11,10] = element size 00=> byte(b), 01=> half(h),
11934 10=> word(s), 11=> double(d)
11935 instr[9,5] = address
11936 instr[4,0] = Vd */
11937
11938 unsigned full = INSTR (30, 30);
11939 unsigned vd = INSTR (4, 0);
11940 unsigned size = INSTR (11, 10);
11941 int nregs = ((INSTR (13, 13) << 1) | INSTR (21, 21)) + 1;
11942 int i, n;
11943
11944 NYI_assert (29, 24, 0x0D);
11945 NYI_assert (22, 22, 1);
11946 NYI_assert (15, 14, 3);
11947 NYI_assert (12, 12, 0);
11948
11949 for (n = 0; n < nregs; n++)
11950 switch (size)
11951 {
11952 case 0:
11953 {
11954 uint8_t val = aarch64_get_mem_u8 (cpu, address + n);
11955 for (i = 0; i < (full ? 16 : 8); i++)
11956 aarch64_set_vec_u8 (cpu, vd + n, i, val);
11957 break;
11958 }
11959
11960 case 1:
11961 {
11962 uint16_t val = aarch64_get_mem_u16 (cpu, address + (n * 2));
11963 for (i = 0; i < (full ? 8 : 4); i++)
11964 aarch64_set_vec_u16 (cpu, vd + n, i, val);
11965 break;
11966 }
11967
11968 case 2:
11969 {
11970 uint32_t val = aarch64_get_mem_u32 (cpu, address + (n * 4));
11971 for (i = 0; i < (full ? 4 : 2); i++)
11972 aarch64_set_vec_u32 (cpu, vd + n, i, val);
11973 break;
11974 }
11975
11976 case 3:
11977 {
11978 uint64_t val = aarch64_get_mem_u64 (cpu, address + (n * 8));
11979 for (i = 0; i < (full ? 2 : 1); i++)
11980 aarch64_set_vec_u64 (cpu, vd + n, i, val);
11981 break;
11982 }
11983
11984 default:
11985 HALT_UNALLOC;
11986 }
11987 }
11988
11989 static void
11990 do_vec_load_store (sim_cpu *cpu)
11991 {
11992 /* {LD|ST}<N> {Vd..Vd+N}, vaddr
11993
11994 instr[31] = 0
11995 instr[30] = element selector 0=>half, 1=>all elements
11996 instr[29,25] = 00110
11997 instr[24] = 0=>multiple struct, 1=>single struct
11998 instr[23] = 0=>simple, 1=>post
11999 instr[22] = 0=>store, 1=>load
12000 instr[21] = 0 (LDn) / small(0)-large(1) selector (LDnR)
12001 instr[20,16] = 00000 (simple), Vinc (reg-post-inc, no SP),
12002 11111 (immediate post inc)
12003 instr[15,12] = elements and destinations. eg for load:
12004 0000=>LD4 => load multiple 4-element to
12005 four consecutive registers
12006 0100=>LD3 => load multiple 3-element to
12007 three consecutive registers
12008 1000=>LD2 => load multiple 2-element to
12009 two consecutive registers
12010 0010=>LD1 => load multiple 1-element to
12011 four consecutive registers
12012 0110=>LD1 => load multiple 1-element to
12013 three consecutive registers
12014 1010=>LD1 => load multiple 1-element to
12015 two consecutive registers
12016 0111=>LD1 => load multiple 1-element to
12017 one register
12018 1100=>LDR1,LDR2
12019 1110=>LDR3,LDR4
12020 instr[11,10] = element size 00=> byte(b), 01=> half(h),
12021 10=> word(s), 11=> double(d)
12022 instr[9,5] = Vn, can be SP
12023 instr[4,0] = Vd */
12024
12025 int single;
12026 int post;
12027 int load;
12028 unsigned vn;
12029 uint64_t address;
12030 int type;
12031
12032 if (INSTR (31, 31) != 0 || INSTR (29, 25) != 0x06)
12033 HALT_NYI;
12034
12035 single = INSTR (24, 24);
12036 post = INSTR (23, 23);
12037 load = INSTR (22, 22);
12038 type = INSTR (15, 12);
12039 vn = INSTR (9, 5);
12040 address = aarch64_get_reg_u64 (cpu, vn, SP_OK);
12041
12042 if (! single && INSTR (21, 21) != 0)
12043 HALT_UNALLOC;
12044
12045 if (post)
12046 {
12047 unsigned vm = INSTR (20, 16);
12048
12049 if (vm == R31)
12050 {
12051 unsigned sizeof_operation;
12052
12053 if (single)
12054 {
12055 if ((type >= 0) && (type <= 11))
12056 {
12057 int nregs = ((INSTR (13, 13) << 1) | INSTR (21, 21)) + 1;
12058 switch (INSTR (15, 14))
12059 {
12060 case 0:
12061 sizeof_operation = nregs * 1;
12062 break;
12063 case 1:
12064 sizeof_operation = nregs * 2;
12065 break;
12066 case 2:
12067 if (INSTR (10, 10) == 0)
12068 sizeof_operation = nregs * 4;
12069 else
12070 sizeof_operation = nregs * 8;
12071 break;
12072 default:
12073 HALT_UNALLOC;
12074 }
12075 }
12076 else if (type == 0xC)
12077 {
12078 sizeof_operation = INSTR (21, 21) ? 2 : 1;
12079 sizeof_operation <<= INSTR (11, 10);
12080 }
12081 else if (type == 0xE)
12082 {
12083 sizeof_operation = INSTR (21, 21) ? 4 : 3;
12084 sizeof_operation <<= INSTR (11, 10);
12085 }
12086 else
12087 HALT_UNALLOC;
12088 }
12089 else
12090 {
12091 switch (type)
12092 {
12093 case 0: sizeof_operation = 32; break;
12094 case 4: sizeof_operation = 24; break;
12095 case 8: sizeof_operation = 16; break;
12096
12097 case 7:
12098 /* One register, immediate offset variant. */
12099 sizeof_operation = 8;
12100 break;
12101
12102 case 10:
12103 /* Two registers, immediate offset variant. */
12104 sizeof_operation = 16;
12105 break;
12106
12107 case 6:
12108 /* Three registers, immediate offset variant. */
12109 sizeof_operation = 24;
12110 break;
12111
12112 case 2:
12113 /* Four registers, immediate offset variant. */
12114 sizeof_operation = 32;
12115 break;
12116
12117 default:
12118 HALT_UNALLOC;
12119 }
12120
12121 if (INSTR (30, 30))
12122 sizeof_operation *= 2;
12123 }
12124
12125 aarch64_set_reg_u64 (cpu, vn, SP_OK, address + sizeof_operation);
12126 }
12127 else
12128 aarch64_set_reg_u64 (cpu, vn, SP_OK,
12129 address + aarch64_get_reg_u64 (cpu, vm, NO_SP));
12130 }
12131 else
12132 {
12133 NYI_assert (20, 16, 0);
12134 }
12135
12136 if (single)
12137 {
12138 if (load)
12139 {
12140 if ((type >= 0) && (type <= 11))
12141 do_vec_LDn_single (cpu, address);
12142 else if ((type == 0xC) || (type == 0xE))
12143 do_vec_LDnR (cpu, address);
12144 else
12145 HALT_UNALLOC;
12146 return;
12147 }
12148
12149 /* Stores. */
12150 if ((type >= 0) && (type <= 11))
12151 {
12152 do_vec_STn_single (cpu, address);
12153 return;
12154 }
12155
12156 HALT_UNALLOC;
12157 }
12158
12159 if (load)
12160 {
12161 switch (type)
12162 {
12163 case 0: LD4 (cpu, address); return;
12164 case 4: LD3 (cpu, address); return;
12165 case 8: LD2 (cpu, address); return;
12166 case 2: LD1_4 (cpu, address); return;
12167 case 6: LD1_3 (cpu, address); return;
12168 case 10: LD1_2 (cpu, address); return;
12169 case 7: LD1_1 (cpu, address); return;
12170
12171 default:
12172 HALT_UNALLOC;
12173 }
12174 }
12175
12176 /* Stores. */
12177 switch (type)
12178 {
12179 case 0: ST4 (cpu, address); return;
12180 case 4: ST3 (cpu, address); return;
12181 case 8: ST2 (cpu, address); return;
12182 case 2: ST1_4 (cpu, address); return;
12183 case 6: ST1_3 (cpu, address); return;
12184 case 10: ST1_2 (cpu, address); return;
12185 case 7: ST1_1 (cpu, address); return;
12186 default:
12187 HALT_UNALLOC;
12188 }
12189 }
12190
12191 static void
12192 dexLdSt (sim_cpu *cpu)
12193 {
12194 /* uint32_t group = dispatchGroup (aarch64_get_instr (cpu));
12195 assert group == GROUP_LDST_0100 || group == GROUP_LDST_0110 ||
12196 group == GROUP_LDST_1100 || group == GROUP_LDST_1110
12197 bits [29,28:26] of a LS are the secondary dispatch vector. */
12198 uint32_t group2 = dispatchLS (aarch64_get_instr (cpu));
12199
12200 switch (group2)
12201 {
12202 case LS_EXCL_000:
12203 dexLoadExclusive (cpu); return;
12204
12205 case LS_LIT_010:
12206 case LS_LIT_011:
12207 dexLoadLiteral (cpu); return;
12208
12209 case LS_OTHER_110:
12210 case LS_OTHER_111:
12211 dexLoadOther (cpu); return;
12212
12213 case LS_ADVSIMD_001:
12214 do_vec_load_store (cpu); return;
12215
12216 case LS_PAIR_100:
12217 dex_load_store_pair_gr (cpu); return;
12218
12219 case LS_PAIR_101:
12220 dex_load_store_pair_fp (cpu); return;
12221
12222 default:
12223 /* Should never reach here. */
12224 HALT_NYI;
12225 }
12226 }
12227
12228 /* Specific decode and execute for group Data Processing Register. */
12229
12230 static void
12231 dexLogicalShiftedRegister (sim_cpu *cpu)
12232 {
12233 /* instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
12234 instr[30,29] = op
12235 instr[28:24] = 01010
12236 instr[23,22] = shift : 0 ==> LSL, 1 ==> LSR, 2 ==> ASR, 3 ==> ROR
12237 instr[21] = N
12238 instr[20,16] = Rm
12239 instr[15,10] = count : must be 0xxxxx for 32 bit
12240 instr[9,5] = Rn
12241 instr[4,0] = Rd */
12242
12243 uint32_t size = INSTR (31, 31);
12244 Shift shiftType = INSTR (23, 22);
12245 uint32_t count = INSTR (15, 10);
12246
12247 /* 32 bit operations must have count[5] = 0.
12248 or else we have an UNALLOC. */
12249 if (size == 0 && uimm (count, 5, 5))
12250 HALT_UNALLOC;
12251
12252 /* Dispatch on size:op:N. */
12253 switch ((INSTR (31, 29) << 1) | INSTR (21, 21))
12254 {
12255 case 0: and32_shift (cpu, shiftType, count); return;
12256 case 1: bic32_shift (cpu, shiftType, count); return;
12257 case 2: orr32_shift (cpu, shiftType, count); return;
12258 case 3: orn32_shift (cpu, shiftType, count); return;
12259 case 4: eor32_shift (cpu, shiftType, count); return;
12260 case 5: eon32_shift (cpu, shiftType, count); return;
12261 case 6: ands32_shift (cpu, shiftType, count); return;
12262 case 7: bics32_shift (cpu, shiftType, count); return;
12263 case 8: and64_shift (cpu, shiftType, count); return;
12264 case 9: bic64_shift (cpu, shiftType, count); return;
12265 case 10:orr64_shift (cpu, shiftType, count); return;
12266 case 11:orn64_shift (cpu, shiftType, count); return;
12267 case 12:eor64_shift (cpu, shiftType, count); return;
12268 case 13:eon64_shift (cpu, shiftType, count); return;
12269 case 14:ands64_shift (cpu, shiftType, count); return;
12270 case 15:bics64_shift (cpu, shiftType, count); return;
12271 }
12272 }
12273
12274 /* 32 bit conditional select. */
12275 static void
12276 csel32 (sim_cpu *cpu, CondCode cc)
12277 {
12278 unsigned rm = INSTR (20, 16);
12279 unsigned rn = INSTR (9, 5);
12280 unsigned rd = INSTR (4, 0);
12281
12282 aarch64_set_reg_u64 (cpu, rd, NO_SP,
12283 testConditionCode (cpu, cc)
12284 ? aarch64_get_reg_u32 (cpu, rn, NO_SP)
12285 : aarch64_get_reg_u32 (cpu, rm, NO_SP));
12286 }
12287
12288 /* 64 bit conditional select. */
12289 static void
12290 csel64 (sim_cpu *cpu, CondCode cc)
12291 {
12292 unsigned rm = INSTR (20, 16);
12293 unsigned rn = INSTR (9, 5);
12294 unsigned rd = INSTR (4, 0);
12295
12296 aarch64_set_reg_u64 (cpu, rd, NO_SP,
12297 testConditionCode (cpu, cc)
12298 ? aarch64_get_reg_u64 (cpu, rn, NO_SP)
12299 : aarch64_get_reg_u64 (cpu, rm, NO_SP));
12300 }
12301
12302 /* 32 bit conditional increment. */
12303 static void
12304 csinc32 (sim_cpu *cpu, CondCode cc)
12305 {
12306 unsigned rm = INSTR (20, 16);
12307 unsigned rn = INSTR (9, 5);
12308 unsigned rd = INSTR (4, 0);
12309
12310 aarch64_set_reg_u64 (cpu, rd, NO_SP,
12311 testConditionCode (cpu, cc)
12312 ? aarch64_get_reg_u32 (cpu, rn, NO_SP)
12313 : aarch64_get_reg_u32 (cpu, rm, NO_SP) + 1);
12314 }
12315
12316 /* 64 bit conditional increment. */
12317 static void
12318 csinc64 (sim_cpu *cpu, CondCode cc)
12319 {
12320 unsigned rm = INSTR (20, 16);
12321 unsigned rn = INSTR (9, 5);
12322 unsigned rd = INSTR (4, 0);
12323
12324 aarch64_set_reg_u64 (cpu, rd, NO_SP,
12325 testConditionCode (cpu, cc)
12326 ? aarch64_get_reg_u64 (cpu, rn, NO_SP)
12327 : aarch64_get_reg_u64 (cpu, rm, NO_SP) + 1);
12328 }
12329
12330 /* 32 bit conditional invert. */
12331 static void
12332 csinv32 (sim_cpu *cpu, CondCode cc)
12333 {
12334 unsigned rm = INSTR (20, 16);
12335 unsigned rn = INSTR (9, 5);
12336 unsigned rd = INSTR (4, 0);
12337
12338 aarch64_set_reg_u64 (cpu, rd, NO_SP,
12339 testConditionCode (cpu, cc)
12340 ? aarch64_get_reg_u32 (cpu, rn, NO_SP)
12341 : ~ aarch64_get_reg_u32 (cpu, rm, NO_SP));
12342 }
12343
12344 /* 64 bit conditional invert. */
12345 static void
12346 csinv64 (sim_cpu *cpu, CondCode cc)
12347 {
12348 unsigned rm = INSTR (20, 16);
12349 unsigned rn = INSTR (9, 5);
12350 unsigned rd = INSTR (4, 0);
12351
12352 aarch64_set_reg_u64 (cpu, rd, NO_SP,
12353 testConditionCode (cpu, cc)
12354 ? aarch64_get_reg_u64 (cpu, rn, NO_SP)
12355 : ~ aarch64_get_reg_u64 (cpu, rm, NO_SP));
12356 }
12357
12358 /* 32 bit conditional negate. */
12359 static void
12360 csneg32 (sim_cpu *cpu, CondCode cc)
12361 {
12362 unsigned rm = INSTR (20, 16);
12363 unsigned rn = INSTR (9, 5);
12364 unsigned rd = INSTR (4, 0);
12365
12366 aarch64_set_reg_u64 (cpu, rd, NO_SP,
12367 testConditionCode (cpu, cc)
12368 ? aarch64_get_reg_u32 (cpu, rn, NO_SP)
12369 : - aarch64_get_reg_u32 (cpu, rm, NO_SP));
12370 }
12371
12372 /* 64 bit conditional negate. */
12373 static void
12374 csneg64 (sim_cpu *cpu, CondCode cc)
12375 {
12376 unsigned rm = INSTR (20, 16);
12377 unsigned rn = INSTR (9, 5);
12378 unsigned rd = INSTR (4, 0);
12379
12380 aarch64_set_reg_u64 (cpu, rd, NO_SP,
12381 testConditionCode (cpu, cc)
12382 ? aarch64_get_reg_u64 (cpu, rn, NO_SP)
12383 : - aarch64_get_reg_u64 (cpu, rm, NO_SP));
12384 }
12385
12386 static void
12387 dexCondSelect (sim_cpu *cpu)
12388 {
12389 /* instr[28,21] = 11011011
12390 instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
12391 instr[30:11,10] = op : 000 ==> CSEL, 001 ==> CSINC,
12392 100 ==> CSINV, 101 ==> CSNEG,
12393 _1_ ==> UNALLOC
12394 instr[29] = S : 0 ==> ok, 1 ==> UNALLOC
12395 instr[15,12] = cond
12396 instr[29] = S : 0 ==> ok, 1 ==> UNALLOC */
12397
12398 CondCode cc = INSTR (15, 12);
12399 uint32_t S = INSTR (29, 29);
12400 uint32_t op2 = INSTR (11, 10);
12401
12402 if (S == 1)
12403 HALT_UNALLOC;
12404
12405 if (op2 & 0x2)
12406 HALT_UNALLOC;
12407
12408 switch ((INSTR (31, 30) << 1) | op2)
12409 {
12410 case 0: csel32 (cpu, cc); return;
12411 case 1: csinc32 (cpu, cc); return;
12412 case 2: csinv32 (cpu, cc); return;
12413 case 3: csneg32 (cpu, cc); return;
12414 case 4: csel64 (cpu, cc); return;
12415 case 5: csinc64 (cpu, cc); return;
12416 case 6: csinv64 (cpu, cc); return;
12417 case 7: csneg64 (cpu, cc); return;
12418 }
12419 }
12420
12421 /* Some helpers for counting leading 1 or 0 bits. */
12422
12423 /* Counts the number of leading bits which are the same
12424 in a 32 bit value in the range 1 to 32. */
12425 static uint32_t
12426 leading32 (uint32_t value)
12427 {
12428 int32_t mask= 0xffff0000;
12429 uint32_t count= 16; /* Counts number of bits set in mask. */
12430 uint32_t lo = 1; /* Lower bound for number of sign bits. */
12431 uint32_t hi = 32; /* Upper bound for number of sign bits. */
12432
12433 while (lo + 1 < hi)
12434 {
12435 int32_t test = (value & mask);
12436
12437 if (test == 0 || test == mask)
12438 {
12439 lo = count;
12440 count = (lo + hi) / 2;
12441 mask >>= (count - lo);
12442 }
12443 else
12444 {
12445 hi = count;
12446 count = (lo + hi) / 2;
12447 mask <<= hi - count;
12448 }
12449 }
12450
12451 if (lo != hi)
12452 {
12453 int32_t test;
12454
12455 mask >>= 1;
12456 test = (value & mask);
12457
12458 if (test == 0 || test == mask)
12459 count = hi;
12460 else
12461 count = lo;
12462 }
12463
12464 return count;
12465 }
12466
12467 /* Counts the number of leading bits which are the same
12468 in a 64 bit value in the range 1 to 64. */
12469 static uint64_t
12470 leading64 (uint64_t value)
12471 {
12472 int64_t mask= 0xffffffff00000000LL;
12473 uint64_t count = 32; /* Counts number of bits set in mask. */
12474 uint64_t lo = 1; /* Lower bound for number of sign bits. */
12475 uint64_t hi = 64; /* Upper bound for number of sign bits. */
12476
12477 while (lo + 1 < hi)
12478 {
12479 int64_t test = (value & mask);
12480
12481 if (test == 0 || test == mask)
12482 {
12483 lo = count;
12484 count = (lo + hi) / 2;
12485 mask >>= (count - lo);
12486 }
12487 else
12488 {
12489 hi = count;
12490 count = (lo + hi) / 2;
12491 mask <<= hi - count;
12492 }
12493 }
12494
12495 if (lo != hi)
12496 {
12497 int64_t test;
12498
12499 mask >>= 1;
12500 test = (value & mask);
12501
12502 if (test == 0 || test == mask)
12503 count = hi;
12504 else
12505 count = lo;
12506 }
12507
12508 return count;
12509 }
12510
12511 /* Bit operations. */
12512 /* N.B register args may not be SP. */
12513
12514 /* 32 bit count leading sign bits. */
12515 static void
12516 cls32 (sim_cpu *cpu)
12517 {
12518 unsigned rn = INSTR (9, 5);
12519 unsigned rd = INSTR (4, 0);
12520
12521 /* N.B. the result needs to exclude the leading bit. */
12522 aarch64_set_reg_u64
12523 (cpu, rd, NO_SP, leading32 (aarch64_get_reg_u32 (cpu, rn, NO_SP)) - 1);
12524 }
12525
12526 /* 64 bit count leading sign bits. */
12527 static void
12528 cls64 (sim_cpu *cpu)
12529 {
12530 unsigned rn = INSTR (9, 5);
12531 unsigned rd = INSTR (4, 0);
12532
12533 /* N.B. the result needs to exclude the leading bit. */
12534 aarch64_set_reg_u64
12535 (cpu, rd, NO_SP, leading64 (aarch64_get_reg_u64 (cpu, rn, NO_SP)) - 1);
12536 }
12537
12538 /* 32 bit count leading zero bits. */
12539 static void
12540 clz32 (sim_cpu *cpu)
12541 {
12542 unsigned rn = INSTR (9, 5);
12543 unsigned rd = INSTR (4, 0);
12544 uint32_t value = aarch64_get_reg_u32 (cpu, rn, NO_SP);
12545
12546 /* if the sign (top) bit is set then the count is 0. */
12547 if (pick32 (value, 31, 31))
12548 aarch64_set_reg_u64 (cpu, rd, NO_SP, 0L);
12549 else
12550 aarch64_set_reg_u64 (cpu, rd, NO_SP, leading32 (value));
12551 }
12552
12553 /* 64 bit count leading zero bits. */
12554 static void
12555 clz64 (sim_cpu *cpu)
12556 {
12557 unsigned rn = INSTR (9, 5);
12558 unsigned rd = INSTR (4, 0);
12559 uint64_t value = aarch64_get_reg_u64 (cpu, rn, NO_SP);
12560
12561 /* if the sign (top) bit is set then the count is 0. */
12562 if (pick64 (value, 63, 63))
12563 aarch64_set_reg_u64 (cpu, rd, NO_SP, 0L);
12564 else
12565 aarch64_set_reg_u64 (cpu, rd, NO_SP, leading64 (value));
12566 }
12567
12568 /* 32 bit reverse bits. */
12569 static void
12570 rbit32 (sim_cpu *cpu)
12571 {
12572 unsigned rn = INSTR (9, 5);
12573 unsigned rd = INSTR (4, 0);
12574 uint32_t value = aarch64_get_reg_u32 (cpu, rn, NO_SP);
12575 uint32_t result = 0;
12576 int i;
12577
12578 for (i = 0; i < 32; i++)
12579 {
12580 result <<= 1;
12581 result |= (value & 1);
12582 value >>= 1;
12583 }
12584 aarch64_set_reg_u64 (cpu, rd, NO_SP, result);
12585 }
12586
12587 /* 64 bit reverse bits. */
12588 static void
12589 rbit64 (sim_cpu *cpu)
12590 {
12591 unsigned rn = INSTR (9, 5);
12592 unsigned rd = INSTR (4, 0);
12593 uint64_t value = aarch64_get_reg_u64 (cpu, rn, NO_SP);
12594 uint64_t result = 0;
12595 int i;
12596
12597 for (i = 0; i < 64; i++)
12598 {
12599 result <<= 1;
12600 result |= (value & 1UL);
12601 value >>= 1;
12602 }
12603 aarch64_set_reg_u64 (cpu, rd, NO_SP, result);
12604 }
12605
12606 /* 32 bit reverse bytes. */
12607 static void
12608 rev32 (sim_cpu *cpu)
12609 {
12610 unsigned rn = INSTR (9, 5);
12611 unsigned rd = INSTR (4, 0);
12612 uint32_t value = aarch64_get_reg_u32 (cpu, rn, NO_SP);
12613 uint32_t result = 0;
12614 int i;
12615
12616 for (i = 0; i < 4; i++)
12617 {
12618 result <<= 8;
12619 result |= (value & 0xff);
12620 value >>= 8;
12621 }
12622 aarch64_set_reg_u64 (cpu, rd, NO_SP, result);
12623 }
12624
12625 /* 64 bit reverse bytes. */
12626 static void
12627 rev64 (sim_cpu *cpu)
12628 {
12629 unsigned rn = INSTR (9, 5);
12630 unsigned rd = INSTR (4, 0);
12631 uint64_t value = aarch64_get_reg_u64 (cpu, rn, NO_SP);
12632 uint64_t result = 0;
12633 int i;
12634
12635 for (i = 0; i < 8; i++)
12636 {
12637 result <<= 8;
12638 result |= (value & 0xffULL);
12639 value >>= 8;
12640 }
12641 aarch64_set_reg_u64 (cpu, rd, NO_SP, result);
12642 }
12643
12644 /* 32 bit reverse shorts. */
12645 /* N.B.this reverses the order of the bytes in each half word. */
12646 static void
12647 revh32 (sim_cpu *cpu)
12648 {
12649 unsigned rn = INSTR (9, 5);
12650 unsigned rd = INSTR (4, 0);
12651 uint32_t value = aarch64_get_reg_u32 (cpu, rn, NO_SP);
12652 uint32_t result = 0;
12653 int i;
12654
12655 for (i = 0; i < 2; i++)
12656 {
12657 result <<= 8;
12658 result |= (value & 0x00ff00ff);
12659 value >>= 8;
12660 }
12661 aarch64_set_reg_u64 (cpu, rd, NO_SP, result);
12662 }
12663
12664 /* 64 bit reverse shorts. */
12665 /* N.B.this reverses the order of the bytes in each half word. */
12666 static void
12667 revh64 (sim_cpu *cpu)
12668 {
12669 unsigned rn = INSTR (9, 5);
12670 unsigned rd = INSTR (4, 0);
12671 uint64_t value = aarch64_get_reg_u64 (cpu, rn, NO_SP);
12672 uint64_t result = 0;
12673 int i;
12674
12675 for (i = 0; i < 2; i++)
12676 {
12677 result <<= 8;
12678 result |= (value & 0x00ff00ff00ff00ffULL);
12679 value >>= 8;
12680 }
12681 aarch64_set_reg_u64 (cpu, rd, NO_SP, result);
12682 }
12683
12684 static void
12685 dexDataProc1Source (sim_cpu *cpu)
12686 {
12687 /* instr[30] = 1
12688 instr[28,21] = 111010110
12689 instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
12690 instr[29] = S : 0 ==> ok, 1 ==> UNALLOC
12691 instr[20,16] = opcode2 : 00000 ==> ok, ow ==> UNALLOC
12692 instr[15,10] = opcode : 000000 ==> RBIT, 000001 ==> REV16,
12693 000010 ==> REV, 000011 ==> UNALLOC
12694 000100 ==> CLZ, 000101 ==> CLS
12695 ow ==> UNALLOC
12696 instr[9,5] = rn : may not be SP
12697 instr[4,0] = rd : may not be SP. */
12698
12699 uint32_t S = INSTR (29, 29);
12700 uint32_t opcode2 = INSTR (20, 16);
12701 uint32_t opcode = INSTR (15, 10);
12702 uint32_t dispatch = ((INSTR (31, 31) << 3) | opcode);
12703
12704 if (S == 1)
12705 HALT_UNALLOC;
12706
12707 if (opcode2 != 0)
12708 HALT_UNALLOC;
12709
12710 if (opcode & 0x38)
12711 HALT_UNALLOC;
12712
12713 switch (dispatch)
12714 {
12715 case 0: rbit32 (cpu); return;
12716 case 1: revh32 (cpu); return;
12717 case 2: rev32 (cpu); return;
12718 case 4: clz32 (cpu); return;
12719 case 5: cls32 (cpu); return;
12720 case 8: rbit64 (cpu); return;
12721 case 9: revh64 (cpu); return;
12722 case 10:rev32 (cpu); return;
12723 case 11:rev64 (cpu); return;
12724 case 12:clz64 (cpu); return;
12725 case 13:cls64 (cpu); return;
12726 default: HALT_UNALLOC;
12727 }
12728 }
12729
12730 /* Variable shift.
12731 Shifts by count supplied in register.
12732 N.B register args may not be SP.
12733 These all use the shifted auxiliary function for
12734 simplicity and clarity. Writing the actual shift
12735 inline would avoid a branch and so be faster but
12736 would also necessitate getting signs right. */
12737
12738 /* 32 bit arithmetic shift right. */
12739 static void
12740 asrv32 (sim_cpu *cpu)
12741 {
12742 unsigned rm = INSTR (20, 16);
12743 unsigned rn = INSTR (9, 5);
12744 unsigned rd = INSTR (4, 0);
12745
12746 aarch64_set_reg_u64
12747 (cpu, rd, NO_SP,
12748 shifted32 (aarch64_get_reg_u32 (cpu, rn, NO_SP), ASR,
12749 (aarch64_get_reg_u32 (cpu, rm, NO_SP) & 0x1f)));
12750 }
12751
12752 /* 64 bit arithmetic shift right. */
12753 static void
12754 asrv64 (sim_cpu *cpu)
12755 {
12756 unsigned rm = INSTR (20, 16);
12757 unsigned rn = INSTR (9, 5);
12758 unsigned rd = INSTR (4, 0);
12759
12760 aarch64_set_reg_u64
12761 (cpu, rd, NO_SP,
12762 shifted64 (aarch64_get_reg_u64 (cpu, rn, NO_SP), ASR,
12763 (aarch64_get_reg_u64 (cpu, rm, NO_SP) & 0x3f)));
12764 }
12765
12766 /* 32 bit logical shift left. */
12767 static void
12768 lslv32 (sim_cpu *cpu)
12769 {
12770 unsigned rm = INSTR (20, 16);
12771 unsigned rn = INSTR (9, 5);
12772 unsigned rd = INSTR (4, 0);
12773
12774 aarch64_set_reg_u64
12775 (cpu, rd, NO_SP,
12776 shifted32 (aarch64_get_reg_u32 (cpu, rn, NO_SP), LSL,
12777 (aarch64_get_reg_u32 (cpu, rm, NO_SP) & 0x1f)));
12778 }
12779
12780 /* 64 bit arithmetic shift left. */
12781 static void
12782 lslv64 (sim_cpu *cpu)
12783 {
12784 unsigned rm = INSTR (20, 16);
12785 unsigned rn = INSTR (9, 5);
12786 unsigned rd = INSTR (4, 0);
12787
12788 aarch64_set_reg_u64
12789 (cpu, rd, NO_SP,
12790 shifted64 (aarch64_get_reg_u64 (cpu, rn, NO_SP), LSL,
12791 (aarch64_get_reg_u64 (cpu, rm, NO_SP) & 0x3f)));
12792 }
12793
12794 /* 32 bit logical shift right. */
12795 static void
12796 lsrv32 (sim_cpu *cpu)
12797 {
12798 unsigned rm = INSTR (20, 16);
12799 unsigned rn = INSTR (9, 5);
12800 unsigned rd = INSTR (4, 0);
12801
12802 aarch64_set_reg_u64
12803 (cpu, rd, NO_SP,
12804 shifted32 (aarch64_get_reg_u32 (cpu, rn, NO_SP), LSR,
12805 (aarch64_get_reg_u32 (cpu, rm, NO_SP) & 0x1f)));
12806 }
12807
12808 /* 64 bit logical shift right. */
12809 static void
12810 lsrv64 (sim_cpu *cpu)
12811 {
12812 unsigned rm = INSTR (20, 16);
12813 unsigned rn = INSTR (9, 5);
12814 unsigned rd = INSTR (4, 0);
12815
12816 aarch64_set_reg_u64
12817 (cpu, rd, NO_SP,
12818 shifted64 (aarch64_get_reg_u64 (cpu, rn, NO_SP), LSR,
12819 (aarch64_get_reg_u64 (cpu, rm, NO_SP) & 0x3f)));
12820 }
12821
12822 /* 32 bit rotate right. */
12823 static void
12824 rorv32 (sim_cpu *cpu)
12825 {
12826 unsigned rm = INSTR (20, 16);
12827 unsigned rn = INSTR (9, 5);
12828 unsigned rd = INSTR (4, 0);
12829
12830 aarch64_set_reg_u64
12831 (cpu, rd, NO_SP,
12832 shifted32 (aarch64_get_reg_u32 (cpu, rn, NO_SP), ROR,
12833 (aarch64_get_reg_u32 (cpu, rm, NO_SP) & 0x1f)));
12834 }
12835
12836 /* 64 bit rotate right. */
12837 static void
12838 rorv64 (sim_cpu *cpu)
12839 {
12840 unsigned rm = INSTR (20, 16);
12841 unsigned rn = INSTR (9, 5);
12842 unsigned rd = INSTR (4, 0);
12843
12844 aarch64_set_reg_u64
12845 (cpu, rd, NO_SP,
12846 shifted64 (aarch64_get_reg_u64 (cpu, rn, NO_SP), ROR,
12847 (aarch64_get_reg_u64 (cpu, rm, NO_SP) & 0x3f)));
12848 }
12849
12850
12851 /* divide. */
12852
12853 /* 32 bit signed divide. */
12854 static void
12855 cpuiv32 (sim_cpu *cpu)
12856 {
12857 unsigned rm = INSTR (20, 16);
12858 unsigned rn = INSTR (9, 5);
12859 unsigned rd = INSTR (4, 0);
12860 /* N.B. the pseudo-code does the divide using 64 bit data. */
12861 /* TODO : check that this rounds towards zero as required. */
12862 int64_t dividend = aarch64_get_reg_s32 (cpu, rn, NO_SP);
12863 int64_t divisor = aarch64_get_reg_s32 (cpu, rm, NO_SP);
12864
12865 aarch64_set_reg_s64 (cpu, rd, NO_SP,
12866 divisor ? ((int32_t) (dividend / divisor)) : 0);
12867 }
12868
12869 /* 64 bit signed divide. */
12870 static void
12871 cpuiv64 (sim_cpu *cpu)
12872 {
12873 unsigned rm = INSTR (20, 16);
12874 unsigned rn = INSTR (9, 5);
12875 unsigned rd = INSTR (4, 0);
12876
12877 /* TODO : check that this rounds towards zero as required. */
12878 int64_t divisor = aarch64_get_reg_s64 (cpu, rm, NO_SP);
12879
12880 aarch64_set_reg_s64
12881 (cpu, rd, NO_SP,
12882 divisor ? (aarch64_get_reg_s64 (cpu, rn, NO_SP) / divisor) : 0);
12883 }
12884
12885 /* 32 bit unsigned divide. */
12886 static void
12887 udiv32 (sim_cpu *cpu)
12888 {
12889 unsigned rm = INSTR (20, 16);
12890 unsigned rn = INSTR (9, 5);
12891 unsigned rd = INSTR (4, 0);
12892
12893 /* N.B. the pseudo-code does the divide using 64 bit data. */
12894 uint64_t dividend = aarch64_get_reg_u32 (cpu, rn, NO_SP);
12895 uint64_t divisor = aarch64_get_reg_u32 (cpu, rm, NO_SP);
12896
12897 aarch64_set_reg_u64 (cpu, rd, NO_SP,
12898 divisor ? (uint32_t) (dividend / divisor) : 0);
12899 }
12900
12901 /* 64 bit unsigned divide. */
12902 static void
12903 udiv64 (sim_cpu *cpu)
12904 {
12905 unsigned rm = INSTR (20, 16);
12906 unsigned rn = INSTR (9, 5);
12907 unsigned rd = INSTR (4, 0);
12908
12909 /* TODO : check that this rounds towards zero as required. */
12910 uint64_t divisor = aarch64_get_reg_u64 (cpu, rm, NO_SP);
12911
12912 aarch64_set_reg_u64
12913 (cpu, rd, NO_SP,
12914 divisor ? (aarch64_get_reg_u64 (cpu, rn, NO_SP) / divisor) : 0);
12915 }
12916
12917 static void
12918 dexDataProc2Source (sim_cpu *cpu)
12919 {
12920 /* assert instr[30] == 0
12921 instr[28,21] == 11010110
12922 instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
12923 instr[29] = S : 0 ==> ok, 1 ==> UNALLOC
12924 instr[15,10] = opcode : 000010 ==> UDIV, 000011 ==> CPUIV,
12925 001000 ==> LSLV, 001001 ==> LSRV
12926 001010 ==> ASRV, 001011 ==> RORV
12927 ow ==> UNALLOC. */
12928
12929 uint32_t dispatch;
12930 uint32_t S = INSTR (29, 29);
12931 uint32_t opcode = INSTR (15, 10);
12932
12933 if (S == 1)
12934 HALT_UNALLOC;
12935
12936 if (opcode & 0x34)
12937 HALT_UNALLOC;
12938
12939 dispatch = ( (INSTR (31, 31) << 3)
12940 | (uimm (opcode, 3, 3) << 2)
12941 | uimm (opcode, 1, 0));
12942 switch (dispatch)
12943 {
12944 case 2: udiv32 (cpu); return;
12945 case 3: cpuiv32 (cpu); return;
12946 case 4: lslv32 (cpu); return;
12947 case 5: lsrv32 (cpu); return;
12948 case 6: asrv32 (cpu); return;
12949 case 7: rorv32 (cpu); return;
12950 case 10: udiv64 (cpu); return;
12951 case 11: cpuiv64 (cpu); return;
12952 case 12: lslv64 (cpu); return;
12953 case 13: lsrv64 (cpu); return;
12954 case 14: asrv64 (cpu); return;
12955 case 15: rorv64 (cpu); return;
12956 default: HALT_UNALLOC;
12957 }
12958 }
12959
12960
12961 /* Multiply. */
12962
12963 /* 32 bit multiply and add. */
12964 static void
12965 madd32 (sim_cpu *cpu)
12966 {
12967 unsigned rm = INSTR (20, 16);
12968 unsigned ra = INSTR (14, 10);
12969 unsigned rn = INSTR (9, 5);
12970 unsigned rd = INSTR (4, 0);
12971
12972 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
12973 aarch64_set_reg_u64 (cpu, rd, NO_SP,
12974 aarch64_get_reg_u32 (cpu, ra, NO_SP)
12975 + aarch64_get_reg_u32 (cpu, rn, NO_SP)
12976 * aarch64_get_reg_u32 (cpu, rm, NO_SP));
12977 }
12978
12979 /* 64 bit multiply and add. */
12980 static void
12981 madd64 (sim_cpu *cpu)
12982 {
12983 unsigned rm = INSTR (20, 16);
12984 unsigned ra = INSTR (14, 10);
12985 unsigned rn = INSTR (9, 5);
12986 unsigned rd = INSTR (4, 0);
12987
12988 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
12989 aarch64_set_reg_u64 (cpu, rd, NO_SP,
12990 aarch64_get_reg_u64 (cpu, ra, NO_SP)
12991 + (aarch64_get_reg_u64 (cpu, rn, NO_SP)
12992 * aarch64_get_reg_u64 (cpu, rm, NO_SP)));
12993 }
12994
12995 /* 32 bit multiply and sub. */
12996 static void
12997 msub32 (sim_cpu *cpu)
12998 {
12999 unsigned rm = INSTR (20, 16);
13000 unsigned ra = INSTR (14, 10);
13001 unsigned rn = INSTR (9, 5);
13002 unsigned rd = INSTR (4, 0);
13003
13004 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13005 aarch64_set_reg_u64 (cpu, rd, NO_SP,
13006 aarch64_get_reg_u32 (cpu, ra, NO_SP)
13007 - aarch64_get_reg_u32 (cpu, rn, NO_SP)
13008 * aarch64_get_reg_u32 (cpu, rm, NO_SP));
13009 }
13010
13011 /* 64 bit multiply and sub. */
13012 static void
13013 msub64 (sim_cpu *cpu)
13014 {
13015 unsigned rm = INSTR (20, 16);
13016 unsigned ra = INSTR (14, 10);
13017 unsigned rn = INSTR (9, 5);
13018 unsigned rd = INSTR (4, 0);
13019
13020 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13021 aarch64_set_reg_u64 (cpu, rd, NO_SP,
13022 aarch64_get_reg_u64 (cpu, ra, NO_SP)
13023 - aarch64_get_reg_u64 (cpu, rn, NO_SP)
13024 * aarch64_get_reg_u64 (cpu, rm, NO_SP));
13025 }
13026
13027 /* Signed multiply add long -- source, source2 : 32 bit, source3 : 64 bit. */
13028 static void
13029 smaddl (sim_cpu *cpu)
13030 {
13031 unsigned rm = INSTR (20, 16);
13032 unsigned ra = INSTR (14, 10);
13033 unsigned rn = INSTR (9, 5);
13034 unsigned rd = INSTR (4, 0);
13035
13036 /* N.B. we need to multiply the signed 32 bit values in rn, rm to
13037 obtain a 64 bit product. */
13038 aarch64_set_reg_s64
13039 (cpu, rd, NO_SP,
13040 aarch64_get_reg_s64 (cpu, ra, NO_SP)
13041 + ((int64_t) aarch64_get_reg_s32 (cpu, rn, NO_SP))
13042 * ((int64_t) aarch64_get_reg_s32 (cpu, rm, NO_SP)));
13043 }
13044
13045 /* Signed multiply sub long -- source, source2 : 32 bit, source3 : 64 bit. */
13046 static void
13047 smsubl (sim_cpu *cpu)
13048 {
13049 unsigned rm = INSTR (20, 16);
13050 unsigned ra = INSTR (14, 10);
13051 unsigned rn = INSTR (9, 5);
13052 unsigned rd = INSTR (4, 0);
13053
13054 /* N.B. we need to multiply the signed 32 bit values in rn, rm to
13055 obtain a 64 bit product. */
13056 aarch64_set_reg_s64
13057 (cpu, rd, NO_SP,
13058 aarch64_get_reg_s64 (cpu, ra, NO_SP)
13059 - ((int64_t) aarch64_get_reg_s32 (cpu, rn, NO_SP))
13060 * ((int64_t) aarch64_get_reg_s32 (cpu, rm, NO_SP)));
13061 }
13062
13063 /* Integer Multiply/Divide. */
13064
13065 /* First some macros and a helper function. */
13066 /* Macros to test or access elements of 64 bit words. */
13067
13068 /* Mask used to access lo 32 bits of 64 bit unsigned int. */
13069 #define LOW_WORD_MASK ((1ULL << 32) - 1)
13070 /* Return the lo 32 bit word of a 64 bit unsigned int as a 64 bit unsigned int. */
13071 #define lowWordToU64(_value_u64) ((_value_u64) & LOW_WORD_MASK)
13072 /* Return the hi 32 bit word of a 64 bit unsigned int as a 64 bit unsigned int. */
13073 #define highWordToU64(_value_u64) ((_value_u64) >> 32)
13074
13075 /* Offset of sign bit in 64 bit signed integger. */
13076 #define SIGN_SHIFT_U64 63
13077 /* The sign bit itself -- also identifies the minimum negative int value. */
13078 #define SIGN_BIT_U64 (1UL << SIGN_SHIFT_U64)
13079 /* Return true if a 64 bit signed int presented as an unsigned int is the
13080 most negative value. */
13081 #define isMinimumU64(_value_u64) ((_value_u64) == SIGN_BIT_U64)
13082 /* Return true (non-zero) if a 64 bit signed int presented as an unsigned
13083 int has its sign bit set to false. */
13084 #define isSignSetU64(_value_u64) ((_value_u64) & SIGN_BIT_U64)
13085 /* Return 1L or -1L according to whether a 64 bit signed int presented as
13086 an unsigned int has its sign bit set or not. */
13087 #define signOfU64(_value_u64) (1L + (((value_u64) >> SIGN_SHIFT_U64) * -2L)
13088 /* Clear the sign bit of a 64 bit signed int presented as an unsigned int. */
13089 #define clearSignU64(_value_u64) ((_value_u64) &= ~SIGN_BIT_U64)
13090
13091 /* Multiply two 64 bit ints and return.
13092 the hi 64 bits of the 128 bit product. */
13093
13094 static uint64_t
13095 mul64hi (uint64_t value1, uint64_t value2)
13096 {
13097 uint64_t resultmid1;
13098 uint64_t result;
13099 uint64_t value1_lo = lowWordToU64 (value1);
13100 uint64_t value1_hi = highWordToU64 (value1) ;
13101 uint64_t value2_lo = lowWordToU64 (value2);
13102 uint64_t value2_hi = highWordToU64 (value2);
13103
13104 /* Cross-multiply and collect results. */
13105 uint64_t xproductlo = value1_lo * value2_lo;
13106 uint64_t xproductmid1 = value1_lo * value2_hi;
13107 uint64_t xproductmid2 = value1_hi * value2_lo;
13108 uint64_t xproducthi = value1_hi * value2_hi;
13109 uint64_t carry = 0;
13110 /* Start accumulating 64 bit results. */
13111 /* Drop bottom half of lowest cross-product. */
13112 uint64_t resultmid = xproductlo >> 32;
13113 /* Add in middle products. */
13114 resultmid = resultmid + xproductmid1;
13115
13116 /* Check for overflow. */
13117 if (resultmid < xproductmid1)
13118 /* Carry over 1 into top cross-product. */
13119 carry++;
13120
13121 resultmid1 = resultmid + xproductmid2;
13122
13123 /* Check for overflow. */
13124 if (resultmid1 < xproductmid2)
13125 /* Carry over 1 into top cross-product. */
13126 carry++;
13127
13128 /* Drop lowest 32 bits of middle cross-product. */
13129 result = resultmid1 >> 32;
13130 /* Move carry bit to just above middle cross-product highest bit. */
13131 carry = carry << 32;
13132
13133 /* Add top cross-product plus and any carry. */
13134 result += xproducthi + carry;
13135
13136 return result;
13137 }
13138
13139 /* Signed multiply high, source, source2 :
13140 64 bit, dest <-- high 64-bit of result. */
13141 static void
13142 smulh (sim_cpu *cpu)
13143 {
13144 uint64_t uresult;
13145 int64_t result;
13146 unsigned rm = INSTR (20, 16);
13147 unsigned rn = INSTR (9, 5);
13148 unsigned rd = INSTR (4, 0);
13149 GReg ra = INSTR (14, 10);
13150 int64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP);
13151 int64_t value2 = aarch64_get_reg_u64 (cpu, rm, NO_SP);
13152 uint64_t uvalue1;
13153 uint64_t uvalue2;
13154 int negate = 0;
13155
13156 if (ra != R31)
13157 HALT_UNALLOC;
13158
13159 /* Convert to unsigned and use the unsigned mul64hi routine
13160 the fix the sign up afterwards. */
13161 if (value1 < 0)
13162 {
13163 negate = !negate;
13164 uvalue1 = -value1;
13165 }
13166 else
13167 {
13168 uvalue1 = value1;
13169 }
13170
13171 if (value2 < 0)
13172 {
13173 negate = !negate;
13174 uvalue2 = -value2;
13175 }
13176 else
13177 {
13178 uvalue2 = value2;
13179 }
13180
13181 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13182
13183 uresult = mul64hi (uvalue1, uvalue2);
13184 result = uresult;
13185
13186 if (negate)
13187 {
13188 /* Multiply 128-bit result by -1, which means highpart gets inverted,
13189 and has carry in added only if low part is 0. */
13190 result = ~result;
13191 if ((uvalue1 * uvalue2) == 0)
13192 result += 1;
13193 }
13194
13195 aarch64_set_reg_s64 (cpu, rd, NO_SP, result);
13196 }
13197
13198 /* Unsigned multiply add long -- source, source2 :
13199 32 bit, source3 : 64 bit. */
13200 static void
13201 umaddl (sim_cpu *cpu)
13202 {
13203 unsigned rm = INSTR (20, 16);
13204 unsigned ra = INSTR (14, 10);
13205 unsigned rn = INSTR (9, 5);
13206 unsigned rd = INSTR (4, 0);
13207
13208 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13209 /* N.B. we need to multiply the signed 32 bit values in rn, rm to
13210 obtain a 64 bit product. */
13211 aarch64_set_reg_u64
13212 (cpu, rd, NO_SP,
13213 aarch64_get_reg_u64 (cpu, ra, NO_SP)
13214 + ((uint64_t) aarch64_get_reg_u32 (cpu, rn, NO_SP))
13215 * ((uint64_t) aarch64_get_reg_u32 (cpu, rm, NO_SP)));
13216 }
13217
13218 /* Unsigned multiply sub long -- source, source2 : 32 bit, source3 : 64 bit. */
13219 static void
13220 umsubl (sim_cpu *cpu)
13221 {
13222 unsigned rm = INSTR (20, 16);
13223 unsigned ra = INSTR (14, 10);
13224 unsigned rn = INSTR (9, 5);
13225 unsigned rd = INSTR (4, 0);
13226
13227 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13228 /* N.B. we need to multiply the signed 32 bit values in rn, rm to
13229 obtain a 64 bit product. */
13230 aarch64_set_reg_u64
13231 (cpu, rd, NO_SP,
13232 aarch64_get_reg_u64 (cpu, ra, NO_SP)
13233 - ((uint64_t) aarch64_get_reg_u32 (cpu, rn, NO_SP))
13234 * ((uint64_t) aarch64_get_reg_u32 (cpu, rm, NO_SP)));
13235 }
13236
13237 /* Unsigned multiply high, source, source2 :
13238 64 bit, dest <-- high 64-bit of result. */
13239 static void
13240 umulh (sim_cpu *cpu)
13241 {
13242 unsigned rm = INSTR (20, 16);
13243 unsigned rn = INSTR (9, 5);
13244 unsigned rd = INSTR (4, 0);
13245 GReg ra = INSTR (14, 10);
13246
13247 if (ra != R31)
13248 HALT_UNALLOC;
13249
13250 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13251 aarch64_set_reg_u64 (cpu, rd, NO_SP,
13252 mul64hi (aarch64_get_reg_u64 (cpu, rn, NO_SP),
13253 aarch64_get_reg_u64 (cpu, rm, NO_SP)));
13254 }
13255
13256 static void
13257 dexDataProc3Source (sim_cpu *cpu)
13258 {
13259 /* assert instr[28,24] == 11011. */
13260 /* instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit (for rd at least)
13261 instr[30,29] = op54 : 00 ==> ok, ow ==> UNALLOC
13262 instr[23,21] = op31 : 111 ==> UNALLOC, o2 ==> ok
13263 instr[15] = o0 : 0/1 ==> ok
13264 instr[23,21:15] ==> op : 0000 ==> MADD, 0001 ==> MSUB, (32/64 bit)
13265 0010 ==> SMADDL, 0011 ==> SMSUBL, (64 bit only)
13266 0100 ==> SMULH, (64 bit only)
13267 1010 ==> UMADDL, 1011 ==> UNSUBL, (64 bit only)
13268 1100 ==> UMULH (64 bit only)
13269 ow ==> UNALLOC. */
13270
13271 uint32_t dispatch;
13272 uint32_t size = INSTR (31, 31);
13273 uint32_t op54 = INSTR (30, 29);
13274 uint32_t op31 = INSTR (23, 21);
13275 uint32_t o0 = INSTR (15, 15);
13276
13277 if (op54 != 0)
13278 HALT_UNALLOC;
13279
13280 if (size == 0)
13281 {
13282 if (op31 != 0)
13283 HALT_UNALLOC;
13284
13285 if (o0 == 0)
13286 madd32 (cpu);
13287 else
13288 msub32 (cpu);
13289 return;
13290 }
13291
13292 dispatch = (op31 << 1) | o0;
13293
13294 switch (dispatch)
13295 {
13296 case 0: madd64 (cpu); return;
13297 case 1: msub64 (cpu); return;
13298 case 2: smaddl (cpu); return;
13299 case 3: smsubl (cpu); return;
13300 case 4: smulh (cpu); return;
13301 case 10: umaddl (cpu); return;
13302 case 11: umsubl (cpu); return;
13303 case 12: umulh (cpu); return;
13304 default: HALT_UNALLOC;
13305 }
13306 }
13307
13308 static void
13309 dexDPReg (sim_cpu *cpu)
13310 {
13311 /* uint32_t group = dispatchGroup (aarch64_get_instr (cpu));
13312 assert group == GROUP_DPREG_0101 || group == GROUP_DPREG_1101
13313 bits [28:24:21] of a DPReg are the secondary dispatch vector. */
13314 uint32_t group2 = dispatchDPReg (aarch64_get_instr (cpu));
13315
13316 switch (group2)
13317 {
13318 case DPREG_LOG_000:
13319 case DPREG_LOG_001:
13320 dexLogicalShiftedRegister (cpu); return;
13321
13322 case DPREG_ADDSHF_010:
13323 dexAddSubtractShiftedRegister (cpu); return;
13324
13325 case DPREG_ADDEXT_011:
13326 dexAddSubtractExtendedRegister (cpu); return;
13327
13328 case DPREG_ADDCOND_100:
13329 {
13330 /* This set bundles a variety of different operations. */
13331 /* Check for. */
13332 /* 1) add/sub w carry. */
13333 uint32_t mask1 = 0x1FE00000U;
13334 uint32_t val1 = 0x1A000000U;
13335 /* 2) cond compare register/immediate. */
13336 uint32_t mask2 = 0x1FE00000U;
13337 uint32_t val2 = 0x1A400000U;
13338 /* 3) cond select. */
13339 uint32_t mask3 = 0x1FE00000U;
13340 uint32_t val3 = 0x1A800000U;
13341 /* 4) data proc 1/2 source. */
13342 uint32_t mask4 = 0x1FE00000U;
13343 uint32_t val4 = 0x1AC00000U;
13344
13345 if ((aarch64_get_instr (cpu) & mask1) == val1)
13346 dexAddSubtractWithCarry (cpu);
13347
13348 else if ((aarch64_get_instr (cpu) & mask2) == val2)
13349 CondCompare (cpu);
13350
13351 else if ((aarch64_get_instr (cpu) & mask3) == val3)
13352 dexCondSelect (cpu);
13353
13354 else if ((aarch64_get_instr (cpu) & mask4) == val4)
13355 {
13356 /* Bit 30 is clear for data proc 2 source
13357 and set for data proc 1 source. */
13358 if (aarch64_get_instr (cpu) & (1U << 30))
13359 dexDataProc1Source (cpu);
13360 else
13361 dexDataProc2Source (cpu);
13362 }
13363
13364 else
13365 /* Should not reach here. */
13366 HALT_NYI;
13367
13368 return;
13369 }
13370
13371 case DPREG_3SRC_110:
13372 dexDataProc3Source (cpu); return;
13373
13374 case DPREG_UNALLOC_101:
13375 HALT_UNALLOC;
13376
13377 case DPREG_3SRC_111:
13378 dexDataProc3Source (cpu); return;
13379
13380 default:
13381 /* Should never reach here. */
13382 HALT_NYI;
13383 }
13384 }
13385
13386 /* Unconditional Branch immediate.
13387 Offset is a PC-relative byte offset in the range +/- 128MiB.
13388 The offset is assumed to be raw from the decode i.e. the
13389 simulator is expected to scale them from word offsets to byte. */
13390
13391 /* Unconditional branch. */
13392 static void
13393 buc (sim_cpu *cpu, int32_t offset)
13394 {
13395 aarch64_set_next_PC_by_offset (cpu, offset);
13396 }
13397
13398 static unsigned stack_depth = 0;
13399
13400 /* Unconditional branch and link -- writes return PC to LR. */
13401 static void
13402 bl (sim_cpu *cpu, int32_t offset)
13403 {
13404 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13405 aarch64_save_LR (cpu);
13406 aarch64_set_next_PC_by_offset (cpu, offset);
13407
13408 if (TRACE_BRANCH_P (cpu))
13409 {
13410 ++ stack_depth;
13411 TRACE_BRANCH (cpu,
13412 " %*scall %" PRIx64 " [%s]"
13413 " [args: %" PRIx64 " %" PRIx64 " %" PRIx64 "]",
13414 stack_depth, " ", aarch64_get_next_PC (cpu),
13415 aarch64_get_func (CPU_STATE (cpu),
13416 aarch64_get_next_PC (cpu)),
13417 aarch64_get_reg_u64 (cpu, 0, NO_SP),
13418 aarch64_get_reg_u64 (cpu, 1, NO_SP),
13419 aarch64_get_reg_u64 (cpu, 2, NO_SP)
13420 );
13421 }
13422 }
13423
13424 /* Unconditional Branch register.
13425 Branch/return address is in source register. */
13426
13427 /* Unconditional branch. */
13428 static void
13429 br (sim_cpu *cpu)
13430 {
13431 unsigned rn = INSTR (9, 5);
13432 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13433 aarch64_set_next_PC (cpu, aarch64_get_reg_u64 (cpu, rn, NO_SP));
13434 }
13435
13436 /* Unconditional branch and link -- writes return PC to LR. */
13437 static void
13438 blr (sim_cpu *cpu)
13439 {
13440 /* Ensure we read the destination before we write LR. */
13441 uint64_t target = aarch64_get_reg_u64 (cpu, INSTR (9, 5), NO_SP);
13442
13443 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13444 aarch64_save_LR (cpu);
13445 aarch64_set_next_PC (cpu, target);
13446
13447 if (TRACE_BRANCH_P (cpu))
13448 {
13449 ++ stack_depth;
13450 TRACE_BRANCH (cpu,
13451 " %*scall %" PRIx64 " [%s]"
13452 " [args: %" PRIx64 " %" PRIx64 " %" PRIx64 "]",
13453 stack_depth, " ", aarch64_get_next_PC (cpu),
13454 aarch64_get_func (CPU_STATE (cpu),
13455 aarch64_get_next_PC (cpu)),
13456 aarch64_get_reg_u64 (cpu, 0, NO_SP),
13457 aarch64_get_reg_u64 (cpu, 1, NO_SP),
13458 aarch64_get_reg_u64 (cpu, 2, NO_SP)
13459 );
13460 }
13461 }
13462
13463 /* Return -- assembler will default source to LR this is functionally
13464 equivalent to br but, presumably, unlike br it side effects the
13465 branch predictor. */
13466 static void
13467 ret (sim_cpu *cpu)
13468 {
13469 unsigned rn = INSTR (9, 5);
13470 aarch64_set_next_PC (cpu, aarch64_get_reg_u64 (cpu, rn, NO_SP));
13471
13472 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13473 if (TRACE_BRANCH_P (cpu))
13474 {
13475 TRACE_BRANCH (cpu,
13476 " %*sreturn [result: %" PRIx64 "]",
13477 stack_depth, " ", aarch64_get_reg_u64 (cpu, 0, NO_SP));
13478 -- stack_depth;
13479 }
13480 }
13481
13482 /* NOP -- we implement this and call it from the decode in case we
13483 want to intercept it later. */
13484
13485 static void
13486 nop (sim_cpu *cpu)
13487 {
13488 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13489 }
13490
13491 /* Data synchronization barrier. */
13492
13493 static void
13494 dsb (sim_cpu *cpu)
13495 {
13496 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13497 }
13498
13499 /* Data memory barrier. */
13500
13501 static void
13502 dmb (sim_cpu *cpu)
13503 {
13504 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13505 }
13506
13507 /* Instruction synchronization barrier. */
13508
13509 static void
13510 isb (sim_cpu *cpu)
13511 {
13512 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13513 }
13514
13515 static void
13516 dexBranchImmediate (sim_cpu *cpu)
13517 {
13518 /* assert instr[30,26] == 00101
13519 instr[31] ==> 0 == B, 1 == BL
13520 instr[25,0] == imm26 branch offset counted in words. */
13521
13522 uint32_t top = INSTR (31, 31);
13523 /* We have a 26 byte signed word offset which we need to pass to the
13524 execute routine as a signed byte offset. */
13525 int32_t offset = simm32 (aarch64_get_instr (cpu), 25, 0) << 2;
13526
13527 if (top)
13528 bl (cpu, offset);
13529 else
13530 buc (cpu, offset);
13531 }
13532
13533 /* Control Flow. */
13534
13535 /* Conditional branch
13536
13537 Offset is a PC-relative byte offset in the range +/- 1MiB pos is
13538 a bit position in the range 0 .. 63
13539
13540 cc is a CondCode enum value as pulled out of the decode
13541
13542 N.B. any offset register (source) can only be Xn or Wn. */
13543
13544 static void
13545 bcc (sim_cpu *cpu, int32_t offset, CondCode cc)
13546 {
13547 /* The test returns TRUE if CC is met. */
13548 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13549 if (testConditionCode (cpu, cc))
13550 aarch64_set_next_PC_by_offset (cpu, offset);
13551 }
13552
13553 /* 32 bit branch on register non-zero. */
13554 static void
13555 cbnz32 (sim_cpu *cpu, int32_t offset)
13556 {
13557 unsigned rt = INSTR (4, 0);
13558
13559 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13560 if (aarch64_get_reg_u32 (cpu, rt, NO_SP) != 0)
13561 aarch64_set_next_PC_by_offset (cpu, offset);
13562 }
13563
13564 /* 64 bit branch on register zero. */
13565 static void
13566 cbnz (sim_cpu *cpu, int32_t offset)
13567 {
13568 unsigned rt = INSTR (4, 0);
13569
13570 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13571 if (aarch64_get_reg_u64 (cpu, rt, NO_SP) != 0)
13572 aarch64_set_next_PC_by_offset (cpu, offset);
13573 }
13574
13575 /* 32 bit branch on register non-zero. */
13576 static void
13577 cbz32 (sim_cpu *cpu, int32_t offset)
13578 {
13579 unsigned rt = INSTR (4, 0);
13580
13581 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13582 if (aarch64_get_reg_u32 (cpu, rt, NO_SP) == 0)
13583 aarch64_set_next_PC_by_offset (cpu, offset);
13584 }
13585
13586 /* 64 bit branch on register zero. */
13587 static void
13588 cbz (sim_cpu *cpu, int32_t offset)
13589 {
13590 unsigned rt = INSTR (4, 0);
13591
13592 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13593 if (aarch64_get_reg_u64 (cpu, rt, NO_SP) == 0)
13594 aarch64_set_next_PC_by_offset (cpu, offset);
13595 }
13596
13597 /* Branch on register bit test non-zero -- one size fits all. */
13598 static void
13599 tbnz (sim_cpu *cpu, uint32_t pos, int32_t offset)
13600 {
13601 unsigned rt = INSTR (4, 0);
13602
13603 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13604 if (aarch64_get_reg_u64 (cpu, rt, NO_SP) & (((uint64_t) 1) << pos))
13605 aarch64_set_next_PC_by_offset (cpu, offset);
13606 }
13607
13608 /* Branch on register bit test zero -- one size fits all. */
13609 static void
13610 tbz (sim_cpu *cpu, uint32_t pos, int32_t offset)
13611 {
13612 unsigned rt = INSTR (4, 0);
13613
13614 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13615 if (!(aarch64_get_reg_u64 (cpu, rt, NO_SP) & (((uint64_t) 1) << pos)))
13616 aarch64_set_next_PC_by_offset (cpu, offset);
13617 }
13618
13619 static void
13620 dexCompareBranchImmediate (sim_cpu *cpu)
13621 {
13622 /* instr[30,25] = 01 1010
13623 instr[31] = size : 0 ==> 32, 1 ==> 64
13624 instr[24] = op : 0 ==> CBZ, 1 ==> CBNZ
13625 instr[23,5] = simm19 branch offset counted in words
13626 instr[4,0] = rt */
13627
13628 uint32_t size = INSTR (31, 31);
13629 uint32_t op = INSTR (24, 24);
13630 int32_t offset = simm32 (aarch64_get_instr (cpu), 23, 5) << 2;
13631
13632 if (size == 0)
13633 {
13634 if (op == 0)
13635 cbz32 (cpu, offset);
13636 else
13637 cbnz32 (cpu, offset);
13638 }
13639 else
13640 {
13641 if (op == 0)
13642 cbz (cpu, offset);
13643 else
13644 cbnz (cpu, offset);
13645 }
13646 }
13647
13648 static void
13649 dexTestBranchImmediate (sim_cpu *cpu)
13650 {
13651 /* instr[31] = b5 : bit 5 of test bit idx
13652 instr[30,25] = 01 1011
13653 instr[24] = op : 0 ==> TBZ, 1 == TBNZ
13654 instr[23,19] = b40 : bits 4 to 0 of test bit idx
13655 instr[18,5] = simm14 : signed offset counted in words
13656 instr[4,0] = uimm5 */
13657
13658 uint32_t pos = ((INSTR (31, 31) << 5) | INSTR (23, 19));
13659 int32_t offset = simm32 (aarch64_get_instr (cpu), 18, 5) << 2;
13660
13661 NYI_assert (30, 25, 0x1b);
13662
13663 if (INSTR (24, 24) == 0)
13664 tbz (cpu, pos, offset);
13665 else
13666 tbnz (cpu, pos, offset);
13667 }
13668
13669 static void
13670 dexCondBranchImmediate (sim_cpu *cpu)
13671 {
13672 /* instr[31,25] = 010 1010
13673 instr[24] = op1; op => 00 ==> B.cond
13674 instr[23,5] = simm19 : signed offset counted in words
13675 instr[4] = op0
13676 instr[3,0] = cond */
13677
13678 int32_t offset;
13679 uint32_t op = ((INSTR (24, 24) << 1) | INSTR (4, 4));
13680
13681 NYI_assert (31, 25, 0x2a);
13682
13683 if (op != 0)
13684 HALT_UNALLOC;
13685
13686 offset = simm32 (aarch64_get_instr (cpu), 23, 5) << 2;
13687
13688 bcc (cpu, offset, INSTR (3, 0));
13689 }
13690
13691 static void
13692 dexBranchRegister (sim_cpu *cpu)
13693 {
13694 /* instr[31,25] = 110 1011
13695 instr[24,21] = op : 0 ==> BR, 1 => BLR, 2 => RET, 3 => ERET, 4 => DRPS
13696 instr[20,16] = op2 : must be 11111
13697 instr[15,10] = op3 : must be 000000
13698 instr[4,0] = op2 : must be 11111. */
13699
13700 uint32_t op = INSTR (24, 21);
13701 uint32_t op2 = INSTR (20, 16);
13702 uint32_t op3 = INSTR (15, 10);
13703 uint32_t op4 = INSTR (4, 0);
13704
13705 NYI_assert (31, 25, 0x6b);
13706
13707 if (op2 != 0x1F || op3 != 0 || op4 != 0)
13708 HALT_UNALLOC;
13709
13710 if (op == 0)
13711 br (cpu);
13712
13713 else if (op == 1)
13714 blr (cpu);
13715
13716 else if (op == 2)
13717 ret (cpu);
13718
13719 else
13720 {
13721 /* ERET and DRPS accept 0b11111 for rn = instr [4,0]. */
13722 /* anything else is unallocated. */
13723 uint32_t rn = INSTR (4, 0);
13724
13725 if (rn != 0x1f)
13726 HALT_UNALLOC;
13727
13728 if (op == 4 || op == 5)
13729 HALT_NYI;
13730
13731 HALT_UNALLOC;
13732 }
13733 }
13734
13735 /* FIXME: We should get the Angel SWI values from ../../libgloss/aarch64/svc.h
13736 but this may not be available. So instead we define the values we need
13737 here. */
13738 #define AngelSVC_Reason_Open 0x01
13739 #define AngelSVC_Reason_Close 0x02
13740 #define AngelSVC_Reason_Write 0x05
13741 #define AngelSVC_Reason_Read 0x06
13742 #define AngelSVC_Reason_IsTTY 0x09
13743 #define AngelSVC_Reason_Seek 0x0A
13744 #define AngelSVC_Reason_FLen 0x0C
13745 #define AngelSVC_Reason_Remove 0x0E
13746 #define AngelSVC_Reason_Rename 0x0F
13747 #define AngelSVC_Reason_Clock 0x10
13748 #define AngelSVC_Reason_Time 0x11
13749 #define AngelSVC_Reason_System 0x12
13750 #define AngelSVC_Reason_Errno 0x13
13751 #define AngelSVC_Reason_GetCmdLine 0x15
13752 #define AngelSVC_Reason_HeapInfo 0x16
13753 #define AngelSVC_Reason_ReportException 0x18
13754 #define AngelSVC_Reason_Elapsed 0x30
13755
13756
13757 static void
13758 handle_halt (sim_cpu *cpu, uint32_t val)
13759 {
13760 uint64_t result = 0;
13761
13762 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13763 if (val != 0xf000)
13764 {
13765 TRACE_SYSCALL (cpu, " HLT [0x%x]", val);
13766 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
13767 sim_stopped, SIM_SIGTRAP);
13768 }
13769
13770 /* We have encountered an Angel SVC call. See if we can process it. */
13771 switch (aarch64_get_reg_u32 (cpu, 0, NO_SP))
13772 {
13773 case AngelSVC_Reason_HeapInfo:
13774 {
13775 /* Get the values. */
13776 uint64_t stack_top = aarch64_get_stack_start (cpu);
13777 uint64_t heap_base = aarch64_get_heap_start (cpu);
13778
13779 /* Get the pointer */
13780 uint64_t ptr = aarch64_get_reg_u64 (cpu, 1, SP_OK);
13781 ptr = aarch64_get_mem_u64 (cpu, ptr);
13782
13783 /* Fill in the memory block. */
13784 /* Start addr of heap. */
13785 aarch64_set_mem_u64 (cpu, ptr + 0, heap_base);
13786 /* End addr of heap. */
13787 aarch64_set_mem_u64 (cpu, ptr + 8, stack_top);
13788 /* Lowest stack addr. */
13789 aarch64_set_mem_u64 (cpu, ptr + 16, heap_base);
13790 /* Initial stack addr. */
13791 aarch64_set_mem_u64 (cpu, ptr + 24, stack_top);
13792
13793 TRACE_SYSCALL (cpu, " AngelSVC: Get Heap Info");
13794 }
13795 break;
13796
13797 case AngelSVC_Reason_Open:
13798 {
13799 /* Get the pointer */
13800 /* uint64_t ptr = aarch64_get_reg_u64 (cpu, 1, SP_OK);. */
13801 /* FIXME: For now we just assume that we will only be asked
13802 to open the standard file descriptors. */
13803 static int fd = 0;
13804 result = fd ++;
13805
13806 TRACE_SYSCALL (cpu, " AngelSVC: Open file %d", fd - 1);
13807 }
13808 break;
13809
13810 case AngelSVC_Reason_Close:
13811 {
13812 uint64_t fh = aarch64_get_reg_u64 (cpu, 1, SP_OK);
13813 TRACE_SYSCALL (cpu, " AngelSVC: Close file %d", (int) fh);
13814 result = 0;
13815 }
13816 break;
13817
13818 case AngelSVC_Reason_Errno:
13819 result = 0;
13820 TRACE_SYSCALL (cpu, " AngelSVC: Get Errno");
13821 break;
13822
13823 case AngelSVC_Reason_Clock:
13824 result =
13825 #ifdef CLOCKS_PER_SEC
13826 (CLOCKS_PER_SEC >= 100)
13827 ? (clock () / (CLOCKS_PER_SEC / 100))
13828 : ((clock () * 100) / CLOCKS_PER_SEC)
13829 #else
13830 /* Presume unix... clock() returns microseconds. */
13831 (clock () / 10000)
13832 #endif
13833 ;
13834 TRACE_SYSCALL (cpu, " AngelSVC: Get Clock");
13835 break;
13836
13837 case AngelSVC_Reason_GetCmdLine:
13838 {
13839 /* Get the pointer */
13840 uint64_t ptr = aarch64_get_reg_u64 (cpu, 1, SP_OK);
13841 ptr = aarch64_get_mem_u64 (cpu, ptr);
13842
13843 /* FIXME: No command line for now. */
13844 aarch64_set_mem_u64 (cpu, ptr, 0);
13845 TRACE_SYSCALL (cpu, " AngelSVC: Get Command Line");
13846 }
13847 break;
13848
13849 case AngelSVC_Reason_IsTTY:
13850 result = 1;
13851 TRACE_SYSCALL (cpu, " AngelSVC: IsTTY ?");
13852 break;
13853
13854 case AngelSVC_Reason_Write:
13855 {
13856 /* Get the pointer */
13857 uint64_t ptr = aarch64_get_reg_u64 (cpu, 1, SP_OK);
13858 /* Get the write control block. */
13859 uint64_t fd = aarch64_get_mem_u64 (cpu, ptr);
13860 uint64_t buf = aarch64_get_mem_u64 (cpu, ptr + 8);
13861 uint64_t len = aarch64_get_mem_u64 (cpu, ptr + 16);
13862
13863 TRACE_SYSCALL (cpu, "write of %" PRIx64 " bytes from %"
13864 PRIx64 " on descriptor %" PRIx64,
13865 len, buf, fd);
13866
13867 if (len > 1280)
13868 {
13869 TRACE_SYSCALL (cpu,
13870 " AngelSVC: Write: Suspiciously long write: %ld",
13871 (long) len);
13872 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
13873 sim_stopped, SIM_SIGBUS);
13874 }
13875 else if (fd == 1)
13876 {
13877 printf ("%.*s", (int) len, aarch64_get_mem_ptr (cpu, buf));
13878 }
13879 else if (fd == 2)
13880 {
13881 TRACE (cpu, 0, "\n");
13882 sim_io_eprintf (CPU_STATE (cpu), "%.*s",
13883 (int) len, aarch64_get_mem_ptr (cpu, buf));
13884 TRACE (cpu, 0, "\n");
13885 }
13886 else
13887 {
13888 TRACE_SYSCALL (cpu,
13889 " AngelSVC: Write: Unexpected file handle: %d",
13890 (int) fd);
13891 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
13892 sim_stopped, SIM_SIGABRT);
13893 }
13894 }
13895 break;
13896
13897 case AngelSVC_Reason_ReportException:
13898 {
13899 /* Get the pointer */
13900 uint64_t ptr = aarch64_get_reg_u64 (cpu, 1, SP_OK);
13901 /*ptr = aarch64_get_mem_u64 (cpu, ptr);. */
13902 uint64_t type = aarch64_get_mem_u64 (cpu, ptr);
13903 uint64_t state = aarch64_get_mem_u64 (cpu, ptr + 8);
13904
13905 TRACE_SYSCALL (cpu,
13906 "Angel Exception: type 0x%" PRIx64 " state %" PRIx64,
13907 type, state);
13908
13909 if (type == 0x20026)
13910 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
13911 sim_exited, state);
13912 else
13913 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
13914 sim_stopped, SIM_SIGINT);
13915 }
13916 break;
13917
13918 case AngelSVC_Reason_Read:
13919 case AngelSVC_Reason_FLen:
13920 case AngelSVC_Reason_Seek:
13921 case AngelSVC_Reason_Remove:
13922 case AngelSVC_Reason_Time:
13923 case AngelSVC_Reason_System:
13924 case AngelSVC_Reason_Rename:
13925 case AngelSVC_Reason_Elapsed:
13926 default:
13927 TRACE_SYSCALL (cpu, " HLT [Unknown angel %x]",
13928 aarch64_get_reg_u32 (cpu, 0, NO_SP));
13929 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
13930 sim_stopped, SIM_SIGTRAP);
13931 }
13932
13933 aarch64_set_reg_u64 (cpu, 0, NO_SP, result);
13934 }
13935
13936 static void
13937 dexExcpnGen (sim_cpu *cpu)
13938 {
13939 /* instr[31:24] = 11010100
13940 instr[23,21] = opc : 000 ==> GEN EXCPN, 001 ==> BRK
13941 010 ==> HLT, 101 ==> DBG GEN EXCPN
13942 instr[20,5] = imm16
13943 instr[4,2] = opc2 000 ==> OK, ow ==> UNALLOC
13944 instr[1,0] = LL : discriminates opc */
13945
13946 uint32_t opc = INSTR (23, 21);
13947 uint32_t imm16 = INSTR (20, 5);
13948 uint32_t opc2 = INSTR (4, 2);
13949 uint32_t LL;
13950
13951 NYI_assert (31, 24, 0xd4);
13952
13953 if (opc2 != 0)
13954 HALT_UNALLOC;
13955
13956 LL = INSTR (1, 0);
13957
13958 /* We only implement HLT and BRK for now. */
13959 if (opc == 1 && LL == 0)
13960 {
13961 TRACE_EVENTS (cpu, " BRK [0x%x]", imm16);
13962 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
13963 sim_exited, aarch64_get_reg_s32 (cpu, R0, SP_OK));
13964 }
13965
13966 if (opc == 2 && LL == 0)
13967 handle_halt (cpu, imm16);
13968
13969 else if (opc == 0 || opc == 5)
13970 HALT_NYI;
13971
13972 else
13973 HALT_UNALLOC;
13974 }
13975
13976 /* Stub for accessing system registers. */
13977
13978 static uint64_t
13979 system_get (sim_cpu *cpu, unsigned op0, unsigned op1, unsigned crn,
13980 unsigned crm, unsigned op2)
13981 {
13982 if (crn == 0 && op1 == 3 && crm == 0 && op2 == 7)
13983 /* DCZID_EL0 - the Data Cache Zero ID register.
13984 We do not support DC ZVA at the moment, so
13985 we return a value with the disable bit set.
13986 We implement support for the DCZID register since
13987 it is used by the C library's memset function. */
13988 return ((uint64_t) 1) << 4;
13989
13990 if (crn == 0 && op1 == 3 && crm == 0 && op2 == 1)
13991 /* Cache Type Register. */
13992 return 0x80008000UL;
13993
13994 if (crn == 13 && op1 == 3 && crm == 0 && op2 == 2)
13995 /* TPIDR_EL0 - thread pointer id. */
13996 return aarch64_get_thread_id (cpu);
13997
13998 if (op1 == 3 && crm == 4 && op2 == 0)
13999 return aarch64_get_FPCR (cpu);
14000
14001 if (op1 == 3 && crm == 4 && op2 == 1)
14002 return aarch64_get_FPSR (cpu);
14003
14004 else if (op1 == 3 && crm == 2 && op2 == 0)
14005 return aarch64_get_CPSR (cpu);
14006
14007 HALT_NYI;
14008 }
14009
14010 static void
14011 system_set (sim_cpu *cpu, unsigned op0, unsigned op1, unsigned crn,
14012 unsigned crm, unsigned op2, uint64_t val)
14013 {
14014 if (op1 == 3 && crm == 4 && op2 == 0)
14015 aarch64_set_FPCR (cpu, val);
14016
14017 else if (op1 == 3 && crm == 4 && op2 == 1)
14018 aarch64_set_FPSR (cpu, val);
14019
14020 else if (op1 == 3 && crm == 2 && op2 == 0)
14021 aarch64_set_CPSR (cpu, val);
14022
14023 else
14024 HALT_NYI;
14025 }
14026
14027 static void
14028 do_mrs (sim_cpu *cpu)
14029 {
14030 /* instr[31:20] = 1101 0101 0001 1
14031 instr[19] = op0
14032 instr[18,16] = op1
14033 instr[15,12] = CRn
14034 instr[11,8] = CRm
14035 instr[7,5] = op2
14036 instr[4,0] = Rt */
14037 unsigned sys_op0 = INSTR (19, 19) + 2;
14038 unsigned sys_op1 = INSTR (18, 16);
14039 unsigned sys_crn = INSTR (15, 12);
14040 unsigned sys_crm = INSTR (11, 8);
14041 unsigned sys_op2 = INSTR (7, 5);
14042 unsigned rt = INSTR (4, 0);
14043
14044 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
14045 aarch64_set_reg_u64 (cpu, rt, NO_SP,
14046 system_get (cpu, sys_op0, sys_op1, sys_crn, sys_crm, sys_op2));
14047 }
14048
14049 static void
14050 do_MSR_immediate (sim_cpu *cpu)
14051 {
14052 /* instr[31:19] = 1101 0101 0000 0
14053 instr[18,16] = op1
14054 instr[15,12] = 0100
14055 instr[11,8] = CRm
14056 instr[7,5] = op2
14057 instr[4,0] = 1 1111 */
14058
14059 unsigned op1 = INSTR (18, 16);
14060 /*unsigned crm = INSTR (11, 8);*/
14061 unsigned op2 = INSTR (7, 5);
14062
14063 NYI_assert (31, 19, 0x1AA0);
14064 NYI_assert (15, 12, 0x4);
14065 NYI_assert (4, 0, 0x1F);
14066
14067 if (op1 == 0)
14068 {
14069 if (op2 == 5)
14070 HALT_NYI; /* set SPSel. */
14071 else
14072 HALT_UNALLOC;
14073 }
14074 else if (op1 == 3)
14075 {
14076 if (op2 == 6)
14077 HALT_NYI; /* set DAIFset. */
14078 else if (op2 == 7)
14079 HALT_NYI; /* set DAIFclr. */
14080 else
14081 HALT_UNALLOC;
14082 }
14083 else
14084 HALT_UNALLOC;
14085 }
14086
14087 static void
14088 do_MSR_reg (sim_cpu *cpu)
14089 {
14090 /* instr[31:20] = 1101 0101 0001
14091 instr[19] = op0
14092 instr[18,16] = op1
14093 instr[15,12] = CRn
14094 instr[11,8] = CRm
14095 instr[7,5] = op2
14096 instr[4,0] = Rt */
14097
14098 unsigned sys_op0 = INSTR (19, 19) + 2;
14099 unsigned sys_op1 = INSTR (18, 16);
14100 unsigned sys_crn = INSTR (15, 12);
14101 unsigned sys_crm = INSTR (11, 8);
14102 unsigned sys_op2 = INSTR (7, 5);
14103 unsigned rt = INSTR (4, 0);
14104
14105 NYI_assert (31, 20, 0xD51);
14106
14107 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
14108 system_set (cpu, sys_op0, sys_op1, sys_crn, sys_crm, sys_op2,
14109 aarch64_get_reg_u64 (cpu, rt, NO_SP));
14110 }
14111
14112 static void
14113 do_SYS (sim_cpu *cpu)
14114 {
14115 /* instr[31,19] = 1101 0101 0000 1
14116 instr[18,16] = op1
14117 instr[15,12] = CRn
14118 instr[11,8] = CRm
14119 instr[7,5] = op2
14120 instr[4,0] = Rt */
14121 NYI_assert (31, 19, 0x1AA1);
14122
14123 /* FIXME: For now we just silently accept system ops. */
14124 }
14125
14126 static void
14127 dexSystem (sim_cpu *cpu)
14128 {
14129 /* instr[31:22] = 1101 01010 0
14130 instr[21] = L
14131 instr[20,19] = op0
14132 instr[18,16] = op1
14133 instr[15,12] = CRn
14134 instr[11,8] = CRm
14135 instr[7,5] = op2
14136 instr[4,0] = uimm5 */
14137
14138 /* We are interested in HINT, DSB, DMB and ISB
14139
14140 Hint #0 encodes NOOP (this is the only hint we care about)
14141 L == 0, op0 == 0, op1 = 011, CRn = 0010, Rt = 11111,
14142 CRm op2 != 0000 000 OR CRm op2 == 0000 000 || CRm op > 0000 101
14143
14144 DSB, DMB, ISB are data store barrier, data memory barrier and
14145 instruction store barrier, respectively, where
14146
14147 L == 0, op0 == 0, op1 = 011, CRn = 0011, Rt = 11111,
14148 op2 : DSB ==> 100, DMB ==> 101, ISB ==> 110
14149 CRm<3:2> ==> domain, CRm<1:0> ==> types,
14150 domain : 00 ==> OuterShareable, 01 ==> Nonshareable,
14151 10 ==> InerShareable, 11 ==> FullSystem
14152 types : 01 ==> Reads, 10 ==> Writes,
14153 11 ==> All, 00 ==> All (domain == FullSystem). */
14154
14155 unsigned rt = INSTR (4, 0);
14156
14157 NYI_assert (31, 22, 0x354);
14158
14159 switch (INSTR (21, 12))
14160 {
14161 case 0x032:
14162 if (rt == 0x1F)
14163 {
14164 /* NOP has CRm != 0000 OR. */
14165 /* (CRm == 0000 AND (op2 == 000 OR op2 > 101)). */
14166 uint32_t crm = INSTR (11, 8);
14167 uint32_t op2 = INSTR (7, 5);
14168
14169 if (crm != 0 || (op2 == 0 || op2 > 5))
14170 {
14171 /* Actually call nop method so we can reimplement it later. */
14172 nop (cpu);
14173 return;
14174 }
14175 }
14176 HALT_NYI;
14177
14178 case 0x033:
14179 {
14180 uint32_t op2 = INSTR (7, 5);
14181
14182 switch (op2)
14183 {
14184 case 2: HALT_NYI;
14185 case 4: dsb (cpu); return;
14186 case 5: dmb (cpu); return;
14187 case 6: isb (cpu); return;
14188 default: HALT_UNALLOC;
14189 }
14190 }
14191
14192 case 0x3B0:
14193 case 0x3B4:
14194 case 0x3BD:
14195 do_mrs (cpu);
14196 return;
14197
14198 case 0x0B7:
14199 do_SYS (cpu); /* DC is an alias of SYS. */
14200 return;
14201
14202 default:
14203 if (INSTR (21, 20) == 0x1)
14204 do_MSR_reg (cpu);
14205 else if (INSTR (21, 19) == 0 && INSTR (15, 12) == 0x4)
14206 do_MSR_immediate (cpu);
14207 else
14208 HALT_NYI;
14209 return;
14210 }
14211 }
14212
14213 static void
14214 dexBr (sim_cpu *cpu)
14215 {
14216 /* uint32_t group = dispatchGroup (aarch64_get_instr (cpu));
14217 assert group == GROUP_BREXSYS_1010 || group == GROUP_BREXSYS_1011
14218 bits [31,29] of a BrExSys are the secondary dispatch vector. */
14219 uint32_t group2 = dispatchBrExSys (aarch64_get_instr (cpu));
14220
14221 switch (group2)
14222 {
14223 case BR_IMM_000:
14224 return dexBranchImmediate (cpu);
14225
14226 case BR_IMMCMP_001:
14227 /* Compare has bit 25 clear while test has it set. */
14228 if (!INSTR (25, 25))
14229 dexCompareBranchImmediate (cpu);
14230 else
14231 dexTestBranchImmediate (cpu);
14232 return;
14233
14234 case BR_IMMCOND_010:
14235 /* This is a conditional branch if bit 25 is clear otherwise
14236 unallocated. */
14237 if (!INSTR (25, 25))
14238 dexCondBranchImmediate (cpu);
14239 else
14240 HALT_UNALLOC;
14241 return;
14242
14243 case BR_UNALLOC_011:
14244 HALT_UNALLOC;
14245
14246 case BR_IMM_100:
14247 dexBranchImmediate (cpu);
14248 return;
14249
14250 case BR_IMMCMP_101:
14251 /* Compare has bit 25 clear while test has it set. */
14252 if (!INSTR (25, 25))
14253 dexCompareBranchImmediate (cpu);
14254 else
14255 dexTestBranchImmediate (cpu);
14256 return;
14257
14258 case BR_REG_110:
14259 /* Unconditional branch reg has bit 25 set. */
14260 if (INSTR (25, 25))
14261 dexBranchRegister (cpu);
14262
14263 /* This includes both Excpn Gen, System and unalloc operations.
14264 We need to decode the Excpn Gen operation BRK so we can plant
14265 debugger entry points.
14266 Excpn Gen operations have instr [24] = 0.
14267 we need to decode at least one of the System operations NOP
14268 which is an alias for HINT #0.
14269 System operations have instr [24,22] = 100. */
14270 else if (INSTR (24, 24) == 0)
14271 dexExcpnGen (cpu);
14272
14273 else if (INSTR (24, 22) == 4)
14274 dexSystem (cpu);
14275
14276 else
14277 HALT_UNALLOC;
14278
14279 return;
14280
14281 case BR_UNALLOC_111:
14282 HALT_UNALLOC;
14283
14284 default:
14285 /* Should never reach here. */
14286 HALT_NYI;
14287 }
14288 }
14289
14290 static void
14291 aarch64_decode_and_execute (sim_cpu *cpu, uint64_t pc)
14292 {
14293 /* We need to check if gdb wants an in here. */
14294 /* checkBreak (cpu);. */
14295
14296 uint64_t group = dispatchGroup (aarch64_get_instr (cpu));
14297
14298 switch (group)
14299 {
14300 case GROUP_PSEUDO_0000: dexPseudo (cpu); break;
14301 case GROUP_LDST_0100: dexLdSt (cpu); break;
14302 case GROUP_DPREG_0101: dexDPReg (cpu); break;
14303 case GROUP_LDST_0110: dexLdSt (cpu); break;
14304 case GROUP_ADVSIMD_0111: dexAdvSIMD0 (cpu); break;
14305 case GROUP_DPIMM_1000: dexDPImm (cpu); break;
14306 case GROUP_DPIMM_1001: dexDPImm (cpu); break;
14307 case GROUP_BREXSYS_1010: dexBr (cpu); break;
14308 case GROUP_BREXSYS_1011: dexBr (cpu); break;
14309 case GROUP_LDST_1100: dexLdSt (cpu); break;
14310 case GROUP_DPREG_1101: dexDPReg (cpu); break;
14311 case GROUP_LDST_1110: dexLdSt (cpu); break;
14312 case GROUP_ADVSIMD_1111: dexAdvSIMD1 (cpu); break;
14313
14314 case GROUP_UNALLOC_0001:
14315 case GROUP_UNALLOC_0010:
14316 case GROUP_UNALLOC_0011:
14317 HALT_UNALLOC;
14318
14319 default:
14320 /* Should never reach here. */
14321 HALT_NYI;
14322 }
14323 }
14324
14325 static bfd_boolean
14326 aarch64_step (sim_cpu *cpu)
14327 {
14328 uint64_t pc = aarch64_get_PC (cpu);
14329
14330 if (pc == TOP_LEVEL_RETURN_PC)
14331 return FALSE;
14332
14333 aarch64_set_next_PC (cpu, pc + 4);
14334
14335 /* Code is always little-endian. */
14336 sim_core_read_buffer (CPU_STATE (cpu), cpu, read_map,
14337 & aarch64_get_instr (cpu), pc, 4);
14338 aarch64_get_instr (cpu) = endian_le2h_4 (aarch64_get_instr (cpu));
14339
14340 TRACE_INSN (cpu, " pc = %" PRIx64 " instr = %08x", pc,
14341 aarch64_get_instr (cpu));
14342 TRACE_DISASM (cpu, pc);
14343
14344 aarch64_decode_and_execute (cpu, pc);
14345
14346 return TRUE;
14347 }
14348
14349 void
14350 aarch64_run (SIM_DESC sd)
14351 {
14352 sim_cpu *cpu = STATE_CPU (sd, 0);
14353
14354 while (aarch64_step (cpu))
14355 {
14356 aarch64_update_PC (cpu);
14357
14358 if (sim_events_tick (sd))
14359 sim_events_process (sd);
14360 }
14361
14362 sim_engine_halt (sd, cpu, NULL, aarch64_get_PC (cpu),
14363 sim_exited, aarch64_get_reg_s32 (cpu, R0, NO_SP));
14364 }
14365
14366 void
14367 aarch64_init (sim_cpu *cpu, uint64_t pc)
14368 {
14369 uint64_t sp = aarch64_get_stack_start (cpu);
14370
14371 /* Install SP, FP and PC and set LR to -20
14372 so we can detect a top-level return. */
14373 aarch64_set_reg_u64 (cpu, SP, SP_OK, sp);
14374 aarch64_set_reg_u64 (cpu, FP, SP_OK, sp);
14375 aarch64_set_reg_u64 (cpu, LR, SP_OK, TOP_LEVEL_RETURN_PC);
14376 aarch64_set_next_PC (cpu, pc);
14377 aarch64_update_PC (cpu);
14378 aarch64_init_LIT_table ();
14379 }