]> git.ipfire.org Git - thirdparty/binutils-gdb.git/blob - sim/aarch64/simulator.c
Add support for the --trace-decode option to the AArch64 simulator.
[thirdparty/binutils-gdb.git] / sim / aarch64 / simulator.c
1 /* simulator.c -- Interface for the AArch64 simulator.
2
3 Copyright (C) 2015-2016 Free Software Foundation, Inc.
4
5 Contributed by Red Hat.
6
7 This file is part of GDB.
8
9 This program is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 3 of the License, or
12 (at your option) any later version.
13
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with this program. If not, see <http://www.gnu.org/licenses/>. */
21
22 #include "config.h"
23 #include <stdlib.h>
24 #include <stdio.h>
25 #include <string.h>
26 #include <sys/types.h>
27 #include <math.h>
28 #include <time.h>
29 #include <limits.h>
30
31 #include "simulator.h"
32 #include "cpustate.h"
33 #include "memory.h"
34
35 #define NO_SP 0
36 #define SP_OK 1
37
38 #define TST(_flag) (aarch64_test_CPSR_bit (cpu, _flag))
39 #define IS_SET(_X) (TST (( _X )) ? 1 : 0)
40 #define IS_CLEAR(_X) (TST (( _X )) ? 0 : 1)
41
42 /* Space saver macro. */
43 #define INSTR(HIGH, LOW) uimm (aarch64_get_instr (cpu), (HIGH), (LOW))
44
45 #define HALT_UNALLOC \
46 do \
47 { \
48 TRACE_DISASM (cpu, aarch64_get_PC (cpu)); \
49 TRACE_INSN (cpu, \
50 "Unallocated instruction detected at sim line %d," \
51 " exe addr %" PRIx64, \
52 __LINE__, aarch64_get_PC (cpu)); \
53 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),\
54 sim_stopped, SIM_SIGILL); \
55 } \
56 while (0)
57
58 #define HALT_NYI \
59 do \
60 { \
61 TRACE_DISASM (cpu, aarch64_get_PC (cpu)); \
62 TRACE_INSN (cpu, \
63 "Unimplemented instruction detected at sim line %d," \
64 " exe addr %" PRIx64, \
65 __LINE__, aarch64_get_PC (cpu)); \
66 if (! TRACE_ANY_P (cpu)) \
67 { \
68 sim_io_eprintf (CPU_STATE (cpu), "SIM Error: Unimplemented instruction: "); \
69 trace_disasm (CPU_STATE (cpu), cpu, aarch64_get_PC (cpu)); \
70 } \
71 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),\
72 sim_stopped, SIM_SIGABRT); \
73 } \
74 while (0)
75
76 #define NYI_assert(HI, LO, EXPECTED) \
77 do \
78 { \
79 if (INSTR ((HI), (LO)) != (EXPECTED)) \
80 HALT_NYI; \
81 } \
82 while (0)
83
84 /* Helper functions used by expandLogicalImmediate. */
85
86 /* for i = 1, ... N result<i-1> = 1 other bits are zero */
87 static inline uint64_t
88 ones (int N)
89 {
90 return (N == 64 ? (uint64_t)-1UL : ((1UL << N) - 1));
91 }
92
93 /* result<0> to val<N> */
94 static inline uint64_t
95 pickbit (uint64_t val, int N)
96 {
97 return pickbits64 (val, N, N);
98 }
99
100 static uint64_t
101 expand_logical_immediate (uint32_t S, uint32_t R, uint32_t N)
102 {
103 uint64_t mask;
104 uint64_t imm;
105 unsigned simd_size;
106
107 /* The immediate value is S+1 bits to 1, left rotated by SIMDsize - R
108 (in other words, right rotated by R), then replicated. */
109 if (N != 0)
110 {
111 simd_size = 64;
112 mask = 0xffffffffffffffffull;
113 }
114 else
115 {
116 switch (S)
117 {
118 case 0x00 ... 0x1f: /* 0xxxxx */ simd_size = 32; break;
119 case 0x20 ... 0x2f: /* 10xxxx */ simd_size = 16; S &= 0xf; break;
120 case 0x30 ... 0x37: /* 110xxx */ simd_size = 8; S &= 0x7; break;
121 case 0x38 ... 0x3b: /* 1110xx */ simd_size = 4; S &= 0x3; break;
122 case 0x3c ... 0x3d: /* 11110x */ simd_size = 2; S &= 0x1; break;
123 default: return 0;
124 }
125 mask = (1ull << simd_size) - 1;
126 /* Top bits are IGNORED. */
127 R &= simd_size - 1;
128 }
129
130 /* NOTE: if S = simd_size - 1 we get 0xf..f which is rejected. */
131 if (S == simd_size - 1)
132 return 0;
133
134 /* S+1 consecutive bits to 1. */
135 /* NOTE: S can't be 63 due to detection above. */
136 imm = (1ull << (S + 1)) - 1;
137
138 /* Rotate to the left by simd_size - R. */
139 if (R != 0)
140 imm = ((imm << (simd_size - R)) & mask) | (imm >> R);
141
142 /* Replicate the value according to SIMD size. */
143 switch (simd_size)
144 {
145 case 2: imm = (imm << 2) | imm;
146 case 4: imm = (imm << 4) | imm;
147 case 8: imm = (imm << 8) | imm;
148 case 16: imm = (imm << 16) | imm;
149 case 32: imm = (imm << 32) | imm;
150 case 64: break;
151 default: return 0;
152 }
153
154 return imm;
155 }
156
157 /* Instr[22,10] encodes N immr and imms. we want a lookup table
158 for each possible combination i.e. 13 bits worth of int entries. */
159 #define LI_TABLE_SIZE (1 << 13)
160 static uint64_t LITable[LI_TABLE_SIZE];
161
162 void
163 aarch64_init_LIT_table (void)
164 {
165 unsigned index;
166
167 for (index = 0; index < LI_TABLE_SIZE; index++)
168 {
169 uint32_t N = uimm (index, 12, 12);
170 uint32_t immr = uimm (index, 11, 6);
171 uint32_t imms = uimm (index, 5, 0);
172
173 LITable [index] = expand_logical_immediate (imms, immr, N);
174 }
175 }
176
177 static void
178 dexNotify (sim_cpu *cpu)
179 {
180 /* instr[14,0] == type : 0 ==> method entry, 1 ==> method reentry
181 2 ==> exit Java, 3 ==> start next bytecode. */
182 uint32_t type = INSTR (14, 0);
183
184 TRACE_EVENTS (cpu, "Notify Insn encountered, type = 0x%x", type);
185
186 switch (type)
187 {
188 case 0:
189 /* aarch64_notifyMethodEntry (aarch64_get_reg_u64 (cpu, R23, 0),
190 aarch64_get_reg_u64 (cpu, R22, 0)); */
191 break;
192 case 1:
193 /* aarch64_notifyMethodReentry (aarch64_get_reg_u64 (cpu, R23, 0),
194 aarch64_get_reg_u64 (cpu, R22, 0)); */
195 break;
196 case 2:
197 /* aarch64_notifyMethodExit (); */
198 break;
199 case 3:
200 /* aarch64_notifyBCStart (aarch64_get_reg_u64 (cpu, R23, 0),
201 aarch64_get_reg_u64 (cpu, R22, 0)); */
202 break;
203 }
204 }
205
206 /* secondary decode within top level groups */
207
208 static void
209 dexPseudo (sim_cpu *cpu)
210 {
211 /* assert instr[28,27] = 00
212
213 We provide 2 pseudo instructions:
214
215 HALT stops execution of the simulator causing an immediate
216 return to the x86 code which entered it.
217
218 CALLOUT initiates recursive entry into x86 code. A register
219 argument holds the address of the x86 routine. Immediate
220 values in the instruction identify the number of general
221 purpose and floating point register arguments to be passed
222 and the type of any value to be returned. */
223
224 uint32_t PSEUDO_HALT = 0xE0000000U;
225 uint32_t PSEUDO_CALLOUT = 0x00018000U;
226 uint32_t PSEUDO_CALLOUTR = 0x00018001U;
227 uint32_t PSEUDO_NOTIFY = 0x00014000U;
228 uint32_t dispatch;
229
230 if (aarch64_get_instr (cpu) == PSEUDO_HALT)
231 {
232 TRACE_EVENTS (cpu, " Pseudo Halt Instruction");
233 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
234 sim_stopped, SIM_SIGTRAP);
235 }
236
237 dispatch = INSTR (31, 15);
238
239 /* We do not handle callouts at the moment. */
240 if (dispatch == PSEUDO_CALLOUT || dispatch == PSEUDO_CALLOUTR)
241 {
242 TRACE_EVENTS (cpu, " Callout");
243 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
244 sim_stopped, SIM_SIGABRT);
245 }
246
247 else if (dispatch == PSEUDO_NOTIFY)
248 dexNotify (cpu);
249
250 else
251 HALT_UNALLOC;
252 }
253
254 /* Load-store single register (unscaled offset)
255 These instructions employ a base register plus an unscaled signed
256 9 bit offset.
257
258 N.B. the base register (source) can be Xn or SP. all other
259 registers may not be SP. */
260
261 /* 32 bit load 32 bit unscaled signed 9 bit. */
262 static void
263 ldur32 (sim_cpu *cpu, int32_t offset)
264 {
265 unsigned rn = INSTR (9, 5);
266 unsigned rt = INSTR (4, 0);
267
268 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
269 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u32
270 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
271 + offset));
272 }
273
274 /* 64 bit load 64 bit unscaled signed 9 bit. */
275 static void
276 ldur64 (sim_cpu *cpu, int32_t offset)
277 {
278 unsigned rn = INSTR (9, 5);
279 unsigned rt = INSTR (4, 0);
280
281 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
282 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u64
283 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
284 + offset));
285 }
286
287 /* 32 bit load zero-extended byte unscaled signed 9 bit. */
288 static void
289 ldurb32 (sim_cpu *cpu, int32_t offset)
290 {
291 unsigned rn = INSTR (9, 5);
292 unsigned rt = INSTR (4, 0);
293
294 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
295 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u8
296 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
297 + offset));
298 }
299
300 /* 32 bit load sign-extended byte unscaled signed 9 bit. */
301 static void
302 ldursb32 (sim_cpu *cpu, int32_t offset)
303 {
304 unsigned rn = INSTR (9, 5);
305 unsigned rt = INSTR (4, 0);
306
307 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
308 aarch64_set_reg_u64 (cpu, rt, NO_SP, (uint32_t) aarch64_get_mem_s8
309 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
310 + offset));
311 }
312
313 /* 64 bit load sign-extended byte unscaled signed 9 bit. */
314 static void
315 ldursb64 (sim_cpu *cpu, int32_t offset)
316 {
317 unsigned rn = INSTR (9, 5);
318 unsigned rt = INSTR (4, 0);
319
320 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
321 aarch64_set_reg_s64 (cpu, rt, NO_SP, aarch64_get_mem_s8
322 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
323 + offset));
324 }
325
326 /* 32 bit load zero-extended short unscaled signed 9 bit */
327 static void
328 ldurh32 (sim_cpu *cpu, int32_t offset)
329 {
330 unsigned rn = INSTR (9, 5);
331 unsigned rd = INSTR (4, 0);
332
333 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
334 aarch64_set_reg_u64 (cpu, rd, NO_SP, aarch64_get_mem_u16
335 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
336 + offset));
337 }
338
339 /* 32 bit load sign-extended short unscaled signed 9 bit */
340 static void
341 ldursh32 (sim_cpu *cpu, int32_t offset)
342 {
343 unsigned rn = INSTR (9, 5);
344 unsigned rd = INSTR (4, 0);
345
346 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
347 aarch64_set_reg_u64 (cpu, rd, NO_SP, (uint32_t) aarch64_get_mem_s16
348 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
349 + offset));
350 }
351
352 /* 64 bit load sign-extended short unscaled signed 9 bit */
353 static void
354 ldursh64 (sim_cpu *cpu, int32_t offset)
355 {
356 unsigned rn = INSTR (9, 5);
357 unsigned rt = INSTR (4, 0);
358
359 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
360 aarch64_set_reg_s64 (cpu, rt, NO_SP, aarch64_get_mem_s16
361 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
362 + offset));
363 }
364
365 /* 64 bit load sign-extended word unscaled signed 9 bit */
366 static void
367 ldursw (sim_cpu *cpu, int32_t offset)
368 {
369 unsigned rn = INSTR (9, 5);
370 unsigned rd = INSTR (4, 0);
371
372 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
373 aarch64_set_reg_u64 (cpu, rd, NO_SP, (uint32_t) aarch64_get_mem_s32
374 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
375 + offset));
376 }
377
378 /* N.B. with stores the value in source is written to the address
379 identified by source2 modified by offset. */
380
381 /* 32 bit store 32 bit unscaled signed 9 bit. */
382 static void
383 stur32 (sim_cpu *cpu, int32_t offset)
384 {
385 unsigned rn = INSTR (9, 5);
386 unsigned rd = INSTR (4, 0);
387
388 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
389 aarch64_set_mem_u32 (cpu,
390 aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset,
391 aarch64_get_reg_u32 (cpu, rd, NO_SP));
392 }
393
394 /* 64 bit store 64 bit unscaled signed 9 bit */
395 static void
396 stur64 (sim_cpu *cpu, int32_t offset)
397 {
398 unsigned rn = INSTR (9, 5);
399 unsigned rd = INSTR (4, 0);
400
401 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
402 aarch64_set_mem_u64 (cpu,
403 aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset,
404 aarch64_get_reg_u64 (cpu, rd, NO_SP));
405 }
406
407 /* 32 bit store byte unscaled signed 9 bit */
408 static void
409 sturb (sim_cpu *cpu, int32_t offset)
410 {
411 unsigned rn = INSTR (9, 5);
412 unsigned rd = INSTR (4, 0);
413
414 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
415 aarch64_set_mem_u8 (cpu,
416 aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset,
417 aarch64_get_reg_u8 (cpu, rd, NO_SP));
418 }
419
420 /* 32 bit store short unscaled signed 9 bit */
421 static void
422 sturh (sim_cpu *cpu, int32_t offset)
423 {
424 unsigned rn = INSTR (9, 5);
425 unsigned rd = INSTR (4, 0);
426
427 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
428 aarch64_set_mem_u16 (cpu,
429 aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset,
430 aarch64_get_reg_u16 (cpu, rd, NO_SP));
431 }
432
433 /* Load single register pc-relative label
434 Offset is a signed 19 bit immediate count in words
435 rt may not be SP. */
436
437 /* 32 bit pc-relative load */
438 static void
439 ldr32_pcrel (sim_cpu *cpu, int32_t offset)
440 {
441 unsigned rd = INSTR (4, 0);
442
443 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
444 aarch64_set_reg_u64 (cpu, rd, NO_SP,
445 aarch64_get_mem_u32
446 (cpu, aarch64_get_PC (cpu) + offset * 4));
447 }
448
449 /* 64 bit pc-relative load */
450 static void
451 ldr_pcrel (sim_cpu *cpu, int32_t offset)
452 {
453 unsigned rd = INSTR (4, 0);
454
455 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
456 aarch64_set_reg_u64 (cpu, rd, NO_SP,
457 aarch64_get_mem_u64
458 (cpu, aarch64_get_PC (cpu) + offset * 4));
459 }
460
461 /* sign extended 32 bit pc-relative load */
462 static void
463 ldrsw_pcrel (sim_cpu *cpu, int32_t offset)
464 {
465 unsigned rd = INSTR (4, 0);
466
467 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
468 aarch64_set_reg_u64 (cpu, rd, NO_SP,
469 aarch64_get_mem_s32
470 (cpu, aarch64_get_PC (cpu) + offset * 4));
471 }
472
473 /* float pc-relative load */
474 static void
475 fldrs_pcrel (sim_cpu *cpu, int32_t offset)
476 {
477 unsigned int rd = INSTR (4, 0);
478
479 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
480 aarch64_set_vec_u32 (cpu, rd, 0,
481 aarch64_get_mem_u32
482 (cpu, aarch64_get_PC (cpu) + offset * 4));
483 }
484
485 /* double pc-relative load */
486 static void
487 fldrd_pcrel (sim_cpu *cpu, int32_t offset)
488 {
489 unsigned int st = INSTR (4, 0);
490
491 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
492 aarch64_set_vec_u64 (cpu, st, 0,
493 aarch64_get_mem_u64
494 (cpu, aarch64_get_PC (cpu) + offset * 4));
495 }
496
497 /* long double pc-relative load. */
498 static void
499 fldrq_pcrel (sim_cpu *cpu, int32_t offset)
500 {
501 unsigned int st = INSTR (4, 0);
502 uint64_t addr = aarch64_get_PC (cpu) + offset * 4;
503 FRegister a;
504
505 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
506 aarch64_get_mem_long_double (cpu, addr, & a);
507 aarch64_set_FP_long_double (cpu, st, a);
508 }
509
510 /* This can be used to scale an offset by applying
511 the requisite shift. the second argument is either
512 16, 32 or 64. */
513
514 #define SCALE(_offset, _elementSize) \
515 ((_offset) << ScaleShift ## _elementSize)
516
517 /* This can be used to optionally scale a register derived offset
518 by applying the requisite shift as indicated by the Scaling
519 argument. The second argument is either Byte, Short, Word
520 or Long. The third argument is either Scaled or Unscaled.
521 N.B. when _Scaling is Scaled the shift gets ANDed with
522 all 1s while when it is Unscaled it gets ANDed with 0. */
523
524 #define OPT_SCALE(_offset, _elementType, _Scaling) \
525 ((_offset) << (_Scaling ? ScaleShift ## _elementType : 0))
526
527 /* This can be used to zero or sign extend a 32 bit register derived
528 value to a 64 bit value. the first argument must be the value as
529 a uint32_t and the second must be either UXTW or SXTW. The result
530 is returned as an int64_t. */
531
532 static inline int64_t
533 extend (uint32_t value, Extension extension)
534 {
535 union
536 {
537 uint32_t u;
538 int32_t n;
539 } x;
540
541 /* A branchless variant of this ought to be possible. */
542 if (extension == UXTW || extension == NoExtension)
543 return value;
544
545 x.u = value;
546 return x.n;
547 }
548
549 /* Scalar Floating Point
550
551 FP load/store single register (4 addressing modes)
552
553 N.B. the base register (source) can be the stack pointer.
554 The secondary source register (source2) can only be an Xn register. */
555
556 /* Load 32 bit unscaled signed 9 bit with pre- or post-writeback. */
557 static void
558 fldrs_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
559 {
560 unsigned rn = INSTR (9, 5);
561 unsigned st = INSTR (4, 0);
562 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
563
564 if (wb != Post)
565 address += offset;
566
567 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
568 aarch64_set_vec_u32 (cpu, st, 0, aarch64_get_mem_u32 (cpu, address));
569 if (wb == Post)
570 address += offset;
571
572 if (wb != NoWriteBack)
573 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
574 }
575
576 /* Load 8 bit with unsigned 12 bit offset. */
577 static void
578 fldrb_abs (sim_cpu *cpu, uint32_t offset)
579 {
580 unsigned rd = INSTR (4, 0);
581 unsigned rn = INSTR (9, 5);
582 uint64_t addr = aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset;
583
584 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
585 aarch64_set_vec_u8 (cpu, rd, 0, aarch64_get_mem_u32 (cpu, addr));
586 }
587
588 /* Load 16 bit scaled unsigned 12 bit. */
589 static void
590 fldrh_abs (sim_cpu *cpu, uint32_t offset)
591 {
592 unsigned rd = INSTR (4, 0);
593 unsigned rn = INSTR (9, 5);
594 uint64_t addr = aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 16);
595
596 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
597 aarch64_set_vec_u16 (cpu, rd, 0, aarch64_get_mem_u16 (cpu, addr));
598 }
599
600 /* Load 32 bit scaled unsigned 12 bit. */
601 static void
602 fldrs_abs (sim_cpu *cpu, uint32_t offset)
603 {
604 unsigned rd = INSTR (4, 0);
605 unsigned rn = INSTR (9, 5);
606 uint64_t addr = aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 32);
607
608 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
609 aarch64_set_vec_u32 (cpu, rd, 0, aarch64_get_mem_u32 (cpu, addr));
610 }
611
612 /* Load 64 bit scaled unsigned 12 bit. */
613 static void
614 fldrd_abs (sim_cpu *cpu, uint32_t offset)
615 {
616 unsigned rd = INSTR (4, 0);
617 unsigned rn = INSTR (9, 5);
618 uint64_t addr = aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 64);
619
620 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
621 aarch64_set_vec_u64 (cpu, rd, 0, aarch64_get_mem_u64 (cpu, addr));
622 }
623
624 /* Load 128 bit scaled unsigned 12 bit. */
625 static void
626 fldrq_abs (sim_cpu *cpu, uint32_t offset)
627 {
628 unsigned rd = INSTR (4, 0);
629 unsigned rn = INSTR (9, 5);
630 uint64_t addr = aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 128);
631
632 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
633 aarch64_set_vec_u64 (cpu, rd, 0, aarch64_get_mem_u64 (cpu, addr));
634 aarch64_set_vec_u64 (cpu, rd, 1, aarch64_get_mem_u64 (cpu, addr + 8));
635 }
636
637 /* Load 32 bit scaled or unscaled zero- or sign-extended
638 32-bit register offset. */
639 static void
640 fldrs_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
641 {
642 unsigned rm = INSTR (20, 16);
643 unsigned rn = INSTR (9, 5);
644 unsigned st = INSTR (4, 0);
645 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
646 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
647 uint64_t displacement = OPT_SCALE (extended, 32, scaling);
648
649 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
650 aarch64_set_vec_u32 (cpu, st, 0, aarch64_get_mem_u32
651 (cpu, address + displacement));
652 }
653
654 /* Load 64 bit unscaled signed 9 bit with pre- or post-writeback. */
655 static void
656 fldrd_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
657 {
658 unsigned rn = INSTR (9, 5);
659 unsigned st = INSTR (4, 0);
660 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
661
662 if (wb != Post)
663 address += offset;
664
665 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
666 aarch64_set_vec_u64 (cpu, st, 0, aarch64_get_mem_u64 (cpu, address));
667
668 if (wb == Post)
669 address += offset;
670
671 if (wb != NoWriteBack)
672 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
673 }
674
675 /* Load 64 bit scaled or unscaled zero- or sign-extended 32-bit register offset. */
676 static void
677 fldrd_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
678 {
679 unsigned rm = INSTR (20, 16);
680 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
681 uint64_t displacement = OPT_SCALE (extended, 64, scaling);
682
683 fldrd_wb (cpu, displacement, NoWriteBack);
684 }
685
686 /* Load 128 bit unscaled signed 9 bit with pre- or post-writeback. */
687 static void
688 fldrq_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
689 {
690 FRegister a;
691 unsigned rn = INSTR (9, 5);
692 unsigned st = INSTR (4, 0);
693 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
694
695 if (wb != Post)
696 address += offset;
697
698 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
699 aarch64_get_mem_long_double (cpu, address, & a);
700 aarch64_set_FP_long_double (cpu, st, a);
701
702 if (wb == Post)
703 address += offset;
704
705 if (wb != NoWriteBack)
706 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
707 }
708
709 /* Load 128 bit scaled or unscaled zero- or sign-extended 32-bit register offset */
710 static void
711 fldrq_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
712 {
713 unsigned rm = INSTR (20, 16);
714 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
715 uint64_t displacement = OPT_SCALE (extended, 128, scaling);
716
717 fldrq_wb (cpu, displacement, NoWriteBack);
718 }
719
720 /* Memory Access
721
722 load-store single register
723 There are four addressing modes available here which all employ a
724 64 bit source (base) register.
725
726 N.B. the base register (source) can be the stack pointer.
727 The secondary source register (source2)can only be an Xn register.
728
729 Scaled, 12-bit, unsigned immediate offset, without pre- and
730 post-index options.
731 Unscaled, 9-bit, signed immediate offset with pre- or post-index
732 writeback.
733 scaled or unscaled 64-bit register offset.
734 scaled or unscaled 32-bit extended register offset.
735
736 All offsets are assumed to be raw from the decode i.e. the
737 simulator is expected to adjust scaled offsets based on the
738 accessed data size with register or extended register offset
739 versions the same applies except that in the latter case the
740 operation may also require a sign extend.
741
742 A separate method is provided for each possible addressing mode. */
743
744 /* 32 bit load 32 bit scaled unsigned 12 bit */
745 static void
746 ldr32_abs (sim_cpu *cpu, uint32_t offset)
747 {
748 unsigned rn = INSTR (9, 5);
749 unsigned rt = INSTR (4, 0);
750
751 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
752 /* The target register may not be SP but the source may be. */
753 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u32
754 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
755 + SCALE (offset, 32)));
756 }
757
758 /* 32 bit load 32 bit unscaled signed 9 bit with pre- or post-writeback. */
759 static void
760 ldr32_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
761 {
762 unsigned rn = INSTR (9, 5);
763 unsigned rt = INSTR (4, 0);
764 uint64_t address;
765
766 if (rn == rt && wb != NoWriteBack)
767 HALT_UNALLOC;
768
769 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
770
771 if (wb != Post)
772 address += offset;
773
774 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
775 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u32 (cpu, address));
776
777 if (wb == Post)
778 address += offset;
779
780 if (wb != NoWriteBack)
781 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
782 }
783
784 /* 32 bit load 32 bit scaled or unscaled
785 zero- or sign-extended 32-bit register offset */
786 static void
787 ldr32_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
788 {
789 unsigned rm = INSTR (20, 16);
790 unsigned rn = INSTR (9, 5);
791 unsigned rt = INSTR (4, 0);
792 /* rn may reference SP, rm and rt must reference ZR */
793
794 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
795 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
796 uint64_t displacement = OPT_SCALE (extended, 32, scaling);
797
798 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
799 aarch64_set_reg_u64 (cpu, rt, NO_SP,
800 aarch64_get_mem_u32 (cpu, address + displacement));
801 }
802
803 /* 64 bit load 64 bit scaled unsigned 12 bit */
804 static void
805 ldr_abs (sim_cpu *cpu, uint32_t offset)
806 {
807 unsigned rn = INSTR (9, 5);
808 unsigned rt = INSTR (4, 0);
809
810 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
811 /* The target register may not be SP but the source may be. */
812 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u64
813 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
814 + SCALE (offset, 64)));
815 }
816
817 /* 64 bit load 64 bit unscaled signed 9 bit with pre- or post-writeback. */
818 static void
819 ldr_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
820 {
821 unsigned rn = INSTR (9, 5);
822 unsigned rt = INSTR (4, 0);
823 uint64_t address;
824
825 if (rn == rt && wb != NoWriteBack)
826 HALT_UNALLOC;
827
828 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
829
830 if (wb != Post)
831 address += offset;
832
833 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
834 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u64 (cpu, address));
835
836 if (wb == Post)
837 address += offset;
838
839 if (wb != NoWriteBack)
840 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
841 }
842
843 /* 64 bit load 64 bit scaled or unscaled zero-
844 or sign-extended 32-bit register offset. */
845 static void
846 ldr_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
847 {
848 unsigned rm = INSTR (20, 16);
849 unsigned rn = INSTR (9, 5);
850 unsigned rt = INSTR (4, 0);
851 /* rn may reference SP, rm and rt must reference ZR */
852
853 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
854 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
855 uint64_t displacement = OPT_SCALE (extended, 64, scaling);
856
857 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
858 aarch64_set_reg_u64 (cpu, rt, NO_SP,
859 aarch64_get_mem_u64 (cpu, address + displacement));
860 }
861
862 /* 32 bit load zero-extended byte scaled unsigned 12 bit. */
863 static void
864 ldrb32_abs (sim_cpu *cpu, uint32_t offset)
865 {
866 unsigned rn = INSTR (9, 5);
867 unsigned rt = INSTR (4, 0);
868
869 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
870 /* The target register may not be SP but the source may be
871 there is no scaling required for a byte load. */
872 aarch64_set_reg_u64 (cpu, rt, NO_SP,
873 aarch64_get_mem_u8
874 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset));
875 }
876
877 /* 32 bit load zero-extended byte unscaled signed 9 bit with pre- or post-writeback. */
878 static void
879 ldrb32_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
880 {
881 unsigned rn = INSTR (9, 5);
882 unsigned rt = INSTR (4, 0);
883 uint64_t address;
884
885 if (rn == rt && wb != NoWriteBack)
886 HALT_UNALLOC;
887
888 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
889
890 if (wb != Post)
891 address += offset;
892
893 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
894 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u8 (cpu, address));
895
896 if (wb == Post)
897 address += offset;
898
899 if (wb != NoWriteBack)
900 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
901 }
902
903 /* 32 bit load zero-extended byte scaled or unscaled zero-
904 or sign-extended 32-bit register offset. */
905 static void
906 ldrb32_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
907 {
908 unsigned rm = INSTR (20, 16);
909 unsigned rn = INSTR (9, 5);
910 unsigned rt = INSTR (4, 0);
911 /* rn may reference SP, rm and rt must reference ZR */
912
913 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
914 int64_t displacement = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
915 extension);
916
917 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
918 /* There is no scaling required for a byte load. */
919 aarch64_set_reg_u64 (cpu, rt, NO_SP,
920 aarch64_get_mem_u8 (cpu, address + displacement));
921 }
922
923 /* 64 bit load sign-extended byte unscaled signed 9 bit
924 with pre- or post-writeback. */
925 static void
926 ldrsb_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
927 {
928 unsigned rn = INSTR (9, 5);
929 unsigned rt = INSTR (4, 0);
930 uint64_t address;
931 int64_t val;
932
933 if (rn == rt && wb != NoWriteBack)
934 HALT_UNALLOC;
935
936 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
937
938 if (wb != Post)
939 address += offset;
940
941 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
942 val = aarch64_get_mem_s8 (cpu, address);
943 aarch64_set_reg_s64 (cpu, rt, NO_SP, val);
944
945 if (wb == Post)
946 address += offset;
947
948 if (wb != NoWriteBack)
949 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
950 }
951
952 /* 64 bit load sign-extended byte scaled unsigned 12 bit. */
953 static void
954 ldrsb_abs (sim_cpu *cpu, uint32_t offset)
955 {
956 ldrsb_wb (cpu, offset, NoWriteBack);
957 }
958
959 /* 64 bit load sign-extended byte scaled or unscaled zero-
960 or sign-extended 32-bit register offset. */
961 static void
962 ldrsb_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
963 {
964 unsigned rm = INSTR (20, 16);
965 unsigned rn = INSTR (9, 5);
966 unsigned rt = INSTR (4, 0);
967 /* rn may reference SP, rm and rt must reference ZR */
968
969 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
970 int64_t displacement = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
971 extension);
972 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
973 /* There is no scaling required for a byte load. */
974 aarch64_set_reg_s64 (cpu, rt, NO_SP,
975 aarch64_get_mem_s8 (cpu, address + displacement));
976 }
977
978 /* 32 bit load zero-extended short scaled unsigned 12 bit. */
979 static void
980 ldrh32_abs (sim_cpu *cpu, uint32_t offset)
981 {
982 unsigned rn = INSTR (9, 5);
983 unsigned rt = INSTR (4, 0);
984 uint32_t val;
985
986 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
987 /* The target register may not be SP but the source may be. */
988 val = aarch64_get_mem_u16 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
989 + SCALE (offset, 16));
990 aarch64_set_reg_u32 (cpu, rt, NO_SP, val);
991 }
992
993 /* 32 bit load zero-extended short unscaled signed 9 bit
994 with pre- or post-writeback. */
995 static void
996 ldrh32_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
997 {
998 unsigned rn = INSTR (9, 5);
999 unsigned rt = INSTR (4, 0);
1000 uint64_t address;
1001
1002 if (rn == rt && wb != NoWriteBack)
1003 HALT_UNALLOC;
1004
1005 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1006
1007 if (wb != Post)
1008 address += offset;
1009
1010 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1011 aarch64_set_reg_u32 (cpu, rt, NO_SP, aarch64_get_mem_u16 (cpu, address));
1012
1013 if (wb == Post)
1014 address += offset;
1015
1016 if (wb != NoWriteBack)
1017 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
1018 }
1019
1020 /* 32 bit load zero-extended short scaled or unscaled zero-
1021 or sign-extended 32-bit register offset. */
1022 static void
1023 ldrh32_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1024 {
1025 unsigned rm = INSTR (20, 16);
1026 unsigned rn = INSTR (9, 5);
1027 unsigned rt = INSTR (4, 0);
1028 /* rn may reference SP, rm and rt must reference ZR */
1029
1030 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1031 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
1032 uint64_t displacement = OPT_SCALE (extended, 16, scaling);
1033
1034 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1035 aarch64_set_reg_u32 (cpu, rt, NO_SP,
1036 aarch64_get_mem_u16 (cpu, address + displacement));
1037 }
1038
1039 /* 32 bit load sign-extended short scaled unsigned 12 bit. */
1040 static void
1041 ldrsh32_abs (sim_cpu *cpu, uint32_t offset)
1042 {
1043 unsigned rn = INSTR (9, 5);
1044 unsigned rt = INSTR (4, 0);
1045 int32_t val;
1046
1047 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1048 /* The target register may not be SP but the source may be. */
1049 val = aarch64_get_mem_s16 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
1050 + SCALE (offset, 16));
1051 aarch64_set_reg_s32 (cpu, rt, NO_SP, val);
1052 }
1053
1054 /* 32 bit load sign-extended short unscaled signed 9 bit
1055 with pre- or post-writeback. */
1056 static void
1057 ldrsh32_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
1058 {
1059 unsigned rn = INSTR (9, 5);
1060 unsigned rt = INSTR (4, 0);
1061 uint64_t address;
1062
1063 if (rn == rt && wb != NoWriteBack)
1064 HALT_UNALLOC;
1065
1066 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1067
1068 if (wb != Post)
1069 address += offset;
1070
1071 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1072 aarch64_set_reg_s32 (cpu, rt, NO_SP,
1073 (int32_t) aarch64_get_mem_s16 (cpu, address));
1074
1075 if (wb == Post)
1076 address += offset;
1077
1078 if (wb != NoWriteBack)
1079 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
1080 }
1081
1082 /* 32 bit load sign-extended short scaled or unscaled zero-
1083 or sign-extended 32-bit register offset. */
1084 static void
1085 ldrsh32_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1086 {
1087 unsigned rm = INSTR (20, 16);
1088 unsigned rn = INSTR (9, 5);
1089 unsigned rt = INSTR (4, 0);
1090 /* rn may reference SP, rm and rt must reference ZR */
1091
1092 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1093 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
1094 uint64_t displacement = OPT_SCALE (extended, 16, scaling);
1095
1096 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1097 aarch64_set_reg_s32 (cpu, rt, NO_SP,
1098 (int32_t) aarch64_get_mem_s16
1099 (cpu, address + displacement));
1100 }
1101
1102 /* 64 bit load sign-extended short scaled unsigned 12 bit. */
1103 static void
1104 ldrsh_abs (sim_cpu *cpu, uint32_t offset)
1105 {
1106 unsigned rn = INSTR (9, 5);
1107 unsigned rt = INSTR (4, 0);
1108 int64_t val;
1109
1110 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1111 /* The target register may not be SP but the source may be. */
1112 val = aarch64_get_mem_s16 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
1113 + SCALE (offset, 16));
1114 aarch64_set_reg_s64 (cpu, rt, NO_SP, val);
1115 }
1116
1117 /* 64 bit load sign-extended short unscaled signed 9 bit
1118 with pre- or post-writeback. */
1119 static void
1120 ldrsh64_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
1121 {
1122 unsigned rn = INSTR (9, 5);
1123 unsigned rt = INSTR (4, 0);
1124 uint64_t address;
1125 int64_t val;
1126
1127 if (rn == rt && wb != NoWriteBack)
1128 HALT_UNALLOC;
1129
1130 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1131 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1132
1133 if (wb != Post)
1134 address += offset;
1135
1136 val = aarch64_get_mem_s16 (cpu, address);
1137 aarch64_set_reg_s64 (cpu, rt, NO_SP, val);
1138
1139 if (wb == Post)
1140 address += offset;
1141
1142 if (wb != NoWriteBack)
1143 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
1144 }
1145
1146 /* 64 bit load sign-extended short scaled or unscaled zero-
1147 or sign-extended 32-bit register offset. */
1148 static void
1149 ldrsh_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1150 {
1151 unsigned rm = INSTR (20, 16);
1152 unsigned rn = INSTR (9, 5);
1153 unsigned rt = INSTR (4, 0);
1154
1155 /* rn may reference SP, rm and rt must reference ZR */
1156
1157 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1158 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
1159 uint64_t displacement = OPT_SCALE (extended, 16, scaling);
1160 int64_t val;
1161
1162 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1163 val = aarch64_get_mem_s16 (cpu, address + displacement);
1164 aarch64_set_reg_s64 (cpu, rt, NO_SP, val);
1165 }
1166
1167 /* 64 bit load sign-extended 32 bit scaled unsigned 12 bit. */
1168 static void
1169 ldrsw_abs (sim_cpu *cpu, uint32_t offset)
1170 {
1171 unsigned rn = INSTR (9, 5);
1172 unsigned rt = INSTR (4, 0);
1173 int64_t val;
1174
1175 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1176 val = aarch64_get_mem_s32 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
1177 + SCALE (offset, 32));
1178 /* The target register may not be SP but the source may be. */
1179 return aarch64_set_reg_s64 (cpu, rt, NO_SP, val);
1180 }
1181
1182 /* 64 bit load sign-extended 32 bit unscaled signed 9 bit
1183 with pre- or post-writeback. */
1184 static void
1185 ldrsw_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
1186 {
1187 unsigned rn = INSTR (9, 5);
1188 unsigned rt = INSTR (4, 0);
1189 uint64_t address;
1190
1191 if (rn == rt && wb != NoWriteBack)
1192 HALT_UNALLOC;
1193
1194 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1195
1196 if (wb != Post)
1197 address += offset;
1198
1199 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1200 aarch64_set_reg_s64 (cpu, rt, NO_SP, aarch64_get_mem_s32 (cpu, address));
1201
1202 if (wb == Post)
1203 address += offset;
1204
1205 if (wb != NoWriteBack)
1206 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
1207 }
1208
1209 /* 64 bit load sign-extended 32 bit scaled or unscaled zero-
1210 or sign-extended 32-bit register offset. */
1211 static void
1212 ldrsw_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1213 {
1214 unsigned rm = INSTR (20, 16);
1215 unsigned rn = INSTR (9, 5);
1216 unsigned rt = INSTR (4, 0);
1217 /* rn may reference SP, rm and rt must reference ZR */
1218
1219 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1220 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
1221 uint64_t displacement = OPT_SCALE (extended, 32, scaling);
1222
1223 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1224 aarch64_set_reg_s64 (cpu, rt, NO_SP,
1225 aarch64_get_mem_s32 (cpu, address + displacement));
1226 }
1227
1228 /* N.B. with stores the value in source is written to the
1229 address identified by source2 modified by source3/offset. */
1230
1231 /* 32 bit store scaled unsigned 12 bit. */
1232 static void
1233 str32_abs (sim_cpu *cpu, uint32_t offset)
1234 {
1235 unsigned rn = INSTR (9, 5);
1236 unsigned rt = INSTR (4, 0);
1237
1238 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1239 /* The target register may not be SP but the source may be. */
1240 aarch64_set_mem_u32 (cpu, (aarch64_get_reg_u64 (cpu, rn, SP_OK)
1241 + SCALE (offset, 32)),
1242 aarch64_get_reg_u32 (cpu, rt, NO_SP));
1243 }
1244
1245 /* 32 bit store unscaled signed 9 bit with pre- or post-writeback. */
1246 static void
1247 str32_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
1248 {
1249 unsigned rn = INSTR (9, 5);
1250 unsigned rt = INSTR (4, 0);
1251 uint64_t address;
1252
1253 if (rn == rt && wb != NoWriteBack)
1254 HALT_UNALLOC;
1255
1256 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1257 if (wb != Post)
1258 address += offset;
1259
1260 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1261 aarch64_set_mem_u32 (cpu, address, aarch64_get_reg_u32 (cpu, rt, NO_SP));
1262
1263 if (wb == Post)
1264 address += offset;
1265
1266 if (wb != NoWriteBack)
1267 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
1268 }
1269
1270 /* 32 bit store scaled or unscaled zero- or
1271 sign-extended 32-bit register offset. */
1272 static void
1273 str32_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1274 {
1275 unsigned rm = INSTR (20, 16);
1276 unsigned rn = INSTR (9, 5);
1277 unsigned rt = INSTR (4, 0);
1278
1279 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1280 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
1281 uint64_t displacement = OPT_SCALE (extended, 32, scaling);
1282
1283 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1284 aarch64_set_mem_u32 (cpu, address + displacement,
1285 aarch64_get_reg_u64 (cpu, rt, NO_SP));
1286 }
1287
1288 /* 64 bit store scaled unsigned 12 bit. */
1289 static void
1290 str_abs (sim_cpu *cpu, uint32_t offset)
1291 {
1292 unsigned rn = INSTR (9, 5);
1293 unsigned rt = INSTR (4, 0);
1294
1295 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1296 aarch64_set_mem_u64 (cpu,
1297 aarch64_get_reg_u64 (cpu, rn, SP_OK)
1298 + SCALE (offset, 64),
1299 aarch64_get_reg_u64 (cpu, rt, NO_SP));
1300 }
1301
1302 /* 64 bit store unscaled signed 9 bit with pre- or post-writeback. */
1303 static void
1304 str_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
1305 {
1306 unsigned rn = INSTR (9, 5);
1307 unsigned rt = INSTR (4, 0);
1308 uint64_t address;
1309
1310 if (rn == rt && wb != NoWriteBack)
1311 HALT_UNALLOC;
1312
1313 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1314
1315 if (wb != Post)
1316 address += offset;
1317
1318 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1319 aarch64_set_mem_u64 (cpu, address, aarch64_get_reg_u64 (cpu, rt, NO_SP));
1320
1321 if (wb == Post)
1322 address += offset;
1323
1324 if (wb != NoWriteBack)
1325 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
1326 }
1327
1328 /* 64 bit store scaled or unscaled zero-
1329 or sign-extended 32-bit register offset. */
1330 static void
1331 str_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1332 {
1333 unsigned rm = INSTR (20, 16);
1334 unsigned rn = INSTR (9, 5);
1335 unsigned rt = INSTR (4, 0);
1336 /* rn may reference SP, rm and rt must reference ZR */
1337
1338 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1339 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
1340 extension);
1341 uint64_t displacement = OPT_SCALE (extended, 64, scaling);
1342
1343 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1344 aarch64_set_mem_u64 (cpu, address + displacement,
1345 aarch64_get_reg_u64 (cpu, rt, NO_SP));
1346 }
1347
1348 /* 32 bit store byte scaled unsigned 12 bit. */
1349 static void
1350 strb_abs (sim_cpu *cpu, uint32_t offset)
1351 {
1352 unsigned rn = INSTR (9, 5);
1353 unsigned rt = INSTR (4, 0);
1354
1355 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1356 /* The target register may not be SP but the source may be.
1357 There is no scaling required for a byte load. */
1358 aarch64_set_mem_u8 (cpu,
1359 aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset,
1360 aarch64_get_reg_u8 (cpu, rt, NO_SP));
1361 }
1362
1363 /* 32 bit store byte unscaled signed 9 bit with pre- or post-writeback. */
1364 static void
1365 strb_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
1366 {
1367 unsigned rn = INSTR (9, 5);
1368 unsigned rt = INSTR (4, 0);
1369 uint64_t address;
1370
1371 if (rn == rt && wb != NoWriteBack)
1372 HALT_UNALLOC;
1373
1374 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1375
1376 if (wb != Post)
1377 address += offset;
1378
1379 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1380 aarch64_set_mem_u8 (cpu, address, aarch64_get_reg_u8 (cpu, rt, NO_SP));
1381
1382 if (wb == Post)
1383 address += offset;
1384
1385 if (wb != NoWriteBack)
1386 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
1387 }
1388
1389 /* 32 bit store byte scaled or unscaled zero-
1390 or sign-extended 32-bit register offset. */
1391 static void
1392 strb_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1393 {
1394 unsigned rm = INSTR (20, 16);
1395 unsigned rn = INSTR (9, 5);
1396 unsigned rt = INSTR (4, 0);
1397 /* rn may reference SP, rm and rt must reference ZR */
1398
1399 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1400 int64_t displacement = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
1401 extension);
1402
1403 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1404 /* There is no scaling required for a byte load. */
1405 aarch64_set_mem_u8 (cpu, address + displacement,
1406 aarch64_get_reg_u8 (cpu, rt, NO_SP));
1407 }
1408
1409 /* 32 bit store short scaled unsigned 12 bit. */
1410 static void
1411 strh_abs (sim_cpu *cpu, uint32_t offset)
1412 {
1413 unsigned rn = INSTR (9, 5);
1414 unsigned rt = INSTR (4, 0);
1415
1416 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1417 /* The target register may not be SP but the source may be. */
1418 aarch64_set_mem_u16 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
1419 + SCALE (offset, 16),
1420 aarch64_get_reg_u16 (cpu, rt, NO_SP));
1421 }
1422
1423 /* 32 bit store short unscaled signed 9 bit with pre- or post-writeback. */
1424 static void
1425 strh_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
1426 {
1427 unsigned rn = INSTR (9, 5);
1428 unsigned rt = INSTR (4, 0);
1429 uint64_t address;
1430
1431 if (rn == rt && wb != NoWriteBack)
1432 HALT_UNALLOC;
1433
1434 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1435
1436 if (wb != Post)
1437 address += offset;
1438
1439 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1440 aarch64_set_mem_u16 (cpu, address, aarch64_get_reg_u16 (cpu, rt, NO_SP));
1441
1442 if (wb == Post)
1443 address += offset;
1444
1445 if (wb != NoWriteBack)
1446 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
1447 }
1448
1449 /* 32 bit store short scaled or unscaled zero-
1450 or sign-extended 32-bit register offset. */
1451 static void
1452 strh_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1453 {
1454 unsigned rm = INSTR (20, 16);
1455 unsigned rn = INSTR (9, 5);
1456 unsigned rt = INSTR (4, 0);
1457 /* rn may reference SP, rm and rt must reference ZR */
1458
1459 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1460 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
1461 uint64_t displacement = OPT_SCALE (extended, 16, scaling);
1462
1463 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1464 aarch64_set_mem_u16 (cpu, address + displacement,
1465 aarch64_get_reg_u16 (cpu, rt, NO_SP));
1466 }
1467
1468 /* Prefetch unsigned 12 bit. */
1469 static void
1470 prfm_abs (sim_cpu *cpu, uint32_t offset)
1471 {
1472 /* instr[4,0] = prfop : 00000 ==> PLDL1KEEP, 00001 ==> PLDL1STRM,
1473 00010 ==> PLDL2KEEP, 00001 ==> PLDL2STRM,
1474 00100 ==> PLDL3KEEP, 00101 ==> PLDL3STRM,
1475 10000 ==> PSTL1KEEP, 10001 ==> PSTL1STRM,
1476 10010 ==> PSTL2KEEP, 10001 ==> PSTL2STRM,
1477 10100 ==> PSTL3KEEP, 10101 ==> PSTL3STRM,
1478 ow ==> UNALLOC
1479 PrfOp prfop = prfop (instr, 4, 0);
1480 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK)
1481 + SCALE (offset, 64). */
1482
1483 /* TODO : implement prefetch of address. */
1484 }
1485
1486 /* Prefetch scaled or unscaled zero- or sign-extended 32-bit register offset. */
1487 static void
1488 prfm_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1489 {
1490 /* instr[4,0] = prfop : 00000 ==> PLDL1KEEP, 00001 ==> PLDL1STRM,
1491 00010 ==> PLDL2KEEP, 00001 ==> PLDL2STRM,
1492 00100 ==> PLDL3KEEP, 00101 ==> PLDL3STRM,
1493 10000 ==> PSTL1KEEP, 10001 ==> PSTL1STRM,
1494 10010 ==> PSTL2KEEP, 10001 ==> PSTL2STRM,
1495 10100 ==> PSTL3KEEP, 10101 ==> PSTL3STRM,
1496 ow ==> UNALLOC
1497 rn may reference SP, rm may only reference ZR
1498 PrfOp prfop = prfop (instr, 4, 0);
1499 uint64_t base = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1500 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
1501 extension);
1502 uint64_t displacement = OPT_SCALE (extended, 64, scaling);
1503 uint64_t address = base + displacement. */
1504
1505 /* TODO : implement prefetch of address */
1506 }
1507
1508 /* 64 bit pc-relative prefetch. */
1509 static void
1510 prfm_pcrel (sim_cpu *cpu, int32_t offset)
1511 {
1512 /* instr[4,0] = prfop : 00000 ==> PLDL1KEEP, 00001 ==> PLDL1STRM,
1513 00010 ==> PLDL2KEEP, 00001 ==> PLDL2STRM,
1514 00100 ==> PLDL3KEEP, 00101 ==> PLDL3STRM,
1515 10000 ==> PSTL1KEEP, 10001 ==> PSTL1STRM,
1516 10010 ==> PSTL2KEEP, 10001 ==> PSTL2STRM,
1517 10100 ==> PSTL3KEEP, 10101 ==> PSTL3STRM,
1518 ow ==> UNALLOC
1519 PrfOp prfop = prfop (instr, 4, 0);
1520 uint64_t address = aarch64_get_PC (cpu) + offset. */
1521
1522 /* TODO : implement this */
1523 }
1524
1525 /* Load-store exclusive. */
1526
1527 static void
1528 ldxr (sim_cpu *cpu)
1529 {
1530 unsigned rn = INSTR (9, 5);
1531 unsigned rt = INSTR (4, 0);
1532 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1533 int size = INSTR (31, 30);
1534 /* int ordered = INSTR (15, 15); */
1535 /* int exclusive = ! INSTR (23, 23); */
1536
1537 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1538 switch (size)
1539 {
1540 case 0:
1541 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u8 (cpu, address));
1542 break;
1543 case 1:
1544 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u16 (cpu, address));
1545 break;
1546 case 2:
1547 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u32 (cpu, address));
1548 break;
1549 case 3:
1550 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u64 (cpu, address));
1551 break;
1552 }
1553 }
1554
1555 static void
1556 stxr (sim_cpu *cpu)
1557 {
1558 unsigned rn = INSTR (9, 5);
1559 unsigned rt = INSTR (4, 0);
1560 unsigned rs = INSTR (20, 16);
1561 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1562 int size = INSTR (31, 30);
1563 uint64_t data = aarch64_get_reg_u64 (cpu, rt, NO_SP);
1564
1565 switch (size)
1566 {
1567 case 0: aarch64_set_mem_u8 (cpu, address, data); break;
1568 case 1: aarch64_set_mem_u16 (cpu, address, data); break;
1569 case 2: aarch64_set_mem_u32 (cpu, address, data); break;
1570 case 3: aarch64_set_mem_u64 (cpu, address, data); break;
1571 }
1572
1573 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1574 aarch64_set_reg_u64 (cpu, rs, NO_SP, 0); /* Always exclusive... */
1575 }
1576
1577 static void
1578 dexLoadLiteral (sim_cpu *cpu)
1579 {
1580 /* instr[29,27] == 011
1581 instr[25,24] == 00
1582 instr[31,30:26] = opc: 000 ==> LDRW, 001 ==> FLDRS
1583 010 ==> LDRX, 011 ==> FLDRD
1584 100 ==> LDRSW, 101 ==> FLDRQ
1585 110 ==> PRFM, 111 ==> UNALLOC
1586 instr[26] ==> V : 0 ==> GReg, 1 ==> FReg
1587 instr[23, 5] == simm19 */
1588
1589 /* unsigned rt = INSTR (4, 0); */
1590 uint32_t dispatch = (INSTR (31, 30) << 1) | INSTR (26, 26);
1591 int32_t imm = simm32 (aarch64_get_instr (cpu), 23, 5);
1592
1593 switch (dispatch)
1594 {
1595 case 0: ldr32_pcrel (cpu, imm); break;
1596 case 1: fldrs_pcrel (cpu, imm); break;
1597 case 2: ldr_pcrel (cpu, imm); break;
1598 case 3: fldrd_pcrel (cpu, imm); break;
1599 case 4: ldrsw_pcrel (cpu, imm); break;
1600 case 5: fldrq_pcrel (cpu, imm); break;
1601 case 6: prfm_pcrel (cpu, imm); break;
1602 case 7:
1603 default:
1604 HALT_UNALLOC;
1605 }
1606 }
1607
1608 /* Immediate arithmetic
1609 The aimm argument is a 12 bit unsigned value or a 12 bit unsigned
1610 value left shifted by 12 bits (done at decode).
1611
1612 N.B. the register args (dest, source) can normally be Xn or SP.
1613 the exception occurs for flag setting instructions which may
1614 only use Xn for the output (dest). */
1615
1616 /* 32 bit add immediate. */
1617 static void
1618 add32 (sim_cpu *cpu, uint32_t aimm)
1619 {
1620 unsigned rn = INSTR (9, 5);
1621 unsigned rd = INSTR (4, 0);
1622
1623 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1624 aarch64_set_reg_u64 (cpu, rd, SP_OK,
1625 aarch64_get_reg_u32 (cpu, rn, SP_OK) + aimm);
1626 }
1627
1628 /* 64 bit add immediate. */
1629 static void
1630 add64 (sim_cpu *cpu, uint32_t aimm)
1631 {
1632 unsigned rn = INSTR (9, 5);
1633 unsigned rd = INSTR (4, 0);
1634
1635 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1636 aarch64_set_reg_u64 (cpu, rd, SP_OK,
1637 aarch64_get_reg_u64 (cpu, rn, SP_OK) + aimm);
1638 }
1639
1640 static void
1641 set_flags_for_add32 (sim_cpu *cpu, int32_t value1, int32_t value2)
1642 {
1643 int32_t result = value1 + value2;
1644 int64_t sresult = (int64_t) value1 + (int64_t) value2;
1645 uint64_t uresult = (uint64_t)(uint32_t) value1
1646 + (uint64_t)(uint32_t) value2;
1647 uint32_t flags = 0;
1648
1649 if (result == 0)
1650 flags |= Z;
1651
1652 if (result & (1 << 31))
1653 flags |= N;
1654
1655 if (uresult != result)
1656 flags |= C;
1657
1658 if (sresult != result)
1659 flags |= V;
1660
1661 aarch64_set_CPSR (cpu, flags);
1662 }
1663
1664 static void
1665 set_flags_for_add64 (sim_cpu *cpu, uint64_t value1, uint64_t value2)
1666 {
1667 int64_t sval1 = value1;
1668 int64_t sval2 = value2;
1669 uint64_t result = value1 + value2;
1670 int64_t sresult = sval1 + sval2;
1671 uint32_t flags = 0;
1672
1673 if (result == 0)
1674 flags |= Z;
1675
1676 if (result & (1ULL << 63))
1677 flags |= N;
1678
1679 if (sval1 < 0)
1680 {
1681 if (sval2 < 0)
1682 {
1683 /* Negative plus a negative. Overflow happens if
1684 the result is greater than either of the operands. */
1685 if (sresult > sval1 || sresult > sval2)
1686 flags |= V;
1687 }
1688 /* else Negative plus a positive. Overflow cannot happen. */
1689 }
1690 else /* value1 is +ve. */
1691 {
1692 if (sval2 < 0)
1693 {
1694 /* Overflow can only occur if we computed "0 - MININT". */
1695 if (sval1 == 0 && sval2 == (1LL << 63))
1696 flags |= V;
1697 }
1698 else
1699 {
1700 /* Postive plus positive - overflow has happened if the
1701 result is smaller than either of the operands. */
1702 if (result < value1 || result < value2)
1703 flags |= V | C;
1704 }
1705 }
1706
1707 aarch64_set_CPSR (cpu, flags);
1708 }
1709
1710 #define NEG(a) (((a) & signbit) == signbit)
1711 #define POS(a) (((a) & signbit) == 0)
1712
1713 static void
1714 set_flags_for_sub32 (sim_cpu *cpu, uint32_t value1, uint32_t value2)
1715 {
1716 uint32_t result = value1 - value2;
1717 uint32_t flags = 0;
1718 uint32_t signbit = 1U << 31;
1719
1720 if (result == 0)
1721 flags |= Z;
1722
1723 if (NEG (result))
1724 flags |= N;
1725
1726 if ( (NEG (value1) && POS (value2))
1727 || (NEG (value1) && POS (result))
1728 || (POS (value2) && POS (result)))
1729 flags |= C;
1730
1731 if ( (NEG (value1) && POS (value2) && POS (result))
1732 || (POS (value1) && NEG (value2) && NEG (result)))
1733 flags |= V;
1734
1735 aarch64_set_CPSR (cpu, flags);
1736 }
1737
1738 static void
1739 set_flags_for_sub64 (sim_cpu *cpu, uint64_t value1, uint64_t value2)
1740 {
1741 uint64_t result = value1 - value2;
1742 uint32_t flags = 0;
1743 uint64_t signbit = 1ULL << 63;
1744
1745 if (result == 0)
1746 flags |= Z;
1747
1748 if (NEG (result))
1749 flags |= N;
1750
1751 if ( (NEG (value1) && POS (value2))
1752 || (NEG (value1) && POS (result))
1753 || (POS (value2) && POS (result)))
1754 flags |= C;
1755
1756 if ( (NEG (value1) && POS (value2) && POS (result))
1757 || (POS (value1) && NEG (value2) && NEG (result)))
1758 flags |= V;
1759
1760 aarch64_set_CPSR (cpu, flags);
1761 }
1762
1763 static void
1764 set_flags_for_binop32 (sim_cpu *cpu, uint32_t result)
1765 {
1766 uint32_t flags = 0;
1767
1768 if (result == 0)
1769 flags |= Z;
1770 else
1771 flags &= ~ Z;
1772
1773 if (result & (1 << 31))
1774 flags |= N;
1775 else
1776 flags &= ~ N;
1777
1778 aarch64_set_CPSR (cpu, flags);
1779 }
1780
1781 static void
1782 set_flags_for_binop64 (sim_cpu *cpu, uint64_t result)
1783 {
1784 uint32_t flags = 0;
1785
1786 if (result == 0)
1787 flags |= Z;
1788 else
1789 flags &= ~ Z;
1790
1791 if (result & (1ULL << 63))
1792 flags |= N;
1793 else
1794 flags &= ~ N;
1795
1796 aarch64_set_CPSR (cpu, flags);
1797 }
1798
1799 /* 32 bit add immediate set flags. */
1800 static void
1801 adds32 (sim_cpu *cpu, uint32_t aimm)
1802 {
1803 unsigned rn = INSTR (9, 5);
1804 unsigned rd = INSTR (4, 0);
1805 /* TODO : do we need to worry about signs here? */
1806 int32_t value1 = aarch64_get_reg_s32 (cpu, rn, SP_OK);
1807
1808 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1809 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + aimm);
1810 set_flags_for_add32 (cpu, value1, aimm);
1811 }
1812
1813 /* 64 bit add immediate set flags. */
1814 static void
1815 adds64 (sim_cpu *cpu, uint32_t aimm)
1816 {
1817 unsigned rn = INSTR (9, 5);
1818 unsigned rd = INSTR (4, 0);
1819 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1820 uint64_t value2 = aimm;
1821
1822 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1823 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2);
1824 set_flags_for_add64 (cpu, value1, value2);
1825 }
1826
1827 /* 32 bit sub immediate. */
1828 static void
1829 sub32 (sim_cpu *cpu, uint32_t aimm)
1830 {
1831 unsigned rn = INSTR (9, 5);
1832 unsigned rd = INSTR (4, 0);
1833
1834 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1835 aarch64_set_reg_u64 (cpu, rd, SP_OK,
1836 aarch64_get_reg_u32 (cpu, rn, SP_OK) - aimm);
1837 }
1838
1839 /* 64 bit sub immediate. */
1840 static void
1841 sub64 (sim_cpu *cpu, uint32_t aimm)
1842 {
1843 unsigned rn = INSTR (9, 5);
1844 unsigned rd = INSTR (4, 0);
1845
1846 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1847 aarch64_set_reg_u64 (cpu, rd, SP_OK,
1848 aarch64_get_reg_u64 (cpu, rn, SP_OK) - aimm);
1849 }
1850
1851 /* 32 bit sub immediate set flags. */
1852 static void
1853 subs32 (sim_cpu *cpu, uint32_t aimm)
1854 {
1855 unsigned rn = INSTR (9, 5);
1856 unsigned rd = INSTR (4, 0);
1857 uint32_t value1 = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1858 uint32_t value2 = aimm;
1859
1860 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1861 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 - value2);
1862 set_flags_for_sub32 (cpu, value1, value2);
1863 }
1864
1865 /* 64 bit sub immediate set flags. */
1866 static void
1867 subs64 (sim_cpu *cpu, uint32_t aimm)
1868 {
1869 unsigned rn = INSTR (9, 5);
1870 unsigned rd = INSTR (4, 0);
1871 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1872 uint32_t value2 = aimm;
1873
1874 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1875 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 - value2);
1876 set_flags_for_sub64 (cpu, value1, value2);
1877 }
1878
1879 /* Data Processing Register. */
1880
1881 /* First two helpers to perform the shift operations. */
1882
1883 static inline uint32_t
1884 shifted32 (uint32_t value, Shift shift, uint32_t count)
1885 {
1886 switch (shift)
1887 {
1888 default:
1889 case LSL:
1890 return (value << count);
1891 case LSR:
1892 return (value >> count);
1893 case ASR:
1894 {
1895 int32_t svalue = value;
1896 return (svalue >> count);
1897 }
1898 case ROR:
1899 {
1900 uint32_t top = value >> count;
1901 uint32_t bottom = value << (32 - count);
1902 return (bottom | top);
1903 }
1904 }
1905 }
1906
1907 static inline uint64_t
1908 shifted64 (uint64_t value, Shift shift, uint32_t count)
1909 {
1910 switch (shift)
1911 {
1912 default:
1913 case LSL:
1914 return (value << count);
1915 case LSR:
1916 return (value >> count);
1917 case ASR:
1918 {
1919 int64_t svalue = value;
1920 return (svalue >> count);
1921 }
1922 case ROR:
1923 {
1924 uint64_t top = value >> count;
1925 uint64_t bottom = value << (64 - count);
1926 return (bottom | top);
1927 }
1928 }
1929 }
1930
1931 /* Arithmetic shifted register.
1932 These allow an optional LSL, ASR or LSR to the second source
1933 register with a count up to the register bit count.
1934
1935 N.B register args may not be SP. */
1936
1937 /* 32 bit ADD shifted register. */
1938 static void
1939 add32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
1940 {
1941 unsigned rm = INSTR (20, 16);
1942 unsigned rn = INSTR (9, 5);
1943 unsigned rd = INSTR (4, 0);
1944
1945 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1946 aarch64_set_reg_u64 (cpu, rd, NO_SP,
1947 aarch64_get_reg_u32 (cpu, rn, NO_SP)
1948 + shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP),
1949 shift, count));
1950 }
1951
1952 /* 64 bit ADD shifted register. */
1953 static void
1954 add64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
1955 {
1956 unsigned rm = INSTR (20, 16);
1957 unsigned rn = INSTR (9, 5);
1958 unsigned rd = INSTR (4, 0);
1959
1960 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1961 aarch64_set_reg_u64 (cpu, rd, NO_SP,
1962 aarch64_get_reg_u64 (cpu, rn, NO_SP)
1963 + shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP),
1964 shift, count));
1965 }
1966
1967 /* 32 bit ADD shifted register setting flags. */
1968 static void
1969 adds32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
1970 {
1971 unsigned rm = INSTR (20, 16);
1972 unsigned rn = INSTR (9, 5);
1973 unsigned rd = INSTR (4, 0);
1974
1975 uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP);
1976 uint32_t value2 = shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP),
1977 shift, count);
1978
1979 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1980 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2);
1981 set_flags_for_add32 (cpu, value1, value2);
1982 }
1983
1984 /* 64 bit ADD shifted register setting flags. */
1985 static void
1986 adds64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
1987 {
1988 unsigned rm = INSTR (20, 16);
1989 unsigned rn = INSTR (9, 5);
1990 unsigned rd = INSTR (4, 0);
1991
1992 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP);
1993 uint64_t value2 = shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP),
1994 shift, count);
1995
1996 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1997 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2);
1998 set_flags_for_add64 (cpu, value1, value2);
1999 }
2000
2001 /* 32 bit SUB shifted register. */
2002 static void
2003 sub32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
2004 {
2005 unsigned rm = INSTR (20, 16);
2006 unsigned rn = INSTR (9, 5);
2007 unsigned rd = INSTR (4, 0);
2008
2009 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2010 aarch64_set_reg_u64 (cpu, rd, NO_SP,
2011 aarch64_get_reg_u32 (cpu, rn, NO_SP)
2012 - shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP),
2013 shift, count));
2014 }
2015
2016 /* 64 bit SUB shifted register. */
2017 static void
2018 sub64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
2019 {
2020 unsigned rm = INSTR (20, 16);
2021 unsigned rn = INSTR (9, 5);
2022 unsigned rd = INSTR (4, 0);
2023
2024 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2025 aarch64_set_reg_u64 (cpu, rd, NO_SP,
2026 aarch64_get_reg_u64 (cpu, rn, NO_SP)
2027 - shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP),
2028 shift, count));
2029 }
2030
2031 /* 32 bit SUB shifted register setting flags. */
2032 static void
2033 subs32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
2034 {
2035 unsigned rm = INSTR (20, 16);
2036 unsigned rn = INSTR (9, 5);
2037 unsigned rd = INSTR (4, 0);
2038
2039 uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP);
2040 uint32_t value2 = shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP),
2041 shift, count);
2042
2043 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2044 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 - value2);
2045 set_flags_for_sub32 (cpu, value1, value2);
2046 }
2047
2048 /* 64 bit SUB shifted register setting flags. */
2049 static void
2050 subs64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
2051 {
2052 unsigned rm = INSTR (20, 16);
2053 unsigned rn = INSTR (9, 5);
2054 unsigned rd = INSTR (4, 0);
2055
2056 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP);
2057 uint64_t value2 = shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP),
2058 shift, count);
2059
2060 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2061 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 - value2);
2062 set_flags_for_sub64 (cpu, value1, value2);
2063 }
2064
2065 /* First a couple more helpers to fetch the
2066 relevant source register element either
2067 sign or zero extended as required by the
2068 extension value. */
2069
2070 static uint32_t
2071 extreg32 (sim_cpu *cpu, unsigned int lo, Extension extension)
2072 {
2073 switch (extension)
2074 {
2075 case UXTB: return aarch64_get_reg_u8 (cpu, lo, NO_SP);
2076 case UXTH: return aarch64_get_reg_u16 (cpu, lo, NO_SP);
2077 case UXTW: /* Fall through. */
2078 case UXTX: return aarch64_get_reg_u32 (cpu, lo, NO_SP);
2079 case SXTB: return aarch64_get_reg_s8 (cpu, lo, NO_SP);
2080 case SXTH: return aarch64_get_reg_s16 (cpu, lo, NO_SP);
2081 case SXTW: /* Fall through. */
2082 case SXTX: /* Fall through. */
2083 default: return aarch64_get_reg_s32 (cpu, lo, NO_SP);
2084 }
2085 }
2086
2087 static uint64_t
2088 extreg64 (sim_cpu *cpu, unsigned int lo, Extension extension)
2089 {
2090 switch (extension)
2091 {
2092 case UXTB: return aarch64_get_reg_u8 (cpu, lo, NO_SP);
2093 case UXTH: return aarch64_get_reg_u16 (cpu, lo, NO_SP);
2094 case UXTW: return aarch64_get_reg_u32 (cpu, lo, NO_SP);
2095 case UXTX: return aarch64_get_reg_u64 (cpu, lo, NO_SP);
2096 case SXTB: return aarch64_get_reg_s8 (cpu, lo, NO_SP);
2097 case SXTH: return aarch64_get_reg_s16 (cpu, lo, NO_SP);
2098 case SXTW: return aarch64_get_reg_s32 (cpu, lo, NO_SP);
2099 case SXTX:
2100 default: return aarch64_get_reg_s64 (cpu, lo, NO_SP);
2101 }
2102 }
2103
2104 /* Arithmetic extending register
2105 These allow an optional sign extension of some portion of the
2106 second source register followed by an optional left shift of
2107 between 1 and 4 bits (i.e. a shift of 0-4 bits???)
2108
2109 N.B output (dest) and first input arg (source) may normally be Xn
2110 or SP. However, for flag setting operations dest can only be
2111 Xn. Second input registers are always Xn. */
2112
2113 /* 32 bit ADD extending register. */
2114 static void
2115 add32_ext (sim_cpu *cpu, Extension extension, uint32_t shift)
2116 {
2117 unsigned rm = INSTR (20, 16);
2118 unsigned rn = INSTR (9, 5);
2119 unsigned rd = INSTR (4, 0);
2120
2121 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2122 aarch64_set_reg_u64 (cpu, rd, SP_OK,
2123 aarch64_get_reg_u32 (cpu, rn, SP_OK)
2124 + (extreg32 (cpu, rm, extension) << shift));
2125 }
2126
2127 /* 64 bit ADD extending register.
2128 N.B. This subsumes the case with 64 bit source2 and UXTX #n or LSL #0. */
2129 static void
2130 add64_ext (sim_cpu *cpu, Extension extension, uint32_t shift)
2131 {
2132 unsigned rm = INSTR (20, 16);
2133 unsigned rn = INSTR (9, 5);
2134 unsigned rd = INSTR (4, 0);
2135
2136 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2137 aarch64_set_reg_u64 (cpu, rd, SP_OK,
2138 aarch64_get_reg_u64 (cpu, rn, SP_OK)
2139 + (extreg64 (cpu, rm, extension) << shift));
2140 }
2141
2142 /* 32 bit ADD extending register setting flags. */
2143 static void
2144 adds32_ext (sim_cpu *cpu, Extension extension, uint32_t shift)
2145 {
2146 unsigned rm = INSTR (20, 16);
2147 unsigned rn = INSTR (9, 5);
2148 unsigned rd = INSTR (4, 0);
2149
2150 uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, SP_OK);
2151 uint32_t value2 = extreg32 (cpu, rm, extension) << shift;
2152
2153 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2154 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2);
2155 set_flags_for_add32 (cpu, value1, value2);
2156 }
2157
2158 /* 64 bit ADD extending register setting flags */
2159 /* N.B. this subsumes the case with 64 bit source2 and UXTX #n or LSL #0 */
2160 static void
2161 adds64_ext (sim_cpu *cpu, Extension extension, uint32_t shift)
2162 {
2163 unsigned rm = INSTR (20, 16);
2164 unsigned rn = INSTR (9, 5);
2165 unsigned rd = INSTR (4, 0);
2166
2167 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, SP_OK);
2168 uint64_t value2 = extreg64 (cpu, rm, extension) << shift;
2169
2170 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2171 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2);
2172 set_flags_for_add64 (cpu, value1, value2);
2173 }
2174
2175 /* 32 bit SUB extending register. */
2176 static void
2177 sub32_ext (sim_cpu *cpu, Extension extension, uint32_t shift)
2178 {
2179 unsigned rm = INSTR (20, 16);
2180 unsigned rn = INSTR (9, 5);
2181 unsigned rd = INSTR (4, 0);
2182
2183 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2184 aarch64_set_reg_u64 (cpu, rd, SP_OK,
2185 aarch64_get_reg_u32 (cpu, rn, SP_OK)
2186 - (extreg32 (cpu, rm, extension) << shift));
2187 }
2188
2189 /* 64 bit SUB extending register. */
2190 /* N.B. this subsumes the case with 64 bit source2 and UXTX #n or LSL #0. */
2191 static void
2192 sub64_ext (sim_cpu *cpu, Extension extension, uint32_t shift)
2193 {
2194 unsigned rm = INSTR (20, 16);
2195 unsigned rn = INSTR (9, 5);
2196 unsigned rd = INSTR (4, 0);
2197
2198 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2199 aarch64_set_reg_u64 (cpu, rd, SP_OK,
2200 aarch64_get_reg_u64 (cpu, rn, SP_OK)
2201 - (extreg64 (cpu, rm, extension) << shift));
2202 }
2203
2204 /* 32 bit SUB extending register setting flags. */
2205 static void
2206 subs32_ext (sim_cpu *cpu, Extension extension, uint32_t shift)
2207 {
2208 unsigned rm = INSTR (20, 16);
2209 unsigned rn = INSTR (9, 5);
2210 unsigned rd = INSTR (4, 0);
2211
2212 uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, SP_OK);
2213 uint32_t value2 = extreg32 (cpu, rm, extension) << shift;
2214
2215 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2216 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 - value2);
2217 set_flags_for_sub32 (cpu, value1, value2);
2218 }
2219
2220 /* 64 bit SUB extending register setting flags */
2221 /* N.B. this subsumes the case with 64 bit source2 and UXTX #n or LSL #0 */
2222 static void
2223 subs64_ext (sim_cpu *cpu, Extension extension, uint32_t shift)
2224 {
2225 unsigned rm = INSTR (20, 16);
2226 unsigned rn = INSTR (9, 5);
2227 unsigned rd = INSTR (4, 0);
2228
2229 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, SP_OK);
2230 uint64_t value2 = extreg64 (cpu, rm, extension) << shift;
2231
2232 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2233 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 - value2);
2234 set_flags_for_sub64 (cpu, value1, value2);
2235 }
2236
2237 static void
2238 dexAddSubtractImmediate (sim_cpu *cpu)
2239 {
2240 /* instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
2241 instr[30] = op : 0 ==> ADD, 1 ==> SUB
2242 instr[29] = set : 0 ==> no flags, 1 ==> set flags
2243 instr[28,24] = 10001
2244 instr[23,22] = shift : 00 == LSL#0, 01 = LSL#12 1x = UNALLOC
2245 instr[21,10] = uimm12
2246 instr[9,5] = Rn
2247 instr[4,0] = Rd */
2248
2249 /* N.B. the shift is applied at decode before calling the add/sub routine. */
2250 uint32_t shift = INSTR (23, 22);
2251 uint32_t imm = INSTR (21, 10);
2252 uint32_t dispatch = INSTR (31, 29);
2253
2254 NYI_assert (28, 24, 0x11);
2255
2256 if (shift > 1)
2257 HALT_UNALLOC;
2258
2259 if (shift)
2260 imm <<= 12;
2261
2262 switch (dispatch)
2263 {
2264 case 0: add32 (cpu, imm); break;
2265 case 1: adds32 (cpu, imm); break;
2266 case 2: sub32 (cpu, imm); break;
2267 case 3: subs32 (cpu, imm); break;
2268 case 4: add64 (cpu, imm); break;
2269 case 5: adds64 (cpu, imm); break;
2270 case 6: sub64 (cpu, imm); break;
2271 case 7: subs64 (cpu, imm); break;
2272 }
2273 }
2274
2275 static void
2276 dexAddSubtractShiftedRegister (sim_cpu *cpu)
2277 {
2278 /* instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
2279 instr[30,29] = op : 00 ==> ADD, 01 ==> ADDS, 10 ==> SUB, 11 ==> SUBS
2280 instr[28,24] = 01011
2281 instr[23,22] = shift : 0 ==> LSL, 1 ==> LSR, 2 ==> ASR, 3 ==> UNALLOC
2282 instr[21] = 0
2283 instr[20,16] = Rm
2284 instr[15,10] = count : must be 0xxxxx for 32 bit
2285 instr[9,5] = Rn
2286 instr[4,0] = Rd */
2287
2288 uint32_t size = INSTR (31, 31);
2289 uint32_t count = INSTR (15, 10);
2290 Shift shiftType = INSTR (23, 22);
2291
2292 NYI_assert (28, 24, 0x0B);
2293 NYI_assert (21, 21, 0);
2294
2295 /* Shift encoded as ROR is unallocated. */
2296 if (shiftType == ROR)
2297 HALT_UNALLOC;
2298
2299 /* 32 bit operations must have count[5] = 0
2300 or else we have an UNALLOC. */
2301 if (size == 0 && uimm (count, 5, 5))
2302 HALT_UNALLOC;
2303
2304 /* Dispatch on size:op i.e instr [31,29]. */
2305 switch (INSTR (31, 29))
2306 {
2307 case 0: add32_shift (cpu, shiftType, count); break;
2308 case 1: adds32_shift (cpu, shiftType, count); break;
2309 case 2: sub32_shift (cpu, shiftType, count); break;
2310 case 3: subs32_shift (cpu, shiftType, count); break;
2311 case 4: add64_shift (cpu, shiftType, count); break;
2312 case 5: adds64_shift (cpu, shiftType, count); break;
2313 case 6: sub64_shift (cpu, shiftType, count); break;
2314 case 7: subs64_shift (cpu, shiftType, count); break;
2315 }
2316 }
2317
2318 static void
2319 dexAddSubtractExtendedRegister (sim_cpu *cpu)
2320 {
2321 /* instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
2322 instr[30] = op : 0 ==> ADD, 1 ==> SUB
2323 instr[29] = set? : 0 ==> no flags, 1 ==> set flags
2324 instr[28,24] = 01011
2325 instr[23,22] = opt : 0 ==> ok, 1,2,3 ==> UNALLOC
2326 instr[21] = 1
2327 instr[20,16] = Rm
2328 instr[15,13] = option : 000 ==> UXTB, 001 ==> UXTH,
2329 000 ==> LSL|UXTW, 001 ==> UXTZ,
2330 000 ==> SXTB, 001 ==> SXTH,
2331 000 ==> SXTW, 001 ==> SXTX,
2332 instr[12,10] = shift : 0,1,2,3,4 ==> ok, 5,6,7 ==> UNALLOC
2333 instr[9,5] = Rn
2334 instr[4,0] = Rd */
2335
2336 Extension extensionType = INSTR (15, 13);
2337 uint32_t shift = INSTR (12, 10);
2338
2339 NYI_assert (28, 24, 0x0B);
2340 NYI_assert (21, 21, 1);
2341
2342 /* Shift may not exceed 4. */
2343 if (shift > 4)
2344 HALT_UNALLOC;
2345
2346 /* Dispatch on size:op:set?. */
2347 switch (INSTR (31, 29))
2348 {
2349 case 0: add32_ext (cpu, extensionType, shift); break;
2350 case 1: adds32_ext (cpu, extensionType, shift); break;
2351 case 2: sub32_ext (cpu, extensionType, shift); break;
2352 case 3: subs32_ext (cpu, extensionType, shift); break;
2353 case 4: add64_ext (cpu, extensionType, shift); break;
2354 case 5: adds64_ext (cpu, extensionType, shift); break;
2355 case 6: sub64_ext (cpu, extensionType, shift); break;
2356 case 7: subs64_ext (cpu, extensionType, shift); break;
2357 }
2358 }
2359
2360 /* Conditional data processing
2361 Condition register is implicit 3rd source. */
2362
2363 /* 32 bit add with carry. */
2364 /* N.B register args may not be SP. */
2365
2366 static void
2367 adc32 (sim_cpu *cpu)
2368 {
2369 unsigned rm = INSTR (20, 16);
2370 unsigned rn = INSTR (9, 5);
2371 unsigned rd = INSTR (4, 0);
2372
2373 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2374 aarch64_set_reg_u64 (cpu, rd, NO_SP,
2375 aarch64_get_reg_u32 (cpu, rn, NO_SP)
2376 + aarch64_get_reg_u32 (cpu, rm, NO_SP)
2377 + IS_SET (C));
2378 }
2379
2380 /* 64 bit add with carry */
2381 static void
2382 adc64 (sim_cpu *cpu)
2383 {
2384 unsigned rm = INSTR (20, 16);
2385 unsigned rn = INSTR (9, 5);
2386 unsigned rd = INSTR (4, 0);
2387
2388 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2389 aarch64_set_reg_u64 (cpu, rd, NO_SP,
2390 aarch64_get_reg_u64 (cpu, rn, NO_SP)
2391 + aarch64_get_reg_u64 (cpu, rm, NO_SP)
2392 + IS_SET (C));
2393 }
2394
2395 /* 32 bit add with carry setting flags. */
2396 static void
2397 adcs32 (sim_cpu *cpu)
2398 {
2399 unsigned rm = INSTR (20, 16);
2400 unsigned rn = INSTR (9, 5);
2401 unsigned rd = INSTR (4, 0);
2402
2403 uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP);
2404 uint32_t value2 = aarch64_get_reg_u32 (cpu, rm, NO_SP);
2405 uint32_t carry = IS_SET (C);
2406
2407 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2408 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2 + carry);
2409 set_flags_for_add32 (cpu, value1, value2 + carry);
2410 }
2411
2412 /* 64 bit add with carry setting flags. */
2413 static void
2414 adcs64 (sim_cpu *cpu)
2415 {
2416 unsigned rm = INSTR (20, 16);
2417 unsigned rn = INSTR (9, 5);
2418 unsigned rd = INSTR (4, 0);
2419
2420 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP);
2421 uint64_t value2 = aarch64_get_reg_u64 (cpu, rm, NO_SP);
2422 uint64_t carry = IS_SET (C);
2423
2424 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2425 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2 + carry);
2426 set_flags_for_add64 (cpu, value1, value2 + carry);
2427 }
2428
2429 /* 32 bit sub with carry. */
2430 static void
2431 sbc32 (sim_cpu *cpu)
2432 {
2433 unsigned rm = INSTR (20, 16);
2434 unsigned rn = INSTR (9, 5); /* ngc iff rn == 31. */
2435 unsigned rd = INSTR (4, 0);
2436
2437 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2438 aarch64_set_reg_u64 (cpu, rd, NO_SP,
2439 aarch64_get_reg_u32 (cpu, rn, NO_SP)
2440 - aarch64_get_reg_u32 (cpu, rm, NO_SP)
2441 - 1 + IS_SET (C));
2442 }
2443
2444 /* 64 bit sub with carry */
2445 static void
2446 sbc64 (sim_cpu *cpu)
2447 {
2448 unsigned rm = INSTR (20, 16);
2449 unsigned rn = INSTR (9, 5);
2450 unsigned rd = INSTR (4, 0);
2451
2452 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2453 aarch64_set_reg_u64 (cpu, rd, NO_SP,
2454 aarch64_get_reg_u64 (cpu, rn, NO_SP)
2455 - aarch64_get_reg_u64 (cpu, rm, NO_SP)
2456 - 1 + IS_SET (C));
2457 }
2458
2459 /* 32 bit sub with carry setting flags */
2460 static void
2461 sbcs32 (sim_cpu *cpu)
2462 {
2463 unsigned rm = INSTR (20, 16);
2464 unsigned rn = INSTR (9, 5);
2465 unsigned rd = INSTR (4, 0);
2466
2467 uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP);
2468 uint32_t value2 = aarch64_get_reg_u32 (cpu, rm, NO_SP);
2469 uint32_t carry = IS_SET (C);
2470 uint32_t result = value1 - value2 + 1 - carry;
2471
2472 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2473 aarch64_set_reg_u64 (cpu, rd, NO_SP, result);
2474 set_flags_for_sub32 (cpu, value1, value2 + 1 - carry);
2475 }
2476
2477 /* 64 bit sub with carry setting flags */
2478 static void
2479 sbcs64 (sim_cpu *cpu)
2480 {
2481 unsigned rm = INSTR (20, 16);
2482 unsigned rn = INSTR (9, 5);
2483 unsigned rd = INSTR (4, 0);
2484
2485 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP);
2486 uint64_t value2 = aarch64_get_reg_u64 (cpu, rm, NO_SP);
2487 uint64_t carry = IS_SET (C);
2488 uint64_t result = value1 - value2 + 1 - carry;
2489
2490 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2491 aarch64_set_reg_u64 (cpu, rd, NO_SP, result);
2492 set_flags_for_sub64 (cpu, value1, value2 + 1 - carry);
2493 }
2494
2495 static void
2496 dexAddSubtractWithCarry (sim_cpu *cpu)
2497 {
2498 /* instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
2499 instr[30] = op : 0 ==> ADC, 1 ==> SBC
2500 instr[29] = set? : 0 ==> no flags, 1 ==> set flags
2501 instr[28,21] = 1 1010 000
2502 instr[20,16] = Rm
2503 instr[15,10] = op2 : 00000 ==> ok, ow ==> UNALLOC
2504 instr[9,5] = Rn
2505 instr[4,0] = Rd */
2506
2507 uint32_t op2 = INSTR (15, 10);
2508
2509 NYI_assert (28, 21, 0xD0);
2510
2511 if (op2 != 0)
2512 HALT_UNALLOC;
2513
2514 /* Dispatch on size:op:set?. */
2515 switch (INSTR (31, 29))
2516 {
2517 case 0: adc32 (cpu); break;
2518 case 1: adcs32 (cpu); break;
2519 case 2: sbc32 (cpu); break;
2520 case 3: sbcs32 (cpu); break;
2521 case 4: adc64 (cpu); break;
2522 case 5: adcs64 (cpu); break;
2523 case 6: sbc64 (cpu); break;
2524 case 7: sbcs64 (cpu); break;
2525 }
2526 }
2527
2528 static uint32_t
2529 testConditionCode (sim_cpu *cpu, CondCode cc)
2530 {
2531 /* This should be reduceable to branchless logic
2532 by some careful testing of bits in CC followed
2533 by the requisite masking and combining of bits
2534 from the flag register.
2535
2536 For now we do it with a switch. */
2537 int res;
2538
2539 switch (cc)
2540 {
2541 case EQ: res = IS_SET (Z); break;
2542 case NE: res = IS_CLEAR (Z); break;
2543 case CS: res = IS_SET (C); break;
2544 case CC: res = IS_CLEAR (C); break;
2545 case MI: res = IS_SET (N); break;
2546 case PL: res = IS_CLEAR (N); break;
2547 case VS: res = IS_SET (V); break;
2548 case VC: res = IS_CLEAR (V); break;
2549 case HI: res = IS_SET (C) && IS_CLEAR (Z); break;
2550 case LS: res = IS_CLEAR (C) || IS_SET (Z); break;
2551 case GE: res = IS_SET (N) == IS_SET (V); break;
2552 case LT: res = IS_SET (N) != IS_SET (V); break;
2553 case GT: res = IS_CLEAR (Z) && (IS_SET (N) == IS_SET (V)); break;
2554 case LE: res = IS_SET (Z) || (IS_SET (N) != IS_SET (V)); break;
2555 case AL:
2556 case NV:
2557 default:
2558 res = 1;
2559 break;
2560 }
2561 return res;
2562 }
2563
2564 static void
2565 CondCompare (sim_cpu *cpu) /* aka: ccmp and ccmn */
2566 {
2567 /* instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
2568 instr[30] = compare with positive (1) or negative value (0)
2569 instr[29,21] = 1 1101 0010
2570 instr[20,16] = Rm or const
2571 instr[15,12] = cond
2572 instr[11] = compare reg (0) or const (1)
2573 instr[10] = 0
2574 instr[9,5] = Rn
2575 instr[4] = 0
2576 instr[3,0] = value for CPSR bits if the comparison does not take place. */
2577 signed int negate;
2578 unsigned rm;
2579 unsigned rn;
2580
2581 NYI_assert (29, 21, 0x1d2);
2582 NYI_assert (10, 10, 0);
2583 NYI_assert (4, 4, 0);
2584
2585 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2586 if (! testConditionCode (cpu, INSTR (15, 12)))
2587 {
2588 aarch64_set_CPSR (cpu, INSTR (3, 0));
2589 return;
2590 }
2591
2592 negate = INSTR (30, 30) ? 1 : -1;
2593 rm = INSTR (20, 16);
2594 rn = INSTR ( 9, 5);
2595
2596 if (INSTR (31, 31))
2597 {
2598 if (INSTR (11, 11))
2599 set_flags_for_sub64 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK),
2600 negate * (uint64_t) rm);
2601 else
2602 set_flags_for_sub64 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK),
2603 negate * aarch64_get_reg_u64 (cpu, rm, SP_OK));
2604 }
2605 else
2606 {
2607 if (INSTR (11, 11))
2608 set_flags_for_sub32 (cpu, aarch64_get_reg_u32 (cpu, rn, SP_OK),
2609 negate * rm);
2610 else
2611 set_flags_for_sub32 (cpu, aarch64_get_reg_u32 (cpu, rn, SP_OK),
2612 negate * aarch64_get_reg_u32 (cpu, rm, SP_OK));
2613 }
2614 }
2615
2616 static void
2617 do_vec_MOV_whole_vector (sim_cpu *cpu)
2618 {
2619 /* MOV Vd.T, Vs.T (alias for ORR Vd.T, Vn.T, Vm.T where Vn == Vm)
2620
2621 instr[31] = 0
2622 instr[30] = half(0)/full(1)
2623 instr[29,21] = 001110101
2624 instr[20,16] = Vs
2625 instr[15,10] = 000111
2626 instr[9,5] = Vs
2627 instr[4,0] = Vd */
2628
2629 unsigned vs = INSTR (9, 5);
2630 unsigned vd = INSTR (4, 0);
2631
2632 NYI_assert (29, 21, 0x075);
2633 NYI_assert (15, 10, 0x07);
2634
2635 if (INSTR (20, 16) != vs)
2636 HALT_NYI;
2637
2638 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2639 if (INSTR (30, 30))
2640 aarch64_set_vec_u64 (cpu, vd, 1, aarch64_get_vec_u64 (cpu, vs, 1));
2641
2642 aarch64_set_vec_u64 (cpu, vd, 0, aarch64_get_vec_u64 (cpu, vs, 0));
2643 }
2644
2645 static void
2646 do_vec_MOV_into_scalar (sim_cpu *cpu)
2647 {
2648 /* instr[31] = 0
2649 instr[30] = word(0)/long(1)
2650 instr[29,21] = 00 1110 000
2651 instr[20,18] = element size and index
2652 instr[17,10] = 00 0011 11
2653 instr[9,5] = V source
2654 instr[4,0] = R dest */
2655
2656 unsigned vs = INSTR (9, 5);
2657 unsigned rd = INSTR (4, 0);
2658
2659 NYI_assert (29, 21, 0x070);
2660 NYI_assert (17, 10, 0x0F);
2661
2662 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2663 switch (INSTR (20, 18))
2664 {
2665 case 0x2:
2666 aarch64_set_reg_u64 (cpu, rd, NO_SP, aarch64_get_vec_u64 (cpu, vs, 0));
2667 break;
2668
2669 case 0x6:
2670 aarch64_set_reg_u64 (cpu, rd, NO_SP, aarch64_get_vec_u64 (cpu, vs, 1));
2671 break;
2672
2673 case 0x1:
2674 case 0x3:
2675 case 0x5:
2676 case 0x7:
2677 aarch64_set_reg_u64 (cpu, rd, NO_SP, aarch64_get_vec_u32
2678 (cpu, vs, INSTR (20, 19)));
2679 break;
2680
2681 default:
2682 HALT_NYI;
2683 }
2684 }
2685
2686 static void
2687 do_vec_INS (sim_cpu *cpu)
2688 {
2689 /* instr[31,21] = 01001110000
2690 instr[20,16] = element size and index
2691 instr[15,10] = 000111
2692 instr[9,5] = W source
2693 instr[4,0] = V dest */
2694
2695 int index;
2696 unsigned rs = INSTR (9, 5);
2697 unsigned vd = INSTR (4, 0);
2698
2699 NYI_assert (31, 21, 0x270);
2700 NYI_assert (15, 10, 0x07);
2701
2702 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2703 if (INSTR (16, 16))
2704 {
2705 index = INSTR (20, 17);
2706 aarch64_set_vec_u8 (cpu, vd, index,
2707 aarch64_get_reg_u8 (cpu, rs, NO_SP));
2708 }
2709 else if (INSTR (17, 17))
2710 {
2711 index = INSTR (20, 18);
2712 aarch64_set_vec_u16 (cpu, vd, index,
2713 aarch64_get_reg_u16 (cpu, rs, NO_SP));
2714 }
2715 else if (INSTR (18, 18))
2716 {
2717 index = INSTR (20, 19);
2718 aarch64_set_vec_u32 (cpu, vd, index,
2719 aarch64_get_reg_u32 (cpu, rs, NO_SP));
2720 }
2721 else if (INSTR (19, 19))
2722 {
2723 index = INSTR (20, 20);
2724 aarch64_set_vec_u64 (cpu, vd, index,
2725 aarch64_get_reg_u64 (cpu, rs, NO_SP));
2726 }
2727 else
2728 HALT_NYI;
2729 }
2730
2731 static void
2732 do_vec_DUP_vector_into_vector (sim_cpu *cpu)
2733 {
2734 /* instr[31] = 0
2735 instr[30] = half(0)/full(1)
2736 instr[29,21] = 00 1110 000
2737 instr[20,16] = element size and index
2738 instr[15,10] = 0000 01
2739 instr[9,5] = V source
2740 instr[4,0] = V dest. */
2741
2742 unsigned full = INSTR (30, 30);
2743 unsigned vs = INSTR (9, 5);
2744 unsigned vd = INSTR (4, 0);
2745 int i, index;
2746
2747 NYI_assert (29, 21, 0x070);
2748 NYI_assert (15, 10, 0x01);
2749
2750 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2751 if (INSTR (16, 16))
2752 {
2753 index = INSTR (20, 17);
2754
2755 for (i = 0; i < (full ? 16 : 8); i++)
2756 aarch64_set_vec_u8 (cpu, vd, i, aarch64_get_vec_u8 (cpu, vs, index));
2757 }
2758 else if (INSTR (17, 17))
2759 {
2760 index = INSTR (20, 18);
2761
2762 for (i = 0; i < (full ? 8 : 4); i++)
2763 aarch64_set_vec_u16 (cpu, vd, i, aarch64_get_vec_u16 (cpu, vs, index));
2764 }
2765 else if (INSTR (18, 18))
2766 {
2767 index = INSTR (20, 19);
2768
2769 for (i = 0; i < (full ? 4 : 2); i++)
2770 aarch64_set_vec_u32 (cpu, vd, i, aarch64_get_vec_u32 (cpu, vs, index));
2771 }
2772 else
2773 {
2774 if (INSTR (19, 19) == 0)
2775 HALT_UNALLOC;
2776
2777 if (! full)
2778 HALT_UNALLOC;
2779
2780 index = INSTR (20, 20);
2781
2782 for (i = 0; i < 2; i++)
2783 aarch64_set_vec_u64 (cpu, vd, i, aarch64_get_vec_u64 (cpu, vs, index));
2784 }
2785 }
2786
2787 static void
2788 do_vec_TBL (sim_cpu *cpu)
2789 {
2790 /* instr[31] = 0
2791 instr[30] = half(0)/full(1)
2792 instr[29,21] = 00 1110 000
2793 instr[20,16] = Vm
2794 instr[15] = 0
2795 instr[14,13] = vec length
2796 instr[12,10] = 000
2797 instr[9,5] = V start
2798 instr[4,0] = V dest */
2799
2800 int full = INSTR (30, 30);
2801 int len = INSTR (14, 13) + 1;
2802 unsigned vm = INSTR (20, 16);
2803 unsigned vn = INSTR (9, 5);
2804 unsigned vd = INSTR (4, 0);
2805 unsigned i;
2806
2807 NYI_assert (29, 21, 0x070);
2808 NYI_assert (12, 10, 0);
2809
2810 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2811 for (i = 0; i < (full ? 16 : 8); i++)
2812 {
2813 unsigned int selector = aarch64_get_vec_u8 (cpu, vm, i);
2814 uint8_t val;
2815
2816 if (selector < 16)
2817 val = aarch64_get_vec_u8 (cpu, vn, selector);
2818 else if (selector < 32)
2819 val = len < 2 ? 0 : aarch64_get_vec_u8 (cpu, vn + 1, selector - 16);
2820 else if (selector < 48)
2821 val = len < 3 ? 0 : aarch64_get_vec_u8 (cpu, vn + 2, selector - 32);
2822 else if (selector < 64)
2823 val = len < 4 ? 0 : aarch64_get_vec_u8 (cpu, vn + 3, selector - 48);
2824 else
2825 val = 0;
2826
2827 aarch64_set_vec_u8 (cpu, vd, i, val);
2828 }
2829 }
2830
2831 static void
2832 do_vec_TRN (sim_cpu *cpu)
2833 {
2834 /* instr[31] = 0
2835 instr[30] = half(0)/full(1)
2836 instr[29,24] = 00 1110
2837 instr[23,22] = size
2838 instr[21] = 0
2839 instr[20,16] = Vm
2840 instr[15] = 0
2841 instr[14] = TRN1 (0) / TRN2 (1)
2842 instr[13,10] = 1010
2843 instr[9,5] = V source
2844 instr[4,0] = V dest. */
2845
2846 int full = INSTR (30, 30);
2847 int second = INSTR (14, 14);
2848 unsigned vm = INSTR (20, 16);
2849 unsigned vn = INSTR (9, 5);
2850 unsigned vd = INSTR (4, 0);
2851 unsigned i;
2852
2853 NYI_assert (29, 24, 0x0E);
2854 NYI_assert (13, 10, 0xA);
2855
2856 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2857 switch (INSTR (23, 22))
2858 {
2859 case 0:
2860 for (i = 0; i < (full ? 8 : 4); i++)
2861 {
2862 aarch64_set_vec_u8
2863 (cpu, vd, i * 2,
2864 aarch64_get_vec_u8 (cpu, second ? vm : vn, i * 2));
2865 aarch64_set_vec_u8
2866 (cpu, vd, 1 * 2 + 1,
2867 aarch64_get_vec_u8 (cpu, second ? vn : vm, i * 2 + 1));
2868 }
2869 break;
2870
2871 case 1:
2872 for (i = 0; i < (full ? 4 : 2); i++)
2873 {
2874 aarch64_set_vec_u16
2875 (cpu, vd, i * 2,
2876 aarch64_get_vec_u16 (cpu, second ? vm : vn, i * 2));
2877 aarch64_set_vec_u16
2878 (cpu, vd, 1 * 2 + 1,
2879 aarch64_get_vec_u16 (cpu, second ? vn : vm, i * 2 + 1));
2880 }
2881 break;
2882
2883 case 2:
2884 aarch64_set_vec_u32
2885 (cpu, vd, 0, aarch64_get_vec_u32 (cpu, second ? vm : vn, 0));
2886 aarch64_set_vec_u32
2887 (cpu, vd, 1, aarch64_get_vec_u32 (cpu, second ? vn : vm, 1));
2888 aarch64_set_vec_u32
2889 (cpu, vd, 2, aarch64_get_vec_u32 (cpu, second ? vm : vn, 2));
2890 aarch64_set_vec_u32
2891 (cpu, vd, 3, aarch64_get_vec_u32 (cpu, second ? vn : vm, 3));
2892 break;
2893
2894 case 3:
2895 if (! full)
2896 HALT_UNALLOC;
2897
2898 aarch64_set_vec_u64 (cpu, vd, 0,
2899 aarch64_get_vec_u64 (cpu, second ? vm : vn, 0));
2900 aarch64_set_vec_u64 (cpu, vd, 1,
2901 aarch64_get_vec_u64 (cpu, second ? vn : vm, 1));
2902 break;
2903 }
2904 }
2905
2906 static void
2907 do_vec_DUP_scalar_into_vector (sim_cpu *cpu)
2908 {
2909 /* instr[31] = 0
2910 instr[30] = 0=> zero top 64-bits, 1=> duplicate into top 64-bits
2911 [must be 1 for 64-bit xfer]
2912 instr[29,20] = 00 1110 0000
2913 instr[19,16] = element size: 0001=> 8-bits, 0010=> 16-bits,
2914 0100=> 32-bits. 1000=>64-bits
2915 instr[15,10] = 0000 11
2916 instr[9,5] = W source
2917 instr[4,0] = V dest. */
2918
2919 unsigned i;
2920 unsigned Vd = INSTR (4, 0);
2921 unsigned Rs = INSTR (9, 5);
2922 int both = INSTR (30, 30);
2923
2924 NYI_assert (29, 20, 0x0E0);
2925 NYI_assert (15, 10, 0x03);
2926
2927 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2928 switch (INSTR (19, 16))
2929 {
2930 case 1:
2931 for (i = 0; i < (both ? 16 : 8); i++)
2932 aarch64_set_vec_u8 (cpu, Vd, i, aarch64_get_reg_u8 (cpu, Rs, NO_SP));
2933 break;
2934
2935 case 2:
2936 for (i = 0; i < (both ? 8 : 4); i++)
2937 aarch64_set_vec_u16 (cpu, Vd, i, aarch64_get_reg_u16 (cpu, Rs, NO_SP));
2938 break;
2939
2940 case 4:
2941 for (i = 0; i < (both ? 4 : 2); i++)
2942 aarch64_set_vec_u32 (cpu, Vd, i, aarch64_get_reg_u32 (cpu, Rs, NO_SP));
2943 break;
2944
2945 case 8:
2946 if (!both)
2947 HALT_NYI;
2948 aarch64_set_vec_u64 (cpu, Vd, 0, aarch64_get_reg_u64 (cpu, Rs, NO_SP));
2949 aarch64_set_vec_u64 (cpu, Vd, 1, aarch64_get_reg_u64 (cpu, Rs, NO_SP));
2950 break;
2951
2952 default:
2953 HALT_NYI;
2954 }
2955 }
2956
2957 static void
2958 do_vec_UZP (sim_cpu *cpu)
2959 {
2960 /* instr[31] = 0
2961 instr[30] = half(0)/full(1)
2962 instr[29,24] = 00 1110
2963 instr[23,22] = size: byte(00), half(01), word (10), long (11)
2964 instr[21] = 0
2965 instr[20,16] = Vm
2966 instr[15] = 0
2967 instr[14] = lower (0) / upper (1)
2968 instr[13,10] = 0110
2969 instr[9,5] = Vn
2970 instr[4,0] = Vd. */
2971
2972 int full = INSTR (30, 30);
2973 int upper = INSTR (14, 14);
2974
2975 unsigned vm = INSTR (20, 16);
2976 unsigned vn = INSTR (9, 5);
2977 unsigned vd = INSTR (4, 0);
2978
2979 uint64_t val_m1 = aarch64_get_vec_u64 (cpu, vm, 0);
2980 uint64_t val_m2 = aarch64_get_vec_u64 (cpu, vm, 1);
2981 uint64_t val_n1 = aarch64_get_vec_u64 (cpu, vn, 0);
2982 uint64_t val_n2 = aarch64_get_vec_u64 (cpu, vn, 1);
2983
2984 uint64_t val1 = 0;
2985 uint64_t val2 = 0;
2986
2987 uint64_t input1 = upper ? val_n1 : val_m1;
2988 uint64_t input2 = upper ? val_n2 : val_m2;
2989 unsigned i;
2990
2991 NYI_assert (29, 24, 0x0E);
2992 NYI_assert (21, 21, 0);
2993 NYI_assert (15, 15, 0);
2994 NYI_assert (13, 10, 6);
2995
2996 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2997 switch (INSTR (23, 23))
2998 {
2999 case 0:
3000 for (i = 0; i < 8; i++)
3001 {
3002 val1 |= (input1 >> (i * 8)) & (0xFFULL << (i * 8));
3003 val2 |= (input2 >> (i * 8)) & (0xFFULL << (i * 8));
3004 }
3005 break;
3006
3007 case 1:
3008 for (i = 0; i < 4; i++)
3009 {
3010 val1 |= (input1 >> (i * 16)) & (0xFFFFULL << (i * 16));
3011 val2 |= (input2 >> (i * 16)) & (0xFFFFULL << (i * 16));
3012 }
3013 break;
3014
3015 case 2:
3016 val1 = ((input1 & 0xFFFFFFFF) | ((input1 >> 32) & 0xFFFFFFFF00000000ULL));
3017 val2 = ((input2 & 0xFFFFFFFF) | ((input2 >> 32) & 0xFFFFFFFF00000000ULL));
3018
3019 case 3:
3020 val1 = input1;
3021 val2 = input2;
3022 break;
3023 }
3024
3025 aarch64_set_vec_u64 (cpu, vd, 0, val1);
3026 if (full)
3027 aarch64_set_vec_u64 (cpu, vd, 1, val2);
3028 }
3029
3030 static void
3031 do_vec_ZIP (sim_cpu *cpu)
3032 {
3033 /* instr[31] = 0
3034 instr[30] = half(0)/full(1)
3035 instr[29,24] = 00 1110
3036 instr[23,22] = size: byte(00), hald(01), word (10), long (11)
3037 instr[21] = 0
3038 instr[20,16] = Vm
3039 instr[15] = 0
3040 instr[14] = lower (0) / upper (1)
3041 instr[13,10] = 1110
3042 instr[9,5] = Vn
3043 instr[4,0] = Vd. */
3044
3045 int full = INSTR (30, 30);
3046 int upper = INSTR (14, 14);
3047
3048 unsigned vm = INSTR (20, 16);
3049 unsigned vn = INSTR (9, 5);
3050 unsigned vd = INSTR (4, 0);
3051
3052 uint64_t val_m1 = aarch64_get_vec_u64 (cpu, vm, 0);
3053 uint64_t val_m2 = aarch64_get_vec_u64 (cpu, vm, 1);
3054 uint64_t val_n1 = aarch64_get_vec_u64 (cpu, vn, 0);
3055 uint64_t val_n2 = aarch64_get_vec_u64 (cpu, vn, 1);
3056
3057 uint64_t val1 = 0;
3058 uint64_t val2 = 0;
3059
3060 uint64_t input1 = upper ? val_n1 : val_m1;
3061 uint64_t input2 = upper ? val_n2 : val_m2;
3062
3063 NYI_assert (29, 24, 0x0E);
3064 NYI_assert (21, 21, 0);
3065 NYI_assert (15, 15, 0);
3066 NYI_assert (13, 10, 0xE);
3067
3068 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3069 switch (INSTR (23, 23))
3070 {
3071 case 0:
3072 val1 =
3073 ((input1 << 0) & (0xFF << 0))
3074 | ((input2 << 8) & (0xFF << 8))
3075 | ((input1 << 8) & (0xFF << 16))
3076 | ((input2 << 16) & (0xFF << 24))
3077 | ((input1 << 16) & (0xFFULL << 32))
3078 | ((input2 << 24) & (0xFFULL << 40))
3079 | ((input1 << 24) & (0xFFULL << 48))
3080 | ((input2 << 32) & (0xFFULL << 56));
3081
3082 val2 =
3083 ((input1 >> 32) & (0xFF << 0))
3084 | ((input2 >> 24) & (0xFF << 8))
3085 | ((input1 >> 24) & (0xFF << 16))
3086 | ((input2 >> 16) & (0xFF << 24))
3087 | ((input1 >> 16) & (0xFFULL << 32))
3088 | ((input2 >> 8) & (0xFFULL << 40))
3089 | ((input1 >> 8) & (0xFFULL << 48))
3090 | ((input2 >> 0) & (0xFFULL << 56));
3091 break;
3092
3093 case 1:
3094 val1 =
3095 ((input1 << 0) & (0xFFFF << 0))
3096 | ((input2 << 16) & (0xFFFF << 16))
3097 | ((input1 << 16) & (0xFFFFULL << 32))
3098 | ((input2 << 32) & (0xFFFFULL << 48));
3099
3100 val2 =
3101 ((input1 >> 32) & (0xFFFF << 0))
3102 | ((input2 >> 16) & (0xFFFF << 16))
3103 | ((input1 >> 16) & (0xFFFFULL << 32))
3104 | ((input2 >> 0) & (0xFFFFULL << 48));
3105 break;
3106
3107 case 2:
3108 val1 = (input1 & 0xFFFFFFFFULL) | (input2 << 32);
3109 val2 = (input2 & 0xFFFFFFFFULL) | (input1 << 32);
3110 break;
3111
3112 case 3:
3113 val1 = input1;
3114 val2 = input2;
3115 break;
3116 }
3117
3118 aarch64_set_vec_u64 (cpu, vd, 0, val1);
3119 if (full)
3120 aarch64_set_vec_u64 (cpu, vd, 1, val2);
3121 }
3122
3123 /* Floating point immediates are encoded in 8 bits.
3124 fpimm[7] = sign bit.
3125 fpimm[6:4] = signed exponent.
3126 fpimm[3:0] = fraction (assuming leading 1).
3127 i.e. F = s * 1.f * 2^(e - b). */
3128
3129 static float
3130 fp_immediate_for_encoding_32 (uint32_t imm8)
3131 {
3132 float u;
3133 uint32_t s, e, f, i;
3134
3135 s = (imm8 >> 7) & 0x1;
3136 e = (imm8 >> 4) & 0x7;
3137 f = imm8 & 0xf;
3138
3139 /* The fp value is s * n/16 * 2r where n is 16+e. */
3140 u = (16.0 + f) / 16.0;
3141
3142 /* N.B. exponent is signed. */
3143 if (e < 4)
3144 {
3145 int epos = e;
3146
3147 for (i = 0; i <= epos; i++)
3148 u *= 2.0;
3149 }
3150 else
3151 {
3152 int eneg = 7 - e;
3153
3154 for (i = 0; i < eneg; i++)
3155 u /= 2.0;
3156 }
3157
3158 if (s)
3159 u = - u;
3160
3161 return u;
3162 }
3163
3164 static double
3165 fp_immediate_for_encoding_64 (uint32_t imm8)
3166 {
3167 double u;
3168 uint32_t s, e, f, i;
3169
3170 s = (imm8 >> 7) & 0x1;
3171 e = (imm8 >> 4) & 0x7;
3172 f = imm8 & 0xf;
3173
3174 /* The fp value is s * n/16 * 2r where n is 16+e. */
3175 u = (16.0 + f) / 16.0;
3176
3177 /* N.B. exponent is signed. */
3178 if (e < 4)
3179 {
3180 int epos = e;
3181
3182 for (i = 0; i <= epos; i++)
3183 u *= 2.0;
3184 }
3185 else
3186 {
3187 int eneg = 7 - e;
3188
3189 for (i = 0; i < eneg; i++)
3190 u /= 2.0;
3191 }
3192
3193 if (s)
3194 u = - u;
3195
3196 return u;
3197 }
3198
3199 static void
3200 do_vec_MOV_immediate (sim_cpu *cpu)
3201 {
3202 /* instr[31] = 0
3203 instr[30] = full/half selector
3204 instr[29,19] = 00111100000
3205 instr[18,16] = high 3 bits of uimm8
3206 instr[15,12] = size & shift:
3207 0000 => 32-bit
3208 0010 => 32-bit + LSL#8
3209 0100 => 32-bit + LSL#16
3210 0110 => 32-bit + LSL#24
3211 1010 => 16-bit + LSL#8
3212 1000 => 16-bit
3213 1101 => 32-bit + MSL#16
3214 1100 => 32-bit + MSL#8
3215 1110 => 8-bit
3216 1111 => double
3217 instr[11,10] = 01
3218 instr[9,5] = low 5-bits of uimm8
3219 instr[4,0] = Vd. */
3220
3221 int full = INSTR (30, 30);
3222 unsigned vd = INSTR (4, 0);
3223 unsigned val = (INSTR (18, 16) << 5) | INSTR (9, 5);
3224 unsigned i;
3225
3226 NYI_assert (29, 19, 0x1E0);
3227 NYI_assert (11, 10, 1);
3228
3229 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3230 switch (INSTR (15, 12))
3231 {
3232 case 0x0: /* 32-bit, no shift. */
3233 case 0x2: /* 32-bit, shift by 8. */
3234 case 0x4: /* 32-bit, shift by 16. */
3235 case 0x6: /* 32-bit, shift by 24. */
3236 val <<= (8 * INSTR (14, 13));
3237 for (i = 0; i < (full ? 4 : 2); i++)
3238 aarch64_set_vec_u32 (cpu, vd, i, val);
3239 break;
3240
3241 case 0xa: /* 16-bit, shift by 8. */
3242 val <<= 8;
3243 /* Fall through. */
3244 case 0x8: /* 16-bit, no shift. */
3245 for (i = 0; i < (full ? 8 : 4); i++)
3246 aarch64_set_vec_u16 (cpu, vd, i, val);
3247 /* Fall through. */
3248 case 0xd: /* 32-bit, mask shift by 16. */
3249 val <<= 8;
3250 val |= 0xFF;
3251 /* Fall through. */
3252 case 0xc: /* 32-bit, mask shift by 8. */
3253 val <<= 8;
3254 val |= 0xFF;
3255 for (i = 0; i < (full ? 4 : 2); i++)
3256 aarch64_set_vec_u32 (cpu, vd, i, val);
3257 break;
3258
3259 case 0xe: /* 8-bit, no shift. */
3260 for (i = 0; i < (full ? 16 : 8); i++)
3261 aarch64_set_vec_u8 (cpu, vd, i, val);
3262 break;
3263
3264 case 0xf: /* FMOV Vs.{2|4}S, #fpimm. */
3265 {
3266 float u = fp_immediate_for_encoding_32 (val);
3267 for (i = 0; i < (full ? 4 : 2); i++)
3268 aarch64_set_vec_float (cpu, vd, i, u);
3269 break;
3270 }
3271
3272 default:
3273 HALT_NYI;
3274 }
3275 }
3276
3277 static void
3278 do_vec_MVNI (sim_cpu *cpu)
3279 {
3280 /* instr[31] = 0
3281 instr[30] = full/half selector
3282 instr[29,19] = 10111100000
3283 instr[18,16] = high 3 bits of uimm8
3284 instr[15,12] = selector
3285 instr[11,10] = 01
3286 instr[9,5] = low 5-bits of uimm8
3287 instr[4,0] = Vd. */
3288
3289 int full = INSTR (30, 30);
3290 unsigned vd = INSTR (4, 0);
3291 unsigned val = (INSTR (18, 16) << 5) | INSTR (9, 5);
3292 unsigned i;
3293
3294 NYI_assert (29, 19, 0x5E0);
3295 NYI_assert (11, 10, 1);
3296
3297 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3298 switch (INSTR (15, 12))
3299 {
3300 case 0x0: /* 32-bit, no shift. */
3301 case 0x2: /* 32-bit, shift by 8. */
3302 case 0x4: /* 32-bit, shift by 16. */
3303 case 0x6: /* 32-bit, shift by 24. */
3304 val <<= (8 * INSTR (14, 13));
3305 val = ~ val;
3306 for (i = 0; i < (full ? 4 : 2); i++)
3307 aarch64_set_vec_u32 (cpu, vd, i, val);
3308 return;
3309
3310 case 0xa: /* 16-bit, 8 bit shift. */
3311 val <<= 8;
3312 case 0x8: /* 16-bit, no shift. */
3313 val = ~ val;
3314 for (i = 0; i < (full ? 8 : 4); i++)
3315 aarch64_set_vec_u16 (cpu, vd, i, val);
3316 return;
3317
3318 case 0xd: /* 32-bit, mask shift by 16. */
3319 val <<= 8;
3320 val |= 0xFF;
3321 case 0xc: /* 32-bit, mask shift by 8. */
3322 val <<= 8;
3323 val |= 0xFF;
3324 val = ~ val;
3325 for (i = 0; i < (full ? 4 : 2); i++)
3326 aarch64_set_vec_u32 (cpu, vd, i, val);
3327 return;
3328
3329 case 0xE: /* MOVI Dn, #mask64 */
3330 {
3331 uint64_t mask = 0;
3332
3333 for (i = 0; i < 8; i++)
3334 if (val & (1 << i))
3335 mask |= (0xFFUL << (i * 8));
3336 aarch64_set_vec_u64 (cpu, vd, 0, mask);
3337 aarch64_set_vec_u64 (cpu, vd, 1, mask);
3338 return;
3339 }
3340
3341 case 0xf: /* FMOV Vd.2D, #fpimm. */
3342 {
3343 double u = fp_immediate_for_encoding_64 (val);
3344
3345 if (! full)
3346 HALT_UNALLOC;
3347
3348 aarch64_set_vec_double (cpu, vd, 0, u);
3349 aarch64_set_vec_double (cpu, vd, 1, u);
3350 return;
3351 }
3352
3353 default:
3354 HALT_NYI;
3355 }
3356 }
3357
3358 #define ABS(A) ((A) < 0 ? - (A) : (A))
3359
3360 static void
3361 do_vec_ABS (sim_cpu *cpu)
3362 {
3363 /* instr[31] = 0
3364 instr[30] = half(0)/full(1)
3365 instr[29,24] = 00 1110
3366 instr[23,22] = size: 00=> 8-bit, 01=> 16-bit, 10=> 32-bit, 11=> 64-bit
3367 instr[21,10] = 10 0000 1011 10
3368 instr[9,5] = Vn
3369 instr[4.0] = Vd. */
3370
3371 unsigned vn = INSTR (9, 5);
3372 unsigned vd = INSTR (4, 0);
3373 unsigned full = INSTR (30, 30);
3374 unsigned i;
3375
3376 NYI_assert (29, 24, 0x0E);
3377 NYI_assert (21, 10, 0x82E);
3378
3379 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3380 switch (INSTR (23, 22))
3381 {
3382 case 0:
3383 for (i = 0; i < (full ? 16 : 8); i++)
3384 aarch64_set_vec_s8 (cpu, vd, i,
3385 ABS (aarch64_get_vec_s8 (cpu, vn, i)));
3386 break;
3387
3388 case 1:
3389 for (i = 0; i < (full ? 8 : 4); i++)
3390 aarch64_set_vec_s16 (cpu, vd, i,
3391 ABS (aarch64_get_vec_s16 (cpu, vn, i)));
3392 break;
3393
3394 case 2:
3395 for (i = 0; i < (full ? 4 : 2); i++)
3396 aarch64_set_vec_s32 (cpu, vd, i,
3397 ABS (aarch64_get_vec_s32 (cpu, vn, i)));
3398 break;
3399
3400 case 3:
3401 if (! full)
3402 HALT_NYI;
3403 for (i = 0; i < 2; i++)
3404 aarch64_set_vec_s64 (cpu, vd, i,
3405 ABS (aarch64_get_vec_s64 (cpu, vn, i)));
3406 break;
3407 }
3408 }
3409
3410 static void
3411 do_vec_ADDV (sim_cpu *cpu)
3412 {
3413 /* instr[31] = 0
3414 instr[30] = full/half selector
3415 instr[29,24] = 00 1110
3416 instr[23,22] = size: 00=> 8-bit, 01=> 16-bit, 10=> 32-bit, 11=> 64-bit
3417 instr[21,10] = 11 0001 1011 10
3418 instr[9,5] = Vm
3419 instr[4.0] = Rd. */
3420
3421 unsigned vm = INSTR (9, 5);
3422 unsigned rd = INSTR (4, 0);
3423 unsigned i;
3424 uint64_t val = 0;
3425 int full = INSTR (30, 30);
3426
3427 NYI_assert (29, 24, 0x0E);
3428 NYI_assert (21, 10, 0xC6E);
3429
3430 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3431 switch (INSTR (23, 22))
3432 {
3433 case 0:
3434 for (i = 0; i < (full ? 16 : 8); i++)
3435 val += aarch64_get_vec_u8 (cpu, vm, i);
3436 aarch64_set_reg_u64 (cpu, rd, NO_SP, val);
3437 return;
3438
3439 case 1:
3440 for (i = 0; i < (full ? 8 : 4); i++)
3441 val += aarch64_get_vec_u16 (cpu, vm, i);
3442 aarch64_set_reg_u64 (cpu, rd, NO_SP, val);
3443 return;
3444
3445 case 2:
3446 for (i = 0; i < (full ? 4 : 2); i++)
3447 val += aarch64_get_vec_u32 (cpu, vm, i);
3448 aarch64_set_reg_u64 (cpu, rd, NO_SP, val);
3449 return;
3450
3451 case 3:
3452 if (! full)
3453 HALT_UNALLOC;
3454 val = aarch64_get_vec_u64 (cpu, vm, 0);
3455 val += aarch64_get_vec_u64 (cpu, vm, 1);
3456 aarch64_set_reg_u64 (cpu, rd, NO_SP, val);
3457 return;
3458 }
3459 }
3460
3461 static void
3462 do_vec_ins_2 (sim_cpu *cpu)
3463 {
3464 /* instr[31,21] = 01001110000
3465 instr[20,18] = size & element selector
3466 instr[17,14] = 0000
3467 instr[13] = direction: to vec(0), from vec (1)
3468 instr[12,10] = 111
3469 instr[9,5] = Vm
3470 instr[4,0] = Vd. */
3471
3472 unsigned elem;
3473 unsigned vm = INSTR (9, 5);
3474 unsigned vd = INSTR (4, 0);
3475
3476 NYI_assert (31, 21, 0x270);
3477 NYI_assert (17, 14, 0);
3478 NYI_assert (12, 10, 7);
3479
3480 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3481 if (INSTR (13, 13) == 1)
3482 {
3483 if (INSTR (18, 18) == 1)
3484 {
3485 /* 32-bit moves. */
3486 elem = INSTR (20, 19);
3487 aarch64_set_reg_u64 (cpu, vd, NO_SP,
3488 aarch64_get_vec_u32 (cpu, vm, elem));
3489 }
3490 else
3491 {
3492 /* 64-bit moves. */
3493 if (INSTR (19, 19) != 1)
3494 HALT_NYI;
3495
3496 elem = INSTR (20, 20);
3497 aarch64_set_reg_u64 (cpu, vd, NO_SP,
3498 aarch64_get_vec_u64 (cpu, vm, elem));
3499 }
3500 }
3501 else
3502 {
3503 if (INSTR (18, 18) == 1)
3504 {
3505 /* 32-bit moves. */
3506 elem = INSTR (20, 19);
3507 aarch64_set_vec_u32 (cpu, vd, elem,
3508 aarch64_get_reg_u32 (cpu, vm, NO_SP));
3509 }
3510 else
3511 {
3512 /* 64-bit moves. */
3513 if (INSTR (19, 19) != 1)
3514 HALT_NYI;
3515
3516 elem = INSTR (20, 20);
3517 aarch64_set_vec_u64 (cpu, vd, elem,
3518 aarch64_get_reg_u64 (cpu, vm, NO_SP));
3519 }
3520 }
3521 }
3522
3523 #define DO_VEC_WIDENING_MUL(N, DST_TYPE, READ_TYPE, WRITE_TYPE) \
3524 do \
3525 { \
3526 DST_TYPE a[N], b[N]; \
3527 \
3528 for (i = 0; i < (N); i++) \
3529 { \
3530 a[i] = aarch64_get_vec_##READ_TYPE (cpu, vn, i + bias); \
3531 b[i] = aarch64_get_vec_##READ_TYPE (cpu, vm, i + bias); \
3532 } \
3533 for (i = 0; i < (N); i++) \
3534 aarch64_set_vec_##WRITE_TYPE (cpu, vd, i, a[i] * b[i]); \
3535 } \
3536 while (0)
3537
3538 static void
3539 do_vec_mull (sim_cpu *cpu)
3540 {
3541 /* instr[31] = 0
3542 instr[30] = lower(0)/upper(1) selector
3543 instr[29] = signed(0)/unsigned(1)
3544 instr[28,24] = 0 1110
3545 instr[23,22] = size: 8-bit (00), 16-bit (01), 32-bit (10)
3546 instr[21] = 1
3547 instr[20,16] = Vm
3548 instr[15,10] = 11 0000
3549 instr[9,5] = Vn
3550 instr[4.0] = Vd. */
3551
3552 int unsign = INSTR (29, 29);
3553 int bias = INSTR (30, 30);
3554 unsigned vm = INSTR (20, 16);
3555 unsigned vn = INSTR ( 9, 5);
3556 unsigned vd = INSTR ( 4, 0);
3557 unsigned i;
3558
3559 NYI_assert (28, 24, 0x0E);
3560 NYI_assert (15, 10, 0x30);
3561
3562 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3563 /* NB: Read source values before writing results, in case
3564 the source and destination vectors are the same. */
3565 switch (INSTR (23, 22))
3566 {
3567 case 0:
3568 if (bias)
3569 bias = 8;
3570 if (unsign)
3571 DO_VEC_WIDENING_MUL (8, uint16_t, u8, u16);
3572 else
3573 DO_VEC_WIDENING_MUL (8, int16_t, s8, s16);
3574 return;
3575
3576 case 1:
3577 if (bias)
3578 bias = 4;
3579 if (unsign)
3580 DO_VEC_WIDENING_MUL (4, uint32_t, u16, u32);
3581 else
3582 DO_VEC_WIDENING_MUL (4, int32_t, s16, s32);
3583 return;
3584
3585 case 2:
3586 if (bias)
3587 bias = 2;
3588 if (unsign)
3589 DO_VEC_WIDENING_MUL (2, uint64_t, u32, u64);
3590 else
3591 DO_VEC_WIDENING_MUL (2, int64_t, s32, s64);
3592 return;
3593
3594 case 3:
3595 HALT_NYI;
3596 }
3597 }
3598
3599 static void
3600 do_vec_fadd (sim_cpu *cpu)
3601 {
3602 /* instr[31] = 0
3603 instr[30] = half(0)/full(1)
3604 instr[29,24] = 001110
3605 instr[23] = FADD(0)/FSUB(1)
3606 instr[22] = float (0)/double(1)
3607 instr[21] = 1
3608 instr[20,16] = Vm
3609 instr[15,10] = 110101
3610 instr[9,5] = Vn
3611 instr[4.0] = Vd. */
3612
3613 unsigned vm = INSTR (20, 16);
3614 unsigned vn = INSTR (9, 5);
3615 unsigned vd = INSTR (4, 0);
3616 unsigned i;
3617 int full = INSTR (30, 30);
3618
3619 NYI_assert (29, 24, 0x0E);
3620 NYI_assert (21, 21, 1);
3621 NYI_assert (15, 10, 0x35);
3622
3623 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3624 if (INSTR (23, 23))
3625 {
3626 if (INSTR (22, 22))
3627 {
3628 if (! full)
3629 HALT_NYI;
3630
3631 for (i = 0; i < 2; i++)
3632 aarch64_set_vec_double (cpu, vd, i,
3633 aarch64_get_vec_double (cpu, vn, i)
3634 - aarch64_get_vec_double (cpu, vm, i));
3635 }
3636 else
3637 {
3638 for (i = 0; i < (full ? 4 : 2); i++)
3639 aarch64_set_vec_float (cpu, vd, i,
3640 aarch64_get_vec_float (cpu, vn, i)
3641 - aarch64_get_vec_float (cpu, vm, i));
3642 }
3643 }
3644 else
3645 {
3646 if (INSTR (22, 22))
3647 {
3648 if (! full)
3649 HALT_NYI;
3650
3651 for (i = 0; i < 2; i++)
3652 aarch64_set_vec_double (cpu, vd, i,
3653 aarch64_get_vec_double (cpu, vm, i)
3654 + aarch64_get_vec_double (cpu, vn, i));
3655 }
3656 else
3657 {
3658 for (i = 0; i < (full ? 4 : 2); i++)
3659 aarch64_set_vec_float (cpu, vd, i,
3660 aarch64_get_vec_float (cpu, vm, i)
3661 + aarch64_get_vec_float (cpu, vn, i));
3662 }
3663 }
3664 }
3665
3666 static void
3667 do_vec_add (sim_cpu *cpu)
3668 {
3669 /* instr[31] = 0
3670 instr[30] = full/half selector
3671 instr[29,24] = 001110
3672 instr[23,22] = size: 00=> 8-bit, 01=> 16-bit, 10=> 32-bit, 11=> 64-bit
3673 instr[21] = 1
3674 instr[20,16] = Vn
3675 instr[15,10] = 100001
3676 instr[9,5] = Vm
3677 instr[4.0] = Vd. */
3678
3679 unsigned vm = INSTR (20, 16);
3680 unsigned vn = INSTR (9, 5);
3681 unsigned vd = INSTR (4, 0);
3682 unsigned i;
3683 int full = INSTR (30, 30);
3684
3685 NYI_assert (29, 24, 0x0E);
3686 NYI_assert (21, 21, 1);
3687 NYI_assert (15, 10, 0x21);
3688
3689 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3690 switch (INSTR (23, 22))
3691 {
3692 case 0:
3693 for (i = 0; i < (full ? 16 : 8); i++)
3694 aarch64_set_vec_u8 (cpu, vd, i, aarch64_get_vec_u8 (cpu, vn, i)
3695 + aarch64_get_vec_u8 (cpu, vm, i));
3696 return;
3697
3698 case 1:
3699 for (i = 0; i < (full ? 8 : 4); i++)
3700 aarch64_set_vec_u16 (cpu, vd, i, aarch64_get_vec_u16 (cpu, vn, i)
3701 + aarch64_get_vec_u16 (cpu, vm, i));
3702 return;
3703
3704 case 2:
3705 for (i = 0; i < (full ? 4 : 2); i++)
3706 aarch64_set_vec_u32 (cpu, vd, i, aarch64_get_vec_u32 (cpu, vn, i)
3707 + aarch64_get_vec_u32 (cpu, vm, i));
3708 return;
3709
3710 case 3:
3711 if (! full)
3712 HALT_UNALLOC;
3713 aarch64_set_vec_u64 (cpu, vd, 0, aarch64_get_vec_u64 (cpu, vn, 0)
3714 + aarch64_get_vec_u64 (cpu, vm, 0));
3715 aarch64_set_vec_u64 (cpu, vd, 1,
3716 aarch64_get_vec_u64 (cpu, vn, 1)
3717 + aarch64_get_vec_u64 (cpu, vm, 1));
3718 return;
3719 }
3720 }
3721
3722 static void
3723 do_vec_mul (sim_cpu *cpu)
3724 {
3725 /* instr[31] = 0
3726 instr[30] = full/half selector
3727 instr[29,24] = 00 1110
3728 instr[23,22] = size: 00=> 8-bit, 01=> 16-bit, 10=> 32-bit
3729 instr[21] = 1
3730 instr[20,16] = Vn
3731 instr[15,10] = 10 0111
3732 instr[9,5] = Vm
3733 instr[4.0] = Vd. */
3734
3735 unsigned vm = INSTR (20, 16);
3736 unsigned vn = INSTR (9, 5);
3737 unsigned vd = INSTR (4, 0);
3738 unsigned i;
3739 int full = INSTR (30, 30);
3740 int bias = 0;
3741
3742 NYI_assert (29, 24, 0x0E);
3743 NYI_assert (21, 21, 1);
3744 NYI_assert (15, 10, 0x27);
3745
3746 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3747 switch (INSTR (23, 22))
3748 {
3749 case 0:
3750 DO_VEC_WIDENING_MUL (full ? 16 : 8, uint16_t, u8, u16);
3751 return;
3752
3753 case 1:
3754 DO_VEC_WIDENING_MUL (full ? 8 : 4, uint32_t, u16, u32);
3755 return;
3756
3757 case 2:
3758 DO_VEC_WIDENING_MUL (full ? 4 : 2, uint64_t, u32, u64);
3759 return;
3760
3761 case 3:
3762 HALT_UNALLOC;
3763 }
3764 }
3765
3766 static void
3767 do_vec_MLA (sim_cpu *cpu)
3768 {
3769 /* instr[31] = 0
3770 instr[30] = full/half selector
3771 instr[29,24] = 00 1110
3772 instr[23,22] = size: 00=> 8-bit, 01=> 16-bit, 10=> 32-bit
3773 instr[21] = 1
3774 instr[20,16] = Vn
3775 instr[15,10] = 1001 01
3776 instr[9,5] = Vm
3777 instr[4.0] = Vd. */
3778
3779 unsigned vm = INSTR (20, 16);
3780 unsigned vn = INSTR (9, 5);
3781 unsigned vd = INSTR (4, 0);
3782 unsigned i;
3783 int full = INSTR (30, 30);
3784
3785 NYI_assert (29, 24, 0x0E);
3786 NYI_assert (21, 21, 1);
3787 NYI_assert (15, 10, 0x25);
3788
3789 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3790 switch (INSTR (23, 22))
3791 {
3792 case 0:
3793 {
3794 uint16_t a[16], b[16];
3795
3796 for (i = 0; i < (full ? 16 : 8); i++)
3797 {
3798 a[i] = aarch64_get_vec_u8 (cpu, vn, i);
3799 b[i] = aarch64_get_vec_u8 (cpu, vm, i);
3800 }
3801
3802 for (i = 0; i < (full ? 16 : 8); i++)
3803 {
3804 uint16_t v = aarch64_get_vec_u8 (cpu, vd, i);
3805
3806 aarch64_set_vec_u16 (cpu, vd, i, v + (a[i] * b[i]));
3807 }
3808 }
3809 return;
3810
3811 case 1:
3812 {
3813 uint32_t a[8], b[8];
3814
3815 for (i = 0; i < (full ? 8 : 4); i++)
3816 {
3817 a[i] = aarch64_get_vec_u16 (cpu, vn, i);
3818 b[i] = aarch64_get_vec_u16 (cpu, vm, i);
3819 }
3820
3821 for (i = 0; i < (full ? 8 : 4); i++)
3822 {
3823 uint32_t v = aarch64_get_vec_u16 (cpu, vd, i);
3824
3825 aarch64_set_vec_u32 (cpu, vd, i, v + (a[i] * b[i]));
3826 }
3827 }
3828 return;
3829
3830 case 2:
3831 {
3832 uint64_t a[4], b[4];
3833
3834 for (i = 0; i < (full ? 4 : 2); i++)
3835 {
3836 a[i] = aarch64_get_vec_u32 (cpu, vn, i);
3837 b[i] = aarch64_get_vec_u32 (cpu, vm, i);
3838 }
3839
3840 for (i = 0; i < (full ? 4 : 2); i++)
3841 {
3842 uint64_t v = aarch64_get_vec_u32 (cpu, vd, i);
3843
3844 aarch64_set_vec_u64 (cpu, vd, i, v + (a[i] * b[i]));
3845 }
3846 }
3847 return;
3848
3849 case 3:
3850 HALT_UNALLOC;
3851 }
3852 }
3853
3854 static float
3855 fmaxnm (float a, float b)
3856 {
3857 if (fpclassify (a) == FP_NORMAL)
3858 {
3859 if (fpclassify (b) == FP_NORMAL)
3860 return a > b ? a : b;
3861 return a;
3862 }
3863 else if (fpclassify (b) == FP_NORMAL)
3864 return b;
3865 return a;
3866 }
3867
3868 static float
3869 fminnm (float a, float b)
3870 {
3871 if (fpclassify (a) == FP_NORMAL)
3872 {
3873 if (fpclassify (b) == FP_NORMAL)
3874 return a < b ? a : b;
3875 return a;
3876 }
3877 else if (fpclassify (b) == FP_NORMAL)
3878 return b;
3879 return a;
3880 }
3881
3882 static double
3883 dmaxnm (double a, double b)
3884 {
3885 if (fpclassify (a) == FP_NORMAL)
3886 {
3887 if (fpclassify (b) == FP_NORMAL)
3888 return a > b ? a : b;
3889 return a;
3890 }
3891 else if (fpclassify (b) == FP_NORMAL)
3892 return b;
3893 return a;
3894 }
3895
3896 static double
3897 dminnm (double a, double b)
3898 {
3899 if (fpclassify (a) == FP_NORMAL)
3900 {
3901 if (fpclassify (b) == FP_NORMAL)
3902 return a < b ? a : b;
3903 return a;
3904 }
3905 else if (fpclassify (b) == FP_NORMAL)
3906 return b;
3907 return a;
3908 }
3909
3910 static void
3911 do_vec_FminmaxNMP (sim_cpu *cpu)
3912 {
3913 /* instr [31] = 0
3914 instr [30] = half (0)/full (1)
3915 instr [29,24] = 10 1110
3916 instr [23] = max(0)/min(1)
3917 instr [22] = float (0)/double (1)
3918 instr [21] = 1
3919 instr [20,16] = Vn
3920 instr [15,10] = 1100 01
3921 instr [9,5] = Vm
3922 instr [4.0] = Vd. */
3923
3924 unsigned vm = INSTR (20, 16);
3925 unsigned vn = INSTR (9, 5);
3926 unsigned vd = INSTR (4, 0);
3927 int full = INSTR (30, 30);
3928
3929 NYI_assert (29, 24, 0x2E);
3930 NYI_assert (21, 21, 1);
3931 NYI_assert (15, 10, 0x31);
3932
3933 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3934 if (INSTR (22, 22))
3935 {
3936 double (* fn)(double, double) = INSTR (23, 23)
3937 ? dminnm : dmaxnm;
3938
3939 if (! full)
3940 HALT_NYI;
3941 aarch64_set_vec_double (cpu, vd, 0,
3942 fn (aarch64_get_vec_double (cpu, vn, 0),
3943 aarch64_get_vec_double (cpu, vn, 1)));
3944 aarch64_set_vec_double (cpu, vd, 0,
3945 fn (aarch64_get_vec_double (cpu, vm, 0),
3946 aarch64_get_vec_double (cpu, vm, 1)));
3947 }
3948 else
3949 {
3950 float (* fn)(float, float) = INSTR (23, 23)
3951 ? fminnm : fmaxnm;
3952
3953 aarch64_set_vec_float (cpu, vd, 0,
3954 fn (aarch64_get_vec_float (cpu, vn, 0),
3955 aarch64_get_vec_float (cpu, vn, 1)));
3956 if (full)
3957 aarch64_set_vec_float (cpu, vd, 1,
3958 fn (aarch64_get_vec_float (cpu, vn, 2),
3959 aarch64_get_vec_float (cpu, vn, 3)));
3960
3961 aarch64_set_vec_float (cpu, vd, (full ? 2 : 1),
3962 fn (aarch64_get_vec_float (cpu, vm, 0),
3963 aarch64_get_vec_float (cpu, vm, 1)));
3964 if (full)
3965 aarch64_set_vec_float (cpu, vd, 3,
3966 fn (aarch64_get_vec_float (cpu, vm, 2),
3967 aarch64_get_vec_float (cpu, vm, 3)));
3968 }
3969 }
3970
3971 static void
3972 do_vec_AND (sim_cpu *cpu)
3973 {
3974 /* instr[31] = 0
3975 instr[30] = half (0)/full (1)
3976 instr[29,21] = 001110001
3977 instr[20,16] = Vm
3978 instr[15,10] = 000111
3979 instr[9,5] = Vn
3980 instr[4.0] = Vd. */
3981
3982 unsigned vm = INSTR (20, 16);
3983 unsigned vn = INSTR (9, 5);
3984 unsigned vd = INSTR (4, 0);
3985 unsigned i;
3986 int full = INSTR (30, 30);
3987
3988 NYI_assert (29, 21, 0x071);
3989 NYI_assert (15, 10, 0x07);
3990
3991 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3992 for (i = 0; i < (full ? 4 : 2); i++)
3993 aarch64_set_vec_u32 (cpu, vd, i,
3994 aarch64_get_vec_u32 (cpu, vn, i)
3995 & aarch64_get_vec_u32 (cpu, vm, i));
3996 }
3997
3998 static void
3999 do_vec_BSL (sim_cpu *cpu)
4000 {
4001 /* instr[31] = 0
4002 instr[30] = half (0)/full (1)
4003 instr[29,21] = 101110011
4004 instr[20,16] = Vm
4005 instr[15,10] = 000111
4006 instr[9,5] = Vn
4007 instr[4.0] = Vd. */
4008
4009 unsigned vm = INSTR (20, 16);
4010 unsigned vn = INSTR (9, 5);
4011 unsigned vd = INSTR (4, 0);
4012 unsigned i;
4013 int full = INSTR (30, 30);
4014
4015 NYI_assert (29, 21, 0x173);
4016 NYI_assert (15, 10, 0x07);
4017
4018 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4019 for (i = 0; i < (full ? 16 : 8); i++)
4020 aarch64_set_vec_u8 (cpu, vd, i,
4021 ( aarch64_get_vec_u8 (cpu, vd, i)
4022 & aarch64_get_vec_u8 (cpu, vn, i))
4023 | ((~ aarch64_get_vec_u8 (cpu, vd, i))
4024 & aarch64_get_vec_u8 (cpu, vm, i)));
4025 }
4026
4027 static void
4028 do_vec_EOR (sim_cpu *cpu)
4029 {
4030 /* instr[31] = 0
4031 instr[30] = half (0)/full (1)
4032 instr[29,21] = 10 1110 001
4033 instr[20,16] = Vm
4034 instr[15,10] = 000111
4035 instr[9,5] = Vn
4036 instr[4.0] = Vd. */
4037
4038 unsigned vm = INSTR (20, 16);
4039 unsigned vn = INSTR (9, 5);
4040 unsigned vd = INSTR (4, 0);
4041 unsigned i;
4042 int full = INSTR (30, 30);
4043
4044 NYI_assert (29, 21, 0x171);
4045 NYI_assert (15, 10, 0x07);
4046
4047 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4048 for (i = 0; i < (full ? 4 : 2); i++)
4049 aarch64_set_vec_u32 (cpu, vd, i,
4050 aarch64_get_vec_u32 (cpu, vn, i)
4051 ^ aarch64_get_vec_u32 (cpu, vm, i));
4052 }
4053
4054 static void
4055 do_vec_bit (sim_cpu *cpu)
4056 {
4057 /* instr[31] = 0
4058 instr[30] = half (0)/full (1)
4059 instr[29,23] = 10 1110 1
4060 instr[22] = BIT (0) / BIF (1)
4061 instr[21] = 1
4062 instr[20,16] = Vm
4063 instr[15,10] = 0001 11
4064 instr[9,5] = Vn
4065 instr[4.0] = Vd. */
4066
4067 unsigned vm = INSTR (20, 16);
4068 unsigned vn = INSTR (9, 5);
4069 unsigned vd = INSTR (4, 0);
4070 unsigned full = INSTR (30, 30);
4071 unsigned test_false = INSTR (22, 22);
4072 unsigned i;
4073
4074 NYI_assert (29, 23, 0x5D);
4075 NYI_assert (21, 21, 1);
4076 NYI_assert (15, 10, 0x07);
4077
4078 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4079 if (test_false)
4080 {
4081 for (i = 0; i < (full ? 16 : 8); i++)
4082 if (aarch64_get_vec_u32 (cpu, vn, i) == 0)
4083 aarch64_set_vec_u32 (cpu, vd, i, aarch64_get_vec_u32 (cpu, vm, i));
4084 }
4085 else
4086 {
4087 for (i = 0; i < (full ? 16 : 8); i++)
4088 if (aarch64_get_vec_u32 (cpu, vn, i) != 0)
4089 aarch64_set_vec_u32 (cpu, vd, i, aarch64_get_vec_u32 (cpu, vm, i));
4090 }
4091 }
4092
4093 static void
4094 do_vec_ORN (sim_cpu *cpu)
4095 {
4096 /* instr[31] = 0
4097 instr[30] = half (0)/full (1)
4098 instr[29,21] = 00 1110 111
4099 instr[20,16] = Vm
4100 instr[15,10] = 00 0111
4101 instr[9,5] = Vn
4102 instr[4.0] = Vd. */
4103
4104 unsigned vm = INSTR (20, 16);
4105 unsigned vn = INSTR (9, 5);
4106 unsigned vd = INSTR (4, 0);
4107 unsigned i;
4108 int full = INSTR (30, 30);
4109
4110 NYI_assert (29, 21, 0x077);
4111 NYI_assert (15, 10, 0x07);
4112
4113 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4114 for (i = 0; i < (full ? 16 : 8); i++)
4115 aarch64_set_vec_u8 (cpu, vd, i,
4116 aarch64_get_vec_u8 (cpu, vn, i)
4117 | ~ aarch64_get_vec_u8 (cpu, vm, i));
4118 }
4119
4120 static void
4121 do_vec_ORR (sim_cpu *cpu)
4122 {
4123 /* instr[31] = 0
4124 instr[30] = half (0)/full (1)
4125 instr[29,21] = 00 1110 101
4126 instr[20,16] = Vm
4127 instr[15,10] = 0001 11
4128 instr[9,5] = Vn
4129 instr[4.0] = Vd. */
4130
4131 unsigned vm = INSTR (20, 16);
4132 unsigned vn = INSTR (9, 5);
4133 unsigned vd = INSTR (4, 0);
4134 unsigned i;
4135 int full = INSTR (30, 30);
4136
4137 NYI_assert (29, 21, 0x075);
4138 NYI_assert (15, 10, 0x07);
4139
4140 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4141 for (i = 0; i < (full ? 16 : 8); i++)
4142 aarch64_set_vec_u8 (cpu, vd, i,
4143 aarch64_get_vec_u8 (cpu, vn, i)
4144 | aarch64_get_vec_u8 (cpu, vm, i));
4145 }
4146
4147 static void
4148 do_vec_BIC (sim_cpu *cpu)
4149 {
4150 /* instr[31] = 0
4151 instr[30] = half (0)/full (1)
4152 instr[29,21] = 00 1110 011
4153 instr[20,16] = Vm
4154 instr[15,10] = 00 0111
4155 instr[9,5] = Vn
4156 instr[4.0] = Vd. */
4157
4158 unsigned vm = INSTR (20, 16);
4159 unsigned vn = INSTR (9, 5);
4160 unsigned vd = INSTR (4, 0);
4161 unsigned i;
4162 int full = INSTR (30, 30);
4163
4164 NYI_assert (29, 21, 0x073);
4165 NYI_assert (15, 10, 0x07);
4166
4167 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4168 for (i = 0; i < (full ? 16 : 8); i++)
4169 aarch64_set_vec_u8 (cpu, vd, i,
4170 aarch64_get_vec_u8 (cpu, vn, i)
4171 & ~ aarch64_get_vec_u8 (cpu, vm, i));
4172 }
4173
4174 static void
4175 do_vec_XTN (sim_cpu *cpu)
4176 {
4177 /* instr[31] = 0
4178 instr[30] = first part (0)/ second part (1)
4179 instr[29,24] = 00 1110
4180 instr[23,22] = size: byte(00), half(01), word (10)
4181 instr[21,10] = 1000 0100 1010
4182 instr[9,5] = Vs
4183 instr[4,0] = Vd. */
4184
4185 unsigned vs = INSTR (9, 5);
4186 unsigned vd = INSTR (4, 0);
4187 unsigned bias = INSTR (30, 30);
4188 unsigned i;
4189
4190 NYI_assert (29, 24, 0x0E);
4191 NYI_assert (21, 10, 0x84A);
4192
4193 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4194 switch (INSTR (23, 22))
4195 {
4196 case 0:
4197 if (bias)
4198 for (i = 0; i < 8; i++)
4199 aarch64_set_vec_u8 (cpu, vd, i + 8,
4200 aarch64_get_vec_u16 (cpu, vs, i) >> 8);
4201 else
4202 for (i = 0; i < 8; i++)
4203 aarch64_set_vec_u8 (cpu, vd, i, aarch64_get_vec_u16 (cpu, vs, i));
4204 return;
4205
4206 case 1:
4207 if (bias)
4208 for (i = 0; i < 4; i++)
4209 aarch64_set_vec_u16 (cpu, vd, i + 4,
4210 aarch64_get_vec_u32 (cpu, vs, i) >> 16);
4211 else
4212 for (i = 0; i < 4; i++)
4213 aarch64_set_vec_u16 (cpu, vd, i, aarch64_get_vec_u32 (cpu, vs, i));
4214 return;
4215
4216 case 2:
4217 if (bias)
4218 for (i = 0; i < 2; i++)
4219 aarch64_set_vec_u32 (cpu, vd, i + 4,
4220 aarch64_get_vec_u64 (cpu, vs, i) >> 32);
4221 else
4222 for (i = 0; i < 2; i++)
4223 aarch64_set_vec_u32 (cpu, vd, i, aarch64_get_vec_u64 (cpu, vs, i));
4224 return;
4225 }
4226 }
4227
4228 static void
4229 do_vec_maxv (sim_cpu *cpu)
4230 {
4231 /* instr[31] = 0
4232 instr[30] = half(0)/full(1)
4233 instr[29] = signed (0)/unsigned(1)
4234 instr[28,24] = 0 1110
4235 instr[23,22] = size: byte(00), half(01), word (10)
4236 instr[21] = 1
4237 instr[20,17] = 1 000
4238 instr[16] = max(0)/min(1)
4239 instr[15,10] = 1010 10
4240 instr[9,5] = V source
4241 instr[4.0] = R dest. */
4242
4243 unsigned vs = INSTR (9, 5);
4244 unsigned rd = INSTR (4, 0);
4245 unsigned full = INSTR (30, 30);
4246 unsigned i;
4247
4248 NYI_assert (28, 24, 0x0E);
4249 NYI_assert (21, 21, 1);
4250 NYI_assert (20, 17, 8);
4251 NYI_assert (15, 10, 0x2A);
4252
4253 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4254 switch ((INSTR (29, 29) << 1) | INSTR (16, 16))
4255 {
4256 case 0: /* SMAXV. */
4257 {
4258 int64_t smax;
4259 switch (INSTR (23, 22))
4260 {
4261 case 0:
4262 smax = aarch64_get_vec_s8 (cpu, vs, 0);
4263 for (i = 1; i < (full ? 16 : 8); i++)
4264 smax = max (smax, aarch64_get_vec_s8 (cpu, vs, i));
4265 break;
4266 case 1:
4267 smax = aarch64_get_vec_s16 (cpu, vs, 0);
4268 for (i = 1; i < (full ? 8 : 4); i++)
4269 smax = max (smax, aarch64_get_vec_s16 (cpu, vs, i));
4270 break;
4271 case 2:
4272 smax = aarch64_get_vec_s32 (cpu, vs, 0);
4273 for (i = 1; i < (full ? 4 : 2); i++)
4274 smax = max (smax, aarch64_get_vec_s32 (cpu, vs, i));
4275 break;
4276 case 3:
4277 HALT_UNALLOC;
4278 }
4279 aarch64_set_reg_s64 (cpu, rd, NO_SP, smax);
4280 return;
4281 }
4282
4283 case 1: /* SMINV. */
4284 {
4285 int64_t smin;
4286 switch (INSTR (23, 22))
4287 {
4288 case 0:
4289 smin = aarch64_get_vec_s8 (cpu, vs, 0);
4290 for (i = 1; i < (full ? 16 : 8); i++)
4291 smin = min (smin, aarch64_get_vec_s8 (cpu, vs, i));
4292 break;
4293 case 1:
4294 smin = aarch64_get_vec_s16 (cpu, vs, 0);
4295 for (i = 1; i < (full ? 8 : 4); i++)
4296 smin = min (smin, aarch64_get_vec_s16 (cpu, vs, i));
4297 break;
4298 case 2:
4299 smin = aarch64_get_vec_s32 (cpu, vs, 0);
4300 for (i = 1; i < (full ? 4 : 2); i++)
4301 smin = min (smin, aarch64_get_vec_s32 (cpu, vs, i));
4302 break;
4303
4304 case 3:
4305 HALT_UNALLOC;
4306 }
4307 aarch64_set_reg_s64 (cpu, rd, NO_SP, smin);
4308 return;
4309 }
4310
4311 case 2: /* UMAXV. */
4312 {
4313 uint64_t umax;
4314 switch (INSTR (23, 22))
4315 {
4316 case 0:
4317 umax = aarch64_get_vec_u8 (cpu, vs, 0);
4318 for (i = 1; i < (full ? 16 : 8); i++)
4319 umax = max (umax, aarch64_get_vec_u8 (cpu, vs, i));
4320 break;
4321 case 1:
4322 umax = aarch64_get_vec_u16 (cpu, vs, 0);
4323 for (i = 1; i < (full ? 8 : 4); i++)
4324 umax = max (umax, aarch64_get_vec_u16 (cpu, vs, i));
4325 break;
4326 case 2:
4327 umax = aarch64_get_vec_u32 (cpu, vs, 0);
4328 for (i = 1; i < (full ? 4 : 2); i++)
4329 umax = max (umax, aarch64_get_vec_u32 (cpu, vs, i));
4330 break;
4331
4332 case 3:
4333 HALT_UNALLOC;
4334 }
4335 aarch64_set_reg_u64 (cpu, rd, NO_SP, umax);
4336 return;
4337 }
4338
4339 case 3: /* UMINV. */
4340 {
4341 uint64_t umin;
4342 switch (INSTR (23, 22))
4343 {
4344 case 0:
4345 umin = aarch64_get_vec_u8 (cpu, vs, 0);
4346 for (i = 1; i < (full ? 16 : 8); i++)
4347 umin = min (umin, aarch64_get_vec_u8 (cpu, vs, i));
4348 break;
4349 case 1:
4350 umin = aarch64_get_vec_u16 (cpu, vs, 0);
4351 for (i = 1; i < (full ? 8 : 4); i++)
4352 umin = min (umin, aarch64_get_vec_u16 (cpu, vs, i));
4353 break;
4354 case 2:
4355 umin = aarch64_get_vec_u32 (cpu, vs, 0);
4356 for (i = 1; i < (full ? 4 : 2); i++)
4357 umin = min (umin, aarch64_get_vec_u32 (cpu, vs, i));
4358 break;
4359
4360 case 3:
4361 HALT_UNALLOC;
4362 }
4363 aarch64_set_reg_u64 (cpu, rd, NO_SP, umin);
4364 return;
4365 }
4366 }
4367 }
4368
4369 static void
4370 do_vec_fminmaxV (sim_cpu *cpu)
4371 {
4372 /* instr[31,24] = 0110 1110
4373 instr[23] = max(0)/min(1)
4374 instr[22,14] = 011 0000 11
4375 instr[13,12] = nm(00)/normal(11)
4376 instr[11,10] = 10
4377 instr[9,5] = V source
4378 instr[4.0] = R dest. */
4379
4380 unsigned vs = INSTR (9, 5);
4381 unsigned rd = INSTR (4, 0);
4382 unsigned i;
4383 float res = aarch64_get_vec_float (cpu, vs, 0);
4384
4385 NYI_assert (31, 24, 0x6E);
4386 NYI_assert (22, 14, 0x0C3);
4387 NYI_assert (11, 10, 2);
4388
4389 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4390 if (INSTR (23, 23))
4391 {
4392 switch (INSTR (13, 12))
4393 {
4394 case 0: /* FMNINNMV. */
4395 for (i = 1; i < 4; i++)
4396 res = fminnm (res, aarch64_get_vec_float (cpu, vs, i));
4397 break;
4398
4399 case 3: /* FMINV. */
4400 for (i = 1; i < 4; i++)
4401 res = min (res, aarch64_get_vec_float (cpu, vs, i));
4402 break;
4403
4404 default:
4405 HALT_NYI;
4406 }
4407 }
4408 else
4409 {
4410 switch (INSTR (13, 12))
4411 {
4412 case 0: /* FMNAXNMV. */
4413 for (i = 1; i < 4; i++)
4414 res = fmaxnm (res, aarch64_get_vec_float (cpu, vs, i));
4415 break;
4416
4417 case 3: /* FMAXV. */
4418 for (i = 1; i < 4; i++)
4419 res = max (res, aarch64_get_vec_float (cpu, vs, i));
4420 break;
4421
4422 default:
4423 HALT_NYI;
4424 }
4425 }
4426
4427 aarch64_set_FP_float (cpu, rd, res);
4428 }
4429
4430 static void
4431 do_vec_Fminmax (sim_cpu *cpu)
4432 {
4433 /* instr[31] = 0
4434 instr[30] = half(0)/full(1)
4435 instr[29,24] = 00 1110
4436 instr[23] = max(0)/min(1)
4437 instr[22] = float(0)/double(1)
4438 instr[21] = 1
4439 instr[20,16] = Vm
4440 instr[15,14] = 11
4441 instr[13,12] = nm(00)/normal(11)
4442 instr[11,10] = 01
4443 instr[9,5] = Vn
4444 instr[4,0] = Vd. */
4445
4446 unsigned vm = INSTR (20, 16);
4447 unsigned vn = INSTR (9, 5);
4448 unsigned vd = INSTR (4, 0);
4449 unsigned full = INSTR (30, 30);
4450 unsigned min = INSTR (23, 23);
4451 unsigned i;
4452
4453 NYI_assert (29, 24, 0x0E);
4454 NYI_assert (21, 21, 1);
4455 NYI_assert (15, 14, 3);
4456 NYI_assert (11, 10, 1);
4457
4458 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4459 if (INSTR (22, 22))
4460 {
4461 double (* func)(double, double);
4462
4463 if (! full)
4464 HALT_NYI;
4465
4466 if (INSTR (13, 12) == 0)
4467 func = min ? dminnm : dmaxnm;
4468 else if (INSTR (13, 12) == 3)
4469 func = min ? fmin : fmax;
4470 else
4471 HALT_NYI;
4472
4473 for (i = 0; i < 2; i++)
4474 aarch64_set_vec_double (cpu, vd, i,
4475 func (aarch64_get_vec_double (cpu, vn, i),
4476 aarch64_get_vec_double (cpu, vm, i)));
4477 }
4478 else
4479 {
4480 float (* func)(float, float);
4481
4482 if (INSTR (13, 12) == 0)
4483 func = min ? fminnm : fmaxnm;
4484 else if (INSTR (13, 12) == 3)
4485 func = min ? fminf : fmaxf;
4486 else
4487 HALT_NYI;
4488
4489 for (i = 0; i < (full ? 4 : 2); i++)
4490 aarch64_set_vec_float (cpu, vd, i,
4491 func (aarch64_get_vec_float (cpu, vn, i),
4492 aarch64_get_vec_float (cpu, vm, i)));
4493 }
4494 }
4495
4496 static void
4497 do_vec_SCVTF (sim_cpu *cpu)
4498 {
4499 /* instr[31] = 0
4500 instr[30] = Q
4501 instr[29,23] = 00 1110 0
4502 instr[22] = float(0)/double(1)
4503 instr[21,10] = 10 0001 1101 10
4504 instr[9,5] = Vn
4505 instr[4,0] = Vd. */
4506
4507 unsigned vn = INSTR (9, 5);
4508 unsigned vd = INSTR (4, 0);
4509 unsigned full = INSTR (30, 30);
4510 unsigned size = INSTR (22, 22);
4511 unsigned i;
4512
4513 NYI_assert (29, 23, 0x1C);
4514 NYI_assert (21, 10, 0x876);
4515
4516 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4517 if (size)
4518 {
4519 if (! full)
4520 HALT_UNALLOC;
4521
4522 for (i = 0; i < 2; i++)
4523 {
4524 double val = (double) aarch64_get_vec_u64 (cpu, vn, i);
4525 aarch64_set_vec_double (cpu, vd, i, val);
4526 }
4527 }
4528 else
4529 {
4530 for (i = 0; i < (full ? 4 : 2); i++)
4531 {
4532 float val = (float) aarch64_get_vec_u32 (cpu, vn, i);
4533 aarch64_set_vec_float (cpu, vd, i, val);
4534 }
4535 }
4536 }
4537
4538 #define VEC_CMP(SOURCE, CMP) \
4539 do \
4540 { \
4541 switch (size) \
4542 { \
4543 case 0: \
4544 for (i = 0; i < (full ? 16 : 8); i++) \
4545 aarch64_set_vec_u8 (cpu, vd, i, \
4546 aarch64_get_vec_##SOURCE##8 (cpu, vn, i) \
4547 CMP \
4548 aarch64_get_vec_##SOURCE##8 (cpu, vm, i) \
4549 ? -1 : 0); \
4550 return; \
4551 case 1: \
4552 for (i = 0; i < (full ? 8 : 4); i++) \
4553 aarch64_set_vec_u16 (cpu, vd, i, \
4554 aarch64_get_vec_##SOURCE##16 (cpu, vn, i) \
4555 CMP \
4556 aarch64_get_vec_##SOURCE##16 (cpu, vm, i) \
4557 ? -1 : 0); \
4558 return; \
4559 case 2: \
4560 for (i = 0; i < (full ? 4 : 2); i++) \
4561 aarch64_set_vec_u32 (cpu, vd, i, \
4562 aarch64_get_vec_##SOURCE##32 (cpu, vn, i) \
4563 CMP \
4564 aarch64_get_vec_##SOURCE##32 (cpu, vm, i) \
4565 ? -1 : 0); \
4566 return; \
4567 case 3: \
4568 if (! full) \
4569 HALT_UNALLOC; \
4570 for (i = 0; i < 2; i++) \
4571 aarch64_set_vec_u64 (cpu, vd, i, \
4572 aarch64_get_vec_##SOURCE##64 (cpu, vn, i) \
4573 CMP \
4574 aarch64_get_vec_##SOURCE##64 (cpu, vm, i) \
4575 ? -1ULL : 0); \
4576 return; \
4577 } \
4578 } \
4579 while (0)
4580
4581 #define VEC_CMP0(SOURCE, CMP) \
4582 do \
4583 { \
4584 switch (size) \
4585 { \
4586 case 0: \
4587 for (i = 0; i < (full ? 16 : 8); i++) \
4588 aarch64_set_vec_u8 (cpu, vd, i, \
4589 aarch64_get_vec_##SOURCE##8 (cpu, vn, i) \
4590 CMP 0 ? -1 : 0); \
4591 return; \
4592 case 1: \
4593 for (i = 0; i < (full ? 8 : 4); i++) \
4594 aarch64_set_vec_u16 (cpu, vd, i, \
4595 aarch64_get_vec_##SOURCE##16 (cpu, vn, i) \
4596 CMP 0 ? -1 : 0); \
4597 return; \
4598 case 2: \
4599 for (i = 0; i < (full ? 4 : 2); i++) \
4600 aarch64_set_vec_u32 (cpu, vd, i, \
4601 aarch64_get_vec_##SOURCE##32 (cpu, vn, i) \
4602 CMP 0 ? -1 : 0); \
4603 return; \
4604 case 3: \
4605 if (! full) \
4606 HALT_UNALLOC; \
4607 for (i = 0; i < 2; i++) \
4608 aarch64_set_vec_u64 (cpu, vd, i, \
4609 aarch64_get_vec_##SOURCE##64 (cpu, vn, i) \
4610 CMP 0 ? -1ULL : 0); \
4611 return; \
4612 } \
4613 } \
4614 while (0)
4615
4616 #define VEC_FCMP0(CMP) \
4617 do \
4618 { \
4619 if (vm != 0) \
4620 HALT_NYI; \
4621 if (INSTR (22, 22)) \
4622 { \
4623 if (! full) \
4624 HALT_NYI; \
4625 for (i = 0; i < 2; i++) \
4626 aarch64_set_vec_u64 (cpu, vd, i, \
4627 aarch64_get_vec_double (cpu, vn, i) \
4628 CMP 0.0 ? -1 : 0); \
4629 } \
4630 else \
4631 { \
4632 for (i = 0; i < (full ? 4 : 2); i++) \
4633 aarch64_set_vec_u32 (cpu, vd, i, \
4634 aarch64_get_vec_float (cpu, vn, i) \
4635 CMP 0.0 ? -1 : 0); \
4636 } \
4637 return; \
4638 } \
4639 while (0)
4640
4641 #define VEC_FCMP(CMP) \
4642 do \
4643 { \
4644 if (INSTR (22, 22)) \
4645 { \
4646 if (! full) \
4647 HALT_NYI; \
4648 for (i = 0; i < 2; i++) \
4649 aarch64_set_vec_u64 (cpu, vd, i, \
4650 aarch64_get_vec_double (cpu, vn, i) \
4651 CMP \
4652 aarch64_get_vec_double (cpu, vm, i) \
4653 ? -1 : 0); \
4654 } \
4655 else \
4656 { \
4657 for (i = 0; i < (full ? 4 : 2); i++) \
4658 aarch64_set_vec_u32 (cpu, vd, i, \
4659 aarch64_get_vec_float (cpu, vn, i) \
4660 CMP \
4661 aarch64_get_vec_float (cpu, vm, i) \
4662 ? -1 : 0); \
4663 } \
4664 return; \
4665 } \
4666 while (0)
4667
4668 static void
4669 do_vec_compare (sim_cpu *cpu)
4670 {
4671 /* instr[31] = 0
4672 instr[30] = half(0)/full(1)
4673 instr[29] = part-of-comparison-type
4674 instr[28,24] = 0 1110
4675 instr[23,22] = size of integer compares: byte(00), half(01), word (10), long (11)
4676 type of float compares: single (-0) / double (-1)
4677 instr[21] = 1
4678 instr[20,16] = Vm or 00000 (compare vs 0)
4679 instr[15,10] = part-of-comparison-type
4680 instr[9,5] = Vn
4681 instr[4.0] = Vd. */
4682
4683 int full = INSTR (30, 30);
4684 int size = INSTR (23, 22);
4685 unsigned vm = INSTR (20, 16);
4686 unsigned vn = INSTR (9, 5);
4687 unsigned vd = INSTR (4, 0);
4688 unsigned i;
4689
4690 NYI_assert (28, 24, 0x0E);
4691 NYI_assert (21, 21, 1);
4692
4693 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4694 if ((INSTR (11, 11)
4695 && INSTR (14, 14))
4696 || ((INSTR (11, 11) == 0
4697 && INSTR (10, 10) == 0)))
4698 {
4699 /* A compare vs 0. */
4700 if (vm != 0)
4701 {
4702 if (INSTR (15, 10) == 0x2A)
4703 do_vec_maxv (cpu);
4704 else if (INSTR (15, 10) == 0x32
4705 || INSTR (15, 10) == 0x3E)
4706 do_vec_fminmaxV (cpu);
4707 else if (INSTR (29, 23) == 0x1C
4708 && INSTR (21, 10) == 0x876)
4709 do_vec_SCVTF (cpu);
4710 else
4711 HALT_NYI;
4712 return;
4713 }
4714 }
4715
4716 if (INSTR (14, 14))
4717 {
4718 /* A floating point compare. */
4719 unsigned decode = (INSTR (29, 29) << 5) | (INSTR (23, 23) << 4)
4720 | INSTR (13, 10);
4721
4722 NYI_assert (15, 15, 1);
4723
4724 switch (decode)
4725 {
4726 case /* 0b010010: GT#0 */ 0x12: VEC_FCMP0 (>);
4727 case /* 0b110010: GE#0 */ 0x32: VEC_FCMP0 (>=);
4728 case /* 0b010110: EQ#0 */ 0x16: VEC_FCMP0 (==);
4729 case /* 0b110110: LE#0 */ 0x36: VEC_FCMP0 (<=);
4730 case /* 0b011010: LT#0 */ 0x1A: VEC_FCMP0 (<);
4731 case /* 0b111001: GT */ 0x39: VEC_FCMP (>);
4732 case /* 0b101001: GE */ 0x29: VEC_FCMP (>=);
4733 case /* 0b001001: EQ */ 0x09: VEC_FCMP (==);
4734
4735 default:
4736 HALT_NYI;
4737 }
4738 }
4739 else
4740 {
4741 unsigned decode = (INSTR (29, 29) << 6) | INSTR (15, 10);
4742
4743 switch (decode)
4744 {
4745 case 0x0D: /* 0001101 GT */ VEC_CMP (s, > );
4746 case 0x0F: /* 0001111 GE */ VEC_CMP (s, >= );
4747 case 0x22: /* 0100010 GT #0 */ VEC_CMP0 (s, > );
4748 case 0x26: /* 0100110 EQ #0 */ VEC_CMP0 (s, == );
4749 case 0x2A: /* 0101010 LT #0 */ VEC_CMP0 (s, < );
4750 case 0x4D: /* 1001101 HI */ VEC_CMP (u, > );
4751 case 0x4F: /* 1001111 HS */ VEC_CMP (u, >= );
4752 case 0x62: /* 1100010 GE #0 */ VEC_CMP0 (s, >= );
4753 case 0x63: /* 1100011 EQ */ VEC_CMP (u, == );
4754 case 0x66: /* 1100110 LE #0 */ VEC_CMP0 (s, <= );
4755 default:
4756 if (vm == 0)
4757 HALT_NYI;
4758 do_vec_maxv (cpu);
4759 }
4760 }
4761 }
4762
4763 static void
4764 do_vec_SSHL (sim_cpu *cpu)
4765 {
4766 /* instr[31] = 0
4767 instr[30] = first part (0)/ second part (1)
4768 instr[29,24] = 00 1110
4769 instr[23,22] = size: byte(00), half(01), word (10), long (11)
4770 instr[21] = 1
4771 instr[20,16] = Vm
4772 instr[15,10] = 0100 01
4773 instr[9,5] = Vn
4774 instr[4,0] = Vd. */
4775
4776 unsigned full = INSTR (30, 30);
4777 unsigned vm = INSTR (20, 16);
4778 unsigned vn = INSTR (9, 5);
4779 unsigned vd = INSTR (4, 0);
4780 unsigned i;
4781 signed int shift;
4782
4783 NYI_assert (29, 24, 0x0E);
4784 NYI_assert (21, 21, 1);
4785 NYI_assert (15, 10, 0x11);
4786
4787 /* FIXME: What is a signed shift left in this context ?. */
4788
4789 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4790 switch (INSTR (23, 22))
4791 {
4792 case 0:
4793 for (i = 0; i < (full ? 16 : 8); i++)
4794 {
4795 shift = aarch64_get_vec_s8 (cpu, vm, i);
4796 if (shift >= 0)
4797 aarch64_set_vec_s8 (cpu, vd, i, aarch64_get_vec_s8 (cpu, vn, i)
4798 << shift);
4799 else
4800 aarch64_set_vec_s8 (cpu, vd, i, aarch64_get_vec_s8 (cpu, vn, i)
4801 >> - shift);
4802 }
4803 return;
4804
4805 case 1:
4806 for (i = 0; i < (full ? 8 : 4); i++)
4807 {
4808 shift = aarch64_get_vec_s8 (cpu, vm, i * 2);
4809 if (shift >= 0)
4810 aarch64_set_vec_s16 (cpu, vd, i, aarch64_get_vec_s16 (cpu, vn, i)
4811 << shift);
4812 else
4813 aarch64_set_vec_s16 (cpu, vd, i, aarch64_get_vec_s16 (cpu, vn, i)
4814 >> - shift);
4815 }
4816 return;
4817
4818 case 2:
4819 for (i = 0; i < (full ? 4 : 2); i++)
4820 {
4821 shift = aarch64_get_vec_s8 (cpu, vm, i * 4);
4822 if (shift >= 0)
4823 aarch64_set_vec_s32 (cpu, vd, i, aarch64_get_vec_s32 (cpu, vn, i)
4824 << shift);
4825 else
4826 aarch64_set_vec_s32 (cpu, vd, i, aarch64_get_vec_s32 (cpu, vn, i)
4827 >> - shift);
4828 }
4829 return;
4830
4831 case 3:
4832 if (! full)
4833 HALT_UNALLOC;
4834 for (i = 0; i < 2; i++)
4835 {
4836 shift = aarch64_get_vec_s8 (cpu, vm, i * 8);
4837 if (shift >= 0)
4838 aarch64_set_vec_s64 (cpu, vd, i, aarch64_get_vec_s64 (cpu, vn, i)
4839 << shift);
4840 else
4841 aarch64_set_vec_s64 (cpu, vd, i, aarch64_get_vec_s64 (cpu, vn, i)
4842 >> - shift);
4843 }
4844 return;
4845 }
4846 }
4847
4848 static void
4849 do_vec_USHL (sim_cpu *cpu)
4850 {
4851 /* instr[31] = 0
4852 instr[30] = first part (0)/ second part (1)
4853 instr[29,24] = 10 1110
4854 instr[23,22] = size: byte(00), half(01), word (10), long (11)
4855 instr[21] = 1
4856 instr[20,16] = Vm
4857 instr[15,10] = 0100 01
4858 instr[9,5] = Vn
4859 instr[4,0] = Vd */
4860
4861 unsigned full = INSTR (30, 30);
4862 unsigned vm = INSTR (20, 16);
4863 unsigned vn = INSTR (9, 5);
4864 unsigned vd = INSTR (4, 0);
4865 unsigned i;
4866 signed int shift;
4867
4868 NYI_assert (29, 24, 0x2E);
4869 NYI_assert (15, 10, 0x11);
4870
4871 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4872 switch (INSTR (23, 22))
4873 {
4874 case 0:
4875 for (i = 0; i < (full ? 16 : 8); i++)
4876 {
4877 shift = aarch64_get_vec_s8 (cpu, vm, i);
4878 if (shift >= 0)
4879 aarch64_set_vec_u8 (cpu, vd, i, aarch64_get_vec_u8 (cpu, vn, i)
4880 << shift);
4881 else
4882 aarch64_set_vec_u8 (cpu, vd, i, aarch64_get_vec_u8 (cpu, vn, i)
4883 >> - shift);
4884 }
4885 return;
4886
4887 case 1:
4888 for (i = 0; i < (full ? 8 : 4); i++)
4889 {
4890 shift = aarch64_get_vec_s8 (cpu, vm, i * 2);
4891 if (shift >= 0)
4892 aarch64_set_vec_u16 (cpu, vd, i, aarch64_get_vec_u16 (cpu, vn, i)
4893 << shift);
4894 else
4895 aarch64_set_vec_u16 (cpu, vd, i, aarch64_get_vec_u16 (cpu, vn, i)
4896 >> - shift);
4897 }
4898 return;
4899
4900 case 2:
4901 for (i = 0; i < (full ? 4 : 2); i++)
4902 {
4903 shift = aarch64_get_vec_s8 (cpu, vm, i * 4);
4904 if (shift >= 0)
4905 aarch64_set_vec_u32 (cpu, vd, i, aarch64_get_vec_u32 (cpu, vn, i)
4906 << shift);
4907 else
4908 aarch64_set_vec_u32 (cpu, vd, i, aarch64_get_vec_u32 (cpu, vn, i)
4909 >> - shift);
4910 }
4911 return;
4912
4913 case 3:
4914 if (! full)
4915 HALT_UNALLOC;
4916 for (i = 0; i < 2; i++)
4917 {
4918 shift = aarch64_get_vec_s8 (cpu, vm, i * 8);
4919 if (shift >= 0)
4920 aarch64_set_vec_u64 (cpu, vd, i, aarch64_get_vec_u64 (cpu, vn, i)
4921 << shift);
4922 else
4923 aarch64_set_vec_u64 (cpu, vd, i, aarch64_get_vec_u64 (cpu, vn, i)
4924 >> - shift);
4925 }
4926 return;
4927 }
4928 }
4929
4930 static void
4931 do_vec_FMLA (sim_cpu *cpu)
4932 {
4933 /* instr[31] = 0
4934 instr[30] = full/half selector
4935 instr[29,23] = 0011100
4936 instr[22] = size: 0=>float, 1=>double
4937 instr[21] = 1
4938 instr[20,16] = Vn
4939 instr[15,10] = 1100 11
4940 instr[9,5] = Vm
4941 instr[4.0] = Vd. */
4942
4943 unsigned vm = INSTR (20, 16);
4944 unsigned vn = INSTR (9, 5);
4945 unsigned vd = INSTR (4, 0);
4946 unsigned i;
4947 int full = INSTR (30, 30);
4948
4949 NYI_assert (29, 23, 0x1C);
4950 NYI_assert (21, 21, 1);
4951 NYI_assert (15, 10, 0x33);
4952
4953 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4954 if (INSTR (22, 22))
4955 {
4956 if (! full)
4957 HALT_UNALLOC;
4958 for (i = 0; i < 2; i++)
4959 aarch64_set_vec_double (cpu, vd, i,
4960 aarch64_get_vec_double (cpu, vn, i) *
4961 aarch64_get_vec_double (cpu, vm, i) +
4962 aarch64_get_vec_double (cpu, vd, i));
4963 }
4964 else
4965 {
4966 for (i = 0; i < (full ? 4 : 2); i++)
4967 aarch64_set_vec_float (cpu, vd, i,
4968 aarch64_get_vec_float (cpu, vn, i) *
4969 aarch64_get_vec_float (cpu, vm, i) +
4970 aarch64_get_vec_float (cpu, vd, i));
4971 }
4972 }
4973
4974 static void
4975 do_vec_max (sim_cpu *cpu)
4976 {
4977 /* instr[31] = 0
4978 instr[30] = full/half selector
4979 instr[29] = SMAX (0) / UMAX (1)
4980 instr[28,24] = 0 1110
4981 instr[23,22] = size: 00=> 8-bit, 01=> 16-bit, 10=> 32-bit
4982 instr[21] = 1
4983 instr[20,16] = Vn
4984 instr[15,10] = 0110 01
4985 instr[9,5] = Vm
4986 instr[4.0] = Vd. */
4987
4988 unsigned vm = INSTR (20, 16);
4989 unsigned vn = INSTR (9, 5);
4990 unsigned vd = INSTR (4, 0);
4991 unsigned i;
4992 int full = INSTR (30, 30);
4993
4994 NYI_assert (28, 24, 0x0E);
4995 NYI_assert (21, 21, 1);
4996 NYI_assert (15, 10, 0x19);
4997
4998 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4999 if (INSTR (29, 29))
5000 {
5001 switch (INSTR (23, 22))
5002 {
5003 case 0:
5004 for (i = 0; i < (full ? 16 : 8); i++)
5005 aarch64_set_vec_u8 (cpu, vd, i,
5006 aarch64_get_vec_u8 (cpu, vn, i)
5007 > aarch64_get_vec_u8 (cpu, vm, i)
5008 ? aarch64_get_vec_u8 (cpu, vn, i)
5009 : aarch64_get_vec_u8 (cpu, vm, i));
5010 return;
5011
5012 case 1:
5013 for (i = 0; i < (full ? 8 : 4); i++)
5014 aarch64_set_vec_u16 (cpu, vd, i,
5015 aarch64_get_vec_u16 (cpu, vn, i)
5016 > aarch64_get_vec_u16 (cpu, vm, i)
5017 ? aarch64_get_vec_u16 (cpu, vn, i)
5018 : aarch64_get_vec_u16 (cpu, vm, i));
5019 return;
5020
5021 case 2:
5022 for (i = 0; i < (full ? 4 : 2); i++)
5023 aarch64_set_vec_u32 (cpu, vd, i,
5024 aarch64_get_vec_u32 (cpu, vn, i)
5025 > aarch64_get_vec_u32 (cpu, vm, i)
5026 ? aarch64_get_vec_u32 (cpu, vn, i)
5027 : aarch64_get_vec_u32 (cpu, vm, i));
5028 return;
5029
5030 case 3:
5031 HALT_UNALLOC;
5032 }
5033 }
5034 else
5035 {
5036 switch (INSTR (23, 22))
5037 {
5038 case 0:
5039 for (i = 0; i < (full ? 16 : 8); i++)
5040 aarch64_set_vec_s8 (cpu, vd, i,
5041 aarch64_get_vec_s8 (cpu, vn, i)
5042 > aarch64_get_vec_s8 (cpu, vm, i)
5043 ? aarch64_get_vec_s8 (cpu, vn, i)
5044 : aarch64_get_vec_s8 (cpu, vm, i));
5045 return;
5046
5047 case 1:
5048 for (i = 0; i < (full ? 8 : 4); i++)
5049 aarch64_set_vec_s16 (cpu, vd, i,
5050 aarch64_get_vec_s16 (cpu, vn, i)
5051 > aarch64_get_vec_s16 (cpu, vm, i)
5052 ? aarch64_get_vec_s16 (cpu, vn, i)
5053 : aarch64_get_vec_s16 (cpu, vm, i));
5054 return;
5055
5056 case 2:
5057 for (i = 0; i < (full ? 4 : 2); i++)
5058 aarch64_set_vec_s32 (cpu, vd, i,
5059 aarch64_get_vec_s32 (cpu, vn, i)
5060 > aarch64_get_vec_s32 (cpu, vm, i)
5061 ? aarch64_get_vec_s32 (cpu, vn, i)
5062 : aarch64_get_vec_s32 (cpu, vm, i));
5063 return;
5064
5065 case 3:
5066 HALT_UNALLOC;
5067 }
5068 }
5069 }
5070
5071 static void
5072 do_vec_min (sim_cpu *cpu)
5073 {
5074 /* instr[31] = 0
5075 instr[30] = full/half selector
5076 instr[29] = SMIN (0) / UMIN (1)
5077 instr[28,24] = 0 1110
5078 instr[23,22] = size: 00=> 8-bit, 01=> 16-bit, 10=> 32-bit
5079 instr[21] = 1
5080 instr[20,16] = Vn
5081 instr[15,10] = 0110 11
5082 instr[9,5] = Vm
5083 instr[4.0] = Vd. */
5084
5085 unsigned vm = INSTR (20, 16);
5086 unsigned vn = INSTR (9, 5);
5087 unsigned vd = INSTR (4, 0);
5088 unsigned i;
5089 int full = INSTR (30, 30);
5090
5091 NYI_assert (28, 24, 0x0E);
5092 NYI_assert (21, 21, 1);
5093 NYI_assert (15, 10, 0x1B);
5094
5095 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5096 if (INSTR (29, 29))
5097 {
5098 switch (INSTR (23, 22))
5099 {
5100 case 0:
5101 for (i = 0; i < (full ? 16 : 8); i++)
5102 aarch64_set_vec_u8 (cpu, vd, i,
5103 aarch64_get_vec_u8 (cpu, vn, i)
5104 < aarch64_get_vec_u8 (cpu, vm, i)
5105 ? aarch64_get_vec_u8 (cpu, vn, i)
5106 : aarch64_get_vec_u8 (cpu, vm, i));
5107 return;
5108
5109 case 1:
5110 for (i = 0; i < (full ? 8 : 4); i++)
5111 aarch64_set_vec_u16 (cpu, vd, i,
5112 aarch64_get_vec_u16 (cpu, vn, i)
5113 < aarch64_get_vec_u16 (cpu, vm, i)
5114 ? aarch64_get_vec_u16 (cpu, vn, i)
5115 : aarch64_get_vec_u16 (cpu, vm, i));
5116 return;
5117
5118 case 2:
5119 for (i = 0; i < (full ? 4 : 2); i++)
5120 aarch64_set_vec_u32 (cpu, vd, i,
5121 aarch64_get_vec_u32 (cpu, vn, i)
5122 < aarch64_get_vec_u32 (cpu, vm, i)
5123 ? aarch64_get_vec_u32 (cpu, vn, i)
5124 : aarch64_get_vec_u32 (cpu, vm, i));
5125 return;
5126
5127 case 3:
5128 HALT_UNALLOC;
5129 }
5130 }
5131 else
5132 {
5133 switch (INSTR (23, 22))
5134 {
5135 case 0:
5136 for (i = 0; i < (full ? 16 : 8); i++)
5137 aarch64_set_vec_s8 (cpu, vd, i,
5138 aarch64_get_vec_s8 (cpu, vn, i)
5139 < aarch64_get_vec_s8 (cpu, vm, i)
5140 ? aarch64_get_vec_s8 (cpu, vn, i)
5141 : aarch64_get_vec_s8 (cpu, vm, i));
5142 return;
5143
5144 case 1:
5145 for (i = 0; i < (full ? 8 : 4); i++)
5146 aarch64_set_vec_s16 (cpu, vd, i,
5147 aarch64_get_vec_s16 (cpu, vn, i)
5148 < aarch64_get_vec_s16 (cpu, vm, i)
5149 ? aarch64_get_vec_s16 (cpu, vn, i)
5150 : aarch64_get_vec_s16 (cpu, vm, i));
5151 return;
5152
5153 case 2:
5154 for (i = 0; i < (full ? 4 : 2); i++)
5155 aarch64_set_vec_s32 (cpu, vd, i,
5156 aarch64_get_vec_s32 (cpu, vn, i)
5157 < aarch64_get_vec_s32 (cpu, vm, i)
5158 ? aarch64_get_vec_s32 (cpu, vn, i)
5159 : aarch64_get_vec_s32 (cpu, vm, i));
5160 return;
5161
5162 case 3:
5163 HALT_UNALLOC;
5164 }
5165 }
5166 }
5167
5168 static void
5169 do_vec_sub_long (sim_cpu *cpu)
5170 {
5171 /* instr[31] = 0
5172 instr[30] = lower (0) / upper (1)
5173 instr[29] = signed (0) / unsigned (1)
5174 instr[28,24] = 0 1110
5175 instr[23,22] = size: bytes (00), half (01), word (10)
5176 instr[21] = 1
5177 insrt[20,16] = Vm
5178 instr[15,10] = 0010 00
5179 instr[9,5] = Vn
5180 instr[4,0] = V dest. */
5181
5182 unsigned size = INSTR (23, 22);
5183 unsigned vm = INSTR (20, 16);
5184 unsigned vn = INSTR (9, 5);
5185 unsigned vd = INSTR (4, 0);
5186 unsigned bias = 0;
5187 unsigned i;
5188
5189 NYI_assert (28, 24, 0x0E);
5190 NYI_assert (21, 21, 1);
5191 NYI_assert (15, 10, 0x08);
5192
5193 if (size == 3)
5194 HALT_UNALLOC;
5195
5196 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5197 switch (INSTR (30, 29))
5198 {
5199 case 2: /* SSUBL2. */
5200 bias = 2;
5201 case 0: /* SSUBL. */
5202 switch (size)
5203 {
5204 case 0:
5205 bias *= 3;
5206 for (i = 0; i < 8; i++)
5207 aarch64_set_vec_s16 (cpu, vd, i,
5208 aarch64_get_vec_s8 (cpu, vn, i + bias)
5209 - aarch64_get_vec_s8 (cpu, vm, i + bias));
5210 break;
5211
5212 case 1:
5213 bias *= 2;
5214 for (i = 0; i < 4; i++)
5215 aarch64_set_vec_s32 (cpu, vd, i,
5216 aarch64_get_vec_s16 (cpu, vn, i + bias)
5217 - aarch64_get_vec_s16 (cpu, vm, i + bias));
5218 break;
5219
5220 case 2:
5221 for (i = 0; i < 2; i++)
5222 aarch64_set_vec_s64 (cpu, vd, i,
5223 aarch64_get_vec_s32 (cpu, vn, i + bias)
5224 - aarch64_get_vec_s32 (cpu, vm, i + bias));
5225 break;
5226
5227 default:
5228 HALT_UNALLOC;
5229 }
5230 break;
5231
5232 case 3: /* USUBL2. */
5233 bias = 2;
5234 case 1: /* USUBL. */
5235 switch (size)
5236 {
5237 case 0:
5238 bias *= 3;
5239 for (i = 0; i < 8; i++)
5240 aarch64_set_vec_u16 (cpu, vd, i,
5241 aarch64_get_vec_u8 (cpu, vn, i + bias)
5242 - aarch64_get_vec_u8 (cpu, vm, i + bias));
5243 break;
5244
5245 case 1:
5246 bias *= 2;
5247 for (i = 0; i < 4; i++)
5248 aarch64_set_vec_u32 (cpu, vd, i,
5249 aarch64_get_vec_u16 (cpu, vn, i + bias)
5250 - aarch64_get_vec_u16 (cpu, vm, i + bias));
5251 break;
5252
5253 case 2:
5254 for (i = 0; i < 2; i++)
5255 aarch64_set_vec_u64 (cpu, vd, i,
5256 aarch64_get_vec_u32 (cpu, vn, i + bias)
5257 - aarch64_get_vec_u32 (cpu, vm, i + bias));
5258 break;
5259
5260 default:
5261 HALT_UNALLOC;
5262 }
5263 break;
5264 }
5265 }
5266
5267 static void
5268 do_vec_ADDP (sim_cpu *cpu)
5269 {
5270 /* instr[31] = 0
5271 instr[30] = half(0)/full(1)
5272 instr[29,24] = 00 1110
5273 instr[23,22] = size: bytes (00), half (01), word (10), long (11)
5274 instr[21] = 1
5275 insrt[20,16] = Vm
5276 instr[15,10] = 1011 11
5277 instr[9,5] = Vn
5278 instr[4,0] = V dest. */
5279
5280 FRegister copy_vn;
5281 FRegister copy_vm;
5282 unsigned full = INSTR (30, 30);
5283 unsigned size = INSTR (23, 22);
5284 unsigned vm = INSTR (20, 16);
5285 unsigned vn = INSTR (9, 5);
5286 unsigned vd = INSTR (4, 0);
5287 unsigned i, range;
5288
5289 NYI_assert (29, 24, 0x0E);
5290 NYI_assert (21, 21, 1);
5291 NYI_assert (15, 10, 0x2F);
5292
5293 /* Make copies of the source registers in case vd == vn/vm. */
5294 copy_vn = cpu->fr[vn];
5295 copy_vm = cpu->fr[vm];
5296
5297 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5298 switch (size)
5299 {
5300 case 0:
5301 range = full ? 8 : 4;
5302 for (i = 0; i < range; i++)
5303 {
5304 aarch64_set_vec_u8 (cpu, vd, i,
5305 copy_vn.b[i * 2] + copy_vn.b[i * 2 + 1]);
5306 aarch64_set_vec_u8 (cpu, vd, i + range,
5307 copy_vm.b[i * 2] + copy_vm.b[i * 2 + 1]);
5308 }
5309 return;
5310
5311 case 1:
5312 range = full ? 4 : 2;
5313 for (i = 0; i < range; i++)
5314 {
5315 aarch64_set_vec_u16 (cpu, vd, i,
5316 copy_vn.h[i * 2] + copy_vn.h[i * 2 + 1]);
5317 aarch64_set_vec_u16 (cpu, vd, i + range,
5318 copy_vm.h[i * 2] + copy_vm.h[i * 2 + 1]);
5319 }
5320 return;
5321
5322 case 2:
5323 range = full ? 2 : 1;
5324 for (i = 0; i < range; i++)
5325 {
5326 aarch64_set_vec_u32 (cpu, vd, i,
5327 copy_vn.w[i * 2] + copy_vn.w[i * 2 + 1]);
5328 aarch64_set_vec_u32 (cpu, vd, i + range,
5329 copy_vm.w[i * 2] + copy_vm.w[i * 2 + 1]);
5330 }
5331 return;
5332
5333 case 3:
5334 if (! full)
5335 HALT_UNALLOC;
5336 aarch64_set_vec_u64 (cpu, vd, 0, copy_vn.v[0] + copy_vn.v[1]);
5337 aarch64_set_vec_u64 (cpu, vd, 1, copy_vm.v[0] + copy_vm.v[1]);
5338 return;
5339 }
5340 }
5341
5342 static void
5343 do_vec_UMOV (sim_cpu *cpu)
5344 {
5345 /* instr[31] = 0
5346 instr[30] = 32-bit(0)/64-bit(1)
5347 instr[29,21] = 00 1110 000
5348 insrt[20,16] = size & index
5349 instr[15,10] = 0011 11
5350 instr[9,5] = V source
5351 instr[4,0] = R dest. */
5352
5353 unsigned vs = INSTR (9, 5);
5354 unsigned rd = INSTR (4, 0);
5355 unsigned index;
5356
5357 NYI_assert (29, 21, 0x070);
5358 NYI_assert (15, 10, 0x0F);
5359
5360 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5361 if (INSTR (16, 16))
5362 {
5363 /* Byte transfer. */
5364 index = INSTR (20, 17);
5365 aarch64_set_reg_u64 (cpu, rd, NO_SP,
5366 aarch64_get_vec_u8 (cpu, vs, index));
5367 }
5368 else if (INSTR (17, 17))
5369 {
5370 index = INSTR (20, 18);
5371 aarch64_set_reg_u64 (cpu, rd, NO_SP,
5372 aarch64_get_vec_u16 (cpu, vs, index));
5373 }
5374 else if (INSTR (18, 18))
5375 {
5376 index = INSTR (20, 19);
5377 aarch64_set_reg_u64 (cpu, rd, NO_SP,
5378 aarch64_get_vec_u32 (cpu, vs, index));
5379 }
5380 else
5381 {
5382 if (INSTR (30, 30) != 1)
5383 HALT_UNALLOC;
5384
5385 index = INSTR (20, 20);
5386 aarch64_set_reg_u64 (cpu, rd, NO_SP,
5387 aarch64_get_vec_u64 (cpu, vs, index));
5388 }
5389 }
5390
5391 static void
5392 do_vec_FABS (sim_cpu *cpu)
5393 {
5394 /* instr[31] = 0
5395 instr[30] = half(0)/full(1)
5396 instr[29,23] = 00 1110 1
5397 instr[22] = float(0)/double(1)
5398 instr[21,16] = 10 0000
5399 instr[15,10] = 1111 10
5400 instr[9,5] = Vn
5401 instr[4,0] = Vd. */
5402
5403 unsigned vn = INSTR (9, 5);
5404 unsigned vd = INSTR (4, 0);
5405 unsigned full = INSTR (30, 30);
5406 unsigned i;
5407
5408 NYI_assert (29, 23, 0x1D);
5409 NYI_assert (21, 10, 0x83E);
5410
5411 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5412 if (INSTR (22, 22))
5413 {
5414 if (! full)
5415 HALT_NYI;
5416
5417 for (i = 0; i < 2; i++)
5418 aarch64_set_vec_double (cpu, vd, i,
5419 fabs (aarch64_get_vec_double (cpu, vn, i)));
5420 }
5421 else
5422 {
5423 for (i = 0; i < (full ? 4 : 2); i++)
5424 aarch64_set_vec_float (cpu, vd, i,
5425 fabsf (aarch64_get_vec_float (cpu, vn, i)));
5426 }
5427 }
5428
5429 static void
5430 do_vec_FCVTZS (sim_cpu *cpu)
5431 {
5432 /* instr[31] = 0
5433 instr[30] = half (0) / all (1)
5434 instr[29,23] = 00 1110 1
5435 instr[22] = single (0) / double (1)
5436 instr[21,10] = 10 0001 1011 10
5437 instr[9,5] = Rn
5438 instr[4,0] = Rd. */
5439
5440 unsigned rn = INSTR (9, 5);
5441 unsigned rd = INSTR (4, 0);
5442 unsigned full = INSTR (30, 30);
5443 unsigned i;
5444
5445 NYI_assert (31, 31, 0);
5446 NYI_assert (29, 23, 0x1D);
5447 NYI_assert (21, 10, 0x86E);
5448
5449 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5450 if (INSTR (22, 22))
5451 {
5452 if (! full)
5453 HALT_UNALLOC;
5454
5455 for (i = 0; i < 2; i++)
5456 aarch64_set_vec_s64 (cpu, rd, i,
5457 (int64_t) aarch64_get_vec_double (cpu, rn, i));
5458 }
5459 else
5460 for (i = 0; i < (full ? 4 : 2); i++)
5461 aarch64_set_vec_s32 (cpu, rd, i,
5462 (int32_t) aarch64_get_vec_float (cpu, rn, i));
5463 }
5464
5465 static void
5466 do_vec_REV64 (sim_cpu *cpu)
5467 {
5468 /* instr[31] = 0
5469 instr[30] = full/half
5470 instr[29,24] = 00 1110
5471 instr[23,22] = size
5472 instr[21,10] = 10 0000 0000 10
5473 instr[9,5] = Rn
5474 instr[4,0] = Rd. */
5475
5476 unsigned rn = INSTR (9, 5);
5477 unsigned rd = INSTR (4, 0);
5478 unsigned size = INSTR (23, 22);
5479 unsigned full = INSTR (30, 30);
5480 unsigned i;
5481 FRegister val;
5482
5483 NYI_assert (29, 24, 0x0E);
5484 NYI_assert (21, 10, 0x802);
5485
5486 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5487 switch (size)
5488 {
5489 case 0:
5490 for (i = 0; i < (full ? 16 : 8); i++)
5491 val.b[i ^ 0x7] = aarch64_get_vec_u8 (cpu, rn, i);
5492 break;
5493
5494 case 1:
5495 for (i = 0; i < (full ? 8 : 4); i++)
5496 val.h[i ^ 0x3] = aarch64_get_vec_u16 (cpu, rn, i);
5497 break;
5498
5499 case 2:
5500 for (i = 0; i < (full ? 4 : 2); i++)
5501 val.w[i ^ 0x1] = aarch64_get_vec_u32 (cpu, rn, i);
5502 break;
5503
5504 case 3:
5505 HALT_UNALLOC;
5506 }
5507
5508 aarch64_set_vec_u64 (cpu, rd, 0, val.v[0]);
5509 if (full)
5510 aarch64_set_vec_u64 (cpu, rd, 1, val.v[1]);
5511 }
5512
5513 static void
5514 do_vec_REV16 (sim_cpu *cpu)
5515 {
5516 /* instr[31] = 0
5517 instr[30] = full/half
5518 instr[29,24] = 00 1110
5519 instr[23,22] = size
5520 instr[21,10] = 10 0000 0001 10
5521 instr[9,5] = Rn
5522 instr[4,0] = Rd. */
5523
5524 unsigned rn = INSTR (9, 5);
5525 unsigned rd = INSTR (4, 0);
5526 unsigned size = INSTR (23, 22);
5527 unsigned full = INSTR (30, 30);
5528 unsigned i;
5529 FRegister val;
5530
5531 NYI_assert (29, 24, 0x0E);
5532 NYI_assert (21, 10, 0x806);
5533
5534 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5535 switch (size)
5536 {
5537 case 0:
5538 for (i = 0; i < (full ? 16 : 8); i++)
5539 val.b[i ^ 0x1] = aarch64_get_vec_u8 (cpu, rn, i);
5540 break;
5541
5542 default:
5543 HALT_UNALLOC;
5544 }
5545
5546 aarch64_set_vec_u64 (cpu, rd, 0, val.v[0]);
5547 if (full)
5548 aarch64_set_vec_u64 (cpu, rd, 1, val.v[1]);
5549 }
5550
5551 static void
5552 do_vec_op1 (sim_cpu *cpu)
5553 {
5554 /* instr[31] = 0
5555 instr[30] = half/full
5556 instr[29,24] = 00 1110
5557 instr[23,21] = ???
5558 instr[20,16] = Vm
5559 instr[15,10] = sub-opcode
5560 instr[9,5] = Vn
5561 instr[4,0] = Vd */
5562 NYI_assert (29, 24, 0x0E);
5563
5564 if (INSTR (21, 21) == 0)
5565 {
5566 if (INSTR (23, 22) == 0)
5567 {
5568 if (INSTR (30, 30) == 1
5569 && INSTR (17, 14) == 0
5570 && INSTR (12, 10) == 7)
5571 return do_vec_ins_2 (cpu);
5572
5573 switch (INSTR (15, 10))
5574 {
5575 case 0x01: do_vec_DUP_vector_into_vector (cpu); return;
5576 case 0x03: do_vec_DUP_scalar_into_vector (cpu); return;
5577 case 0x07: do_vec_INS (cpu); return;
5578 case 0x0A: do_vec_TRN (cpu); return;
5579
5580 case 0x0F:
5581 if (INSTR (17, 16) == 0)
5582 {
5583 do_vec_MOV_into_scalar (cpu);
5584 return;
5585 }
5586 break;
5587
5588 case 0x00:
5589 case 0x08:
5590 case 0x10:
5591 case 0x18:
5592 do_vec_TBL (cpu); return;
5593
5594 case 0x06:
5595 case 0x16:
5596 do_vec_UZP (cpu); return;
5597
5598 case 0x0E:
5599 case 0x1E:
5600 do_vec_ZIP (cpu); return;
5601
5602 default:
5603 HALT_NYI;
5604 }
5605 }
5606
5607 switch (INSTR (13, 10))
5608 {
5609 case 0x6: do_vec_UZP (cpu); return;
5610 case 0xE: do_vec_ZIP (cpu); return;
5611 case 0xA: do_vec_TRN (cpu); return;
5612 case 0xF: do_vec_UMOV (cpu); return;
5613 default: HALT_NYI;
5614 }
5615 }
5616
5617 switch (INSTR (15, 10))
5618 {
5619 case 0x02: do_vec_REV64 (cpu); return;
5620 case 0x06: do_vec_REV16 (cpu); return;
5621
5622 case 0x07:
5623 switch (INSTR (23, 21))
5624 {
5625 case 1: do_vec_AND (cpu); return;
5626 case 3: do_vec_BIC (cpu); return;
5627 case 5: do_vec_ORR (cpu); return;
5628 case 7: do_vec_ORN (cpu); return;
5629 default: HALT_NYI;
5630 }
5631
5632 case 0x08: do_vec_sub_long (cpu); return;
5633 case 0x0a: do_vec_XTN (cpu); return;
5634 case 0x11: do_vec_SSHL (cpu); return;
5635 case 0x19: do_vec_max (cpu); return;
5636 case 0x1B: do_vec_min (cpu); return;
5637 case 0x21: do_vec_add (cpu); return;
5638 case 0x25: do_vec_MLA (cpu); return;
5639 case 0x27: do_vec_mul (cpu); return;
5640 case 0x2F: do_vec_ADDP (cpu); return;
5641 case 0x30: do_vec_mull (cpu); return;
5642 case 0x33: do_vec_FMLA (cpu); return;
5643 case 0x35: do_vec_fadd (cpu); return;
5644
5645 case 0x2E:
5646 switch (INSTR (20, 16))
5647 {
5648 case 0x00: do_vec_ABS (cpu); return;
5649 case 0x01: do_vec_FCVTZS (cpu); return;
5650 case 0x11: do_vec_ADDV (cpu); return;
5651 default: HALT_NYI;
5652 }
5653
5654 case 0x31:
5655 case 0x3B:
5656 do_vec_Fminmax (cpu); return;
5657
5658 case 0x0D:
5659 case 0x0F:
5660 case 0x22:
5661 case 0x23:
5662 case 0x26:
5663 case 0x2A:
5664 case 0x32:
5665 case 0x36:
5666 case 0x39:
5667 case 0x3A:
5668 do_vec_compare (cpu); return;
5669
5670 case 0x3E:
5671 do_vec_FABS (cpu); return;
5672
5673 default:
5674 HALT_NYI;
5675 }
5676 }
5677
5678 static void
5679 do_vec_xtl (sim_cpu *cpu)
5680 {
5681 /* instr[31] = 0
5682 instr[30,29] = SXTL (00), UXTL (01), SXTL2 (10), UXTL2 (11)
5683 instr[28,22] = 0 1111 00
5684 instr[21,16] = size & shift (USHLL, SSHLL, USHLL2, SSHLL2)
5685 instr[15,10] = 1010 01
5686 instr[9,5] = V source
5687 instr[4,0] = V dest. */
5688
5689 unsigned vs = INSTR (9, 5);
5690 unsigned vd = INSTR (4, 0);
5691 unsigned i, shift, bias = 0;
5692
5693 NYI_assert (28, 22, 0x3C);
5694 NYI_assert (15, 10, 0x29);
5695
5696 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5697 switch (INSTR (30, 29))
5698 {
5699 case 2: /* SXTL2, SSHLL2. */
5700 bias = 2;
5701 case 0: /* SXTL, SSHLL. */
5702 if (INSTR (21, 21))
5703 {
5704 int64_t val1, val2;
5705
5706 shift = INSTR (20, 16);
5707 /* Get the source values before setting the destination values
5708 in case the source and destination are the same. */
5709 val1 = aarch64_get_vec_s32 (cpu, vs, bias) << shift;
5710 val2 = aarch64_get_vec_s32 (cpu, vs, bias + 1) << shift;
5711 aarch64_set_vec_s64 (cpu, vd, 0, val1);
5712 aarch64_set_vec_s64 (cpu, vd, 1, val2);
5713 }
5714 else if (INSTR (20, 20))
5715 {
5716 int32_t v[4];
5717 int32_t v1,v2,v3,v4;
5718
5719 shift = INSTR (19, 16);
5720 bias *= 2;
5721 for (i = 0; i < 4; i++)
5722 v[i] = aarch64_get_vec_s16 (cpu, vs, bias + i) << shift;
5723 for (i = 0; i < 4; i++)
5724 aarch64_set_vec_s32 (cpu, vd, i, v[i]);
5725 }
5726 else
5727 {
5728 int16_t v[8];
5729 NYI_assert (19, 19, 1);
5730
5731 shift = INSTR (18, 16);
5732 bias *= 3;
5733 for (i = 0; i < 8; i++)
5734 v[i] = aarch64_get_vec_s8 (cpu, vs, i + bias) << shift;
5735 for (i = 0; i < 8; i++)
5736 aarch64_set_vec_s16 (cpu, vd, i, v[i]);
5737 }
5738 return;
5739
5740 case 3: /* UXTL2, USHLL2. */
5741 bias = 2;
5742 case 1: /* UXTL, USHLL. */
5743 if (INSTR (21, 21))
5744 {
5745 uint64_t v1, v2;
5746 shift = INSTR (20, 16);
5747 v1 = aarch64_get_vec_u32 (cpu, vs, bias) << shift;
5748 v2 = aarch64_get_vec_u32 (cpu, vs, bias + 1) << shift;
5749 aarch64_set_vec_u64 (cpu, vd, 0, v1);
5750 aarch64_set_vec_u64 (cpu, vd, 1, v2);
5751 }
5752 else if (INSTR (20, 20))
5753 {
5754 uint32_t v[4];
5755 shift = INSTR (19, 16);
5756 bias *= 2;
5757 for (i = 0; i < 4; i++)
5758 v[i] = aarch64_get_vec_u16 (cpu, vs, i + bias) << shift;
5759 for (i = 0; i < 4; i++)
5760 aarch64_set_vec_u32 (cpu, vd, i, v[i]);
5761 }
5762 else
5763 {
5764 uint16_t v[8];
5765 NYI_assert (19, 19, 1);
5766
5767 shift = INSTR (18, 16);
5768 bias *= 3;
5769 for (i = 0; i < 8; i++)
5770 v[i] = aarch64_get_vec_u8 (cpu, vs, i + bias) << shift;
5771 for (i = 0; i < 8; i++)
5772 aarch64_set_vec_u16 (cpu, vd, i, v[i]);
5773 }
5774 return;
5775 }
5776 }
5777
5778 static void
5779 do_vec_SHL (sim_cpu *cpu)
5780 {
5781 /* instr [31] = 0
5782 instr [30] = half(0)/full(1)
5783 instr [29,23] = 001 1110
5784 instr [22,16] = size and shift amount
5785 instr [15,10] = 01 0101
5786 instr [9, 5] = Vs
5787 instr [4, 0] = Vd. */
5788
5789 int shift;
5790 int full = INSTR (30, 30);
5791 unsigned vs = INSTR (9, 5);
5792 unsigned vd = INSTR (4, 0);
5793 unsigned i;
5794
5795 NYI_assert (29, 23, 0x1E);
5796 NYI_assert (15, 10, 0x15);
5797
5798 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5799 if (INSTR (22, 22))
5800 {
5801 shift = INSTR (21, 16);
5802
5803 if (full == 0)
5804 HALT_UNALLOC;
5805
5806 for (i = 0; i < 2; i++)
5807 {
5808 uint64_t val = aarch64_get_vec_u64 (cpu, vs, i);
5809 aarch64_set_vec_u64 (cpu, vd, i, val << shift);
5810 }
5811
5812 return;
5813 }
5814
5815 if (INSTR (21, 21))
5816 {
5817 shift = INSTR (20, 16);
5818
5819 for (i = 0; i < (full ? 4 : 2); i++)
5820 {
5821 uint32_t val = aarch64_get_vec_u32 (cpu, vs, i);
5822 aarch64_set_vec_u32 (cpu, vd, i, val << shift);
5823 }
5824
5825 return;
5826 }
5827
5828 if (INSTR (20, 20))
5829 {
5830 shift = INSTR (19, 16);
5831
5832 for (i = 0; i < (full ? 8 : 4); i++)
5833 {
5834 uint16_t val = aarch64_get_vec_u16 (cpu, vs, i);
5835 aarch64_set_vec_u16 (cpu, vd, i, val << shift);
5836 }
5837
5838 return;
5839 }
5840
5841 if (INSTR (19, 19) == 0)
5842 HALT_UNALLOC;
5843
5844 shift = INSTR (18, 16);
5845
5846 for (i = 0; i < (full ? 16 : 8); i++)
5847 {
5848 uint8_t val = aarch64_get_vec_u8 (cpu, vs, i);
5849 aarch64_set_vec_u8 (cpu, vd, i, val << shift);
5850 }
5851 }
5852
5853 static void
5854 do_vec_SSHR_USHR (sim_cpu *cpu)
5855 {
5856 /* instr [31] = 0
5857 instr [30] = half(0)/full(1)
5858 instr [29] = signed(0)/unsigned(1)
5859 instr [28,23] = 0 1111 0
5860 instr [22,16] = size and shift amount
5861 instr [15,10] = 0000 01
5862 instr [9, 5] = Vs
5863 instr [4, 0] = Vd. */
5864
5865 int full = INSTR (30, 30);
5866 int sign = ! INSTR (29, 29);
5867 unsigned shift = INSTR (22, 16);
5868 unsigned vs = INSTR (9, 5);
5869 unsigned vd = INSTR (4, 0);
5870 unsigned i;
5871
5872 NYI_assert (28, 23, 0x1E);
5873 NYI_assert (15, 10, 0x01);
5874
5875 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5876 if (INSTR (22, 22))
5877 {
5878 shift = 128 - shift;
5879
5880 if (full == 0)
5881 HALT_UNALLOC;
5882
5883 if (sign)
5884 for (i = 0; i < 2; i++)
5885 {
5886 int64_t val = aarch64_get_vec_s64 (cpu, vs, i);
5887 aarch64_set_vec_s64 (cpu, vd, i, val >> shift);
5888 }
5889 else
5890 for (i = 0; i < 2; i++)
5891 {
5892 uint64_t val = aarch64_get_vec_u64 (cpu, vs, i);
5893 aarch64_set_vec_u64 (cpu, vd, i, val >> shift);
5894 }
5895
5896 return;
5897 }
5898
5899 if (INSTR (21, 21))
5900 {
5901 shift = 64 - shift;
5902
5903 if (sign)
5904 for (i = 0; i < (full ? 4 : 2); i++)
5905 {
5906 int32_t val = aarch64_get_vec_s32 (cpu, vs, i);
5907 aarch64_set_vec_s32 (cpu, vd, i, val >> shift);
5908 }
5909 else
5910 for (i = 0; i < (full ? 4 : 2); i++)
5911 {
5912 uint32_t val = aarch64_get_vec_u32 (cpu, vs, i);
5913 aarch64_set_vec_u32 (cpu, vd, i, val >> shift);
5914 }
5915
5916 return;
5917 }
5918
5919 if (INSTR (20, 20))
5920 {
5921 shift = 32 - shift;
5922
5923 if (sign)
5924 for (i = 0; i < (full ? 8 : 4); i++)
5925 {
5926 int16_t val = aarch64_get_vec_s16 (cpu, vs, i);
5927 aarch64_set_vec_s16 (cpu, vd, i, val >> shift);
5928 }
5929 else
5930 for (i = 0; i < (full ? 8 : 4); i++)
5931 {
5932 uint16_t val = aarch64_get_vec_u16 (cpu, vs, i);
5933 aarch64_set_vec_u16 (cpu, vd, i, val >> shift);
5934 }
5935
5936 return;
5937 }
5938
5939 if (INSTR (19, 19) == 0)
5940 HALT_UNALLOC;
5941
5942 shift = 16 - shift;
5943
5944 if (sign)
5945 for (i = 0; i < (full ? 16 : 8); i++)
5946 {
5947 int8_t val = aarch64_get_vec_s8 (cpu, vs, i);
5948 aarch64_set_vec_s8 (cpu, vd, i, val >> shift);
5949 }
5950 else
5951 for (i = 0; i < (full ? 16 : 8); i++)
5952 {
5953 uint8_t val = aarch64_get_vec_u8 (cpu, vs, i);
5954 aarch64_set_vec_u8 (cpu, vd, i, val >> shift);
5955 }
5956 }
5957
5958 static void
5959 do_vec_MUL_by_element (sim_cpu *cpu)
5960 {
5961 /* instr[31] = 0
5962 instr[30] = half/full
5963 instr[29,24] = 00 1111
5964 instr[23,22] = size
5965 instr[21] = L
5966 instr[20] = M
5967 instr[19,16] = m
5968 instr[15,12] = 1000
5969 instr[11] = H
5970 instr[10] = 0
5971 instr[9,5] = Vn
5972 instr[4,0] = Vd */
5973
5974 unsigned full = INSTR (30, 30);
5975 unsigned L = INSTR (21, 21);
5976 unsigned H = INSTR (11, 11);
5977 unsigned vn = INSTR (9, 5);
5978 unsigned vd = INSTR (4, 0);
5979 unsigned size = INSTR (23, 22);
5980 unsigned index;
5981 unsigned vm;
5982 unsigned e;
5983
5984 NYI_assert (29, 24, 0x0F);
5985 NYI_assert (15, 12, 0x8);
5986 NYI_assert (10, 10, 0);
5987
5988 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5989 switch (size)
5990 {
5991 case 1:
5992 {
5993 /* 16 bit products. */
5994 uint16_t product;
5995 uint16_t element1;
5996 uint16_t element2;
5997
5998 index = (H << 2) | (L << 1) | INSTR (20, 20);
5999 vm = INSTR (19, 16);
6000 element2 = aarch64_get_vec_u16 (cpu, vm, index);
6001
6002 for (e = 0; e < (full ? 8 : 4); e ++)
6003 {
6004 element1 = aarch64_get_vec_u16 (cpu, vn, e);
6005 product = element1 * element2;
6006 aarch64_set_vec_u16 (cpu, vd, e, product);
6007 }
6008 }
6009 break;
6010
6011 case 2:
6012 {
6013 /* 32 bit products. */
6014 uint32_t product;
6015 uint32_t element1;
6016 uint32_t element2;
6017
6018 index = (H << 1) | L;
6019 vm = INSTR (20, 16);
6020 element2 = aarch64_get_vec_u32 (cpu, vm, index);
6021
6022 for (e = 0; e < (full ? 4 : 2); e ++)
6023 {
6024 element1 = aarch64_get_vec_u32 (cpu, vn, e);
6025 product = element1 * element2;
6026 aarch64_set_vec_u32 (cpu, vd, e, product);
6027 }
6028 }
6029 break;
6030
6031 default:
6032 HALT_UNALLOC;
6033 }
6034 }
6035
6036 static void
6037 do_vec_op2 (sim_cpu *cpu)
6038 {
6039 /* instr[31] = 0
6040 instr[30] = half/full
6041 instr[29,24] = 00 1111
6042 instr[23] = ?
6043 instr[22,16] = element size & index
6044 instr[15,10] = sub-opcode
6045 instr[9,5] = Vm
6046 instr[4,0] = Vd */
6047
6048 NYI_assert (29, 24, 0x0F);
6049
6050 if (INSTR (23, 23) != 0)
6051 {
6052 switch (INSTR (15, 10))
6053 {
6054 case 0x20:
6055 case 0x22: do_vec_MUL_by_element (cpu); return;
6056 default: HALT_NYI;
6057 }
6058 }
6059 else
6060 {
6061 switch (INSTR (15, 10))
6062 {
6063 case 0x01: do_vec_SSHR_USHR (cpu); return;
6064 case 0x15: do_vec_SHL (cpu); return;
6065 case 0x20:
6066 case 0x22: do_vec_MUL_by_element (cpu); return;
6067 case 0x29: do_vec_xtl (cpu); return;
6068 default: HALT_NYI;
6069 }
6070 }
6071 }
6072
6073 static void
6074 do_vec_neg (sim_cpu *cpu)
6075 {
6076 /* instr[31] = 0
6077 instr[30] = full(1)/half(0)
6078 instr[29,24] = 10 1110
6079 instr[23,22] = size: byte(00), half (01), word (10), long (11)
6080 instr[21,10] = 1000 0010 1110
6081 instr[9,5] = Vs
6082 instr[4,0] = Vd */
6083
6084 int full = INSTR (30, 30);
6085 unsigned vs = INSTR (9, 5);
6086 unsigned vd = INSTR (4, 0);
6087 unsigned i;
6088
6089 NYI_assert (29, 24, 0x2E);
6090 NYI_assert (21, 10, 0x82E);
6091
6092 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6093 switch (INSTR (23, 22))
6094 {
6095 case 0:
6096 for (i = 0; i < (full ? 16 : 8); i++)
6097 aarch64_set_vec_s8 (cpu, vd, i, - aarch64_get_vec_s8 (cpu, vs, i));
6098 return;
6099
6100 case 1:
6101 for (i = 0; i < (full ? 8 : 4); i++)
6102 aarch64_set_vec_s16 (cpu, vd, i, - aarch64_get_vec_s16 (cpu, vs, i));
6103 return;
6104
6105 case 2:
6106 for (i = 0; i < (full ? 4 : 2); i++)
6107 aarch64_set_vec_s32 (cpu, vd, i, - aarch64_get_vec_s32 (cpu, vs, i));
6108 return;
6109
6110 case 3:
6111 if (! full)
6112 HALT_NYI;
6113 for (i = 0; i < 2; i++)
6114 aarch64_set_vec_s64 (cpu, vd, i, - aarch64_get_vec_s64 (cpu, vs, i));
6115 return;
6116 }
6117 }
6118
6119 static void
6120 do_vec_sqrt (sim_cpu *cpu)
6121 {
6122 /* instr[31] = 0
6123 instr[30] = full(1)/half(0)
6124 instr[29,23] = 101 1101
6125 instr[22] = single(0)/double(1)
6126 instr[21,10] = 1000 0111 1110
6127 instr[9,5] = Vs
6128 instr[4,0] = Vd. */
6129
6130 int full = INSTR (30, 30);
6131 unsigned vs = INSTR (9, 5);
6132 unsigned vd = INSTR (4, 0);
6133 unsigned i;
6134
6135 NYI_assert (29, 23, 0x5B);
6136 NYI_assert (21, 10, 0x87E);
6137
6138 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6139 if (INSTR (22, 22) == 0)
6140 for (i = 0; i < (full ? 4 : 2); i++)
6141 aarch64_set_vec_float (cpu, vd, i,
6142 sqrtf (aarch64_get_vec_float (cpu, vs, i)));
6143 else
6144 for (i = 0; i < 2; i++)
6145 aarch64_set_vec_double (cpu, vd, i,
6146 sqrt (aarch64_get_vec_double (cpu, vs, i)));
6147 }
6148
6149 static void
6150 do_vec_mls_indexed (sim_cpu *cpu)
6151 {
6152 /* instr[31] = 0
6153 instr[30] = half(0)/full(1)
6154 instr[29,24] = 10 1111
6155 instr[23,22] = 16-bit(01)/32-bit(10)
6156 instr[21,20+11] = index (if 16-bit)
6157 instr[21+11] = index (if 32-bit)
6158 instr[20,16] = Vm
6159 instr[15,12] = 0100
6160 instr[11] = part of index
6161 instr[10] = 0
6162 instr[9,5] = Vs
6163 instr[4,0] = Vd. */
6164
6165 int full = INSTR (30, 30);
6166 unsigned vs = INSTR (9, 5);
6167 unsigned vd = INSTR (4, 0);
6168 unsigned vm = INSTR (20, 16);
6169 unsigned i;
6170
6171 NYI_assert (15, 12, 4);
6172 NYI_assert (10, 10, 0);
6173
6174 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6175 switch (INSTR (23, 22))
6176 {
6177 case 1:
6178 {
6179 unsigned elem;
6180 uint32_t val;
6181
6182 if (vm > 15)
6183 HALT_NYI;
6184
6185 elem = (INSTR (21, 20) << 1) | INSTR (11, 11);
6186 val = aarch64_get_vec_u16 (cpu, vm, elem);
6187
6188 for (i = 0; i < (full ? 8 : 4); i++)
6189 aarch64_set_vec_u32 (cpu, vd, i,
6190 aarch64_get_vec_u32 (cpu, vd, i) -
6191 (aarch64_get_vec_u32 (cpu, vs, i) * val));
6192 return;
6193 }
6194
6195 case 2:
6196 {
6197 unsigned elem = (INSTR (21, 21) << 1) | INSTR (11, 11);
6198 uint64_t val = aarch64_get_vec_u32 (cpu, vm, elem);
6199
6200 for (i = 0; i < (full ? 4 : 2); i++)
6201 aarch64_set_vec_u64 (cpu, vd, i,
6202 aarch64_get_vec_u64 (cpu, vd, i) -
6203 (aarch64_get_vec_u64 (cpu, vs, i) * val));
6204 return;
6205 }
6206
6207 case 0:
6208 case 3:
6209 default:
6210 HALT_NYI;
6211 }
6212 }
6213
6214 static void
6215 do_vec_SUB (sim_cpu *cpu)
6216 {
6217 /* instr [31] = 0
6218 instr [30] = half(0)/full(1)
6219 instr [29,24] = 10 1110
6220 instr [23,22] = size: byte(00, half(01), word (10), long (11)
6221 instr [21] = 1
6222 instr [20,16] = Vm
6223 instr [15,10] = 10 0001
6224 instr [9, 5] = Vn
6225 instr [4, 0] = Vd. */
6226
6227 unsigned full = INSTR (30, 30);
6228 unsigned vm = INSTR (20, 16);
6229 unsigned vn = INSTR (9, 5);
6230 unsigned vd = INSTR (4, 0);
6231 unsigned i;
6232
6233 NYI_assert (29, 24, 0x2E);
6234 NYI_assert (21, 21, 1);
6235 NYI_assert (15, 10, 0x21);
6236
6237 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6238 switch (INSTR (23, 22))
6239 {
6240 case 0:
6241 for (i = 0; i < (full ? 16 : 8); i++)
6242 aarch64_set_vec_s8 (cpu, vd, i,
6243 aarch64_get_vec_s8 (cpu, vn, i)
6244 - aarch64_get_vec_s8 (cpu, vm, i));
6245 return;
6246
6247 case 1:
6248 for (i = 0; i < (full ? 8 : 4); i++)
6249 aarch64_set_vec_s16 (cpu, vd, i,
6250 aarch64_get_vec_s16 (cpu, vn, i)
6251 - aarch64_get_vec_s16 (cpu, vm, i));
6252 return;
6253
6254 case 2:
6255 for (i = 0; i < (full ? 4 : 2); i++)
6256 aarch64_set_vec_s32 (cpu, vd, i,
6257 aarch64_get_vec_s32 (cpu, vn, i)
6258 - aarch64_get_vec_s32 (cpu, vm, i));
6259 return;
6260
6261 case 3:
6262 if (full == 0)
6263 HALT_UNALLOC;
6264
6265 for (i = 0; i < 2; i++)
6266 aarch64_set_vec_s64 (cpu, vd, i,
6267 aarch64_get_vec_s64 (cpu, vn, i)
6268 - aarch64_get_vec_s64 (cpu, vm, i));
6269 return;
6270 }
6271 }
6272
6273 static void
6274 do_vec_MLS (sim_cpu *cpu)
6275 {
6276 /* instr [31] = 0
6277 instr [30] = half(0)/full(1)
6278 instr [29,24] = 10 1110
6279 instr [23,22] = size: byte(00, half(01), word (10)
6280 instr [21] = 1
6281 instr [20,16] = Vm
6282 instr [15,10] = 10 0101
6283 instr [9, 5] = Vn
6284 instr [4, 0] = Vd. */
6285
6286 unsigned full = INSTR (30, 30);
6287 unsigned vm = INSTR (20, 16);
6288 unsigned vn = INSTR (9, 5);
6289 unsigned vd = INSTR (4, 0);
6290 unsigned i;
6291
6292 NYI_assert (29, 24, 0x2E);
6293 NYI_assert (21, 21, 1);
6294 NYI_assert (15, 10, 0x25);
6295
6296 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6297 switch (INSTR (23, 22))
6298 {
6299 case 0:
6300 for (i = 0; i < (full ? 16 : 8); i++)
6301 aarch64_set_vec_u8 (cpu, vd, i,
6302 (aarch64_get_vec_u8 (cpu, vn, i)
6303 * aarch64_get_vec_u8 (cpu, vm, i))
6304 - aarch64_get_vec_u8 (cpu, vd, i));
6305 return;
6306
6307 case 1:
6308 for (i = 0; i < (full ? 8 : 4); i++)
6309 aarch64_set_vec_u16 (cpu, vd, i,
6310 (aarch64_get_vec_u16 (cpu, vn, i)
6311 * aarch64_get_vec_u16 (cpu, vm, i))
6312 - aarch64_get_vec_u16 (cpu, vd, i));
6313 return;
6314
6315 case 2:
6316 for (i = 0; i < (full ? 4 : 2); i++)
6317 aarch64_set_vec_u32 (cpu, vd, i,
6318 (aarch64_get_vec_u32 (cpu, vn, i)
6319 * aarch64_get_vec_u32 (cpu, vm, i))
6320 - aarch64_get_vec_u32 (cpu, vd, i));
6321 return;
6322
6323 default:
6324 HALT_UNALLOC;
6325 }
6326 }
6327
6328 static void
6329 do_vec_FDIV (sim_cpu *cpu)
6330 {
6331 /* instr [31] = 0
6332 instr [30] = half(0)/full(1)
6333 instr [29,23] = 10 1110 0
6334 instr [22] = float()/double(1)
6335 instr [21] = 1
6336 instr [20,16] = Vm
6337 instr [15,10] = 1111 11
6338 instr [9, 5] = Vn
6339 instr [4, 0] = Vd. */
6340
6341 unsigned full = INSTR (30, 30);
6342 unsigned vm = INSTR (20, 16);
6343 unsigned vn = INSTR (9, 5);
6344 unsigned vd = INSTR (4, 0);
6345 unsigned i;
6346
6347 NYI_assert (29, 23, 0x5C);
6348 NYI_assert (21, 21, 1);
6349 NYI_assert (15, 10, 0x3F);
6350
6351 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6352 if (INSTR (22, 22))
6353 {
6354 if (! full)
6355 HALT_UNALLOC;
6356
6357 for (i = 0; i < 2; i++)
6358 aarch64_set_vec_double (cpu, vd, i,
6359 aarch64_get_vec_double (cpu, vn, i)
6360 / aarch64_get_vec_double (cpu, vm, i));
6361 }
6362 else
6363 for (i = 0; i < (full ? 4 : 2); i++)
6364 aarch64_set_vec_float (cpu, vd, i,
6365 aarch64_get_vec_float (cpu, vn, i)
6366 / aarch64_get_vec_float (cpu, vm, i));
6367 }
6368
6369 static void
6370 do_vec_FMUL (sim_cpu *cpu)
6371 {
6372 /* instr [31] = 0
6373 instr [30] = half(0)/full(1)
6374 instr [29,23] = 10 1110 0
6375 instr [22] = float(0)/double(1)
6376 instr [21] = 1
6377 instr [20,16] = Vm
6378 instr [15,10] = 1101 11
6379 instr [9, 5] = Vn
6380 instr [4, 0] = Vd. */
6381
6382 unsigned full = INSTR (30, 30);
6383 unsigned vm = INSTR (20, 16);
6384 unsigned vn = INSTR (9, 5);
6385 unsigned vd = INSTR (4, 0);
6386 unsigned i;
6387
6388 NYI_assert (29, 23, 0x5C);
6389 NYI_assert (21, 21, 1);
6390 NYI_assert (15, 10, 0x37);
6391
6392 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6393 if (INSTR (22, 22))
6394 {
6395 if (! full)
6396 HALT_UNALLOC;
6397
6398 for (i = 0; i < 2; i++)
6399 aarch64_set_vec_double (cpu, vd, i,
6400 aarch64_get_vec_double (cpu, vn, i)
6401 * aarch64_get_vec_double (cpu, vm, i));
6402 }
6403 else
6404 for (i = 0; i < (full ? 4 : 2); i++)
6405 aarch64_set_vec_float (cpu, vd, i,
6406 aarch64_get_vec_float (cpu, vn, i)
6407 * aarch64_get_vec_float (cpu, vm, i));
6408 }
6409
6410 static void
6411 do_vec_FADDP (sim_cpu *cpu)
6412 {
6413 /* instr [31] = 0
6414 instr [30] = half(0)/full(1)
6415 instr [29,23] = 10 1110 0
6416 instr [22] = float(0)/double(1)
6417 instr [21] = 1
6418 instr [20,16] = Vm
6419 instr [15,10] = 1101 01
6420 instr [9, 5] = Vn
6421 instr [4, 0] = Vd. */
6422
6423 unsigned full = INSTR (30, 30);
6424 unsigned vm = INSTR (20, 16);
6425 unsigned vn = INSTR (9, 5);
6426 unsigned vd = INSTR (4, 0);
6427
6428 NYI_assert (29, 23, 0x5C);
6429 NYI_assert (21, 21, 1);
6430 NYI_assert (15, 10, 0x35);
6431
6432 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6433 if (INSTR (22, 22))
6434 {
6435 /* Extract values before adding them incase vd == vn/vm. */
6436 double tmp1 = aarch64_get_vec_double (cpu, vn, 0);
6437 double tmp2 = aarch64_get_vec_double (cpu, vn, 1);
6438 double tmp3 = aarch64_get_vec_double (cpu, vm, 0);
6439 double tmp4 = aarch64_get_vec_double (cpu, vm, 1);
6440
6441 if (! full)
6442 HALT_UNALLOC;
6443
6444 aarch64_set_vec_double (cpu, vd, 0, tmp1 + tmp2);
6445 aarch64_set_vec_double (cpu, vd, 1, tmp3 + tmp4);
6446 }
6447 else
6448 {
6449 /* Extract values before adding them incase vd == vn/vm. */
6450 float tmp1 = aarch64_get_vec_float (cpu, vn, 0);
6451 float tmp2 = aarch64_get_vec_float (cpu, vn, 1);
6452 float tmp5 = aarch64_get_vec_float (cpu, vm, 0);
6453 float tmp6 = aarch64_get_vec_float (cpu, vm, 1);
6454
6455 if (full)
6456 {
6457 float tmp3 = aarch64_get_vec_float (cpu, vn, 2);
6458 float tmp4 = aarch64_get_vec_float (cpu, vn, 3);
6459 float tmp7 = aarch64_get_vec_float (cpu, vm, 2);
6460 float tmp8 = aarch64_get_vec_float (cpu, vm, 3);
6461
6462 aarch64_set_vec_float (cpu, vd, 0, tmp1 + tmp2);
6463 aarch64_set_vec_float (cpu, vd, 1, tmp3 + tmp4);
6464 aarch64_set_vec_float (cpu, vd, 2, tmp5 + tmp6);
6465 aarch64_set_vec_float (cpu, vd, 3, tmp7 + tmp8);
6466 }
6467 else
6468 {
6469 aarch64_set_vec_float (cpu, vd, 0, tmp1 + tmp2);
6470 aarch64_set_vec_float (cpu, vd, 1, tmp5 + tmp6);
6471 }
6472 }
6473 }
6474
6475 static void
6476 do_vec_FSQRT (sim_cpu *cpu)
6477 {
6478 /* instr[31] = 0
6479 instr[30] = half(0)/full(1)
6480 instr[29,23] = 10 1110 1
6481 instr[22] = single(0)/double(1)
6482 instr[21,10] = 10 0001 1111 10
6483 instr[9,5] = Vsrc
6484 instr[4,0] = Vdest. */
6485
6486 unsigned vn = INSTR (9, 5);
6487 unsigned vd = INSTR (4, 0);
6488 unsigned full = INSTR (30, 30);
6489 int i;
6490
6491 NYI_assert (29, 23, 0x5D);
6492 NYI_assert (21, 10, 0x87E);
6493
6494 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6495 if (INSTR (22, 22))
6496 {
6497 if (! full)
6498 HALT_UNALLOC;
6499
6500 for (i = 0; i < 2; i++)
6501 aarch64_set_vec_double (cpu, vd, i,
6502 sqrt (aarch64_get_vec_double (cpu, vn, i)));
6503 }
6504 else
6505 {
6506 for (i = 0; i < (full ? 4 : 2); i++)
6507 aarch64_set_vec_float (cpu, vd, i,
6508 sqrtf (aarch64_get_vec_float (cpu, vn, i)));
6509 }
6510 }
6511
6512 static void
6513 do_vec_FNEG (sim_cpu *cpu)
6514 {
6515 /* instr[31] = 0
6516 instr[30] = half (0)/full (1)
6517 instr[29,23] = 10 1110 1
6518 instr[22] = single (0)/double (1)
6519 instr[21,10] = 10 0000 1111 10
6520 instr[9,5] = Vsrc
6521 instr[4,0] = Vdest. */
6522
6523 unsigned vn = INSTR (9, 5);
6524 unsigned vd = INSTR (4, 0);
6525 unsigned full = INSTR (30, 30);
6526 int i;
6527
6528 NYI_assert (29, 23, 0x5D);
6529 NYI_assert (21, 10, 0x83E);
6530
6531 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6532 if (INSTR (22, 22))
6533 {
6534 if (! full)
6535 HALT_UNALLOC;
6536
6537 for (i = 0; i < 2; i++)
6538 aarch64_set_vec_double (cpu, vd, i,
6539 - aarch64_get_vec_double (cpu, vn, i));
6540 }
6541 else
6542 {
6543 for (i = 0; i < (full ? 4 : 2); i++)
6544 aarch64_set_vec_float (cpu, vd, i,
6545 - aarch64_get_vec_float (cpu, vn, i));
6546 }
6547 }
6548
6549 static void
6550 do_vec_NOT (sim_cpu *cpu)
6551 {
6552 /* instr[31] = 0
6553 instr[30] = half (0)/full (1)
6554 instr[29,10] = 10 1110 0010 0000 0101 10
6555 instr[9,5] = Vn
6556 instr[4.0] = Vd. */
6557
6558 unsigned vn = INSTR (9, 5);
6559 unsigned vd = INSTR (4, 0);
6560 unsigned i;
6561 int full = INSTR (30, 30);
6562
6563 NYI_assert (29, 10, 0xB8816);
6564
6565 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6566 for (i = 0; i < (full ? 16 : 8); i++)
6567 aarch64_set_vec_u8 (cpu, vd, i, ~ aarch64_get_vec_u8 (cpu, vn, i));
6568 }
6569
6570 static unsigned int
6571 clz (uint64_t val, unsigned size)
6572 {
6573 uint64_t mask = 1;
6574 int count;
6575
6576 mask <<= (size - 1);
6577 count = 0;
6578 do
6579 {
6580 if (val & mask)
6581 break;
6582 mask >>= 1;
6583 count ++;
6584 }
6585 while (mask);
6586
6587 return count;
6588 }
6589
6590 static void
6591 do_vec_CLZ (sim_cpu *cpu)
6592 {
6593 /* instr[31] = 0
6594 instr[30] = half (0)/full (1)
6595 instr[29,24] = 10 1110
6596 instr[23,22] = size
6597 instr[21,10] = 10 0000 0100 10
6598 instr[9,5] = Vn
6599 instr[4.0] = Vd. */
6600
6601 unsigned vn = INSTR (9, 5);
6602 unsigned vd = INSTR (4, 0);
6603 unsigned i;
6604 int full = INSTR (30,30);
6605
6606 NYI_assert (29, 24, 0x2E);
6607 NYI_assert (21, 10, 0x812);
6608
6609 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6610 switch (INSTR (23, 22))
6611 {
6612 case 0:
6613 for (i = 0; i < (full ? 16 : 8); i++)
6614 aarch64_set_vec_u8 (cpu, vd, i, clz (aarch64_get_vec_u8 (cpu, vn, i), 8));
6615 break;
6616 case 1:
6617 for (i = 0; i < (full ? 8 : 4); i++)
6618 aarch64_set_vec_u16 (cpu, vd, i, clz (aarch64_get_vec_u16 (cpu, vn, i), 16));
6619 break;
6620 case 2:
6621 for (i = 0; i < (full ? 4 : 2); i++)
6622 aarch64_set_vec_u32 (cpu, vd, i, clz (aarch64_get_vec_u32 (cpu, vn, i), 32));
6623 break;
6624 case 3:
6625 if (! full)
6626 HALT_UNALLOC;
6627 aarch64_set_vec_u64 (cpu, vd, 0, clz (aarch64_get_vec_u64 (cpu, vn, 0), 64));
6628 aarch64_set_vec_u64 (cpu, vd, 1, clz (aarch64_get_vec_u64 (cpu, vn, 1), 64));
6629 break;
6630 }
6631 }
6632
6633 static void
6634 do_vec_MOV_element (sim_cpu *cpu)
6635 {
6636 /* instr[31,21] = 0110 1110 000
6637 instr[20,16] = size & dest index
6638 instr[15] = 0
6639 instr[14,11] = source index
6640 instr[10] = 1
6641 instr[9,5] = Vs
6642 instr[4.0] = Vd. */
6643
6644 unsigned vs = INSTR (9, 5);
6645 unsigned vd = INSTR (4, 0);
6646 unsigned src_index;
6647 unsigned dst_index;
6648
6649 NYI_assert (31, 21, 0x370);
6650 NYI_assert (15, 15, 0);
6651 NYI_assert (10, 10, 1);
6652
6653 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6654 if (INSTR (16, 16))
6655 {
6656 /* Move a byte. */
6657 src_index = INSTR (14, 11);
6658 dst_index = INSTR (20, 17);
6659 aarch64_set_vec_u8 (cpu, vd, dst_index,
6660 aarch64_get_vec_u8 (cpu, vs, src_index));
6661 }
6662 else if (INSTR (17, 17))
6663 {
6664 /* Move 16-bits. */
6665 NYI_assert (11, 11, 0);
6666 src_index = INSTR (14, 12);
6667 dst_index = INSTR (20, 18);
6668 aarch64_set_vec_u16 (cpu, vd, dst_index,
6669 aarch64_get_vec_u16 (cpu, vs, src_index));
6670 }
6671 else if (INSTR (18, 18))
6672 {
6673 /* Move 32-bits. */
6674 NYI_assert (12, 11, 0);
6675 src_index = INSTR (14, 13);
6676 dst_index = INSTR (20, 19);
6677 aarch64_set_vec_u32 (cpu, vd, dst_index,
6678 aarch64_get_vec_u32 (cpu, vs, src_index));
6679 }
6680 else
6681 {
6682 NYI_assert (19, 19, 1);
6683 NYI_assert (13, 11, 0);
6684 src_index = INSTR (14, 14);
6685 dst_index = INSTR (20, 20);
6686 aarch64_set_vec_u64 (cpu, vd, dst_index,
6687 aarch64_get_vec_u64 (cpu, vs, src_index));
6688 }
6689 }
6690
6691 static void
6692 do_vec_REV32 (sim_cpu *cpu)
6693 {
6694 /* instr[31] = 0
6695 instr[30] = full/half
6696 instr[29,24] = 10 1110
6697 instr[23,22] = size
6698 instr[21,10] = 10 0000 0000 10
6699 instr[9,5] = Rn
6700 instr[4,0] = Rd. */
6701
6702 unsigned rn = INSTR (9, 5);
6703 unsigned rd = INSTR (4, 0);
6704 unsigned size = INSTR (23, 22);
6705 unsigned full = INSTR (30, 30);
6706 unsigned i;
6707 FRegister val;
6708
6709 NYI_assert (29, 24, 0x2E);
6710 NYI_assert (21, 10, 0x802);
6711
6712 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6713 switch (size)
6714 {
6715 case 0:
6716 for (i = 0; i < (full ? 16 : 8); i++)
6717 val.b[i ^ 0x3] = aarch64_get_vec_u8 (cpu, rn, i);
6718 break;
6719
6720 case 1:
6721 for (i = 0; i < (full ? 8 : 4); i++)
6722 val.h[i ^ 0x1] = aarch64_get_vec_u16 (cpu, rn, i);
6723 break;
6724
6725 default:
6726 HALT_UNALLOC;
6727 }
6728
6729 aarch64_set_vec_u64 (cpu, rd, 0, val.v[0]);
6730 if (full)
6731 aarch64_set_vec_u64 (cpu, rd, 1, val.v[1]);
6732 }
6733
6734 static void
6735 do_vec_EXT (sim_cpu *cpu)
6736 {
6737 /* instr[31] = 0
6738 instr[30] = full/half
6739 instr[29,21] = 10 1110 000
6740 instr[20,16] = Vm
6741 instr[15] = 0
6742 instr[14,11] = source index
6743 instr[10] = 0
6744 instr[9,5] = Vn
6745 instr[4.0] = Vd. */
6746
6747 unsigned vm = INSTR (20, 16);
6748 unsigned vn = INSTR (9, 5);
6749 unsigned vd = INSTR (4, 0);
6750 unsigned src_index = INSTR (14, 11);
6751 unsigned full = INSTR (30, 30);
6752 unsigned i;
6753 unsigned j;
6754 FRegister val;
6755
6756 NYI_assert (31, 21, 0x370);
6757 NYI_assert (15, 15, 0);
6758 NYI_assert (10, 10, 0);
6759
6760 if (!full && (src_index & 0x8))
6761 HALT_UNALLOC;
6762
6763 j = 0;
6764
6765 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6766 for (i = src_index; i < (full ? 16 : 8); i++)
6767 val.b[j ++] = aarch64_get_vec_u8 (cpu, vn, i);
6768 for (i = 0; i < src_index; i++)
6769 val.b[j ++] = aarch64_get_vec_u8 (cpu, vm, i);
6770
6771 aarch64_set_vec_u64 (cpu, vd, 0, val.v[0]);
6772 if (full)
6773 aarch64_set_vec_u64 (cpu, vd, 1, val.v[1]);
6774 }
6775
6776 static void
6777 dexAdvSIMD0 (sim_cpu *cpu)
6778 {
6779 /* instr [28,25] = 0 111. */
6780 if ( INSTR (15, 10) == 0x07
6781 && (INSTR (9, 5) ==
6782 INSTR (20, 16)))
6783 {
6784 if (INSTR (31, 21) == 0x075
6785 || INSTR (31, 21) == 0x275)
6786 {
6787 do_vec_MOV_whole_vector (cpu);
6788 return;
6789 }
6790 }
6791
6792 if (INSTR (29, 19) == 0x1E0)
6793 {
6794 do_vec_MOV_immediate (cpu);
6795 return;
6796 }
6797
6798 if (INSTR (29, 19) == 0x5E0)
6799 {
6800 do_vec_MVNI (cpu);
6801 return;
6802 }
6803
6804 if (INSTR (29, 19) == 0x1C0
6805 || INSTR (29, 19) == 0x1C1)
6806 {
6807 if (INSTR (15, 10) == 0x03)
6808 {
6809 do_vec_DUP_scalar_into_vector (cpu);
6810 return;
6811 }
6812 }
6813
6814 switch (INSTR (29, 24))
6815 {
6816 case 0x0E: do_vec_op1 (cpu); return;
6817 case 0x0F: do_vec_op2 (cpu); return;
6818
6819 case 0x2E:
6820 if (INSTR (21, 21) == 1)
6821 {
6822 switch (INSTR (15, 10))
6823 {
6824 case 0x02:
6825 do_vec_REV32 (cpu);
6826 return;
6827
6828 case 0x07:
6829 switch (INSTR (23, 22))
6830 {
6831 case 0: do_vec_EOR (cpu); return;
6832 case 1: do_vec_BSL (cpu); return;
6833 case 2:
6834 case 3: do_vec_bit (cpu); return;
6835 }
6836 break;
6837
6838 case 0x08: do_vec_sub_long (cpu); return;
6839 case 0x11: do_vec_USHL (cpu); return;
6840 case 0x12: do_vec_CLZ (cpu); return;
6841 case 0x16: do_vec_NOT (cpu); return;
6842 case 0x19: do_vec_max (cpu); return;
6843 case 0x1B: do_vec_min (cpu); return;
6844 case 0x21: do_vec_SUB (cpu); return;
6845 case 0x25: do_vec_MLS (cpu); return;
6846 case 0x31: do_vec_FminmaxNMP (cpu); return;
6847 case 0x35: do_vec_FADDP (cpu); return;
6848 case 0x37: do_vec_FMUL (cpu); return;
6849 case 0x3F: do_vec_FDIV (cpu); return;
6850
6851 case 0x3E:
6852 switch (INSTR (20, 16))
6853 {
6854 case 0x00: do_vec_FNEG (cpu); return;
6855 case 0x01: do_vec_FSQRT (cpu); return;
6856 default: HALT_NYI;
6857 }
6858
6859 case 0x0D:
6860 case 0x0F:
6861 case 0x22:
6862 case 0x23:
6863 case 0x26:
6864 case 0x2A:
6865 case 0x32:
6866 case 0x36:
6867 case 0x39:
6868 case 0x3A:
6869 do_vec_compare (cpu); return;
6870
6871 default:
6872 break;
6873 }
6874 }
6875
6876 if (INSTR (31, 21) == 0x370)
6877 {
6878 if (INSTR (10, 10))
6879 do_vec_MOV_element (cpu);
6880 else
6881 do_vec_EXT (cpu);
6882 return;
6883 }
6884
6885 switch (INSTR (21, 10))
6886 {
6887 case 0x82E: do_vec_neg (cpu); return;
6888 case 0x87E: do_vec_sqrt (cpu); return;
6889 default:
6890 if (INSTR (15, 10) == 0x30)
6891 {
6892 do_vec_mull (cpu);
6893 return;
6894 }
6895 break;
6896 }
6897 break;
6898
6899 case 0x2f:
6900 switch (INSTR (15, 10))
6901 {
6902 case 0x01: do_vec_SSHR_USHR (cpu); return;
6903 case 0x10:
6904 case 0x12: do_vec_mls_indexed (cpu); return;
6905 case 0x29: do_vec_xtl (cpu); return;
6906 default:
6907 HALT_NYI;
6908 }
6909
6910 default:
6911 break;
6912 }
6913
6914 HALT_NYI;
6915 }
6916
6917 /* 3 sources. */
6918
6919 /* Float multiply add. */
6920 static void
6921 fmadds (sim_cpu *cpu)
6922 {
6923 unsigned sa = INSTR (14, 10);
6924 unsigned sm = INSTR (20, 16);
6925 unsigned sn = INSTR ( 9, 5);
6926 unsigned sd = INSTR ( 4, 0);
6927
6928 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6929 aarch64_set_FP_float (cpu, sd, aarch64_get_FP_float (cpu, sa)
6930 + aarch64_get_FP_float (cpu, sn)
6931 * aarch64_get_FP_float (cpu, sm));
6932 }
6933
6934 /* Double multiply add. */
6935 static void
6936 fmaddd (sim_cpu *cpu)
6937 {
6938 unsigned sa = INSTR (14, 10);
6939 unsigned sm = INSTR (20, 16);
6940 unsigned sn = INSTR ( 9, 5);
6941 unsigned sd = INSTR ( 4, 0);
6942
6943 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6944 aarch64_set_FP_double (cpu, sd, aarch64_get_FP_double (cpu, sa)
6945 + aarch64_get_FP_double (cpu, sn)
6946 * aarch64_get_FP_double (cpu, sm));
6947 }
6948
6949 /* Float multiply subtract. */
6950 static void
6951 fmsubs (sim_cpu *cpu)
6952 {
6953 unsigned sa = INSTR (14, 10);
6954 unsigned sm = INSTR (20, 16);
6955 unsigned sn = INSTR ( 9, 5);
6956 unsigned sd = INSTR ( 4, 0);
6957
6958 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6959 aarch64_set_FP_float (cpu, sd, aarch64_get_FP_float (cpu, sa)
6960 - aarch64_get_FP_float (cpu, sn)
6961 * aarch64_get_FP_float (cpu, sm));
6962 }
6963
6964 /* Double multiply subtract. */
6965 static void
6966 fmsubd (sim_cpu *cpu)
6967 {
6968 unsigned sa = INSTR (14, 10);
6969 unsigned sm = INSTR (20, 16);
6970 unsigned sn = INSTR ( 9, 5);
6971 unsigned sd = INSTR ( 4, 0);
6972
6973 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6974 aarch64_set_FP_double (cpu, sd, aarch64_get_FP_double (cpu, sa)
6975 - aarch64_get_FP_double (cpu, sn)
6976 * aarch64_get_FP_double (cpu, sm));
6977 }
6978
6979 /* Float negative multiply add. */
6980 static void
6981 fnmadds (sim_cpu *cpu)
6982 {
6983 unsigned sa = INSTR (14, 10);
6984 unsigned sm = INSTR (20, 16);
6985 unsigned sn = INSTR ( 9, 5);
6986 unsigned sd = INSTR ( 4, 0);
6987
6988 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6989 aarch64_set_FP_float (cpu, sd, - aarch64_get_FP_float (cpu, sa)
6990 + (- aarch64_get_FP_float (cpu, sn))
6991 * aarch64_get_FP_float (cpu, sm));
6992 }
6993
6994 /* Double negative multiply add. */
6995 static void
6996 fnmaddd (sim_cpu *cpu)
6997 {
6998 unsigned sa = INSTR (14, 10);
6999 unsigned sm = INSTR (20, 16);
7000 unsigned sn = INSTR ( 9, 5);
7001 unsigned sd = INSTR ( 4, 0);
7002
7003 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7004 aarch64_set_FP_double (cpu, sd, - aarch64_get_FP_double (cpu, sa)
7005 + (- aarch64_get_FP_double (cpu, sn))
7006 * aarch64_get_FP_double (cpu, sm));
7007 }
7008
7009 /* Float negative multiply subtract. */
7010 static void
7011 fnmsubs (sim_cpu *cpu)
7012 {
7013 unsigned sa = INSTR (14, 10);
7014 unsigned sm = INSTR (20, 16);
7015 unsigned sn = INSTR ( 9, 5);
7016 unsigned sd = INSTR ( 4, 0);
7017
7018 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7019 aarch64_set_FP_float (cpu, sd, - aarch64_get_FP_float (cpu, sa)
7020 + aarch64_get_FP_float (cpu, sn)
7021 * aarch64_get_FP_float (cpu, sm));
7022 }
7023
7024 /* Double negative multiply subtract. */
7025 static void
7026 fnmsubd (sim_cpu *cpu)
7027 {
7028 unsigned sa = INSTR (14, 10);
7029 unsigned sm = INSTR (20, 16);
7030 unsigned sn = INSTR ( 9, 5);
7031 unsigned sd = INSTR ( 4, 0);
7032
7033 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7034 aarch64_set_FP_double (cpu, sd, - aarch64_get_FP_double (cpu, sa)
7035 + aarch64_get_FP_double (cpu, sn)
7036 * aarch64_get_FP_double (cpu, sm));
7037 }
7038
7039 static void
7040 dexSimpleFPDataProc3Source (sim_cpu *cpu)
7041 {
7042 /* instr[31] ==> M : 0 ==> OK, 1 ==> UNALLOC
7043 instr[30] = 0
7044 instr[29] ==> S : 0 ==> OK, 1 ==> UNALLOC
7045 instr[28,25] = 1111
7046 instr[24] = 1
7047 instr[23,22] ==> type : 0 ==> single, 01 ==> double, 1x ==> UNALLOC
7048 instr[21] ==> o1 : 0 ==> unnegated, 1 ==> negated
7049 instr[15] ==> o2 : 0 ==> ADD, 1 ==> SUB */
7050
7051 uint32_t M_S = (INSTR (31, 31) << 1) | INSTR (29, 29);
7052 /* dispatch on combined type:o1:o2. */
7053 uint32_t dispatch = (INSTR (23, 21) << 1) | INSTR (15, 15);
7054
7055 if (M_S != 0)
7056 HALT_UNALLOC;
7057
7058 switch (dispatch)
7059 {
7060 case 0: fmadds (cpu); return;
7061 case 1: fmsubs (cpu); return;
7062 case 2: fnmadds (cpu); return;
7063 case 3: fnmsubs (cpu); return;
7064 case 4: fmaddd (cpu); return;
7065 case 5: fmsubd (cpu); return;
7066 case 6: fnmaddd (cpu); return;
7067 case 7: fnmsubd (cpu); return;
7068 default:
7069 /* type > 1 is currently unallocated. */
7070 HALT_UNALLOC;
7071 }
7072 }
7073
7074 static void
7075 dexSimpleFPFixedConvert (sim_cpu *cpu)
7076 {
7077 HALT_NYI;
7078 }
7079
7080 static void
7081 dexSimpleFPCondCompare (sim_cpu *cpu)
7082 {
7083 /* instr [31,23] = 0001 1110 0
7084 instr [22] = type
7085 instr [21] = 1
7086 instr [20,16] = Rm
7087 instr [15,12] = condition
7088 instr [11,10] = 01
7089 instr [9,5] = Rn
7090 instr [4] = 0
7091 instr [3,0] = nzcv */
7092
7093 unsigned rm = INSTR (20, 16);
7094 unsigned rn = INSTR (9, 5);
7095
7096 NYI_assert (31, 23, 0x3C);
7097 NYI_assert (11, 10, 0x1);
7098 NYI_assert (4, 4, 0);
7099
7100 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7101 if (! testConditionCode (cpu, INSTR (15, 12)))
7102 {
7103 aarch64_set_CPSR (cpu, INSTR (3, 0));
7104 return;
7105 }
7106
7107 if (INSTR (22, 22))
7108 {
7109 /* Double precision. */
7110 double val1 = aarch64_get_vec_double (cpu, rn, 0);
7111 double val2 = aarch64_get_vec_double (cpu, rm, 0);
7112
7113 /* FIXME: Check for NaNs. */
7114 if (val1 == val2)
7115 aarch64_set_CPSR (cpu, (Z | C));
7116 else if (val1 < val2)
7117 aarch64_set_CPSR (cpu, N);
7118 else /* val1 > val2 */
7119 aarch64_set_CPSR (cpu, C);
7120 }
7121 else
7122 {
7123 /* Single precision. */
7124 float val1 = aarch64_get_vec_float (cpu, rn, 0);
7125 float val2 = aarch64_get_vec_float (cpu, rm, 0);
7126
7127 /* FIXME: Check for NaNs. */
7128 if (val1 == val2)
7129 aarch64_set_CPSR (cpu, (Z | C));
7130 else if (val1 < val2)
7131 aarch64_set_CPSR (cpu, N);
7132 else /* val1 > val2 */
7133 aarch64_set_CPSR (cpu, C);
7134 }
7135 }
7136
7137 /* 2 sources. */
7138
7139 /* Float add. */
7140 static void
7141 fadds (sim_cpu *cpu)
7142 {
7143 unsigned sm = INSTR (20, 16);
7144 unsigned sn = INSTR ( 9, 5);
7145 unsigned sd = INSTR ( 4, 0);
7146
7147 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7148 aarch64_set_FP_float (cpu, sd, aarch64_get_FP_float (cpu, sn)
7149 + aarch64_get_FP_float (cpu, sm));
7150 }
7151
7152 /* Double add. */
7153 static void
7154 faddd (sim_cpu *cpu)
7155 {
7156 unsigned sm = INSTR (20, 16);
7157 unsigned sn = INSTR ( 9, 5);
7158 unsigned sd = INSTR ( 4, 0);
7159
7160 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7161 aarch64_set_FP_double (cpu, sd, aarch64_get_FP_double (cpu, sn)
7162 + aarch64_get_FP_double (cpu, sm));
7163 }
7164
7165 /* Float divide. */
7166 static void
7167 fdivs (sim_cpu *cpu)
7168 {
7169 unsigned sm = INSTR (20, 16);
7170 unsigned sn = INSTR ( 9, 5);
7171 unsigned sd = INSTR ( 4, 0);
7172
7173 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7174 aarch64_set_FP_float (cpu, sd, aarch64_get_FP_float (cpu, sn)
7175 / aarch64_get_FP_float (cpu, sm));
7176 }
7177
7178 /* Double divide. */
7179 static void
7180 fdivd (sim_cpu *cpu)
7181 {
7182 unsigned sm = INSTR (20, 16);
7183 unsigned sn = INSTR ( 9, 5);
7184 unsigned sd = INSTR ( 4, 0);
7185
7186 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7187 aarch64_set_FP_double (cpu, sd, aarch64_get_FP_double (cpu, sn)
7188 / aarch64_get_FP_double (cpu, sm));
7189 }
7190
7191 /* Float multiply. */
7192 static void
7193 fmuls (sim_cpu *cpu)
7194 {
7195 unsigned sm = INSTR (20, 16);
7196 unsigned sn = INSTR ( 9, 5);
7197 unsigned sd = INSTR ( 4, 0);
7198
7199 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7200 aarch64_set_FP_float (cpu, sd, aarch64_get_FP_float (cpu, sn)
7201 * aarch64_get_FP_float (cpu, sm));
7202 }
7203
7204 /* Double multiply. */
7205 static void
7206 fmuld (sim_cpu *cpu)
7207 {
7208 unsigned sm = INSTR (20, 16);
7209 unsigned sn = INSTR ( 9, 5);
7210 unsigned sd = INSTR ( 4, 0);
7211
7212 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7213 aarch64_set_FP_double (cpu, sd, aarch64_get_FP_double (cpu, sn)
7214 * aarch64_get_FP_double (cpu, sm));
7215 }
7216
7217 /* Float negate and multiply. */
7218 static void
7219 fnmuls (sim_cpu *cpu)
7220 {
7221 unsigned sm = INSTR (20, 16);
7222 unsigned sn = INSTR ( 9, 5);
7223 unsigned sd = INSTR ( 4, 0);
7224
7225 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7226 aarch64_set_FP_float (cpu, sd, - (aarch64_get_FP_float (cpu, sn)
7227 * aarch64_get_FP_float (cpu, sm)));
7228 }
7229
7230 /* Double negate and multiply. */
7231 static void
7232 fnmuld (sim_cpu *cpu)
7233 {
7234 unsigned sm = INSTR (20, 16);
7235 unsigned sn = INSTR ( 9, 5);
7236 unsigned sd = INSTR ( 4, 0);
7237
7238 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7239 aarch64_set_FP_double (cpu, sd, - (aarch64_get_FP_double (cpu, sn)
7240 * aarch64_get_FP_double (cpu, sm)));
7241 }
7242
7243 /* Float subtract. */
7244 static void
7245 fsubs (sim_cpu *cpu)
7246 {
7247 unsigned sm = INSTR (20, 16);
7248 unsigned sn = INSTR ( 9, 5);
7249 unsigned sd = INSTR ( 4, 0);
7250
7251 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7252 aarch64_set_FP_float (cpu, sd, aarch64_get_FP_float (cpu, sn)
7253 - aarch64_get_FP_float (cpu, sm));
7254 }
7255
7256 /* Double subtract. */
7257 static void
7258 fsubd (sim_cpu *cpu)
7259 {
7260 unsigned sm = INSTR (20, 16);
7261 unsigned sn = INSTR ( 9, 5);
7262 unsigned sd = INSTR ( 4, 0);
7263
7264 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7265 aarch64_set_FP_double (cpu, sd, aarch64_get_FP_double (cpu, sn)
7266 - aarch64_get_FP_double (cpu, sm));
7267 }
7268
7269 static void
7270 do_FMINNM (sim_cpu *cpu)
7271 {
7272 /* instr[31,23] = 0 0011 1100
7273 instr[22] = float(0)/double(1)
7274 instr[21] = 1
7275 instr[20,16] = Sm
7276 instr[15,10] = 01 1110
7277 instr[9,5] = Sn
7278 instr[4,0] = Cpu */
7279
7280 unsigned sm = INSTR (20, 16);
7281 unsigned sn = INSTR ( 9, 5);
7282 unsigned sd = INSTR ( 4, 0);
7283
7284 NYI_assert (31, 23, 0x03C);
7285 NYI_assert (15, 10, 0x1E);
7286
7287 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7288 if (INSTR (22, 22))
7289 aarch64_set_FP_double (cpu, sd,
7290 dminnm (aarch64_get_FP_double (cpu, sn),
7291 aarch64_get_FP_double (cpu, sm)));
7292 else
7293 aarch64_set_FP_float (cpu, sd,
7294 fminnm (aarch64_get_FP_float (cpu, sn),
7295 aarch64_get_FP_float (cpu, sm)));
7296 }
7297
7298 static void
7299 do_FMAXNM (sim_cpu *cpu)
7300 {
7301 /* instr[31,23] = 0 0011 1100
7302 instr[22] = float(0)/double(1)
7303 instr[21] = 1
7304 instr[20,16] = Sm
7305 instr[15,10] = 01 1010
7306 instr[9,5] = Sn
7307 instr[4,0] = Cpu */
7308
7309 unsigned sm = INSTR (20, 16);
7310 unsigned sn = INSTR ( 9, 5);
7311 unsigned sd = INSTR ( 4, 0);
7312
7313 NYI_assert (31, 23, 0x03C);
7314 NYI_assert (15, 10, 0x1A);
7315
7316 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7317 if (INSTR (22, 22))
7318 aarch64_set_FP_double (cpu, sd,
7319 dmaxnm (aarch64_get_FP_double (cpu, sn),
7320 aarch64_get_FP_double (cpu, sm)));
7321 else
7322 aarch64_set_FP_float (cpu, sd,
7323 fmaxnm (aarch64_get_FP_float (cpu, sn),
7324 aarch64_get_FP_float (cpu, sm)));
7325 }
7326
7327 static void
7328 dexSimpleFPDataProc2Source (sim_cpu *cpu)
7329 {
7330 /* instr[31] ==> M : 0 ==> OK, 1 ==> UNALLOC
7331 instr[30] = 0
7332 instr[29] ==> S : 0 ==> OK, 1 ==> UNALLOC
7333 instr[28,25] = 1111
7334 instr[24] = 0
7335 instr[23,22] ==> type : 0 ==> single, 01 ==> double, 1x ==> UNALLOC
7336 instr[21] = 1
7337 instr[20,16] = Vm
7338 instr[15,12] ==> opcode : 0000 ==> FMUL, 0001 ==> FDIV
7339 0010 ==> FADD, 0011 ==> FSUB,
7340 0100 ==> FMAX, 0101 ==> FMIN
7341 0110 ==> FMAXNM, 0111 ==> FMINNM
7342 1000 ==> FNMUL, ow ==> UNALLOC
7343 instr[11,10] = 10
7344 instr[9,5] = Vn
7345 instr[4,0] = Vd */
7346
7347 uint32_t M_S = (INSTR (31, 31) << 1) | INSTR (29, 29);
7348 uint32_t type = INSTR (23, 22);
7349 /* Dispatch on opcode. */
7350 uint32_t dispatch = INSTR (15, 12);
7351
7352 if (type > 1)
7353 HALT_UNALLOC;
7354
7355 if (M_S != 0)
7356 HALT_UNALLOC;
7357
7358 if (type)
7359 switch (dispatch)
7360 {
7361 case 0: fmuld (cpu); return;
7362 case 1: fdivd (cpu); return;
7363 case 2: faddd (cpu); return;
7364 case 3: fsubd (cpu); return;
7365 case 6: do_FMAXNM (cpu); return;
7366 case 7: do_FMINNM (cpu); return;
7367 case 8: fnmuld (cpu); return;
7368
7369 /* Have not yet implemented fmax and fmin. */
7370 case 4:
7371 case 5:
7372 HALT_NYI;
7373
7374 default:
7375 HALT_UNALLOC;
7376 }
7377 else /* type == 0 => floats. */
7378 switch (dispatch)
7379 {
7380 case 0: fmuls (cpu); return;
7381 case 1: fdivs (cpu); return;
7382 case 2: fadds (cpu); return;
7383 case 3: fsubs (cpu); return;
7384 case 6: do_FMAXNM (cpu); return;
7385 case 7: do_FMINNM (cpu); return;
7386 case 8: fnmuls (cpu); return;
7387
7388 case 4:
7389 case 5:
7390 HALT_NYI;
7391
7392 default:
7393 HALT_UNALLOC;
7394 }
7395 }
7396
7397 static void
7398 dexSimpleFPCondSelect (sim_cpu *cpu)
7399 {
7400 /* FCSEL
7401 instr[31,23] = 0 0011 1100
7402 instr[22] = 0=>single 1=>double
7403 instr[21] = 1
7404 instr[20,16] = Sm
7405 instr[15,12] = cond
7406 instr[11,10] = 11
7407 instr[9,5] = Sn
7408 instr[4,0] = Cpu */
7409 unsigned sm = INSTR (20, 16);
7410 unsigned sn = INSTR ( 9, 5);
7411 unsigned sd = INSTR ( 4, 0);
7412 uint32_t set = testConditionCode (cpu, INSTR (15, 12));
7413
7414 NYI_assert (31, 23, 0x03C);
7415 NYI_assert (11, 10, 0x3);
7416
7417 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7418 if (INSTR (22, 22))
7419 aarch64_set_FP_double (cpu, sd, set ? sn : sm);
7420 else
7421 aarch64_set_FP_float (cpu, sd, set ? sn : sm);
7422 }
7423
7424 /* Store 32 bit unscaled signed 9 bit. */
7425 static void
7426 fsturs (sim_cpu *cpu, int32_t offset)
7427 {
7428 unsigned int rn = INSTR (9, 5);
7429 unsigned int st = INSTR (4, 0);
7430
7431 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7432 aarch64_set_mem_u32 (cpu, aarch64_get_reg_u64 (cpu, st, 1) + offset,
7433 aarch64_get_vec_u32 (cpu, rn, 0));
7434 }
7435
7436 /* Store 64 bit unscaled signed 9 bit. */
7437 static void
7438 fsturd (sim_cpu *cpu, int32_t offset)
7439 {
7440 unsigned int rn = INSTR (9, 5);
7441 unsigned int st = INSTR (4, 0);
7442
7443 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7444 aarch64_set_mem_u64 (cpu, aarch64_get_reg_u64 (cpu, st, 1) + offset,
7445 aarch64_get_vec_u64 (cpu, rn, 0));
7446 }
7447
7448 /* Store 128 bit unscaled signed 9 bit. */
7449 static void
7450 fsturq (sim_cpu *cpu, int32_t offset)
7451 {
7452 unsigned int rn = INSTR (9, 5);
7453 unsigned int st = INSTR (4, 0);
7454 FRegister a;
7455
7456 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7457 aarch64_get_FP_long_double (cpu, rn, & a);
7458 aarch64_set_mem_long_double (cpu,
7459 aarch64_get_reg_u64 (cpu, st, 1)
7460 + offset, a);
7461 }
7462
7463 /* TODO FP move register. */
7464
7465 /* 32 bit fp to fp move register. */
7466 static void
7467 ffmovs (sim_cpu *cpu)
7468 {
7469 unsigned int rn = INSTR (9, 5);
7470 unsigned int st = INSTR (4, 0);
7471
7472 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7473 aarch64_set_FP_float (cpu, st, aarch64_get_FP_float (cpu, rn));
7474 }
7475
7476 /* 64 bit fp to fp move register. */
7477 static void
7478 ffmovd (sim_cpu *cpu)
7479 {
7480 unsigned int rn = INSTR (9, 5);
7481 unsigned int st = INSTR (4, 0);
7482
7483 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7484 aarch64_set_FP_double (cpu, st, aarch64_get_FP_double (cpu, rn));
7485 }
7486
7487 /* 32 bit GReg to Vec move register. */
7488 static void
7489 fgmovs (sim_cpu *cpu)
7490 {
7491 unsigned int rn = INSTR (9, 5);
7492 unsigned int st = INSTR (4, 0);
7493
7494 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7495 aarch64_set_vec_u32 (cpu, st, 0, aarch64_get_reg_u32 (cpu, rn, NO_SP));
7496 }
7497
7498 /* 64 bit g to fp move register. */
7499 static void
7500 fgmovd (sim_cpu *cpu)
7501 {
7502 unsigned int rn = INSTR (9, 5);
7503 unsigned int st = INSTR (4, 0);
7504
7505 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7506 aarch64_set_vec_u64 (cpu, st, 0, aarch64_get_reg_u64 (cpu, rn, NO_SP));
7507 }
7508
7509 /* 32 bit fp to g move register. */
7510 static void
7511 gfmovs (sim_cpu *cpu)
7512 {
7513 unsigned int rn = INSTR (9, 5);
7514 unsigned int st = INSTR (4, 0);
7515
7516 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7517 aarch64_set_reg_u64 (cpu, st, NO_SP, aarch64_get_vec_u32 (cpu, rn, 0));
7518 }
7519
7520 /* 64 bit fp to g move register. */
7521 static void
7522 gfmovd (sim_cpu *cpu)
7523 {
7524 unsigned int rn = INSTR (9, 5);
7525 unsigned int st = INSTR (4, 0);
7526
7527 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7528 aarch64_set_reg_u64 (cpu, st, NO_SP, aarch64_get_vec_u64 (cpu, rn, 0));
7529 }
7530
7531 /* FP move immediate
7532
7533 These install an immediate 8 bit value in the target register
7534 where the 8 bits comprise 1 sign bit, 4 bits of fraction and a 3
7535 bit exponent. */
7536
7537 static void
7538 fmovs (sim_cpu *cpu)
7539 {
7540 unsigned int sd = INSTR (4, 0);
7541 uint32_t imm = INSTR (20, 13);
7542 float f = fp_immediate_for_encoding_32 (imm);
7543
7544 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7545 aarch64_set_FP_float (cpu, sd, f);
7546 }
7547
7548 static void
7549 fmovd (sim_cpu *cpu)
7550 {
7551 unsigned int sd = INSTR (4, 0);
7552 uint32_t imm = INSTR (20, 13);
7553 double d = fp_immediate_for_encoding_64 (imm);
7554
7555 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7556 aarch64_set_FP_double (cpu, sd, d);
7557 }
7558
7559 static void
7560 dexSimpleFPImmediate (sim_cpu *cpu)
7561 {
7562 /* instr[31,23] == 00111100
7563 instr[22] == type : single(0)/double(1)
7564 instr[21] == 1
7565 instr[20,13] == imm8
7566 instr[12,10] == 100
7567 instr[9,5] == imm5 : 00000 ==> PK, ow ==> UNALLOC
7568 instr[4,0] == Rd */
7569 uint32_t imm5 = INSTR (9, 5);
7570
7571 NYI_assert (31, 23, 0x3C);
7572
7573 if (imm5 != 0)
7574 HALT_UNALLOC;
7575
7576 if (INSTR (22, 22))
7577 fmovd (cpu);
7578 else
7579 fmovs (cpu);
7580 }
7581
7582 /* TODO specific decode and execute for group Load Store. */
7583
7584 /* TODO FP load/store single register (unscaled offset). */
7585
7586 /* TODO load 8 bit unscaled signed 9 bit. */
7587 /* TODO load 16 bit unscaled signed 9 bit. */
7588
7589 /* Load 32 bit unscaled signed 9 bit. */
7590 static void
7591 fldurs (sim_cpu *cpu, int32_t offset)
7592 {
7593 unsigned int rn = INSTR (9, 5);
7594 unsigned int st = INSTR (4, 0);
7595
7596 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7597 aarch64_set_vec_u32 (cpu, st, 0, aarch64_get_mem_u32
7598 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset));
7599 }
7600
7601 /* Load 64 bit unscaled signed 9 bit. */
7602 static void
7603 fldurd (sim_cpu *cpu, int32_t offset)
7604 {
7605 unsigned int rn = INSTR (9, 5);
7606 unsigned int st = INSTR (4, 0);
7607
7608 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7609 aarch64_set_vec_u64 (cpu, st, 0, aarch64_get_mem_u64
7610 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset));
7611 }
7612
7613 /* Load 128 bit unscaled signed 9 bit. */
7614 static void
7615 fldurq (sim_cpu *cpu, int32_t offset)
7616 {
7617 unsigned int rn = INSTR (9, 5);
7618 unsigned int st = INSTR (4, 0);
7619 FRegister a;
7620 uint64_t addr = aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset;
7621
7622 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7623 aarch64_get_mem_long_double (cpu, addr, & a);
7624 aarch64_set_FP_long_double (cpu, st, a);
7625 }
7626
7627 /* TODO store 8 bit unscaled signed 9 bit. */
7628 /* TODO store 16 bit unscaled signed 9 bit. */
7629
7630
7631 /* 1 source. */
7632
7633 /* Float absolute value. */
7634 static void
7635 fabss (sim_cpu *cpu)
7636 {
7637 unsigned sn = INSTR (9, 5);
7638 unsigned sd = INSTR (4, 0);
7639 float value = aarch64_get_FP_float (cpu, sn);
7640
7641 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7642 aarch64_set_FP_float (cpu, sd, fabsf (value));
7643 }
7644
7645 /* Double absolute value. */
7646 static void
7647 fabcpu (sim_cpu *cpu)
7648 {
7649 unsigned sn = INSTR (9, 5);
7650 unsigned sd = INSTR (4, 0);
7651 double value = aarch64_get_FP_double (cpu, sn);
7652
7653 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7654 aarch64_set_FP_double (cpu, sd, fabs (value));
7655 }
7656
7657 /* Float negative value. */
7658 static void
7659 fnegs (sim_cpu *cpu)
7660 {
7661 unsigned sn = INSTR (9, 5);
7662 unsigned sd = INSTR (4, 0);
7663
7664 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7665 aarch64_set_FP_float (cpu, sd, - aarch64_get_FP_float (cpu, sn));
7666 }
7667
7668 /* Double negative value. */
7669 static void
7670 fnegd (sim_cpu *cpu)
7671 {
7672 unsigned sn = INSTR (9, 5);
7673 unsigned sd = INSTR (4, 0);
7674
7675 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7676 aarch64_set_FP_double (cpu, sd, - aarch64_get_FP_double (cpu, sn));
7677 }
7678
7679 /* Float square root. */
7680 static void
7681 fsqrts (sim_cpu *cpu)
7682 {
7683 unsigned sn = INSTR (9, 5);
7684 unsigned sd = INSTR (4, 0);
7685
7686 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7687 aarch64_set_FP_float (cpu, sd, sqrt (aarch64_get_FP_float (cpu, sn)));
7688 }
7689
7690 /* Double square root. */
7691 static void
7692 fsqrtd (sim_cpu *cpu)
7693 {
7694 unsigned sn = INSTR (9, 5);
7695 unsigned sd = INSTR (4, 0);
7696
7697 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7698 aarch64_set_FP_double (cpu, sd,
7699 sqrt (aarch64_get_FP_double (cpu, sn)));
7700 }
7701
7702 /* Convert double to float. */
7703 static void
7704 fcvtds (sim_cpu *cpu)
7705 {
7706 unsigned sn = INSTR (9, 5);
7707 unsigned sd = INSTR (4, 0);
7708
7709 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7710 aarch64_set_FP_float (cpu, sd, (float) aarch64_get_FP_double (cpu, sn));
7711 }
7712
7713 /* Convert float to double. */
7714 static void
7715 fcvtcpu (sim_cpu *cpu)
7716 {
7717 unsigned sn = INSTR (9, 5);
7718 unsigned sd = INSTR (4, 0);
7719
7720 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7721 aarch64_set_FP_double (cpu, sd, (double) aarch64_get_FP_float (cpu, sn));
7722 }
7723
7724 static void
7725 do_FRINT (sim_cpu *cpu)
7726 {
7727 /* instr[31,23] = 0001 1110 0
7728 instr[22] = single(0)/double(1)
7729 instr[21,18] = 1001
7730 instr[17,15] = rounding mode
7731 instr[14,10] = 10000
7732 instr[9,5] = source
7733 instr[4,0] = dest */
7734
7735 float val;
7736 unsigned rs = INSTR (9, 5);
7737 unsigned rd = INSTR (4, 0);
7738 unsigned int rmode = INSTR (17, 15);
7739
7740 NYI_assert (31, 23, 0x03C);
7741 NYI_assert (21, 18, 0x9);
7742 NYI_assert (14, 10, 0x10);
7743
7744 if (rmode == 6 || rmode == 7)
7745 /* FIXME: Add support for rmode == 6 exactness check. */
7746 rmode = uimm (aarch64_get_FPSR (cpu), 23, 22);
7747
7748 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7749 if (INSTR (22, 22))
7750 {
7751 double val = aarch64_get_FP_double (cpu, rs);
7752
7753 switch (rmode)
7754 {
7755 case 0: /* mode N: nearest or even. */
7756 {
7757 double rval = round (val);
7758
7759 if (val - rval == 0.5)
7760 {
7761 if (((rval / 2.0) * 2.0) != rval)
7762 rval += 1.0;
7763 }
7764
7765 aarch64_set_FP_double (cpu, rd, round (val));
7766 return;
7767 }
7768
7769 case 1: /* mode P: towards +inf. */
7770 if (val < 0.0)
7771 aarch64_set_FP_double (cpu, rd, trunc (val));
7772 else
7773 aarch64_set_FP_double (cpu, rd, round (val));
7774 return;
7775
7776 case 2: /* mode M: towards -inf. */
7777 if (val < 0.0)
7778 aarch64_set_FP_double (cpu, rd, round (val));
7779 else
7780 aarch64_set_FP_double (cpu, rd, trunc (val));
7781 return;
7782
7783 case 3: /* mode Z: towards 0. */
7784 aarch64_set_FP_double (cpu, rd, trunc (val));
7785 return;
7786
7787 case 4: /* mode A: away from 0. */
7788 aarch64_set_FP_double (cpu, rd, round (val));
7789 return;
7790
7791 case 6: /* mode X: use FPCR with exactness check. */
7792 case 7: /* mode I: use FPCR mode. */
7793 HALT_NYI;
7794
7795 default:
7796 HALT_UNALLOC;
7797 }
7798 }
7799
7800 val = aarch64_get_FP_float (cpu, rs);
7801
7802 switch (rmode)
7803 {
7804 case 0: /* mode N: nearest or even. */
7805 {
7806 float rval = roundf (val);
7807
7808 if (val - rval == 0.5)
7809 {
7810 if (((rval / 2.0) * 2.0) != rval)
7811 rval += 1.0;
7812 }
7813
7814 aarch64_set_FP_float (cpu, rd, rval);
7815 return;
7816 }
7817
7818 case 1: /* mode P: towards +inf. */
7819 if (val < 0.0)
7820 aarch64_set_FP_float (cpu, rd, truncf (val));
7821 else
7822 aarch64_set_FP_float (cpu, rd, roundf (val));
7823 return;
7824
7825 case 2: /* mode M: towards -inf. */
7826 if (val < 0.0)
7827 aarch64_set_FP_float (cpu, rd, truncf (val));
7828 else
7829 aarch64_set_FP_float (cpu, rd, roundf (val));
7830 return;
7831
7832 case 3: /* mode Z: towards 0. */
7833 aarch64_set_FP_float (cpu, rd, truncf (val));
7834 return;
7835
7836 case 4: /* mode A: away from 0. */
7837 aarch64_set_FP_float (cpu, rd, roundf (val));
7838 return;
7839
7840 case 6: /* mode X: use FPCR with exactness check. */
7841 case 7: /* mode I: use FPCR mode. */
7842 HALT_NYI;
7843
7844 default:
7845 HALT_UNALLOC;
7846 }
7847 }
7848
7849 /* Convert half to float. */
7850 static void
7851 do_FCVT_half_to_single (sim_cpu *cpu)
7852 {
7853 unsigned rn = INSTR (9, 5);
7854 unsigned rd = INSTR (4, 0);
7855
7856 NYI_assert (31, 10, 0x7B890);
7857
7858 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7859 aarch64_set_FP_float (cpu, rd, (float) aarch64_get_FP_half (cpu, rn));
7860 }
7861
7862 /* Convert half to double. */
7863 static void
7864 do_FCVT_half_to_double (sim_cpu *cpu)
7865 {
7866 unsigned rn = INSTR (9, 5);
7867 unsigned rd = INSTR (4, 0);
7868
7869 NYI_assert (31, 10, 0x7B8B0);
7870
7871 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7872 aarch64_set_FP_double (cpu, rd, (double) aarch64_get_FP_half (cpu, rn));
7873 }
7874
7875 static void
7876 do_FCVT_single_to_half (sim_cpu *cpu)
7877 {
7878 unsigned rn = INSTR (9, 5);
7879 unsigned rd = INSTR (4, 0);
7880
7881 NYI_assert (31, 10, 0x788F0);
7882
7883 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7884 aarch64_set_FP_half (cpu, rd, aarch64_get_FP_float (cpu, rn));
7885 }
7886
7887 /* Convert double to half. */
7888 static void
7889 do_FCVT_double_to_half (sim_cpu *cpu)
7890 {
7891 unsigned rn = INSTR (9, 5);
7892 unsigned rd = INSTR (4, 0);
7893
7894 NYI_assert (31, 10, 0x798F0);
7895
7896 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7897 aarch64_set_FP_half (cpu, rd, (float) aarch64_get_FP_double (cpu, rn));
7898 }
7899
7900 static void
7901 dexSimpleFPDataProc1Source (sim_cpu *cpu)
7902 {
7903 /* instr[31] ==> M : 0 ==> OK, 1 ==> UNALLOC
7904 instr[30] = 0
7905 instr[29] ==> S : 0 ==> OK, 1 ==> UNALLOC
7906 instr[28,25] = 1111
7907 instr[24] = 0
7908 instr[23,22] ==> type : 00 ==> source is single,
7909 01 ==> source is double
7910 10 ==> UNALLOC
7911 11 ==> UNALLOC or source is half
7912 instr[21] = 1
7913 instr[20,15] ==> opcode : with type 00 or 01
7914 000000 ==> FMOV, 000001 ==> FABS,
7915 000010 ==> FNEG, 000011 ==> FSQRT,
7916 000100 ==> UNALLOC, 000101 ==> FCVT,(to single/double)
7917 000110 ==> UNALLOC, 000111 ==> FCVT (to half)
7918 001000 ==> FRINTN, 001001 ==> FRINTP,
7919 001010 ==> FRINTM, 001011 ==> FRINTZ,
7920 001100 ==> FRINTA, 001101 ==> UNALLOC
7921 001110 ==> FRINTX, 001111 ==> FRINTI
7922 with type 11
7923 000100 ==> FCVT (half-to-single)
7924 000101 ==> FCVT (half-to-double)
7925 instr[14,10] = 10000. */
7926
7927 uint32_t M_S = (INSTR (31, 31) << 1) | INSTR (29, 29);
7928 uint32_t type = INSTR (23, 22);
7929 uint32_t opcode = INSTR (20, 15);
7930
7931 if (M_S != 0)
7932 HALT_UNALLOC;
7933
7934 if (type == 3)
7935 {
7936 if (opcode == 4)
7937 do_FCVT_half_to_single (cpu);
7938 else if (opcode == 5)
7939 do_FCVT_half_to_double (cpu);
7940 else
7941 HALT_UNALLOC;
7942 return;
7943 }
7944
7945 if (type == 2)
7946 HALT_UNALLOC;
7947
7948 switch (opcode)
7949 {
7950 case 0:
7951 if (type)
7952 ffmovd (cpu);
7953 else
7954 ffmovs (cpu);
7955 return;
7956
7957 case 1:
7958 if (type)
7959 fabcpu (cpu);
7960 else
7961 fabss (cpu);
7962 return;
7963
7964 case 2:
7965 if (type)
7966 fnegd (cpu);
7967 else
7968 fnegs (cpu);
7969 return;
7970
7971 case 3:
7972 if (type)
7973 fsqrtd (cpu);
7974 else
7975 fsqrts (cpu);
7976 return;
7977
7978 case 4:
7979 if (type)
7980 fcvtds (cpu);
7981 else
7982 HALT_UNALLOC;
7983 return;
7984
7985 case 5:
7986 if (type)
7987 HALT_UNALLOC;
7988 fcvtcpu (cpu);
7989 return;
7990
7991 case 8: /* FRINTN etc. */
7992 case 9:
7993 case 10:
7994 case 11:
7995 case 12:
7996 case 14:
7997 case 15:
7998 do_FRINT (cpu);
7999 return;
8000
8001 case 7:
8002 if (INSTR (22, 22))
8003 do_FCVT_double_to_half (cpu);
8004 else
8005 do_FCVT_single_to_half (cpu);
8006 return;
8007
8008 case 13:
8009 HALT_NYI;
8010
8011 default:
8012 HALT_UNALLOC;
8013 }
8014 }
8015
8016 /* 32 bit signed int to float. */
8017 static void
8018 scvtf32 (sim_cpu *cpu)
8019 {
8020 unsigned rn = INSTR (9, 5);
8021 unsigned sd = INSTR (4, 0);
8022
8023 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8024 aarch64_set_FP_float
8025 (cpu, sd, (float) aarch64_get_reg_s32 (cpu, rn, NO_SP));
8026 }
8027
8028 /* signed int to float. */
8029 static void
8030 scvtf (sim_cpu *cpu)
8031 {
8032 unsigned rn = INSTR (9, 5);
8033 unsigned sd = INSTR (4, 0);
8034
8035 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8036 aarch64_set_FP_float
8037 (cpu, sd, (float) aarch64_get_reg_s64 (cpu, rn, NO_SP));
8038 }
8039
8040 /* 32 bit signed int to double. */
8041 static void
8042 scvtd32 (sim_cpu *cpu)
8043 {
8044 unsigned rn = INSTR (9, 5);
8045 unsigned sd = INSTR (4, 0);
8046
8047 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8048 aarch64_set_FP_double
8049 (cpu, sd, (double) aarch64_get_reg_s32 (cpu, rn, NO_SP));
8050 }
8051
8052 /* signed int to double. */
8053 static void
8054 scvtd (sim_cpu *cpu)
8055 {
8056 unsigned rn = INSTR (9, 5);
8057 unsigned sd = INSTR (4, 0);
8058
8059 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8060 aarch64_set_FP_double
8061 (cpu, sd, (double) aarch64_get_reg_s64 (cpu, rn, NO_SP));
8062 }
8063
8064 static const float FLOAT_INT_MAX = (float) INT_MAX;
8065 static const float FLOAT_INT_MIN = (float) INT_MIN;
8066 static const double DOUBLE_INT_MAX = (double) INT_MAX;
8067 static const double DOUBLE_INT_MIN = (double) INT_MIN;
8068 static const float FLOAT_LONG_MAX = (float) LONG_MAX;
8069 static const float FLOAT_LONG_MIN = (float) LONG_MIN;
8070 static const double DOUBLE_LONG_MAX = (double) LONG_MAX;
8071 static const double DOUBLE_LONG_MIN = (double) LONG_MIN;
8072
8073 /* Check for FP exception conditions:
8074 NaN raises IO
8075 Infinity raises IO
8076 Out of Range raises IO and IX and saturates value
8077 Denormal raises ID and IX and sets to zero. */
8078 #define RAISE_EXCEPTIONS(F, VALUE, FTYPE, ITYPE) \
8079 do \
8080 { \
8081 switch (fpclassify (F)) \
8082 { \
8083 case FP_INFINITE: \
8084 case FP_NAN: \
8085 aarch64_set_FPSR (cpu, IO); \
8086 if (signbit (F)) \
8087 VALUE = ITYPE##_MAX; \
8088 else \
8089 VALUE = ITYPE##_MIN; \
8090 break; \
8091 \
8092 case FP_NORMAL: \
8093 if (F >= FTYPE##_##ITYPE##_MAX) \
8094 { \
8095 aarch64_set_FPSR_bits (cpu, IO | IX, IO | IX); \
8096 VALUE = ITYPE##_MAX; \
8097 } \
8098 else if (F <= FTYPE##_##ITYPE##_MIN) \
8099 { \
8100 aarch64_set_FPSR_bits (cpu, IO | IX, IO | IX); \
8101 VALUE = ITYPE##_MIN; \
8102 } \
8103 break; \
8104 \
8105 case FP_SUBNORMAL: \
8106 aarch64_set_FPSR_bits (cpu, IO | IX | ID, IX | ID); \
8107 VALUE = 0; \
8108 break; \
8109 \
8110 default: \
8111 case FP_ZERO: \
8112 VALUE = 0; \
8113 break; \
8114 } \
8115 } \
8116 while (0)
8117
8118 /* 32 bit convert float to signed int truncate towards zero. */
8119 static void
8120 fcvtszs32 (sim_cpu *cpu)
8121 {
8122 unsigned sn = INSTR (9, 5);
8123 unsigned rd = INSTR (4, 0);
8124 /* TODO : check that this rounds toward zero. */
8125 float f = aarch64_get_FP_float (cpu, sn);
8126 int32_t value = (int32_t) f;
8127
8128 RAISE_EXCEPTIONS (f, value, FLOAT, INT);
8129
8130 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8131 /* Avoid sign extension to 64 bit. */
8132 aarch64_set_reg_u64 (cpu, rd, NO_SP, (uint32_t) value);
8133 }
8134
8135 /* 64 bit convert float to signed int truncate towards zero. */
8136 static void
8137 fcvtszs (sim_cpu *cpu)
8138 {
8139 unsigned sn = INSTR (9, 5);
8140 unsigned rd = INSTR (4, 0);
8141 float f = aarch64_get_FP_float (cpu, sn);
8142 int64_t value = (int64_t) f;
8143
8144 RAISE_EXCEPTIONS (f, value, FLOAT, LONG);
8145
8146 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8147 aarch64_set_reg_s64 (cpu, rd, NO_SP, value);
8148 }
8149
8150 /* 32 bit convert double to signed int truncate towards zero. */
8151 static void
8152 fcvtszd32 (sim_cpu *cpu)
8153 {
8154 unsigned sn = INSTR (9, 5);
8155 unsigned rd = INSTR (4, 0);
8156 /* TODO : check that this rounds toward zero. */
8157 double d = aarch64_get_FP_double (cpu, sn);
8158 int32_t value = (int32_t) d;
8159
8160 RAISE_EXCEPTIONS (d, value, DOUBLE, INT);
8161
8162 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8163 /* Avoid sign extension to 64 bit. */
8164 aarch64_set_reg_u64 (cpu, rd, NO_SP, (uint32_t) value);
8165 }
8166
8167 /* 64 bit convert double to signed int truncate towards zero. */
8168 static void
8169 fcvtszd (sim_cpu *cpu)
8170 {
8171 unsigned sn = INSTR (9, 5);
8172 unsigned rd = INSTR (4, 0);
8173 /* TODO : check that this rounds toward zero. */
8174 double d = aarch64_get_FP_double (cpu, sn);
8175 int64_t value;
8176
8177 value = (int64_t) d;
8178
8179 RAISE_EXCEPTIONS (d, value, DOUBLE, LONG);
8180
8181 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8182 aarch64_set_reg_s64 (cpu, rd, NO_SP, value);
8183 }
8184
8185 static void
8186 do_fcvtzu (sim_cpu *cpu)
8187 {
8188 /* instr[31] = size: 32-bit (0), 64-bit (1)
8189 instr[30,23] = 00111100
8190 instr[22] = type: single (0)/ double (1)
8191 instr[21] = enable (0)/disable(1) precision
8192 instr[20,16] = 11001
8193 instr[15,10] = precision
8194 instr[9,5] = Rs
8195 instr[4,0] = Rd. */
8196
8197 unsigned rs = INSTR (9, 5);
8198 unsigned rd = INSTR (4, 0);
8199
8200 NYI_assert (30, 23, 0x3C);
8201 NYI_assert (20, 16, 0x19);
8202
8203 if (INSTR (21, 21) != 1)
8204 /* Convert to fixed point. */
8205 HALT_NYI;
8206
8207 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8208 if (INSTR (31, 31))
8209 {
8210 /* Convert to unsigned 64-bit integer. */
8211 if (INSTR (22, 22))
8212 {
8213 double d = aarch64_get_FP_double (cpu, rs);
8214 uint64_t value = (uint64_t) d;
8215
8216 /* Do not raise an exception if we have reached ULONG_MAX. */
8217 if (value != (1UL << 63))
8218 RAISE_EXCEPTIONS (d, value, DOUBLE, LONG);
8219
8220 aarch64_set_reg_u64 (cpu, rd, NO_SP, value);
8221 }
8222 else
8223 {
8224 float f = aarch64_get_FP_float (cpu, rs);
8225 uint64_t value = (uint64_t) f;
8226
8227 /* Do not raise an exception if we have reached ULONG_MAX. */
8228 if (value != (1UL << 63))
8229 RAISE_EXCEPTIONS (f, value, FLOAT, LONG);
8230
8231 aarch64_set_reg_u64 (cpu, rd, NO_SP, value);
8232 }
8233 }
8234 else
8235 {
8236 uint32_t value;
8237
8238 /* Convert to unsigned 32-bit integer. */
8239 if (INSTR (22, 22))
8240 {
8241 double d = aarch64_get_FP_double (cpu, rs);
8242
8243 value = (uint32_t) d;
8244 /* Do not raise an exception if we have reached UINT_MAX. */
8245 if (value != (1UL << 31))
8246 RAISE_EXCEPTIONS (d, value, DOUBLE, INT);
8247 }
8248 else
8249 {
8250 float f = aarch64_get_FP_float (cpu, rs);
8251
8252 value = (uint32_t) f;
8253 /* Do not raise an exception if we have reached UINT_MAX. */
8254 if (value != (1UL << 31))
8255 RAISE_EXCEPTIONS (f, value, FLOAT, INT);
8256 }
8257
8258 aarch64_set_reg_u64 (cpu, rd, NO_SP, value);
8259 }
8260 }
8261
8262 static void
8263 do_UCVTF (sim_cpu *cpu)
8264 {
8265 /* instr[31] = size: 32-bit (0), 64-bit (1)
8266 instr[30,23] = 001 1110 0
8267 instr[22] = type: single (0)/ double (1)
8268 instr[21] = enable (0)/disable(1) precision
8269 instr[20,16] = 0 0011
8270 instr[15,10] = precision
8271 instr[9,5] = Rs
8272 instr[4,0] = Rd. */
8273
8274 unsigned rs = INSTR (9, 5);
8275 unsigned rd = INSTR (4, 0);
8276
8277 NYI_assert (30, 23, 0x3C);
8278 NYI_assert (20, 16, 0x03);
8279
8280 if (INSTR (21, 21) != 1)
8281 HALT_NYI;
8282
8283 /* FIXME: Add exception raising. */
8284 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8285 if (INSTR (31, 31))
8286 {
8287 uint64_t value = aarch64_get_reg_u64 (cpu, rs, NO_SP);
8288
8289 if (INSTR (22, 22))
8290 aarch64_set_FP_double (cpu, rd, (double) value);
8291 else
8292 aarch64_set_FP_float (cpu, rd, (float) value);
8293 }
8294 else
8295 {
8296 uint32_t value = aarch64_get_reg_u32 (cpu, rs, NO_SP);
8297
8298 if (INSTR (22, 22))
8299 aarch64_set_FP_double (cpu, rd, (double) value);
8300 else
8301 aarch64_set_FP_float (cpu, rd, (float) value);
8302 }
8303 }
8304
8305 static void
8306 float_vector_move (sim_cpu *cpu)
8307 {
8308 /* instr[31,17] == 100 1111 0101 0111
8309 instr[16] ==> direction 0=> to GR, 1=> from GR
8310 instr[15,10] => ???
8311 instr[9,5] ==> source
8312 instr[4,0] ==> dest. */
8313
8314 unsigned rn = INSTR (9, 5);
8315 unsigned rd = INSTR (4, 0);
8316
8317 NYI_assert (31, 17, 0x4F57);
8318
8319 if (INSTR (15, 10) != 0)
8320 HALT_UNALLOC;
8321
8322 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8323 if (INSTR (16, 16))
8324 aarch64_set_vec_u64 (cpu, rd, 1, aarch64_get_reg_u64 (cpu, rn, NO_SP));
8325 else
8326 aarch64_set_reg_u64 (cpu, rd, NO_SP, aarch64_get_vec_u64 (cpu, rn, 1));
8327 }
8328
8329 static void
8330 dexSimpleFPIntegerConvert (sim_cpu *cpu)
8331 {
8332 /* instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
8333 instr[30 = 0
8334 instr[29] = S : 0 ==> OK, 1 ==> UNALLOC
8335 instr[28,25] = 1111
8336 instr[24] = 0
8337 instr[23,22] = type : 00 ==> single, 01 ==> double, 1x ==> UNALLOC
8338 instr[21] = 1
8339 instr[20,19] = rmode
8340 instr[18,16] = opcode
8341 instr[15,10] = 10 0000 */
8342
8343 uint32_t rmode_opcode;
8344 uint32_t size_type;
8345 uint32_t type;
8346 uint32_t size;
8347 uint32_t S;
8348
8349 if (INSTR (31, 17) == 0x4F57)
8350 {
8351 float_vector_move (cpu);
8352 return;
8353 }
8354
8355 size = INSTR (31, 31);
8356 S = INSTR (29, 29);
8357 if (S != 0)
8358 HALT_UNALLOC;
8359
8360 type = INSTR (23, 22);
8361 if (type > 1)
8362 HALT_UNALLOC;
8363
8364 rmode_opcode = INSTR (20, 16);
8365 size_type = (size << 1) | type; /* 0==32f, 1==32d, 2==64f, 3==64d. */
8366
8367 switch (rmode_opcode)
8368 {
8369 case 2: /* SCVTF. */
8370 switch (size_type)
8371 {
8372 case 0: scvtf32 (cpu); return;
8373 case 1: scvtd32 (cpu); return;
8374 case 2: scvtf (cpu); return;
8375 case 3: scvtd (cpu); return;
8376 }
8377
8378 case 6: /* FMOV GR, Vec. */
8379 switch (size_type)
8380 {
8381 case 0: gfmovs (cpu); return;
8382 case 3: gfmovd (cpu); return;
8383 default: HALT_UNALLOC;
8384 }
8385
8386 case 7: /* FMOV vec, GR. */
8387 switch (size_type)
8388 {
8389 case 0: fgmovs (cpu); return;
8390 case 3: fgmovd (cpu); return;
8391 default: HALT_UNALLOC;
8392 }
8393
8394 case 24: /* FCVTZS. */
8395 switch (size_type)
8396 {
8397 case 0: fcvtszs32 (cpu); return;
8398 case 1: fcvtszd32 (cpu); return;
8399 case 2: fcvtszs (cpu); return;
8400 case 3: fcvtszd (cpu); return;
8401 }
8402
8403 case 25: do_fcvtzu (cpu); return;
8404 case 3: do_UCVTF (cpu); return;
8405
8406 case 0: /* FCVTNS. */
8407 case 1: /* FCVTNU. */
8408 case 4: /* FCVTAS. */
8409 case 5: /* FCVTAU. */
8410 case 8: /* FCVPTS. */
8411 case 9: /* FCVTPU. */
8412 case 16: /* FCVTMS. */
8413 case 17: /* FCVTMU. */
8414 default:
8415 HALT_NYI;
8416 }
8417 }
8418
8419 static void
8420 set_flags_for_float_compare (sim_cpu *cpu, float fvalue1, float fvalue2)
8421 {
8422 uint32_t flags;
8423
8424 if (isnan (fvalue1) || isnan (fvalue2))
8425 flags = C|V;
8426 else
8427 {
8428 float result = fvalue1 - fvalue2;
8429
8430 if (result == 0.0)
8431 flags = Z|C;
8432 else if (result < 0)
8433 flags = N;
8434 else /* (result > 0). */
8435 flags = C;
8436 }
8437
8438 aarch64_set_CPSR (cpu, flags);
8439 }
8440
8441 static void
8442 fcmps (sim_cpu *cpu)
8443 {
8444 unsigned sm = INSTR (20, 16);
8445 unsigned sn = INSTR ( 9, 5);
8446
8447 float fvalue1 = aarch64_get_FP_float (cpu, sn);
8448 float fvalue2 = aarch64_get_FP_float (cpu, sm);
8449
8450 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8451 set_flags_for_float_compare (cpu, fvalue1, fvalue2);
8452 }
8453
8454 /* Float compare to zero -- Invalid Operation exception
8455 only on signaling NaNs. */
8456 static void
8457 fcmpzs (sim_cpu *cpu)
8458 {
8459 unsigned sn = INSTR ( 9, 5);
8460 float fvalue1 = aarch64_get_FP_float (cpu, sn);
8461
8462 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8463 set_flags_for_float_compare (cpu, fvalue1, 0.0f);
8464 }
8465
8466 /* Float compare -- Invalid Operation exception on all NaNs. */
8467 static void
8468 fcmpes (sim_cpu *cpu)
8469 {
8470 unsigned sm = INSTR (20, 16);
8471 unsigned sn = INSTR ( 9, 5);
8472
8473 float fvalue1 = aarch64_get_FP_float (cpu, sn);
8474 float fvalue2 = aarch64_get_FP_float (cpu, sm);
8475
8476 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8477 set_flags_for_float_compare (cpu, fvalue1, fvalue2);
8478 }
8479
8480 /* Float compare to zero -- Invalid Operation exception on all NaNs. */
8481 static void
8482 fcmpzes (sim_cpu *cpu)
8483 {
8484 unsigned sn = INSTR ( 9, 5);
8485 float fvalue1 = aarch64_get_FP_float (cpu, sn);
8486
8487 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8488 set_flags_for_float_compare (cpu, fvalue1, 0.0f);
8489 }
8490
8491 static void
8492 set_flags_for_double_compare (sim_cpu *cpu, double dval1, double dval2)
8493 {
8494 uint32_t flags;
8495
8496 if (isnan (dval1) || isnan (dval2))
8497 flags = C|V;
8498 else
8499 {
8500 double result = dval1 - dval2;
8501
8502 if (result == 0.0)
8503 flags = Z|C;
8504 else if (result < 0)
8505 flags = N;
8506 else /* (result > 0). */
8507 flags = C;
8508 }
8509
8510 aarch64_set_CPSR (cpu, flags);
8511 }
8512
8513 /* Double compare -- Invalid Operation exception only on signaling NaNs. */
8514 static void
8515 fcmpd (sim_cpu *cpu)
8516 {
8517 unsigned sm = INSTR (20, 16);
8518 unsigned sn = INSTR ( 9, 5);
8519
8520 double dvalue1 = aarch64_get_FP_double (cpu, sn);
8521 double dvalue2 = aarch64_get_FP_double (cpu, sm);
8522
8523 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8524 set_flags_for_double_compare (cpu, dvalue1, dvalue2);
8525 }
8526
8527 /* Double compare to zero -- Invalid Operation exception
8528 only on signaling NaNs. */
8529 static void
8530 fcmpzd (sim_cpu *cpu)
8531 {
8532 unsigned sn = INSTR ( 9, 5);
8533 double dvalue1 = aarch64_get_FP_double (cpu, sn);
8534
8535 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8536 set_flags_for_double_compare (cpu, dvalue1, 0.0);
8537 }
8538
8539 /* Double compare -- Invalid Operation exception on all NaNs. */
8540 static void
8541 fcmped (sim_cpu *cpu)
8542 {
8543 unsigned sm = INSTR (20, 16);
8544 unsigned sn = INSTR ( 9, 5);
8545
8546 double dvalue1 = aarch64_get_FP_double (cpu, sn);
8547 double dvalue2 = aarch64_get_FP_double (cpu, sm);
8548
8549 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8550 set_flags_for_double_compare (cpu, dvalue1, dvalue2);
8551 }
8552
8553 /* Double compare to zero -- Invalid Operation exception on all NaNs. */
8554 static void
8555 fcmpzed (sim_cpu *cpu)
8556 {
8557 unsigned sn = INSTR ( 9, 5);
8558 double dvalue1 = aarch64_get_FP_double (cpu, sn);
8559
8560 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8561 set_flags_for_double_compare (cpu, dvalue1, 0.0);
8562 }
8563
8564 static void
8565 dexSimpleFPCompare (sim_cpu *cpu)
8566 {
8567 /* assert instr[28,25] == 1111
8568 instr[30:24:21:13,10] = 0011000
8569 instr[31] = M : 0 ==> OK, 1 ==> UNALLOC
8570 instr[29] ==> S : 0 ==> OK, 1 ==> UNALLOC
8571 instr[23,22] ==> type : 0 ==> single, 01 ==> double, 1x ==> UNALLOC
8572 instr[15,14] ==> op : 00 ==> OK, ow ==> UNALLOC
8573 instr[4,0] ==> opcode2 : 00000 ==> FCMP, 10000 ==> FCMPE,
8574 01000 ==> FCMPZ, 11000 ==> FCMPEZ,
8575 ow ==> UNALLOC */
8576 uint32_t dispatch;
8577 uint32_t M_S = (INSTR (31, 31) << 1) | INSTR (29, 29);
8578 uint32_t type = INSTR (23, 22);
8579 uint32_t op = INSTR (15, 14);
8580 uint32_t op2_2_0 = INSTR (2, 0);
8581
8582 if (op2_2_0 != 0)
8583 HALT_UNALLOC;
8584
8585 if (M_S != 0)
8586 HALT_UNALLOC;
8587
8588 if (type > 1)
8589 HALT_UNALLOC;
8590
8591 if (op != 0)
8592 HALT_UNALLOC;
8593
8594 /* dispatch on type and top 2 bits of opcode. */
8595 dispatch = (type << 2) | INSTR (4, 3);
8596
8597 switch (dispatch)
8598 {
8599 case 0: fcmps (cpu); return;
8600 case 1: fcmpzs (cpu); return;
8601 case 2: fcmpes (cpu); return;
8602 case 3: fcmpzes (cpu); return;
8603 case 4: fcmpd (cpu); return;
8604 case 5: fcmpzd (cpu); return;
8605 case 6: fcmped (cpu); return;
8606 case 7: fcmpzed (cpu); return;
8607 }
8608 }
8609
8610 static void
8611 do_scalar_FADDP (sim_cpu *cpu)
8612 {
8613 /* instr [31,23] = 0111 1110 0
8614 instr [22] = single(0)/double(1)
8615 instr [21,10] = 11 0000 1101 10
8616 instr [9,5] = Fn
8617 instr [4,0] = Fd. */
8618
8619 unsigned Fn = INSTR (9, 5);
8620 unsigned Fd = INSTR (4, 0);
8621
8622 NYI_assert (31, 23, 0x0FC);
8623 NYI_assert (21, 10, 0xC36);
8624
8625 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8626 if (INSTR (22, 22))
8627 {
8628 double val1 = aarch64_get_vec_double (cpu, Fn, 0);
8629 double val2 = aarch64_get_vec_double (cpu, Fn, 1);
8630
8631 aarch64_set_FP_double (cpu, Fd, val1 + val2);
8632 }
8633 else
8634 {
8635 float val1 = aarch64_get_vec_float (cpu, Fn, 0);
8636 float val2 = aarch64_get_vec_float (cpu, Fn, 1);
8637
8638 aarch64_set_FP_float (cpu, Fd, val1 + val2);
8639 }
8640 }
8641
8642 /* Floating point absolute difference. */
8643
8644 static void
8645 do_scalar_FABD (sim_cpu *cpu)
8646 {
8647 /* instr [31,23] = 0111 1110 1
8648 instr [22] = float(0)/double(1)
8649 instr [21] = 1
8650 instr [20,16] = Rm
8651 instr [15,10] = 1101 01
8652 instr [9, 5] = Rn
8653 instr [4, 0] = Rd. */
8654
8655 unsigned rm = INSTR (20, 16);
8656 unsigned rn = INSTR (9, 5);
8657 unsigned rd = INSTR (4, 0);
8658
8659 NYI_assert (31, 23, 0x0FD);
8660 NYI_assert (21, 21, 1);
8661 NYI_assert (15, 10, 0x35);
8662
8663 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8664 if (INSTR (22, 22))
8665 aarch64_set_FP_double (cpu, rd,
8666 fabs (aarch64_get_FP_double (cpu, rn)
8667 - aarch64_get_FP_double (cpu, rm)));
8668 else
8669 aarch64_set_FP_float (cpu, rd,
8670 fabsf (aarch64_get_FP_float (cpu, rn)
8671 - aarch64_get_FP_float (cpu, rm)));
8672 }
8673
8674 static void
8675 do_scalar_CMGT (sim_cpu *cpu)
8676 {
8677 /* instr [31,21] = 0101 1110 111
8678 instr [20,16] = Rm
8679 instr [15,10] = 00 1101
8680 instr [9, 5] = Rn
8681 instr [4, 0] = Rd. */
8682
8683 unsigned rm = INSTR (20, 16);
8684 unsigned rn = INSTR (9, 5);
8685 unsigned rd = INSTR (4, 0);
8686
8687 NYI_assert (31, 21, 0x2F7);
8688 NYI_assert (15, 10, 0x0D);
8689
8690 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8691 aarch64_set_vec_u64 (cpu, rd, 0,
8692 aarch64_get_vec_u64 (cpu, rn, 0) >
8693 aarch64_get_vec_u64 (cpu, rm, 0) ? -1L : 0L);
8694 }
8695
8696 static void
8697 do_scalar_USHR (sim_cpu *cpu)
8698 {
8699 /* instr [31,23] = 0111 1111 0
8700 instr [22,16] = shift amount
8701 instr [15,10] = 0000 01
8702 instr [9, 5] = Rn
8703 instr [4, 0] = Rd. */
8704
8705 unsigned amount = 128 - INSTR (22, 16);
8706 unsigned rn = INSTR (9, 5);
8707 unsigned rd = INSTR (4, 0);
8708
8709 NYI_assert (31, 23, 0x0FE);
8710 NYI_assert (15, 10, 0x01);
8711
8712 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8713 aarch64_set_vec_u64 (cpu, rd, 0,
8714 aarch64_get_vec_u64 (cpu, rn, 0) >> amount);
8715 }
8716
8717 static void
8718 do_scalar_SSHL (sim_cpu *cpu)
8719 {
8720 /* instr [31,21] = 0101 1110 111
8721 instr [20,16] = Rm
8722 instr [15,10] = 0100 01
8723 instr [9, 5] = Rn
8724 instr [4, 0] = Rd. */
8725
8726 unsigned rm = INSTR (20, 16);
8727 unsigned rn = INSTR (9, 5);
8728 unsigned rd = INSTR (4, 0);
8729 signed int shift = aarch64_get_vec_s8 (cpu, rm, 0);
8730
8731 NYI_assert (31, 21, 0x2F7);
8732 NYI_assert (15, 10, 0x11);
8733
8734 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8735 if (shift >= 0)
8736 aarch64_set_vec_s64 (cpu, rd, 0,
8737 aarch64_get_vec_s64 (cpu, rn, 0) << shift);
8738 else
8739 aarch64_set_vec_s64 (cpu, rd, 0,
8740 aarch64_get_vec_s64 (cpu, rn, 0) >> - shift);
8741 }
8742
8743 static void
8744 do_scalar_shift (sim_cpu *cpu)
8745 {
8746 /* instr [31,23] = 0101 1111 0
8747 instr [22,16] = shift amount
8748 instr [15,10] = 0101 01 [SHL]
8749 instr [15,10] = 0000 01 [SSHR]
8750 instr [9, 5] = Rn
8751 instr [4, 0] = Rd. */
8752
8753 unsigned rn = INSTR (9, 5);
8754 unsigned rd = INSTR (4, 0);
8755 unsigned amount;
8756
8757 NYI_assert (31, 23, 0x0BE);
8758
8759 if (INSTR (22, 22) == 0)
8760 HALT_UNALLOC;
8761
8762 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8763 switch (INSTR (15, 10))
8764 {
8765 case 0x01: /* SSHR */
8766 amount = 128 - INSTR (22, 16);
8767 aarch64_set_vec_s64 (cpu, rd, 0,
8768 aarch64_get_vec_s64 (cpu, rn, 0) >> amount);
8769 return;
8770 case 0x15: /* SHL */
8771 amount = INSTR (22, 16) - 64;
8772 aarch64_set_vec_u64 (cpu, rd, 0,
8773 aarch64_get_vec_u64 (cpu, rn, 0) << amount);
8774 return;
8775 default:
8776 HALT_NYI;
8777 }
8778 }
8779
8780 /* FCMEQ FCMGT FCMGE. */
8781 static void
8782 do_scalar_FCM (sim_cpu *cpu)
8783 {
8784 /* instr [31,30] = 01
8785 instr [29] = U
8786 instr [28,24] = 1 1110
8787 instr [23] = E
8788 instr [22] = size
8789 instr [21] = 1
8790 instr [20,16] = Rm
8791 instr [15,12] = 1110
8792 instr [11] = AC
8793 instr [10] = 1
8794 instr [9, 5] = Rn
8795 instr [4, 0] = Rd. */
8796
8797 unsigned rm = INSTR (20, 16);
8798 unsigned rn = INSTR (9, 5);
8799 unsigned rd = INSTR (4, 0);
8800 unsigned EUac = (INSTR (23, 23) << 2) | (INSTR (29, 29) << 1) | INSTR (11, 11);
8801 unsigned result;
8802 float val1;
8803 float val2;
8804
8805 NYI_assert (31, 30, 1);
8806 NYI_assert (28, 24, 0x1E);
8807 NYI_assert (21, 21, 1);
8808 NYI_assert (15, 12, 0xE);
8809 NYI_assert (10, 10, 1);
8810
8811 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8812 if (INSTR (22, 22))
8813 {
8814 double val1 = aarch64_get_FP_double (cpu, rn);
8815 double val2 = aarch64_get_FP_double (cpu, rm);
8816
8817 switch (EUac)
8818 {
8819 case 0: /* 000 */
8820 result = val1 == val2;
8821 break;
8822
8823 case 3: /* 011 */
8824 val1 = fabs (val1);
8825 val2 = fabs (val2);
8826 /* Fall through. */
8827 case 2: /* 010 */
8828 result = val1 >= val2;
8829 break;
8830
8831 case 7: /* 111 */
8832 val1 = fabs (val1);
8833 val2 = fabs (val2);
8834 /* Fall through. */
8835 case 6: /* 110 */
8836 result = val1 > val2;
8837 break;
8838
8839 default:
8840 HALT_UNALLOC;
8841 }
8842
8843 aarch64_set_vec_u32 (cpu, rd, 0, result ? -1 : 0);
8844 return;
8845 }
8846
8847 val1 = aarch64_get_FP_float (cpu, rn);
8848 val2 = aarch64_get_FP_float (cpu, rm);
8849
8850 switch (EUac)
8851 {
8852 case 0: /* 000 */
8853 result = val1 == val2;
8854 break;
8855
8856 case 3: /* 011 */
8857 val1 = fabsf (val1);
8858 val2 = fabsf (val2);
8859 /* Fall through. */
8860 case 2: /* 010 */
8861 result = val1 >= val2;
8862 break;
8863
8864 case 7: /* 111 */
8865 val1 = fabsf (val1);
8866 val2 = fabsf (val2);
8867 /* Fall through. */
8868 case 6: /* 110 */
8869 result = val1 > val2;
8870 break;
8871
8872 default:
8873 HALT_UNALLOC;
8874 }
8875
8876 aarch64_set_vec_u32 (cpu, rd, 0, result ? -1 : 0);
8877 }
8878
8879 /* An alias of DUP. */
8880 static void
8881 do_scalar_MOV (sim_cpu *cpu)
8882 {
8883 /* instr [31,21] = 0101 1110 000
8884 instr [20,16] = imm5
8885 instr [15,10] = 0000 01
8886 instr [9, 5] = Rn
8887 instr [4, 0] = Rd. */
8888
8889 unsigned rn = INSTR (9, 5);
8890 unsigned rd = INSTR (4, 0);
8891 unsigned index;
8892
8893 NYI_assert (31, 21, 0x2F0);
8894 NYI_assert (15, 10, 0x01);
8895
8896 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8897 if (INSTR (16, 16))
8898 {
8899 /* 8-bit. */
8900 index = INSTR (20, 17);
8901 aarch64_set_vec_u8
8902 (cpu, rd, 0, aarch64_get_vec_u8 (cpu, rn, index));
8903 }
8904 else if (INSTR (17, 17))
8905 {
8906 /* 16-bit. */
8907 index = INSTR (20, 18);
8908 aarch64_set_vec_u16
8909 (cpu, rd, 0, aarch64_get_vec_u16 (cpu, rn, index));
8910 }
8911 else if (INSTR (18, 18))
8912 {
8913 /* 32-bit. */
8914 index = INSTR (20, 19);
8915 aarch64_set_vec_u32
8916 (cpu, rd, 0, aarch64_get_vec_u32 (cpu, rn, index));
8917 }
8918 else if (INSTR (19, 19))
8919 {
8920 /* 64-bit. */
8921 index = INSTR (20, 20);
8922 aarch64_set_vec_u64
8923 (cpu, rd, 0, aarch64_get_vec_u64 (cpu, rn, index));
8924 }
8925 else
8926 HALT_UNALLOC;
8927 }
8928
8929 static void
8930 do_scalar_NEG (sim_cpu *cpu)
8931 {
8932 /* instr [31,10] = 0111 1110 1110 0000 1011 10
8933 instr [9, 5] = Rn
8934 instr [4, 0] = Rd. */
8935
8936 unsigned rn = INSTR (9, 5);
8937 unsigned rd = INSTR (4, 0);
8938
8939 NYI_assert (31, 10, 0x1FB82E);
8940
8941 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8942 aarch64_set_vec_u64 (cpu, rd, 0, - aarch64_get_vec_u64 (cpu, rn, 0));
8943 }
8944
8945 static void
8946 do_scalar_USHL (sim_cpu *cpu)
8947 {
8948 /* instr [31,21] = 0111 1110 111
8949 instr [20,16] = Rm
8950 instr [15,10] = 0100 01
8951 instr [9, 5] = Rn
8952 instr [4, 0] = Rd. */
8953
8954 unsigned rm = INSTR (20, 16);
8955 unsigned rn = INSTR (9, 5);
8956 unsigned rd = INSTR (4, 0);
8957 signed int shift = aarch64_get_vec_s8 (cpu, rm, 0);
8958
8959 NYI_assert (31, 21, 0x3F7);
8960 NYI_assert (15, 10, 0x11);
8961
8962 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8963 if (shift >= 0)
8964 aarch64_set_vec_u64 (cpu, rd, 0, aarch64_get_vec_u64 (cpu, rn, 0) << shift);
8965 else
8966 aarch64_set_vec_u64 (cpu, rd, 0, aarch64_get_vec_u64 (cpu, rn, 0) >> - shift);
8967 }
8968
8969 static void
8970 do_double_add (sim_cpu *cpu)
8971 {
8972 /* instr [31,21] = 0101 1110 111
8973 instr [20,16] = Fn
8974 instr [15,10] = 1000 01
8975 instr [9,5] = Fm
8976 instr [4,0] = Fd. */
8977 unsigned Fd;
8978 unsigned Fm;
8979 unsigned Fn;
8980 double val1;
8981 double val2;
8982
8983 NYI_assert (31, 21, 0x2F7);
8984 NYI_assert (15, 10, 0x21);
8985
8986 Fd = INSTR (4, 0);
8987 Fm = INSTR (9, 5);
8988 Fn = INSTR (20, 16);
8989
8990 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8991 val1 = aarch64_get_FP_double (cpu, Fm);
8992 val2 = aarch64_get_FP_double (cpu, Fn);
8993
8994 aarch64_set_FP_double (cpu, Fd, val1 + val2);
8995 }
8996
8997 static void
8998 do_scalar_UCVTF (sim_cpu *cpu)
8999 {
9000 /* instr [31,23] = 0111 1110 0
9001 instr [22] = single(0)/double(1)
9002 instr [21,10] = 10 0001 1101 10
9003 instr [9,5] = rn
9004 instr [4,0] = rd. */
9005
9006 unsigned rn = INSTR (9, 5);
9007 unsigned rd = INSTR (4, 0);
9008
9009 NYI_assert (31, 23, 0x0FC);
9010 NYI_assert (21, 10, 0x876);
9011
9012 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9013 if (INSTR (22, 22))
9014 {
9015 uint64_t val = aarch64_get_vec_u64 (cpu, rn, 0);
9016
9017 aarch64_set_vec_double (cpu, rd, 0, (double) val);
9018 }
9019 else
9020 {
9021 uint32_t val = aarch64_get_vec_u32 (cpu, rn, 0);
9022
9023 aarch64_set_vec_float (cpu, rd, 0, (float) val);
9024 }
9025 }
9026
9027 static void
9028 do_scalar_vec (sim_cpu *cpu)
9029 {
9030 /* instr [30] = 1. */
9031 /* instr [28,25] = 1111. */
9032 switch (INSTR (31, 23))
9033 {
9034 case 0xBC:
9035 switch (INSTR (15, 10))
9036 {
9037 case 0x01: do_scalar_MOV (cpu); return;
9038 case 0x39: do_scalar_FCM (cpu); return;
9039 case 0x3B: do_scalar_FCM (cpu); return;
9040 }
9041 break;
9042
9043 case 0xBE: do_scalar_shift (cpu); return;
9044
9045 case 0xFC:
9046 switch (INSTR (15, 10))
9047 {
9048 case 0x36:
9049 switch (INSTR (21, 16))
9050 {
9051 case 0x30: do_scalar_FADDP (cpu); return;
9052 case 0x21: do_scalar_UCVTF (cpu); return;
9053 }
9054 HALT_NYI;
9055 case 0x39: do_scalar_FCM (cpu); return;
9056 case 0x3B: do_scalar_FCM (cpu); return;
9057 }
9058 break;
9059
9060 case 0xFD:
9061 switch (INSTR (15, 10))
9062 {
9063 case 0x0D: do_scalar_CMGT (cpu); return;
9064 case 0x11: do_scalar_USHL (cpu); return;
9065 case 0x2E: do_scalar_NEG (cpu); return;
9066 case 0x35: do_scalar_FABD (cpu); return;
9067 case 0x39: do_scalar_FCM (cpu); return;
9068 case 0x3B: do_scalar_FCM (cpu); return;
9069 default:
9070 HALT_NYI;
9071 }
9072
9073 case 0xFE: do_scalar_USHR (cpu); return;
9074
9075 case 0xBD:
9076 switch (INSTR (15, 10))
9077 {
9078 case 0x21: do_double_add (cpu); return;
9079 case 0x11: do_scalar_SSHL (cpu); return;
9080 default:
9081 HALT_NYI;
9082 }
9083
9084 default:
9085 HALT_NYI;
9086 }
9087 }
9088
9089 static void
9090 dexAdvSIMD1 (sim_cpu *cpu)
9091 {
9092 /* instr [28,25] = 1 111. */
9093
9094 /* We are currently only interested in the basic
9095 scalar fp routines which all have bit 30 = 0. */
9096 if (INSTR (30, 30))
9097 do_scalar_vec (cpu);
9098
9099 /* instr[24] is set for FP data processing 3-source and clear for
9100 all other basic scalar fp instruction groups. */
9101 else if (INSTR (24, 24))
9102 dexSimpleFPDataProc3Source (cpu);
9103
9104 /* instr[21] is clear for floating <-> fixed conversions and set for
9105 all other basic scalar fp instruction groups. */
9106 else if (!INSTR (21, 21))
9107 dexSimpleFPFixedConvert (cpu);
9108
9109 /* instr[11,10] : 01 ==> cond compare, 10 ==> Data Proc 2 Source
9110 11 ==> cond select, 00 ==> other. */
9111 else
9112 switch (INSTR (11, 10))
9113 {
9114 case 1: dexSimpleFPCondCompare (cpu); return;
9115 case 2: dexSimpleFPDataProc2Source (cpu); return;
9116 case 3: dexSimpleFPCondSelect (cpu); return;
9117
9118 default:
9119 /* Now an ordered cascade of tests.
9120 FP immediate has instr [12] == 1.
9121 FP compare has instr [13] == 1.
9122 FP Data Proc 1 Source has instr [14] == 1.
9123 FP floating <--> integer conversions has instr [15] == 0. */
9124 if (INSTR (12, 12))
9125 dexSimpleFPImmediate (cpu);
9126
9127 else if (INSTR (13, 13))
9128 dexSimpleFPCompare (cpu);
9129
9130 else if (INSTR (14, 14))
9131 dexSimpleFPDataProc1Source (cpu);
9132
9133 else if (!INSTR (15, 15))
9134 dexSimpleFPIntegerConvert (cpu);
9135
9136 else
9137 /* If we get here then instr[15] == 1 which means UNALLOC. */
9138 HALT_UNALLOC;
9139 }
9140 }
9141
9142 /* PC relative addressing. */
9143
9144 static void
9145 pcadr (sim_cpu *cpu)
9146 {
9147 /* instr[31] = op : 0 ==> ADR, 1 ==> ADRP
9148 instr[30,29] = immlo
9149 instr[23,5] = immhi. */
9150 uint64_t address;
9151 unsigned rd = INSTR (4, 0);
9152 uint32_t isPage = INSTR (31, 31);
9153 union { int64_t u64; uint64_t s64; } imm;
9154 uint64_t offset;
9155
9156 imm.s64 = simm64 (aarch64_get_instr (cpu), 23, 5);
9157 offset = imm.u64;
9158 offset = (offset << 2) | INSTR (30, 29);
9159
9160 address = aarch64_get_PC (cpu);
9161
9162 if (isPage)
9163 {
9164 offset <<= 12;
9165 address &= ~0xfff;
9166 }
9167
9168 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9169 aarch64_set_reg_u64 (cpu, rd, NO_SP, address + offset);
9170 }
9171
9172 /* Specific decode and execute for group Data Processing Immediate. */
9173
9174 static void
9175 dexPCRelAddressing (sim_cpu *cpu)
9176 {
9177 /* assert instr[28,24] = 10000. */
9178 pcadr (cpu);
9179 }
9180
9181 /* Immediate logical.
9182 The bimm32/64 argument is constructed by replicating a 2, 4, 8,
9183 16, 32 or 64 bit sequence pulled out at decode and possibly
9184 inverting it..
9185
9186 N.B. the output register (dest) can normally be Xn or SP
9187 the exception occurs for flag setting instructions which may
9188 only use Xn for the output (dest). The input register can
9189 never be SP. */
9190
9191 /* 32 bit and immediate. */
9192 static void
9193 and32 (sim_cpu *cpu, uint32_t bimm)
9194 {
9195 unsigned rn = INSTR (9, 5);
9196 unsigned rd = INSTR (4, 0);
9197
9198 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9199 aarch64_set_reg_u64 (cpu, rd, SP_OK,
9200 aarch64_get_reg_u32 (cpu, rn, NO_SP) & bimm);
9201 }
9202
9203 /* 64 bit and immediate. */
9204 static void
9205 and64 (sim_cpu *cpu, uint64_t bimm)
9206 {
9207 unsigned rn = INSTR (9, 5);
9208 unsigned rd = INSTR (4, 0);
9209
9210 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9211 aarch64_set_reg_u64 (cpu, rd, SP_OK,
9212 aarch64_get_reg_u64 (cpu, rn, NO_SP) & bimm);
9213 }
9214
9215 /* 32 bit and immediate set flags. */
9216 static void
9217 ands32 (sim_cpu *cpu, uint32_t bimm)
9218 {
9219 unsigned rn = INSTR (9, 5);
9220 unsigned rd = INSTR (4, 0);
9221
9222 uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP);
9223 uint32_t value2 = bimm;
9224
9225 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9226 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 & value2);
9227 set_flags_for_binop32 (cpu, value1 & value2);
9228 }
9229
9230 /* 64 bit and immediate set flags. */
9231 static void
9232 ands64 (sim_cpu *cpu, uint64_t bimm)
9233 {
9234 unsigned rn = INSTR (9, 5);
9235 unsigned rd = INSTR (4, 0);
9236
9237 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP);
9238 uint64_t value2 = bimm;
9239
9240 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9241 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 & value2);
9242 set_flags_for_binop64 (cpu, value1 & value2);
9243 }
9244
9245 /* 32 bit exclusive or immediate. */
9246 static void
9247 eor32 (sim_cpu *cpu, uint32_t bimm)
9248 {
9249 unsigned rn = INSTR (9, 5);
9250 unsigned rd = INSTR (4, 0);
9251
9252 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9253 aarch64_set_reg_u64 (cpu, rd, SP_OK,
9254 aarch64_get_reg_u32 (cpu, rn, NO_SP) ^ bimm);
9255 }
9256
9257 /* 64 bit exclusive or immediate. */
9258 static void
9259 eor64 (sim_cpu *cpu, uint64_t bimm)
9260 {
9261 unsigned rn = INSTR (9, 5);
9262 unsigned rd = INSTR (4, 0);
9263
9264 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9265 aarch64_set_reg_u64 (cpu, rd, SP_OK,
9266 aarch64_get_reg_u64 (cpu, rn, NO_SP) ^ bimm);
9267 }
9268
9269 /* 32 bit or immediate. */
9270 static void
9271 orr32 (sim_cpu *cpu, uint32_t bimm)
9272 {
9273 unsigned rn = INSTR (9, 5);
9274 unsigned rd = INSTR (4, 0);
9275
9276 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9277 aarch64_set_reg_u64 (cpu, rd, SP_OK,
9278 aarch64_get_reg_u32 (cpu, rn, NO_SP) | bimm);
9279 }
9280
9281 /* 64 bit or immediate. */
9282 static void
9283 orr64 (sim_cpu *cpu, uint64_t bimm)
9284 {
9285 unsigned rn = INSTR (9, 5);
9286 unsigned rd = INSTR (4, 0);
9287
9288 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9289 aarch64_set_reg_u64 (cpu, rd, SP_OK,
9290 aarch64_get_reg_u64 (cpu, rn, NO_SP) | bimm);
9291 }
9292
9293 /* Logical shifted register.
9294 These allow an optional LSL, ASR, LSR or ROR to the second source
9295 register with a count up to the register bit count.
9296 N.B register args may not be SP. */
9297
9298 /* 32 bit AND shifted register. */
9299 static void
9300 and32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9301 {
9302 unsigned rm = INSTR (20, 16);
9303 unsigned rn = INSTR (9, 5);
9304 unsigned rd = INSTR (4, 0);
9305
9306 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9307 aarch64_set_reg_u64
9308 (cpu, rd, NO_SP, aarch64_get_reg_u32 (cpu, rn, NO_SP)
9309 & shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP), shift, count));
9310 }
9311
9312 /* 64 bit AND shifted register. */
9313 static void
9314 and64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9315 {
9316 unsigned rm = INSTR (20, 16);
9317 unsigned rn = INSTR (9, 5);
9318 unsigned rd = INSTR (4, 0);
9319
9320 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9321 aarch64_set_reg_u64
9322 (cpu, rd, NO_SP, aarch64_get_reg_u64 (cpu, rn, NO_SP)
9323 & shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP), shift, count));
9324 }
9325
9326 /* 32 bit AND shifted register setting flags. */
9327 static void
9328 ands32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9329 {
9330 unsigned rm = INSTR (20, 16);
9331 unsigned rn = INSTR (9, 5);
9332 unsigned rd = INSTR (4, 0);
9333
9334 uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP);
9335 uint32_t value2 = shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP),
9336 shift, count);
9337
9338 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9339 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 & value2);
9340 set_flags_for_binop32 (cpu, value1 & value2);
9341 }
9342
9343 /* 64 bit AND shifted register setting flags. */
9344 static void
9345 ands64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9346 {
9347 unsigned rm = INSTR (20, 16);
9348 unsigned rn = INSTR (9, 5);
9349 unsigned rd = INSTR (4, 0);
9350
9351 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP);
9352 uint64_t value2 = shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP),
9353 shift, count);
9354
9355 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9356 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 & value2);
9357 set_flags_for_binop64 (cpu, value1 & value2);
9358 }
9359
9360 /* 32 bit BIC shifted register. */
9361 static void
9362 bic32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9363 {
9364 unsigned rm = INSTR (20, 16);
9365 unsigned rn = INSTR (9, 5);
9366 unsigned rd = INSTR (4, 0);
9367
9368 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9369 aarch64_set_reg_u64
9370 (cpu, rd, NO_SP, aarch64_get_reg_u32 (cpu, rn, NO_SP)
9371 & ~ shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP), shift, count));
9372 }
9373
9374 /* 64 bit BIC shifted register. */
9375 static void
9376 bic64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9377 {
9378 unsigned rm = INSTR (20, 16);
9379 unsigned rn = INSTR (9, 5);
9380 unsigned rd = INSTR (4, 0);
9381
9382 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9383 aarch64_set_reg_u64
9384 (cpu, rd, NO_SP, aarch64_get_reg_u64 (cpu, rn, NO_SP)
9385 & ~ shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP), shift, count));
9386 }
9387
9388 /* 32 bit BIC shifted register setting flags. */
9389 static void
9390 bics32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9391 {
9392 unsigned rm = INSTR (20, 16);
9393 unsigned rn = INSTR (9, 5);
9394 unsigned rd = INSTR (4, 0);
9395
9396 uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP);
9397 uint32_t value2 = ~ shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP),
9398 shift, count);
9399
9400 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9401 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 & value2);
9402 set_flags_for_binop32 (cpu, value1 & value2);
9403 }
9404
9405 /* 64 bit BIC shifted register setting flags. */
9406 static void
9407 bics64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9408 {
9409 unsigned rm = INSTR (20, 16);
9410 unsigned rn = INSTR (9, 5);
9411 unsigned rd = INSTR (4, 0);
9412
9413 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP);
9414 uint64_t value2 = ~ shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP),
9415 shift, count);
9416
9417 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9418 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 & value2);
9419 set_flags_for_binop64 (cpu, value1 & value2);
9420 }
9421
9422 /* 32 bit EON shifted register. */
9423 static void
9424 eon32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9425 {
9426 unsigned rm = INSTR (20, 16);
9427 unsigned rn = INSTR (9, 5);
9428 unsigned rd = INSTR (4, 0);
9429
9430 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9431 aarch64_set_reg_u64
9432 (cpu, rd, NO_SP, aarch64_get_reg_u32 (cpu, rn, NO_SP)
9433 ^ ~ shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP), shift, count));
9434 }
9435
9436 /* 64 bit EON shifted register. */
9437 static void
9438 eon64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9439 {
9440 unsigned rm = INSTR (20, 16);
9441 unsigned rn = INSTR (9, 5);
9442 unsigned rd = INSTR (4, 0);
9443
9444 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9445 aarch64_set_reg_u64
9446 (cpu, rd, NO_SP, aarch64_get_reg_u64 (cpu, rn, NO_SP)
9447 ^ ~ shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP), shift, count));
9448 }
9449
9450 /* 32 bit EOR shifted register. */
9451 static void
9452 eor32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9453 {
9454 unsigned rm = INSTR (20, 16);
9455 unsigned rn = INSTR (9, 5);
9456 unsigned rd = INSTR (4, 0);
9457
9458 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9459 aarch64_set_reg_u64
9460 (cpu, rd, NO_SP, aarch64_get_reg_u32 (cpu, rn, NO_SP)
9461 ^ shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP), shift, count));
9462 }
9463
9464 /* 64 bit EOR shifted register. */
9465 static void
9466 eor64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9467 {
9468 unsigned rm = INSTR (20, 16);
9469 unsigned rn = INSTR (9, 5);
9470 unsigned rd = INSTR (4, 0);
9471
9472 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9473 aarch64_set_reg_u64
9474 (cpu, rd, NO_SP, aarch64_get_reg_u64 (cpu, rn, NO_SP)
9475 ^ shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP), shift, count));
9476 }
9477
9478 /* 32 bit ORR shifted register. */
9479 static void
9480 orr32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9481 {
9482 unsigned rm = INSTR (20, 16);
9483 unsigned rn = INSTR (9, 5);
9484 unsigned rd = INSTR (4, 0);
9485
9486 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9487 aarch64_set_reg_u64
9488 (cpu, rd, NO_SP, aarch64_get_reg_u32 (cpu, rn, NO_SP)
9489 | shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP), shift, count));
9490 }
9491
9492 /* 64 bit ORR shifted register. */
9493 static void
9494 orr64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9495 {
9496 unsigned rm = INSTR (20, 16);
9497 unsigned rn = INSTR (9, 5);
9498 unsigned rd = INSTR (4, 0);
9499
9500 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9501 aarch64_set_reg_u64
9502 (cpu, rd, NO_SP, aarch64_get_reg_u64 (cpu, rn, NO_SP)
9503 | shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP), shift, count));
9504 }
9505
9506 /* 32 bit ORN shifted register. */
9507 static void
9508 orn32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9509 {
9510 unsigned rm = INSTR (20, 16);
9511 unsigned rn = INSTR (9, 5);
9512 unsigned rd = INSTR (4, 0);
9513
9514 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9515 aarch64_set_reg_u64
9516 (cpu, rd, NO_SP, aarch64_get_reg_u32 (cpu, rn, NO_SP)
9517 | ~ shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP), shift, count));
9518 }
9519
9520 /* 64 bit ORN shifted register. */
9521 static void
9522 orn64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9523 {
9524 unsigned rm = INSTR (20, 16);
9525 unsigned rn = INSTR (9, 5);
9526 unsigned rd = INSTR (4, 0);
9527
9528 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9529 aarch64_set_reg_u64
9530 (cpu, rd, NO_SP, aarch64_get_reg_u64 (cpu, rn, NO_SP)
9531 | ~ shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP), shift, count));
9532 }
9533
9534 static void
9535 dexLogicalImmediate (sim_cpu *cpu)
9536 {
9537 /* assert instr[28,23] = 1001000
9538 instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
9539 instr[30,29] = op : 0 ==> AND, 1 ==> ORR, 2 ==> EOR, 3 ==> ANDS
9540 instr[22] = N : used to construct immediate mask
9541 instr[21,16] = immr
9542 instr[15,10] = imms
9543 instr[9,5] = Rn
9544 instr[4,0] = Rd */
9545
9546 /* 32 bit operations must have N = 0 or else we have an UNALLOC. */
9547 uint32_t size = INSTR (31, 31);
9548 uint32_t N = INSTR (22, 22);
9549 /* uint32_t immr = INSTR (21, 16);. */
9550 /* uint32_t imms = INSTR (15, 10);. */
9551 uint32_t index = INSTR (22, 10);
9552 uint64_t bimm64 = LITable [index];
9553 uint32_t dispatch = INSTR (30, 29);
9554
9555 if (~size & N)
9556 HALT_UNALLOC;
9557
9558 if (!bimm64)
9559 HALT_UNALLOC;
9560
9561 if (size == 0)
9562 {
9563 uint32_t bimm = (uint32_t) bimm64;
9564
9565 switch (dispatch)
9566 {
9567 case 0: and32 (cpu, bimm); return;
9568 case 1: orr32 (cpu, bimm); return;
9569 case 2: eor32 (cpu, bimm); return;
9570 case 3: ands32 (cpu, bimm); return;
9571 }
9572 }
9573 else
9574 {
9575 switch (dispatch)
9576 {
9577 case 0: and64 (cpu, bimm64); return;
9578 case 1: orr64 (cpu, bimm64); return;
9579 case 2: eor64 (cpu, bimm64); return;
9580 case 3: ands64 (cpu, bimm64); return;
9581 }
9582 }
9583 HALT_UNALLOC;
9584 }
9585
9586 /* Immediate move.
9587 The uimm argument is a 16 bit value to be inserted into the
9588 target register the pos argument locates the 16 bit word in the
9589 dest register i.e. it is in {0, 1} for 32 bit and {0, 1, 2,
9590 3} for 64 bit.
9591 N.B register arg may not be SP so it should be.
9592 accessed using the setGZRegisterXXX accessors. */
9593
9594 /* 32 bit move 16 bit immediate zero remaining shorts. */
9595 static void
9596 movz32 (sim_cpu *cpu, uint32_t val, uint32_t pos)
9597 {
9598 unsigned rd = INSTR (4, 0);
9599
9600 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9601 aarch64_set_reg_u64 (cpu, rd, NO_SP, val << (pos * 16));
9602 }
9603
9604 /* 64 bit move 16 bit immediate zero remaining shorts. */
9605 static void
9606 movz64 (sim_cpu *cpu, uint32_t val, uint32_t pos)
9607 {
9608 unsigned rd = INSTR (4, 0);
9609
9610 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9611 aarch64_set_reg_u64 (cpu, rd, NO_SP, ((uint64_t) val) << (pos * 16));
9612 }
9613
9614 /* 32 bit move 16 bit immediate negated. */
9615 static void
9616 movn32 (sim_cpu *cpu, uint32_t val, uint32_t pos)
9617 {
9618 unsigned rd = INSTR (4, 0);
9619
9620 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9621 aarch64_set_reg_u64 (cpu, rd, NO_SP, ((val << (pos * 16)) ^ 0xffffffffU));
9622 }
9623
9624 /* 64 bit move 16 bit immediate negated. */
9625 static void
9626 movn64 (sim_cpu *cpu, uint32_t val, uint32_t pos)
9627 {
9628 unsigned rd = INSTR (4, 0);
9629
9630 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9631 aarch64_set_reg_u64
9632 (cpu, rd, NO_SP, ((((uint64_t) val) << (pos * 16))
9633 ^ 0xffffffffffffffffULL));
9634 }
9635
9636 /* 32 bit move 16 bit immediate keep remaining shorts. */
9637 static void
9638 movk32 (sim_cpu *cpu, uint32_t val, uint32_t pos)
9639 {
9640 unsigned rd = INSTR (4, 0);
9641 uint32_t current = aarch64_get_reg_u32 (cpu, rd, NO_SP);
9642 uint32_t value = val << (pos * 16);
9643 uint32_t mask = ~(0xffffU << (pos * 16));
9644
9645 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9646 aarch64_set_reg_u64 (cpu, rd, NO_SP, (value | (current & mask)));
9647 }
9648
9649 /* 64 bit move 16 it immediate keep remaining shorts. */
9650 static void
9651 movk64 (sim_cpu *cpu, uint32_t val, uint32_t pos)
9652 {
9653 unsigned rd = INSTR (4, 0);
9654 uint64_t current = aarch64_get_reg_u64 (cpu, rd, NO_SP);
9655 uint64_t value = (uint64_t) val << (pos * 16);
9656 uint64_t mask = ~(0xffffULL << (pos * 16));
9657
9658 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9659 aarch64_set_reg_u64 (cpu, rd, NO_SP, (value | (current & mask)));
9660 }
9661
9662 static void
9663 dexMoveWideImmediate (sim_cpu *cpu)
9664 {
9665 /* assert instr[28:23] = 100101
9666 instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
9667 instr[30,29] = op : 0 ==> MOVN, 1 ==> UNALLOC, 2 ==> MOVZ, 3 ==> MOVK
9668 instr[22,21] = shift : 00 == LSL#0, 01 = LSL#16, 10 = LSL#32, 11 = LSL#48
9669 instr[20,5] = uimm16
9670 instr[4,0] = Rd */
9671
9672 /* N.B. the (multiple of 16) shift is applied by the called routine,
9673 we just pass the multiplier. */
9674
9675 uint32_t imm;
9676 uint32_t size = INSTR (31, 31);
9677 uint32_t op = INSTR (30, 29);
9678 uint32_t shift = INSTR (22, 21);
9679
9680 /* 32 bit can only shift 0 or 1 lot of 16.
9681 anything else is an unallocated instruction. */
9682 if (size == 0 && (shift > 1))
9683 HALT_UNALLOC;
9684
9685 if (op == 1)
9686 HALT_UNALLOC;
9687
9688 imm = INSTR (20, 5);
9689
9690 if (size == 0)
9691 {
9692 if (op == 0)
9693 movn32 (cpu, imm, shift);
9694 else if (op == 2)
9695 movz32 (cpu, imm, shift);
9696 else
9697 movk32 (cpu, imm, shift);
9698 }
9699 else
9700 {
9701 if (op == 0)
9702 movn64 (cpu, imm, shift);
9703 else if (op == 2)
9704 movz64 (cpu, imm, shift);
9705 else
9706 movk64 (cpu, imm, shift);
9707 }
9708 }
9709
9710 /* Bitfield operations.
9711 These take a pair of bit positions r and s which are in {0..31}
9712 or {0..63} depending on the instruction word size.
9713 N.B register args may not be SP. */
9714
9715 /* OK, we start with ubfm which just needs to pick
9716 some bits out of source zero the rest and write
9717 the result to dest. Just need two logical shifts. */
9718
9719 /* 32 bit bitfield move, left and right of affected zeroed
9720 if r <= s Wd<s-r:0> = Wn<s:r> else Wd<32+s-r,32-r> = Wn<s:0>. */
9721 static void
9722 ubfm32 (sim_cpu *cpu, uint32_t r, uint32_t s)
9723 {
9724 unsigned rd;
9725 unsigned rn = INSTR (9, 5);
9726 uint32_t value = aarch64_get_reg_u32 (cpu, rn, NO_SP);
9727
9728 /* Pick either s+1-r or s+1 consecutive bits out of the original word. */
9729 if (r <= s)
9730 {
9731 /* 31:...:s:xxx:r:...:0 ==> 31:...:s-r:xxx:0.
9732 We want only bits s:xxx:r at the bottom of the word
9733 so we LSL bit s up to bit 31 i.e. by 31 - s
9734 and then we LSR to bring bit 31 down to bit s - r
9735 i.e. by 31 + r - s. */
9736 value <<= 31 - s;
9737 value >>= 31 + r - s;
9738 }
9739 else
9740 {
9741 /* 31:...:s:xxx:0 ==> 31:...:31-(r-1)+s:xxx:31-(r-1):...:0
9742 We want only bits s:xxx:0 starting at it 31-(r-1)
9743 so we LSL bit s up to bit 31 i.e. by 31 - s
9744 and then we LSL to bring bit 31 down to 31-(r-1)+s
9745 i.e. by r - (s + 1). */
9746 value <<= 31 - s;
9747 value >>= r - (s + 1);
9748 }
9749
9750 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9751 rd = INSTR (4, 0);
9752 aarch64_set_reg_u64 (cpu, rd, NO_SP, value);
9753 }
9754
9755 /* 64 bit bitfield move, left and right of affected zeroed
9756 if r <= s Wd<s-r:0> = Wn<s:r> else Wd<64+s-r,64-r> = Wn<s:0>. */
9757 static void
9758 ubfm (sim_cpu *cpu, uint32_t r, uint32_t s)
9759 {
9760 unsigned rd;
9761 unsigned rn = INSTR (9, 5);
9762 uint64_t value = aarch64_get_reg_u64 (cpu, rn, NO_SP);
9763
9764 if (r <= s)
9765 {
9766 /* 63:...:s:xxx:r:...:0 ==> 63:...:s-r:xxx:0.
9767 We want only bits s:xxx:r at the bottom of the word.
9768 So we LSL bit s up to bit 63 i.e. by 63 - s
9769 and then we LSR to bring bit 63 down to bit s - r
9770 i.e. by 63 + r - s. */
9771 value <<= 63 - s;
9772 value >>= 63 + r - s;
9773 }
9774 else
9775 {
9776 /* 63:...:s:xxx:0 ==> 63:...:63-(r-1)+s:xxx:63-(r-1):...:0.
9777 We want only bits s:xxx:0 starting at it 63-(r-1).
9778 So we LSL bit s up to bit 63 i.e. by 63 - s
9779 and then we LSL to bring bit 63 down to 63-(r-1)+s
9780 i.e. by r - (s + 1). */
9781 value <<= 63 - s;
9782 value >>= r - (s + 1);
9783 }
9784
9785 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9786 rd = INSTR (4, 0);
9787 aarch64_set_reg_u64 (cpu, rd, NO_SP, value);
9788 }
9789
9790 /* The signed versions need to insert sign bits
9791 on the left of the inserted bit field. so we do
9792 much the same as the unsigned version except we
9793 use an arithmetic shift right -- this just means
9794 we need to operate on signed values. */
9795
9796 /* 32 bit bitfield move, left of affected sign-extended, right zeroed. */
9797 /* If r <= s Wd<s-r:0> = Wn<s:r> else Wd<32+s-r,32-r> = Wn<s:0>. */
9798 static void
9799 sbfm32 (sim_cpu *cpu, uint32_t r, uint32_t s)
9800 {
9801 unsigned rd;
9802 unsigned rn = INSTR (9, 5);
9803 /* as per ubfm32 but use an ASR instead of an LSR. */
9804 int32_t value = aarch64_get_reg_s32 (cpu, rn, NO_SP);
9805
9806 if (r <= s)
9807 {
9808 value <<= 31 - s;
9809 value >>= 31 + r - s;
9810 }
9811 else
9812 {
9813 value <<= 31 - s;
9814 value >>= r - (s + 1);
9815 }
9816
9817 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9818 rd = INSTR (4, 0);
9819 aarch64_set_reg_u64 (cpu, rd, NO_SP, (uint32_t) value);
9820 }
9821
9822 /* 64 bit bitfield move, left of affected sign-extended, right zeroed. */
9823 /* If r <= s Wd<s-r:0> = Wn<s:r> else Wd<64+s-r,64-r> = Wn<s:0>. */
9824 static void
9825 sbfm (sim_cpu *cpu, uint32_t r, uint32_t s)
9826 {
9827 unsigned rd;
9828 unsigned rn = INSTR (9, 5);
9829 /* acpu per ubfm but use an ASR instead of an LSR. */
9830 int64_t value = aarch64_get_reg_s64 (cpu, rn, NO_SP);
9831
9832 if (r <= s)
9833 {
9834 value <<= 63 - s;
9835 value >>= 63 + r - s;
9836 }
9837 else
9838 {
9839 value <<= 63 - s;
9840 value >>= r - (s + 1);
9841 }
9842
9843 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9844 rd = INSTR (4, 0);
9845 aarch64_set_reg_s64 (cpu, rd, NO_SP, value);
9846 }
9847
9848 /* Finally, these versions leave non-affected bits
9849 as is. so we need to generate the bits as per
9850 ubfm and also generate a mask to pick the
9851 bits from the original and computed values. */
9852
9853 /* 32 bit bitfield move, non-affected bits left as is.
9854 If r <= s Wd<s-r:0> = Wn<s:r> else Wd<32+s-r,32-r> = Wn<s:0>. */
9855 static void
9856 bfm32 (sim_cpu *cpu, uint32_t r, uint32_t s)
9857 {
9858 unsigned rn = INSTR (9, 5);
9859 uint32_t value = aarch64_get_reg_u32 (cpu, rn, NO_SP);
9860 uint32_t mask = -1;
9861 unsigned rd;
9862 uint32_t value2;
9863
9864 /* Pick either s+1-r or s+1 consecutive bits out of the original word. */
9865 if (r <= s)
9866 {
9867 /* 31:...:s:xxx:r:...:0 ==> 31:...:s-r:xxx:0.
9868 We want only bits s:xxx:r at the bottom of the word
9869 so we LSL bit s up to bit 31 i.e. by 31 - s
9870 and then we LSR to bring bit 31 down to bit s - r
9871 i.e. by 31 + r - s. */
9872 value <<= 31 - s;
9873 value >>= 31 + r - s;
9874 /* the mask must include the same bits. */
9875 mask <<= 31 - s;
9876 mask >>= 31 + r - s;
9877 }
9878 else
9879 {
9880 /* 31:...:s:xxx:0 ==> 31:...:31-(r-1)+s:xxx:31-(r-1):...:0.
9881 We want only bits s:xxx:0 starting at it 31-(r-1)
9882 so we LSL bit s up to bit 31 i.e. by 31 - s
9883 and then we LSL to bring bit 31 down to 31-(r-1)+s
9884 i.e. by r - (s + 1). */
9885 value <<= 31 - s;
9886 value >>= r - (s + 1);
9887 /* The mask must include the same bits. */
9888 mask <<= 31 - s;
9889 mask >>= r - (s + 1);
9890 }
9891
9892 rd = INSTR (4, 0);
9893 value2 = aarch64_get_reg_u32 (cpu, rd, NO_SP);
9894
9895 value2 &= ~mask;
9896 value2 |= value;
9897
9898 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9899 aarch64_set_reg_u64
9900 (cpu, rd, NO_SP, (aarch64_get_reg_u32 (cpu, rd, NO_SP) & ~mask) | value);
9901 }
9902
9903 /* 64 bit bitfield move, non-affected bits left as is.
9904 If r <= s Wd<s-r:0> = Wn<s:r> else Wd<64+s-r,64-r> = Wn<s:0>. */
9905 static void
9906 bfm (sim_cpu *cpu, uint32_t r, uint32_t s)
9907 {
9908 unsigned rd;
9909 unsigned rn = INSTR (9, 5);
9910 uint64_t value = aarch64_get_reg_u64 (cpu, rn, NO_SP);
9911 uint64_t mask = 0xffffffffffffffffULL;
9912
9913 if (r <= s)
9914 {
9915 /* 63:...:s:xxx:r:...:0 ==> 63:...:s-r:xxx:0.
9916 We want only bits s:xxx:r at the bottom of the word
9917 so we LSL bit s up to bit 63 i.e. by 63 - s
9918 and then we LSR to bring bit 63 down to bit s - r
9919 i.e. by 63 + r - s. */
9920 value <<= 63 - s;
9921 value >>= 63 + r - s;
9922 /* The mask must include the same bits. */
9923 mask <<= 63 - s;
9924 mask >>= 63 + r - s;
9925 }
9926 else
9927 {
9928 /* 63:...:s:xxx:0 ==> 63:...:63-(r-1)+s:xxx:63-(r-1):...:0
9929 We want only bits s:xxx:0 starting at it 63-(r-1)
9930 so we LSL bit s up to bit 63 i.e. by 63 - s
9931 and then we LSL to bring bit 63 down to 63-(r-1)+s
9932 i.e. by r - (s + 1). */
9933 value <<= 63 - s;
9934 value >>= r - (s + 1);
9935 /* The mask must include the same bits. */
9936 mask <<= 63 - s;
9937 mask >>= r - (s + 1);
9938 }
9939
9940 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9941 rd = INSTR (4, 0);
9942 aarch64_set_reg_u64
9943 (cpu, rd, NO_SP, (aarch64_get_reg_u64 (cpu, rd, NO_SP) & ~mask) | value);
9944 }
9945
9946 static void
9947 dexBitfieldImmediate (sim_cpu *cpu)
9948 {
9949 /* assert instr[28:23] = 100110
9950 instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
9951 instr[30,29] = op : 0 ==> SBFM, 1 ==> BFM, 2 ==> UBFM, 3 ==> UNALLOC
9952 instr[22] = N : must be 0 for 32 bit, 1 for 64 bit ow UNALLOC
9953 instr[21,16] = immr : 0xxxxx for 32 bit, xxxxxx for 64 bit
9954 instr[15,10] = imms : 0xxxxx for 32 bit, xxxxxx for 64 bit
9955 instr[9,5] = Rn
9956 instr[4,0] = Rd */
9957
9958 /* 32 bit operations must have N = 0 or else we have an UNALLOC. */
9959 uint32_t dispatch;
9960 uint32_t imms;
9961 uint32_t size = INSTR (31, 31);
9962 uint32_t N = INSTR (22, 22);
9963 /* 32 bit operations must have immr[5] = 0 and imms[5] = 0. */
9964 /* or else we have an UNALLOC. */
9965 uint32_t immr = INSTR (21, 16);
9966
9967 if (~size & N)
9968 HALT_UNALLOC;
9969
9970 if (!size && uimm (immr, 5, 5))
9971 HALT_UNALLOC;
9972
9973 imms = INSTR (15, 10);
9974 if (!size && uimm (imms, 5, 5))
9975 HALT_UNALLOC;
9976
9977 /* Switch on combined size and op. */
9978 dispatch = INSTR (31, 29);
9979 switch (dispatch)
9980 {
9981 case 0: sbfm32 (cpu, immr, imms); return;
9982 case 1: bfm32 (cpu, immr, imms); return;
9983 case 2: ubfm32 (cpu, immr, imms); return;
9984 case 4: sbfm (cpu, immr, imms); return;
9985 case 5: bfm (cpu, immr, imms); return;
9986 case 6: ubfm (cpu, immr, imms); return;
9987 default: HALT_UNALLOC;
9988 }
9989 }
9990
9991 static void
9992 do_EXTR_32 (sim_cpu *cpu)
9993 {
9994 /* instr[31:21] = 00010011100
9995 instr[20,16] = Rm
9996 instr[15,10] = imms : 0xxxxx for 32 bit
9997 instr[9,5] = Rn
9998 instr[4,0] = Rd */
9999 unsigned rm = INSTR (20, 16);
10000 unsigned imms = INSTR (15, 10) & 31;
10001 unsigned rn = INSTR ( 9, 5);
10002 unsigned rd = INSTR ( 4, 0);
10003 uint64_t val1;
10004 uint64_t val2;
10005
10006 val1 = aarch64_get_reg_u32 (cpu, rm, NO_SP);
10007 val1 >>= imms;
10008 val2 = aarch64_get_reg_u32 (cpu, rn, NO_SP);
10009 val2 <<= (32 - imms);
10010
10011 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
10012 aarch64_set_reg_u64 (cpu, rd, NO_SP, val1 | val2);
10013 }
10014
10015 static void
10016 do_EXTR_64 (sim_cpu *cpu)
10017 {
10018 /* instr[31:21] = 10010011100
10019 instr[20,16] = Rm
10020 instr[15,10] = imms
10021 instr[9,5] = Rn
10022 instr[4,0] = Rd */
10023 unsigned rm = INSTR (20, 16);
10024 unsigned imms = INSTR (15, 10) & 63;
10025 unsigned rn = INSTR ( 9, 5);
10026 unsigned rd = INSTR ( 4, 0);
10027 uint64_t val;
10028
10029 val = aarch64_get_reg_u64 (cpu, rm, NO_SP);
10030 val >>= imms;
10031 val |= (aarch64_get_reg_u64 (cpu, rn, NO_SP) << (64 - imms));
10032
10033 aarch64_set_reg_u64 (cpu, rd, NO_SP, val);
10034 }
10035
10036 static void
10037 dexExtractImmediate (sim_cpu *cpu)
10038 {
10039 /* assert instr[28:23] = 100111
10040 instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
10041 instr[30,29] = op21 : 0 ==> EXTR, 1,2,3 ==> UNALLOC
10042 instr[22] = N : must be 0 for 32 bit, 1 for 64 bit or UNALLOC
10043 instr[21] = op0 : must be 0 or UNALLOC
10044 instr[20,16] = Rm
10045 instr[15,10] = imms : 0xxxxx for 32 bit, xxxxxx for 64 bit
10046 instr[9,5] = Rn
10047 instr[4,0] = Rd */
10048
10049 /* 32 bit operations must have N = 0 or else we have an UNALLOC. */
10050 /* 64 bit operations must have N = 1 or else we have an UNALLOC. */
10051 uint32_t dispatch;
10052 uint32_t size = INSTR (31, 31);
10053 uint32_t N = INSTR (22, 22);
10054 /* 32 bit operations must have imms[5] = 0
10055 or else we have an UNALLOC. */
10056 uint32_t imms = INSTR (15, 10);
10057
10058 if (size ^ N)
10059 HALT_UNALLOC;
10060
10061 if (!size && uimm (imms, 5, 5))
10062 HALT_UNALLOC;
10063
10064 /* Switch on combined size and op. */
10065 dispatch = INSTR (31, 29);
10066
10067 if (dispatch == 0)
10068 do_EXTR_32 (cpu);
10069
10070 else if (dispatch == 4)
10071 do_EXTR_64 (cpu);
10072
10073 else if (dispatch == 1)
10074 HALT_NYI;
10075 else
10076 HALT_UNALLOC;
10077 }
10078
10079 static void
10080 dexDPImm (sim_cpu *cpu)
10081 {
10082 /* uint32_t group = dispatchGroup (aarch64_get_instr (cpu));
10083 assert group == GROUP_DPIMM_1000 || grpoup == GROUP_DPIMM_1001
10084 bits [25,23] of a DPImm are the secondary dispatch vector. */
10085 uint32_t group2 = dispatchDPImm (aarch64_get_instr (cpu));
10086
10087 switch (group2)
10088 {
10089 case DPIMM_PCADR_000:
10090 case DPIMM_PCADR_001:
10091 dexPCRelAddressing (cpu);
10092 return;
10093
10094 case DPIMM_ADDSUB_010:
10095 case DPIMM_ADDSUB_011:
10096 dexAddSubtractImmediate (cpu);
10097 return;
10098
10099 case DPIMM_LOG_100:
10100 dexLogicalImmediate (cpu);
10101 return;
10102
10103 case DPIMM_MOV_101:
10104 dexMoveWideImmediate (cpu);
10105 return;
10106
10107 case DPIMM_BITF_110:
10108 dexBitfieldImmediate (cpu);
10109 return;
10110
10111 case DPIMM_EXTR_111:
10112 dexExtractImmediate (cpu);
10113 return;
10114
10115 default:
10116 /* Should never reach here. */
10117 HALT_NYI;
10118 }
10119 }
10120
10121 static void
10122 dexLoadUnscaledImmediate (sim_cpu *cpu)
10123 {
10124 /* instr[29,24] == 111_00
10125 instr[21] == 0
10126 instr[11,10] == 00
10127 instr[31,30] = size
10128 instr[26] = V
10129 instr[23,22] = opc
10130 instr[20,12] = simm9
10131 instr[9,5] = rn may be SP. */
10132 /* unsigned rt = INSTR (4, 0); */
10133 uint32_t V = INSTR (26, 26);
10134 uint32_t dispatch = ((INSTR (31, 30) << 2) | INSTR (23, 22));
10135 int32_t imm = simm32 (aarch64_get_instr (cpu), 20, 12);
10136
10137 if (!V)
10138 {
10139 /* GReg operations. */
10140 switch (dispatch)
10141 {
10142 case 0: sturb (cpu, imm); return;
10143 case 1: ldurb32 (cpu, imm); return;
10144 case 2: ldursb64 (cpu, imm); return;
10145 case 3: ldursb32 (cpu, imm); return;
10146 case 4: sturh (cpu, imm); return;
10147 case 5: ldurh32 (cpu, imm); return;
10148 case 6: ldursh64 (cpu, imm); return;
10149 case 7: ldursh32 (cpu, imm); return;
10150 case 8: stur32 (cpu, imm); return;
10151 case 9: ldur32 (cpu, imm); return;
10152 case 10: ldursw (cpu, imm); return;
10153 case 12: stur64 (cpu, imm); return;
10154 case 13: ldur64 (cpu, imm); return;
10155
10156 case 14:
10157 /* PRFUM NYI. */
10158 HALT_NYI;
10159
10160 default:
10161 case 11:
10162 case 15:
10163 HALT_UNALLOC;
10164 }
10165 }
10166
10167 /* FReg operations. */
10168 switch (dispatch)
10169 {
10170 case 2: fsturq (cpu, imm); return;
10171 case 3: fldurq (cpu, imm); return;
10172 case 8: fsturs (cpu, imm); return;
10173 case 9: fldurs (cpu, imm); return;
10174 case 12: fsturd (cpu, imm); return;
10175 case 13: fldurd (cpu, imm); return;
10176
10177 case 0: /* STUR 8 bit FP. */
10178 case 1: /* LDUR 8 bit FP. */
10179 case 4: /* STUR 16 bit FP. */
10180 case 5: /* LDUR 8 bit FP. */
10181 HALT_NYI;
10182
10183 default:
10184 case 6:
10185 case 7:
10186 case 10:
10187 case 11:
10188 case 14:
10189 case 15:
10190 HALT_UNALLOC;
10191 }
10192 }
10193
10194 /* N.B. A preliminary note regarding all the ldrs<x>32
10195 instructions
10196
10197 The signed value loaded by these instructions is cast to unsigned
10198 before being assigned to aarch64_get_reg_u64 (cpu, N) i.e. to the
10199 64 bit element of the GReg union. this performs a 32 bit sign extension
10200 (as required) but avoids 64 bit sign extension, thus ensuring that the
10201 top half of the register word is zero. this is what the spec demands
10202 when a 32 bit load occurs. */
10203
10204 /* 32 bit load sign-extended byte scaled unsigned 12 bit. */
10205 static void
10206 ldrsb32_abs (sim_cpu *cpu, uint32_t offset)
10207 {
10208 unsigned int rn = INSTR (9, 5);
10209 unsigned int rt = INSTR (4, 0);
10210
10211 /* The target register may not be SP but the source may be
10212 there is no scaling required for a byte load. */
10213 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset;
10214 aarch64_set_reg_u64 (cpu, rt, NO_SP,
10215 (int64_t) aarch64_get_mem_s8 (cpu, address));
10216 }
10217
10218 /* 32 bit load sign-extended byte scaled or unscaled zero-
10219 or sign-extended 32-bit register offset. */
10220 static void
10221 ldrsb32_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
10222 {
10223 unsigned int rm = INSTR (20, 16);
10224 unsigned int rn = INSTR (9, 5);
10225 unsigned int rt = INSTR (4, 0);
10226
10227 /* rn may reference SP, rm and rt must reference ZR. */
10228
10229 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10230 int64_t displacement = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
10231 extension);
10232
10233 /* There is no scaling required for a byte load. */
10234 aarch64_set_reg_u64
10235 (cpu, rt, NO_SP, (int64_t) aarch64_get_mem_s8 (cpu, address
10236 + displacement));
10237 }
10238
10239 /* 32 bit load sign-extended byte unscaled signed 9 bit with
10240 pre- or post-writeback. */
10241 static void
10242 ldrsb32_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
10243 {
10244 uint64_t address;
10245 unsigned int rn = INSTR (9, 5);
10246 unsigned int rt = INSTR (4, 0);
10247
10248 if (rn == rt && wb != NoWriteBack)
10249 HALT_UNALLOC;
10250
10251 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10252
10253 if (wb == Pre)
10254 address += offset;
10255
10256 aarch64_set_reg_u64 (cpu, rt, NO_SP,
10257 (int64_t) aarch64_get_mem_s8 (cpu, address));
10258
10259 if (wb == Post)
10260 address += offset;
10261
10262 if (wb != NoWriteBack)
10263 aarch64_set_reg_u64 (cpu, rn, NO_SP, address);
10264 }
10265
10266 /* 8 bit store scaled. */
10267 static void
10268 fstrb_abs (sim_cpu *cpu, uint32_t offset)
10269 {
10270 unsigned st = INSTR (4, 0);
10271 unsigned rn = INSTR (9, 5);
10272
10273 aarch64_set_mem_u8 (cpu,
10274 aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset,
10275 aarch64_get_vec_u8 (cpu, st, 0));
10276 }
10277
10278 /* 8 bit store scaled or unscaled zero- or
10279 sign-extended 8-bit register offset. */
10280 static void
10281 fstrb_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
10282 {
10283 unsigned rm = INSTR (20, 16);
10284 unsigned rn = INSTR (9, 5);
10285 unsigned st = INSTR (4, 0);
10286
10287 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10288 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
10289 extension);
10290 uint64_t displacement = scaling == Scaled ? extended : 0;
10291
10292 aarch64_set_mem_u8
10293 (cpu, address + displacement, aarch64_get_vec_u8 (cpu, st, 0));
10294 }
10295
10296 /* 16 bit store scaled. */
10297 static void
10298 fstrh_abs (sim_cpu *cpu, uint32_t offset)
10299 {
10300 unsigned st = INSTR (4, 0);
10301 unsigned rn = INSTR (9, 5);
10302
10303 aarch64_set_mem_u16
10304 (cpu,
10305 aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 16),
10306 aarch64_get_vec_u16 (cpu, st, 0));
10307 }
10308
10309 /* 16 bit store scaled or unscaled zero-
10310 or sign-extended 16-bit register offset. */
10311 static void
10312 fstrh_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
10313 {
10314 unsigned rm = INSTR (20, 16);
10315 unsigned rn = INSTR (9, 5);
10316 unsigned st = INSTR (4, 0);
10317
10318 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10319 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
10320 extension);
10321 uint64_t displacement = OPT_SCALE (extended, 16, scaling);
10322
10323 aarch64_set_mem_u16
10324 (cpu, address + displacement, aarch64_get_vec_u16 (cpu, st, 0));
10325 }
10326
10327 /* 32 bit store scaled unsigned 12 bit. */
10328 static void
10329 fstrs_abs (sim_cpu *cpu, uint32_t offset)
10330 {
10331 unsigned st = INSTR (4, 0);
10332 unsigned rn = INSTR (9, 5);
10333
10334 aarch64_set_mem_u32
10335 (cpu,
10336 aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 32),
10337 aarch64_get_vec_u32 (cpu, st, 0));
10338 }
10339
10340 /* 32 bit store unscaled signed 9 bit with pre- or post-writeback. */
10341 static void
10342 fstrs_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
10343 {
10344 unsigned rn = INSTR (9, 5);
10345 unsigned st = INSTR (4, 0);
10346
10347 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10348
10349 if (wb != Post)
10350 address += offset;
10351
10352 aarch64_set_mem_u32 (cpu, address, aarch64_get_vec_u32 (cpu, st, 0));
10353
10354 if (wb == Post)
10355 address += offset;
10356
10357 if (wb != NoWriteBack)
10358 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
10359 }
10360
10361 /* 32 bit store scaled or unscaled zero-
10362 or sign-extended 32-bit register offset. */
10363 static void
10364 fstrs_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
10365 {
10366 unsigned rm = INSTR (20, 16);
10367 unsigned rn = INSTR (9, 5);
10368 unsigned st = INSTR (4, 0);
10369
10370 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10371 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
10372 extension);
10373 uint64_t displacement = OPT_SCALE (extended, 32, scaling);
10374
10375 aarch64_set_mem_u32
10376 (cpu, address + displacement, aarch64_get_vec_u32 (cpu, st, 0));
10377 }
10378
10379 /* 64 bit store scaled unsigned 12 bit. */
10380 static void
10381 fstrd_abs (sim_cpu *cpu, uint32_t offset)
10382 {
10383 unsigned st = INSTR (4, 0);
10384 unsigned rn = INSTR (9, 5);
10385
10386 aarch64_set_mem_u64
10387 (cpu,
10388 aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 64),
10389 aarch64_get_vec_u64 (cpu, st, 0));
10390 }
10391
10392 /* 64 bit store unscaled signed 9 bit with pre- or post-writeback. */
10393 static void
10394 fstrd_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
10395 {
10396 unsigned rn = INSTR (9, 5);
10397 unsigned st = INSTR (4, 0);
10398
10399 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10400
10401 if (wb != Post)
10402 address += offset;
10403
10404 aarch64_set_mem_u64 (cpu, address, aarch64_get_vec_u64 (cpu, st, 0));
10405
10406 if (wb == Post)
10407 address += offset;
10408
10409 if (wb != NoWriteBack)
10410 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
10411 }
10412
10413 /* 64 bit store scaled or unscaled zero-
10414 or sign-extended 32-bit register offset. */
10415 static void
10416 fstrd_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
10417 {
10418 unsigned rm = INSTR (20, 16);
10419 unsigned rn = INSTR (9, 5);
10420 unsigned st = INSTR (4, 0);
10421
10422 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10423 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
10424 extension);
10425 uint64_t displacement = OPT_SCALE (extended, 64, scaling);
10426
10427 aarch64_set_mem_u64
10428 (cpu, address + displacement, aarch64_get_vec_u64 (cpu, st, 0));
10429 }
10430
10431 /* 128 bit store scaled unsigned 12 bit. */
10432 static void
10433 fstrq_abs (sim_cpu *cpu, uint32_t offset)
10434 {
10435 FRegister a;
10436 unsigned st = INSTR (4, 0);
10437 unsigned rn = INSTR (9, 5);
10438 uint64_t addr;
10439
10440 aarch64_get_FP_long_double (cpu, st, & a);
10441
10442 addr = aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 128);
10443 aarch64_set_mem_long_double (cpu, addr, a);
10444 }
10445
10446 /* 128 bit store unscaled signed 9 bit with pre- or post-writeback. */
10447 static void
10448 fstrq_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
10449 {
10450 FRegister a;
10451 unsigned rn = INSTR (9, 5);
10452 unsigned st = INSTR (4, 0);
10453 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10454
10455 if (wb != Post)
10456 address += offset;
10457
10458 aarch64_get_FP_long_double (cpu, st, & a);
10459 aarch64_set_mem_long_double (cpu, address, a);
10460
10461 if (wb == Post)
10462 address += offset;
10463
10464 if (wb != NoWriteBack)
10465 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
10466 }
10467
10468 /* 128 bit store scaled or unscaled zero-
10469 or sign-extended 32-bit register offset. */
10470 static void
10471 fstrq_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
10472 {
10473 unsigned rm = INSTR (20, 16);
10474 unsigned rn = INSTR (9, 5);
10475 unsigned st = INSTR (4, 0);
10476
10477 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10478 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
10479 extension);
10480 uint64_t displacement = OPT_SCALE (extended, 128, scaling);
10481
10482 FRegister a;
10483
10484 aarch64_get_FP_long_double (cpu, st, & a);
10485 aarch64_set_mem_long_double (cpu, address + displacement, a);
10486 }
10487
10488 static void
10489 dexLoadImmediatePrePost (sim_cpu *cpu)
10490 {
10491 /* instr[31,30] = size
10492 instr[29,27] = 111
10493 instr[26] = V
10494 instr[25,24] = 00
10495 instr[23,22] = opc
10496 instr[21] = 0
10497 instr[20,12] = simm9
10498 instr[11] = wb : 0 ==> Post, 1 ==> Pre
10499 instr[10] = 0
10500 instr[9,5] = Rn may be SP.
10501 instr[4,0] = Rt */
10502
10503 uint32_t V = INSTR (26, 26);
10504 uint32_t dispatch = ((INSTR (31, 30) << 2) | INSTR (23, 22));
10505 int32_t imm = simm32 (aarch64_get_instr (cpu), 20, 12);
10506 WriteBack wb = INSTR (11, 11);
10507
10508 if (!V)
10509 {
10510 /* GReg operations. */
10511 switch (dispatch)
10512 {
10513 case 0: strb_wb (cpu, imm, wb); return;
10514 case 1: ldrb32_wb (cpu, imm, wb); return;
10515 case 2: ldrsb_wb (cpu, imm, wb); return;
10516 case 3: ldrsb32_wb (cpu, imm, wb); return;
10517 case 4: strh_wb (cpu, imm, wb); return;
10518 case 5: ldrh32_wb (cpu, imm, wb); return;
10519 case 6: ldrsh64_wb (cpu, imm, wb); return;
10520 case 7: ldrsh32_wb (cpu, imm, wb); return;
10521 case 8: str32_wb (cpu, imm, wb); return;
10522 case 9: ldr32_wb (cpu, imm, wb); return;
10523 case 10: ldrsw_wb (cpu, imm, wb); return;
10524 case 12: str_wb (cpu, imm, wb); return;
10525 case 13: ldr_wb (cpu, imm, wb); return;
10526
10527 default:
10528 case 11:
10529 case 14:
10530 case 15:
10531 HALT_UNALLOC;
10532 }
10533 }
10534
10535 /* FReg operations. */
10536 switch (dispatch)
10537 {
10538 case 2: fstrq_wb (cpu, imm, wb); return;
10539 case 3: fldrq_wb (cpu, imm, wb); return;
10540 case 8: fstrs_wb (cpu, imm, wb); return;
10541 case 9: fldrs_wb (cpu, imm, wb); return;
10542 case 12: fstrd_wb (cpu, imm, wb); return;
10543 case 13: fldrd_wb (cpu, imm, wb); return;
10544
10545 case 0: /* STUR 8 bit FP. */
10546 case 1: /* LDUR 8 bit FP. */
10547 case 4: /* STUR 16 bit FP. */
10548 case 5: /* LDUR 8 bit FP. */
10549 HALT_NYI;
10550
10551 default:
10552 case 6:
10553 case 7:
10554 case 10:
10555 case 11:
10556 case 14:
10557 case 15:
10558 HALT_UNALLOC;
10559 }
10560 }
10561
10562 static void
10563 dexLoadRegisterOffset (sim_cpu *cpu)
10564 {
10565 /* instr[31,30] = size
10566 instr[29,27] = 111
10567 instr[26] = V
10568 instr[25,24] = 00
10569 instr[23,22] = opc
10570 instr[21] = 1
10571 instr[20,16] = rm
10572 instr[15,13] = option : 010 ==> UXTW, 011 ==> UXTX/LSL,
10573 110 ==> SXTW, 111 ==> SXTX,
10574 ow ==> RESERVED
10575 instr[12] = scaled
10576 instr[11,10] = 10
10577 instr[9,5] = rn
10578 instr[4,0] = rt. */
10579
10580 uint32_t V = INSTR (26, 26);
10581 uint32_t dispatch = ((INSTR (31, 30) << 2) | INSTR (23, 22));
10582 Scaling scale = INSTR (12, 12);
10583 Extension extensionType = INSTR (15, 13);
10584
10585 /* Check for illegal extension types. */
10586 if (uimm (extensionType, 1, 1) == 0)
10587 HALT_UNALLOC;
10588
10589 if (extensionType == UXTX || extensionType == SXTX)
10590 extensionType = NoExtension;
10591
10592 if (!V)
10593 {
10594 /* GReg operations. */
10595 switch (dispatch)
10596 {
10597 case 0: strb_scale_ext (cpu, scale, extensionType); return;
10598 case 1: ldrb32_scale_ext (cpu, scale, extensionType); return;
10599 case 2: ldrsb_scale_ext (cpu, scale, extensionType); return;
10600 case 3: ldrsb32_scale_ext (cpu, scale, extensionType); return;
10601 case 4: strh_scale_ext (cpu, scale, extensionType); return;
10602 case 5: ldrh32_scale_ext (cpu, scale, extensionType); return;
10603 case 6: ldrsh_scale_ext (cpu, scale, extensionType); return;
10604 case 7: ldrsh32_scale_ext (cpu, scale, extensionType); return;
10605 case 8: str32_scale_ext (cpu, scale, extensionType); return;
10606 case 9: ldr32_scale_ext (cpu, scale, extensionType); return;
10607 case 10: ldrsw_scale_ext (cpu, scale, extensionType); return;
10608 case 12: str_scale_ext (cpu, scale, extensionType); return;
10609 case 13: ldr_scale_ext (cpu, scale, extensionType); return;
10610 case 14: prfm_scale_ext (cpu, scale, extensionType); return;
10611
10612 default:
10613 case 11:
10614 case 15:
10615 HALT_UNALLOC;
10616 }
10617 }
10618
10619 /* FReg operations. */
10620 switch (dispatch)
10621 {
10622 case 1: /* LDUR 8 bit FP. */
10623 HALT_NYI;
10624 case 3: fldrq_scale_ext (cpu, scale, extensionType); return;
10625 case 5: /* LDUR 8 bit FP. */
10626 HALT_NYI;
10627 case 9: fldrs_scale_ext (cpu, scale, extensionType); return;
10628 case 13: fldrd_scale_ext (cpu, scale, extensionType); return;
10629
10630 case 0: fstrb_scale_ext (cpu, scale, extensionType); return;
10631 case 2: fstrq_scale_ext (cpu, scale, extensionType); return;
10632 case 4: fstrh_scale_ext (cpu, scale, extensionType); return;
10633 case 8: fstrs_scale_ext (cpu, scale, extensionType); return;
10634 case 12: fstrd_scale_ext (cpu, scale, extensionType); return;
10635
10636 default:
10637 case 6:
10638 case 7:
10639 case 10:
10640 case 11:
10641 case 14:
10642 case 15:
10643 HALT_UNALLOC;
10644 }
10645 }
10646
10647 static void
10648 dexLoadUnsignedImmediate (sim_cpu *cpu)
10649 {
10650 /* instr[29,24] == 111_01
10651 instr[31,30] = size
10652 instr[26] = V
10653 instr[23,22] = opc
10654 instr[21,10] = uimm12 : unsigned immediate offset
10655 instr[9,5] = rn may be SP.
10656 instr[4,0] = rt. */
10657
10658 uint32_t V = INSTR (26,26);
10659 uint32_t dispatch = ((INSTR (31, 30) << 2) | INSTR (23, 22));
10660 uint32_t imm = INSTR (21, 10);
10661
10662 if (!V)
10663 {
10664 /* GReg operations. */
10665 switch (dispatch)
10666 {
10667 case 0: strb_abs (cpu, imm); return;
10668 case 1: ldrb32_abs (cpu, imm); return;
10669 case 2: ldrsb_abs (cpu, imm); return;
10670 case 3: ldrsb32_abs (cpu, imm); return;
10671 case 4: strh_abs (cpu, imm); return;
10672 case 5: ldrh32_abs (cpu, imm); return;
10673 case 6: ldrsh_abs (cpu, imm); return;
10674 case 7: ldrsh32_abs (cpu, imm); return;
10675 case 8: str32_abs (cpu, imm); return;
10676 case 9: ldr32_abs (cpu, imm); return;
10677 case 10: ldrsw_abs (cpu, imm); return;
10678 case 12: str_abs (cpu, imm); return;
10679 case 13: ldr_abs (cpu, imm); return;
10680 case 14: prfm_abs (cpu, imm); return;
10681
10682 default:
10683 case 11:
10684 case 15:
10685 HALT_UNALLOC;
10686 }
10687 }
10688
10689 /* FReg operations. */
10690 switch (dispatch)
10691 {
10692 case 0: fstrb_abs (cpu, imm); return;
10693 case 4: fstrh_abs (cpu, imm); return;
10694 case 8: fstrs_abs (cpu, imm); return;
10695 case 12: fstrd_abs (cpu, imm); return;
10696 case 2: fstrq_abs (cpu, imm); return;
10697
10698 case 1: fldrb_abs (cpu, imm); return;
10699 case 5: fldrh_abs (cpu, imm); return;
10700 case 9: fldrs_abs (cpu, imm); return;
10701 case 13: fldrd_abs (cpu, imm); return;
10702 case 3: fldrq_abs (cpu, imm); return;
10703
10704 default:
10705 case 6:
10706 case 7:
10707 case 10:
10708 case 11:
10709 case 14:
10710 case 15:
10711 HALT_UNALLOC;
10712 }
10713 }
10714
10715 static void
10716 dexLoadExclusive (sim_cpu *cpu)
10717 {
10718 /* assert instr[29:24] = 001000;
10719 instr[31,30] = size
10720 instr[23] = 0 if exclusive
10721 instr[22] = L : 1 if load, 0 if store
10722 instr[21] = 1 if pair
10723 instr[20,16] = Rs
10724 instr[15] = o0 : 1 if ordered
10725 instr[14,10] = Rt2
10726 instr[9,5] = Rn
10727 instr[4.0] = Rt. */
10728
10729 switch (INSTR (22, 21))
10730 {
10731 case 2: ldxr (cpu); return;
10732 case 0: stxr (cpu); return;
10733 default: HALT_NYI;
10734 }
10735 }
10736
10737 static void
10738 dexLoadOther (sim_cpu *cpu)
10739 {
10740 uint32_t dispatch;
10741
10742 /* instr[29,25] = 111_0
10743 instr[24] == 0 ==> dispatch, 1 ==> ldst reg unsigned immediate
10744 instr[21:11,10] is the secondary dispatch. */
10745 if (INSTR (24, 24))
10746 {
10747 dexLoadUnsignedImmediate (cpu);
10748 return;
10749 }
10750
10751 dispatch = ((INSTR (21, 21) << 2) | INSTR (11, 10));
10752 switch (dispatch)
10753 {
10754 case 0: dexLoadUnscaledImmediate (cpu); return;
10755 case 1: dexLoadImmediatePrePost (cpu); return;
10756 case 3: dexLoadImmediatePrePost (cpu); return;
10757 case 6: dexLoadRegisterOffset (cpu); return;
10758
10759 default:
10760 case 2:
10761 case 4:
10762 case 5:
10763 case 7:
10764 HALT_NYI;
10765 }
10766 }
10767
10768 static void
10769 store_pair_u32 (sim_cpu *cpu, int32_t offset, WriteBack wb)
10770 {
10771 unsigned rn = INSTR (14, 10);
10772 unsigned rd = INSTR (9, 5);
10773 unsigned rm = INSTR (4, 0);
10774 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
10775
10776 if ((rn == rd || rm == rd) && wb != NoWriteBack)
10777 HALT_UNALLOC; /* ??? */
10778
10779 offset <<= 2;
10780
10781 if (wb != Post)
10782 address += offset;
10783
10784 aarch64_set_mem_u32 (cpu, address,
10785 aarch64_get_reg_u32 (cpu, rm, NO_SP));
10786 aarch64_set_mem_u32 (cpu, address + 4,
10787 aarch64_get_reg_u32 (cpu, rn, NO_SP));
10788
10789 if (wb == Post)
10790 address += offset;
10791
10792 if (wb != NoWriteBack)
10793 aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
10794 }
10795
10796 static void
10797 store_pair_u64 (sim_cpu *cpu, int32_t offset, WriteBack wb)
10798 {
10799 unsigned rn = INSTR (14, 10);
10800 unsigned rd = INSTR (9, 5);
10801 unsigned rm = INSTR (4, 0);
10802 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
10803
10804 if ((rn == rd || rm == rd) && wb != NoWriteBack)
10805 HALT_UNALLOC; /* ??? */
10806
10807 offset <<= 3;
10808
10809 if (wb != Post)
10810 address += offset;
10811
10812 aarch64_set_mem_u64 (cpu, address,
10813 aarch64_get_reg_u64 (cpu, rm, NO_SP));
10814 aarch64_set_mem_u64 (cpu, address + 8,
10815 aarch64_get_reg_u64 (cpu, rn, NO_SP));
10816
10817 if (wb == Post)
10818 address += offset;
10819
10820 if (wb != NoWriteBack)
10821 aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
10822 }
10823
10824 static void
10825 load_pair_u32 (sim_cpu *cpu, int32_t offset, WriteBack wb)
10826 {
10827 unsigned rn = INSTR (14, 10);
10828 unsigned rd = INSTR (9, 5);
10829 unsigned rm = INSTR (4, 0);
10830 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
10831
10832 /* Treat this as unalloc to make sure we don't do it. */
10833 if (rn == rm)
10834 HALT_UNALLOC;
10835
10836 offset <<= 2;
10837
10838 if (wb != Post)
10839 address += offset;
10840
10841 aarch64_set_reg_u64 (cpu, rm, SP_OK, aarch64_get_mem_u32 (cpu, address));
10842 aarch64_set_reg_u64 (cpu, rn, SP_OK, aarch64_get_mem_u32 (cpu, address + 4));
10843
10844 if (wb == Post)
10845 address += offset;
10846
10847 if (wb != NoWriteBack)
10848 aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
10849 }
10850
10851 static void
10852 load_pair_s32 (sim_cpu *cpu, int32_t offset, WriteBack wb)
10853 {
10854 unsigned rn = INSTR (14, 10);
10855 unsigned rd = INSTR (9, 5);
10856 unsigned rm = INSTR (4, 0);
10857 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
10858
10859 /* Treat this as unalloc to make sure we don't do it. */
10860 if (rn == rm)
10861 HALT_UNALLOC;
10862
10863 offset <<= 2;
10864
10865 if (wb != Post)
10866 address += offset;
10867
10868 aarch64_set_reg_s64 (cpu, rm, SP_OK, aarch64_get_mem_s32 (cpu, address));
10869 aarch64_set_reg_s64 (cpu, rn, SP_OK, aarch64_get_mem_s32 (cpu, address + 4));
10870
10871 if (wb == Post)
10872 address += offset;
10873
10874 if (wb != NoWriteBack)
10875 aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
10876 }
10877
10878 static void
10879 load_pair_u64 (sim_cpu *cpu, int32_t offset, WriteBack wb)
10880 {
10881 unsigned rn = INSTR (14, 10);
10882 unsigned rd = INSTR (9, 5);
10883 unsigned rm = INSTR (4, 0);
10884 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
10885
10886 /* Treat this as unalloc to make sure we don't do it. */
10887 if (rn == rm)
10888 HALT_UNALLOC;
10889
10890 offset <<= 3;
10891
10892 if (wb != Post)
10893 address += offset;
10894
10895 aarch64_set_reg_u64 (cpu, rm, SP_OK, aarch64_get_mem_u64 (cpu, address));
10896 aarch64_set_reg_u64 (cpu, rn, SP_OK, aarch64_get_mem_u64 (cpu, address + 8));
10897
10898 if (wb == Post)
10899 address += offset;
10900
10901 if (wb != NoWriteBack)
10902 aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
10903 }
10904
10905 static void
10906 dex_load_store_pair_gr (sim_cpu *cpu)
10907 {
10908 /* instr[31,30] = size (10=> 64-bit, 01=> signed 32-bit, 00=> 32-bit)
10909 instr[29,25] = instruction encoding: 101_0
10910 instr[26] = V : 1 if fp 0 if gp
10911 instr[24,23] = addressing mode (10=> offset, 01=> post, 11=> pre)
10912 instr[22] = load/store (1=> load)
10913 instr[21,15] = signed, scaled, offset
10914 instr[14,10] = Rn
10915 instr[ 9, 5] = Rd
10916 instr[ 4, 0] = Rm. */
10917
10918 uint32_t dispatch = ((INSTR (31, 30) << 3) | INSTR (24, 22));
10919 int32_t offset = simm32 (aarch64_get_instr (cpu), 21, 15);
10920
10921 switch (dispatch)
10922 {
10923 case 2: store_pair_u32 (cpu, offset, Post); return;
10924 case 3: load_pair_u32 (cpu, offset, Post); return;
10925 case 4: store_pair_u32 (cpu, offset, NoWriteBack); return;
10926 case 5: load_pair_u32 (cpu, offset, NoWriteBack); return;
10927 case 6: store_pair_u32 (cpu, offset, Pre); return;
10928 case 7: load_pair_u32 (cpu, offset, Pre); return;
10929
10930 case 11: load_pair_s32 (cpu, offset, Post); return;
10931 case 13: load_pair_s32 (cpu, offset, NoWriteBack); return;
10932 case 15: load_pair_s32 (cpu, offset, Pre); return;
10933
10934 case 18: store_pair_u64 (cpu, offset, Post); return;
10935 case 19: load_pair_u64 (cpu, offset, Post); return;
10936 case 20: store_pair_u64 (cpu, offset, NoWriteBack); return;
10937 case 21: load_pair_u64 (cpu, offset, NoWriteBack); return;
10938 case 22: store_pair_u64 (cpu, offset, Pre); return;
10939 case 23: load_pair_u64 (cpu, offset, Pre); return;
10940
10941 default:
10942 HALT_UNALLOC;
10943 }
10944 }
10945
10946 static void
10947 store_pair_float (sim_cpu *cpu, int32_t offset, WriteBack wb)
10948 {
10949 unsigned rn = INSTR (14, 10);
10950 unsigned rd = INSTR (9, 5);
10951 unsigned rm = INSTR (4, 0);
10952 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
10953
10954 offset <<= 2;
10955
10956 if (wb != Post)
10957 address += offset;
10958
10959 aarch64_set_mem_u32 (cpu, address, aarch64_get_vec_u32 (cpu, rm, 0));
10960 aarch64_set_mem_u32 (cpu, address + 4, aarch64_get_vec_u32 (cpu, rn, 0));
10961
10962 if (wb == Post)
10963 address += offset;
10964
10965 if (wb != NoWriteBack)
10966 aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
10967 }
10968
10969 static void
10970 store_pair_double (sim_cpu *cpu, int32_t offset, WriteBack wb)
10971 {
10972 unsigned rn = INSTR (14, 10);
10973 unsigned rd = INSTR (9, 5);
10974 unsigned rm = INSTR (4, 0);
10975 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
10976
10977 offset <<= 3;
10978
10979 if (wb != Post)
10980 address += offset;
10981
10982 aarch64_set_mem_u64 (cpu, address, aarch64_get_vec_u64 (cpu, rm, 0));
10983 aarch64_set_mem_u64 (cpu, address + 8, aarch64_get_vec_u64 (cpu, rn, 0));
10984
10985 if (wb == Post)
10986 address += offset;
10987
10988 if (wb != NoWriteBack)
10989 aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
10990 }
10991
10992 static void
10993 store_pair_long_double (sim_cpu *cpu, int32_t offset, WriteBack wb)
10994 {
10995 FRegister a;
10996 unsigned rn = INSTR (14, 10);
10997 unsigned rd = INSTR (9, 5);
10998 unsigned rm = INSTR (4, 0);
10999 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
11000
11001 offset <<= 4;
11002
11003 if (wb != Post)
11004 address += offset;
11005
11006 aarch64_get_FP_long_double (cpu, rm, & a);
11007 aarch64_set_mem_long_double (cpu, address, a);
11008 aarch64_get_FP_long_double (cpu, rn, & a);
11009 aarch64_set_mem_long_double (cpu, address + 16, a);
11010
11011 if (wb == Post)
11012 address += offset;
11013
11014 if (wb != NoWriteBack)
11015 aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
11016 }
11017
11018 static void
11019 load_pair_float (sim_cpu *cpu, int32_t offset, WriteBack wb)
11020 {
11021 unsigned rn = INSTR (14, 10);
11022 unsigned rd = INSTR (9, 5);
11023 unsigned rm = INSTR (4, 0);
11024 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
11025
11026 if (rm == rn)
11027 HALT_UNALLOC;
11028
11029 offset <<= 2;
11030
11031 if (wb != Post)
11032 address += offset;
11033
11034 aarch64_set_vec_u32 (cpu, rm, 0, aarch64_get_mem_u32 (cpu, address));
11035 aarch64_set_vec_u32 (cpu, rn, 0, aarch64_get_mem_u32 (cpu, address + 4));
11036
11037 if (wb == Post)
11038 address += offset;
11039
11040 if (wb != NoWriteBack)
11041 aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
11042 }
11043
11044 static void
11045 load_pair_double (sim_cpu *cpu, int32_t offset, WriteBack wb)
11046 {
11047 unsigned rn = INSTR (14, 10);
11048 unsigned rd = INSTR (9, 5);
11049 unsigned rm = INSTR (4, 0);
11050 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
11051
11052 if (rm == rn)
11053 HALT_UNALLOC;
11054
11055 offset <<= 3;
11056
11057 if (wb != Post)
11058 address += offset;
11059
11060 aarch64_set_vec_u64 (cpu, rm, 0, aarch64_get_mem_u64 (cpu, address));
11061 aarch64_set_vec_u64 (cpu, rn, 0, aarch64_get_mem_u64 (cpu, address + 8));
11062
11063 if (wb == Post)
11064 address += offset;
11065
11066 if (wb != NoWriteBack)
11067 aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
11068 }
11069
11070 static void
11071 load_pair_long_double (sim_cpu *cpu, int32_t offset, WriteBack wb)
11072 {
11073 FRegister a;
11074 unsigned rn = INSTR (14, 10);
11075 unsigned rd = INSTR (9, 5);
11076 unsigned rm = INSTR (4, 0);
11077 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
11078
11079 if (rm == rn)
11080 HALT_UNALLOC;
11081
11082 offset <<= 4;
11083
11084 if (wb != Post)
11085 address += offset;
11086
11087 aarch64_get_mem_long_double (cpu, address, & a);
11088 aarch64_set_FP_long_double (cpu, rm, a);
11089 aarch64_get_mem_long_double (cpu, address + 16, & a);
11090 aarch64_set_FP_long_double (cpu, rn, a);
11091
11092 if (wb == Post)
11093 address += offset;
11094
11095 if (wb != NoWriteBack)
11096 aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
11097 }
11098
11099 static void
11100 dex_load_store_pair_fp (sim_cpu *cpu)
11101 {
11102 /* instr[31,30] = size (10=> 128-bit, 01=> 64-bit, 00=> 32-bit)
11103 instr[29,25] = instruction encoding
11104 instr[24,23] = addressing mode (10=> offset, 01=> post, 11=> pre)
11105 instr[22] = load/store (1=> load)
11106 instr[21,15] = signed, scaled, offset
11107 instr[14,10] = Rn
11108 instr[ 9, 5] = Rd
11109 instr[ 4, 0] = Rm */
11110
11111 uint32_t dispatch = ((INSTR (31, 30) << 3) | INSTR (24, 22));
11112 int32_t offset = simm32 (aarch64_get_instr (cpu), 21, 15);
11113
11114 switch (dispatch)
11115 {
11116 case 2: store_pair_float (cpu, offset, Post); return;
11117 case 3: load_pair_float (cpu, offset, Post); return;
11118 case 4: store_pair_float (cpu, offset, NoWriteBack); return;
11119 case 5: load_pair_float (cpu, offset, NoWriteBack); return;
11120 case 6: store_pair_float (cpu, offset, Pre); return;
11121 case 7: load_pair_float (cpu, offset, Pre); return;
11122
11123 case 10: store_pair_double (cpu, offset, Post); return;
11124 case 11: load_pair_double (cpu, offset, Post); return;
11125 case 12: store_pair_double (cpu, offset, NoWriteBack); return;
11126 case 13: load_pair_double (cpu, offset, NoWriteBack); return;
11127 case 14: store_pair_double (cpu, offset, Pre); return;
11128 case 15: load_pair_double (cpu, offset, Pre); return;
11129
11130 case 18: store_pair_long_double (cpu, offset, Post); return;
11131 case 19: load_pair_long_double (cpu, offset, Post); return;
11132 case 20: store_pair_long_double (cpu, offset, NoWriteBack); return;
11133 case 21: load_pair_long_double (cpu, offset, NoWriteBack); return;
11134 case 22: store_pair_long_double (cpu, offset, Pre); return;
11135 case 23: load_pair_long_double (cpu, offset, Pre); return;
11136
11137 default:
11138 HALT_UNALLOC;
11139 }
11140 }
11141
11142 static inline unsigned
11143 vec_reg (unsigned v, unsigned o)
11144 {
11145 return (v + o) & 0x3F;
11146 }
11147
11148 /* Load multiple N-element structures to N consecutive registers. */
11149 static void
11150 vec_load (sim_cpu *cpu, uint64_t address, unsigned N)
11151 {
11152 int all = INSTR (30, 30);
11153 unsigned size = INSTR (11, 10);
11154 unsigned vd = INSTR (4, 0);
11155 unsigned i;
11156
11157 switch (size)
11158 {
11159 case 0: /* 8-bit operations. */
11160 if (all)
11161 for (i = 0; i < (16 * N); i++)
11162 aarch64_set_vec_u8 (cpu, vec_reg (vd, i >> 4), i & 15,
11163 aarch64_get_mem_u8 (cpu, address + i));
11164 else
11165 for (i = 0; i < (8 * N); i++)
11166 aarch64_set_vec_u8 (cpu, vec_reg (vd, i >> 3), i & 7,
11167 aarch64_get_mem_u8 (cpu, address + i));
11168 return;
11169
11170 case 1: /* 16-bit operations. */
11171 if (all)
11172 for (i = 0; i < (8 * N); i++)
11173 aarch64_set_vec_u16 (cpu, vec_reg (vd, i >> 3), i & 7,
11174 aarch64_get_mem_u16 (cpu, address + i * 2));
11175 else
11176 for (i = 0; i < (4 * N); i++)
11177 aarch64_set_vec_u16 (cpu, vec_reg (vd, i >> 2), i & 3,
11178 aarch64_get_mem_u16 (cpu, address + i * 2));
11179 return;
11180
11181 case 2: /* 32-bit operations. */
11182 if (all)
11183 for (i = 0; i < (4 * N); i++)
11184 aarch64_set_vec_u32 (cpu, vec_reg (vd, i >> 2), i & 3,
11185 aarch64_get_mem_u32 (cpu, address + i * 4));
11186 else
11187 for (i = 0; i < (2 * N); i++)
11188 aarch64_set_vec_u32 (cpu, vec_reg (vd, i >> 1), i & 1,
11189 aarch64_get_mem_u32 (cpu, address + i * 4));
11190 return;
11191
11192 case 3: /* 64-bit operations. */
11193 if (all)
11194 for (i = 0; i < (2 * N); i++)
11195 aarch64_set_vec_u64 (cpu, vec_reg (vd, i >> 1), i & 1,
11196 aarch64_get_mem_u64 (cpu, address + i * 8));
11197 else
11198 for (i = 0; i < N; i++)
11199 aarch64_set_vec_u64 (cpu, vec_reg (vd, i), 0,
11200 aarch64_get_mem_u64 (cpu, address + i * 8));
11201 return;
11202 }
11203 }
11204
11205 /* LD4: load multiple 4-element to four consecutive registers. */
11206 static void
11207 LD4 (sim_cpu *cpu, uint64_t address)
11208 {
11209 vec_load (cpu, address, 4);
11210 }
11211
11212 /* LD3: load multiple 3-element structures to three consecutive registers. */
11213 static void
11214 LD3 (sim_cpu *cpu, uint64_t address)
11215 {
11216 vec_load (cpu, address, 3);
11217 }
11218
11219 /* LD2: load multiple 2-element structures to two consecutive registers. */
11220 static void
11221 LD2 (sim_cpu *cpu, uint64_t address)
11222 {
11223 vec_load (cpu, address, 2);
11224 }
11225
11226 /* Load multiple 1-element structures into one register. */
11227 static void
11228 LD1_1 (sim_cpu *cpu, uint64_t address)
11229 {
11230 int all = INSTR (30, 30);
11231 unsigned size = INSTR (11, 10);
11232 unsigned vd = INSTR (4, 0);
11233 unsigned i;
11234
11235 switch (size)
11236 {
11237 case 0:
11238 /* LD1 {Vd.16b}, addr, #16 */
11239 /* LD1 {Vd.8b}, addr, #8 */
11240 for (i = 0; i < (all ? 16 : 8); i++)
11241 aarch64_set_vec_u8 (cpu, vd, i,
11242 aarch64_get_mem_u8 (cpu, address + i));
11243 return;
11244
11245 case 1:
11246 /* LD1 {Vd.8h}, addr, #16 */
11247 /* LD1 {Vd.4h}, addr, #8 */
11248 for (i = 0; i < (all ? 8 : 4); i++)
11249 aarch64_set_vec_u16 (cpu, vd, i,
11250 aarch64_get_mem_u16 (cpu, address + i * 2));
11251 return;
11252
11253 case 2:
11254 /* LD1 {Vd.4s}, addr, #16 */
11255 /* LD1 {Vd.2s}, addr, #8 */
11256 for (i = 0; i < (all ? 4 : 2); i++)
11257 aarch64_set_vec_u32 (cpu, vd, i,
11258 aarch64_get_mem_u32 (cpu, address + i * 4));
11259 return;
11260
11261 case 3:
11262 /* LD1 {Vd.2d}, addr, #16 */
11263 /* LD1 {Vd.1d}, addr, #8 */
11264 for (i = 0; i < (all ? 2 : 1); i++)
11265 aarch64_set_vec_u64 (cpu, vd, i,
11266 aarch64_get_mem_u64 (cpu, address + i * 8));
11267 return;
11268 }
11269 }
11270
11271 /* Load multiple 1-element structures into two registers. */
11272 static void
11273 LD1_2 (sim_cpu *cpu, uint64_t address)
11274 {
11275 /* FIXME: This algorithm is *exactly* the same as the LD2 version.
11276 So why have two different instructions ? There must be something
11277 wrong somewhere. */
11278 vec_load (cpu, address, 2);
11279 }
11280
11281 /* Load multiple 1-element structures into three registers. */
11282 static void
11283 LD1_3 (sim_cpu *cpu, uint64_t address)
11284 {
11285 /* FIXME: This algorithm is *exactly* the same as the LD3 version.
11286 So why have two different instructions ? There must be something
11287 wrong somewhere. */
11288 vec_load (cpu, address, 3);
11289 }
11290
11291 /* Load multiple 1-element structures into four registers. */
11292 static void
11293 LD1_4 (sim_cpu *cpu, uint64_t address)
11294 {
11295 /* FIXME: This algorithm is *exactly* the same as the LD4 version.
11296 So why have two different instructions ? There must be something
11297 wrong somewhere. */
11298 vec_load (cpu, address, 4);
11299 }
11300
11301 /* Store multiple N-element structures to N consecutive registers. */
11302 static void
11303 vec_store (sim_cpu *cpu, uint64_t address, unsigned N)
11304 {
11305 int all = INSTR (30, 30);
11306 unsigned size = INSTR (11, 10);
11307 unsigned vd = INSTR (4, 0);
11308 unsigned i;
11309
11310 switch (size)
11311 {
11312 case 0: /* 8-bit operations. */
11313 if (all)
11314 for (i = 0; i < (16 * N); i++)
11315 aarch64_set_mem_u8
11316 (cpu, address + i,
11317 aarch64_get_vec_u8 (cpu, vec_reg (vd, i >> 4), i & 15));
11318 else
11319 for (i = 0; i < (8 * N); i++)
11320 aarch64_set_mem_u8
11321 (cpu, address + i,
11322 aarch64_get_vec_u8 (cpu, vec_reg (vd, i >> 3), i & 7));
11323 return;
11324
11325 case 1: /* 16-bit operations. */
11326 if (all)
11327 for (i = 0; i < (8 * N); i++)
11328 aarch64_set_mem_u16
11329 (cpu, address + i * 2,
11330 aarch64_get_vec_u16 (cpu, vec_reg (vd, i >> 3), i & 7));
11331 else
11332 for (i = 0; i < (4 * N); i++)
11333 aarch64_set_mem_u16
11334 (cpu, address + i * 2,
11335 aarch64_get_vec_u16 (cpu, vec_reg (vd, i >> 2), i & 3));
11336 return;
11337
11338 case 2: /* 32-bit operations. */
11339 if (all)
11340 for (i = 0; i < (4 * N); i++)
11341 aarch64_set_mem_u32
11342 (cpu, address + i * 4,
11343 aarch64_get_vec_u32 (cpu, vec_reg (vd, i >> 2), i & 3));
11344 else
11345 for (i = 0; i < (2 * N); i++)
11346 aarch64_set_mem_u32
11347 (cpu, address + i * 4,
11348 aarch64_get_vec_u32 (cpu, vec_reg (vd, i >> 1), i & 1));
11349 return;
11350
11351 case 3: /* 64-bit operations. */
11352 if (all)
11353 for (i = 0; i < (2 * N); i++)
11354 aarch64_set_mem_u64
11355 (cpu, address + i * 8,
11356 aarch64_get_vec_u64 (cpu, vec_reg (vd, i >> 1), i & 1));
11357 else
11358 for (i = 0; i < N; i++)
11359 aarch64_set_mem_u64
11360 (cpu, address + i * 8,
11361 aarch64_get_vec_u64 (cpu, vec_reg (vd, i), 0));
11362 return;
11363 }
11364 }
11365
11366 /* Store multiple 4-element structure to four consecutive registers. */
11367 static void
11368 ST4 (sim_cpu *cpu, uint64_t address)
11369 {
11370 vec_store (cpu, address, 4);
11371 }
11372
11373 /* Store multiple 3-element structures to three consecutive registers. */
11374 static void
11375 ST3 (sim_cpu *cpu, uint64_t address)
11376 {
11377 vec_store (cpu, address, 3);
11378 }
11379
11380 /* Store multiple 2-element structures to two consecutive registers. */
11381 static void
11382 ST2 (sim_cpu *cpu, uint64_t address)
11383 {
11384 vec_store (cpu, address, 2);
11385 }
11386
11387 /* Store multiple 1-element structures into one register. */
11388 static void
11389 ST1_1 (sim_cpu *cpu, uint64_t address)
11390 {
11391 int all = INSTR (30, 30);
11392 unsigned size = INSTR (11, 10);
11393 unsigned vd = INSTR (4, 0);
11394 unsigned i;
11395
11396 switch (size)
11397 {
11398 case 0:
11399 for (i = 0; i < (all ? 16 : 8); i++)
11400 aarch64_set_mem_u8 (cpu, address + i,
11401 aarch64_get_vec_u8 (cpu, vd, i));
11402 return;
11403
11404 case 1:
11405 for (i = 0; i < (all ? 8 : 4); i++)
11406 aarch64_set_mem_u16 (cpu, address + i * 2,
11407 aarch64_get_vec_u16 (cpu, vd, i));
11408 return;
11409
11410 case 2:
11411 for (i = 0; i < (all ? 4 : 2); i++)
11412 aarch64_set_mem_u32 (cpu, address + i * 4,
11413 aarch64_get_vec_u32 (cpu, vd, i));
11414 return;
11415
11416 case 3:
11417 for (i = 0; i < (all ? 2 : 1); i++)
11418 aarch64_set_mem_u64 (cpu, address + i * 8,
11419 aarch64_get_vec_u64 (cpu, vd, i));
11420 return;
11421 }
11422 }
11423
11424 /* Store multiple 1-element structures into two registers. */
11425 static void
11426 ST1_2 (sim_cpu *cpu, uint64_t address)
11427 {
11428 /* FIXME: This algorithm is *exactly* the same as the ST2 version.
11429 So why have two different instructions ? There must be
11430 something wrong somewhere. */
11431 vec_store (cpu, address, 2);
11432 }
11433
11434 /* Store multiple 1-element structures into three registers. */
11435 static void
11436 ST1_3 (sim_cpu *cpu, uint64_t address)
11437 {
11438 /* FIXME: This algorithm is *exactly* the same as the ST3 version.
11439 So why have two different instructions ? There must be
11440 something wrong somewhere. */
11441 vec_store (cpu, address, 3);
11442 }
11443
11444 /* Store multiple 1-element structures into four registers. */
11445 static void
11446 ST1_4 (sim_cpu *cpu, uint64_t address)
11447 {
11448 /* FIXME: This algorithm is *exactly* the same as the ST4 version.
11449 So why have two different instructions ? There must be
11450 something wrong somewhere. */
11451 vec_store (cpu, address, 4);
11452 }
11453
11454 static void
11455 do_vec_LDnR (sim_cpu *cpu, uint64_t address)
11456 {
11457 /* instr[31] = 0
11458 instr[30] = element selector 0=>half, 1=>all elements
11459 instr[29,24] = 00 1101
11460 instr[23] = 0=>simple, 1=>post
11461 instr[22] = 1
11462 instr[21] = width: LD1R-or-LD3R (0) / LD2R-or-LD4R (1)
11463 instr[20,16] = 0 0000 (simple), Vinc (reg-post-inc, no SP),
11464 11111 (immediate post inc)
11465 instr[15,14] = 11
11466 instr[13] = width: LD1R-or-LD2R (0) / LD3R-or-LD4R (1)
11467 instr[12] = 0
11468 instr[11,10] = element size 00=> byte(b), 01=> half(h),
11469 10=> word(s), 11=> double(d)
11470 instr[9,5] = address
11471 instr[4,0] = Vd */
11472
11473 unsigned full = INSTR (30, 30);
11474 unsigned vd = INSTR (4, 0);
11475 unsigned size = INSTR (11, 10);
11476 int i;
11477
11478 NYI_assert (29, 24, 0x0D);
11479 NYI_assert (22, 22, 1);
11480 NYI_assert (15, 14, 3);
11481 NYI_assert (12, 12, 0);
11482
11483 switch ((INSTR (13, 13) << 1) | INSTR (21, 21))
11484 {
11485 case 0: /* LD1R. */
11486 switch (size)
11487 {
11488 case 0:
11489 {
11490 uint8_t val = aarch64_get_mem_u8 (cpu, address);
11491 for (i = 0; i < (full ? 16 : 8); i++)
11492 aarch64_set_vec_u8 (cpu, vd, i, val);
11493 break;
11494 }
11495
11496 case 1:
11497 {
11498 uint16_t val = aarch64_get_mem_u16 (cpu, address);
11499 for (i = 0; i < (full ? 8 : 4); i++)
11500 aarch64_set_vec_u16 (cpu, vd, i, val);
11501 break;
11502 }
11503
11504 case 2:
11505 {
11506 uint32_t val = aarch64_get_mem_u32 (cpu, address);
11507 for (i = 0; i < (full ? 4 : 2); i++)
11508 aarch64_set_vec_u32 (cpu, vd, i, val);
11509 break;
11510 }
11511
11512 case 3:
11513 {
11514 uint64_t val = aarch64_get_mem_u64 (cpu, address);
11515 for (i = 0; i < (full ? 2 : 1); i++)
11516 aarch64_set_vec_u64 (cpu, vd, i, val);
11517 break;
11518 }
11519
11520 default:
11521 HALT_UNALLOC;
11522 }
11523 break;
11524
11525 case 1: /* LD2R. */
11526 switch (size)
11527 {
11528 case 0:
11529 {
11530 uint8_t val1 = aarch64_get_mem_u8 (cpu, address);
11531 uint8_t val2 = aarch64_get_mem_u8 (cpu, address + 1);
11532
11533 for (i = 0; i < (full ? 16 : 8); i++)
11534 {
11535 aarch64_set_vec_u8 (cpu, vd, 0, val1);
11536 aarch64_set_vec_u8 (cpu, vd + 1, 0, val2);
11537 }
11538 break;
11539 }
11540
11541 case 1:
11542 {
11543 uint16_t val1 = aarch64_get_mem_u16 (cpu, address);
11544 uint16_t val2 = aarch64_get_mem_u16 (cpu, address + 2);
11545
11546 for (i = 0; i < (full ? 8 : 4); i++)
11547 {
11548 aarch64_set_vec_u16 (cpu, vd, 0, val1);
11549 aarch64_set_vec_u16 (cpu, vd + 1, 0, val2);
11550 }
11551 break;
11552 }
11553
11554 case 2:
11555 {
11556 uint32_t val1 = aarch64_get_mem_u32 (cpu, address);
11557 uint32_t val2 = aarch64_get_mem_u32 (cpu, address + 4);
11558
11559 for (i = 0; i < (full ? 4 : 2); i++)
11560 {
11561 aarch64_set_vec_u32 (cpu, vd, 0, val1);
11562 aarch64_set_vec_u32 (cpu, vd + 1, 0, val2);
11563 }
11564 break;
11565 }
11566
11567 case 3:
11568 {
11569 uint64_t val1 = aarch64_get_mem_u64 (cpu, address);
11570 uint64_t val2 = aarch64_get_mem_u64 (cpu, address + 8);
11571
11572 for (i = 0; i < (full ? 2 : 1); i++)
11573 {
11574 aarch64_set_vec_u64 (cpu, vd, 0, val1);
11575 aarch64_set_vec_u64 (cpu, vd + 1, 0, val2);
11576 }
11577 break;
11578 }
11579
11580 default:
11581 HALT_UNALLOC;
11582 }
11583 break;
11584
11585 case 2: /* LD3R. */
11586 switch (size)
11587 {
11588 case 0:
11589 {
11590 uint8_t val1 = aarch64_get_mem_u8 (cpu, address);
11591 uint8_t val2 = aarch64_get_mem_u8 (cpu, address + 1);
11592 uint8_t val3 = aarch64_get_mem_u8 (cpu, address + 2);
11593
11594 for (i = 0; i < (full ? 16 : 8); i++)
11595 {
11596 aarch64_set_vec_u8 (cpu, vd, 0, val1);
11597 aarch64_set_vec_u8 (cpu, vd + 1, 0, val2);
11598 aarch64_set_vec_u8 (cpu, vd + 2, 0, val3);
11599 }
11600 }
11601 break;
11602
11603 case 1:
11604 {
11605 uint32_t val1 = aarch64_get_mem_u16 (cpu, address);
11606 uint32_t val2 = aarch64_get_mem_u16 (cpu, address + 2);
11607 uint32_t val3 = aarch64_get_mem_u16 (cpu, address + 4);
11608
11609 for (i = 0; i < (full ? 8 : 4); i++)
11610 {
11611 aarch64_set_vec_u16 (cpu, vd, 0, val1);
11612 aarch64_set_vec_u16 (cpu, vd + 1, 0, val2);
11613 aarch64_set_vec_u16 (cpu, vd + 2, 0, val3);
11614 }
11615 }
11616 break;
11617
11618 case 2:
11619 {
11620 uint32_t val1 = aarch64_get_mem_u32 (cpu, address);
11621 uint32_t val2 = aarch64_get_mem_u32 (cpu, address + 4);
11622 uint32_t val3 = aarch64_get_mem_u32 (cpu, address + 8);
11623
11624 for (i = 0; i < (full ? 4 : 2); i++)
11625 {
11626 aarch64_set_vec_u32 (cpu, vd, 0, val1);
11627 aarch64_set_vec_u32 (cpu, vd + 1, 0, val2);
11628 aarch64_set_vec_u32 (cpu, vd + 2, 0, val3);
11629 }
11630 }
11631 break;
11632
11633 case 3:
11634 {
11635 uint64_t val1 = aarch64_get_mem_u64 (cpu, address);
11636 uint64_t val2 = aarch64_get_mem_u64 (cpu, address + 8);
11637 uint64_t val3 = aarch64_get_mem_u64 (cpu, address + 16);
11638
11639 for (i = 0; i < (full ? 2 : 1); i++)
11640 {
11641 aarch64_set_vec_u64 (cpu, vd, 0, val1);
11642 aarch64_set_vec_u64 (cpu, vd + 1, 0, val2);
11643 aarch64_set_vec_u64 (cpu, vd + 2, 0, val3);
11644 }
11645 }
11646 break;
11647
11648 default:
11649 HALT_UNALLOC;
11650 }
11651 break;
11652
11653 case 3: /* LD4R. */
11654 switch (size)
11655 {
11656 case 0:
11657 {
11658 uint8_t val1 = aarch64_get_mem_u8 (cpu, address);
11659 uint8_t val2 = aarch64_get_mem_u8 (cpu, address + 1);
11660 uint8_t val3 = aarch64_get_mem_u8 (cpu, address + 2);
11661 uint8_t val4 = aarch64_get_mem_u8 (cpu, address + 3);
11662
11663 for (i = 0; i < (full ? 16 : 8); i++)
11664 {
11665 aarch64_set_vec_u8 (cpu, vd, 0, val1);
11666 aarch64_set_vec_u8 (cpu, vd + 1, 0, val2);
11667 aarch64_set_vec_u8 (cpu, vd + 2, 0, val3);
11668 aarch64_set_vec_u8 (cpu, vd + 3, 0, val4);
11669 }
11670 }
11671 break;
11672
11673 case 1:
11674 {
11675 uint32_t val1 = aarch64_get_mem_u16 (cpu, address);
11676 uint32_t val2 = aarch64_get_mem_u16 (cpu, address + 2);
11677 uint32_t val3 = aarch64_get_mem_u16 (cpu, address + 4);
11678 uint32_t val4 = aarch64_get_mem_u16 (cpu, address + 6);
11679
11680 for (i = 0; i < (full ? 8 : 4); i++)
11681 {
11682 aarch64_set_vec_u16 (cpu, vd, 0, val1);
11683 aarch64_set_vec_u16 (cpu, vd + 1, 0, val2);
11684 aarch64_set_vec_u16 (cpu, vd + 2, 0, val3);
11685 aarch64_set_vec_u16 (cpu, vd + 3, 0, val4);
11686 }
11687 }
11688 break;
11689
11690 case 2:
11691 {
11692 uint32_t val1 = aarch64_get_mem_u32 (cpu, address);
11693 uint32_t val2 = aarch64_get_mem_u32 (cpu, address + 4);
11694 uint32_t val3 = aarch64_get_mem_u32 (cpu, address + 8);
11695 uint32_t val4 = aarch64_get_mem_u32 (cpu, address + 12);
11696
11697 for (i = 0; i < (full ? 4 : 2); i++)
11698 {
11699 aarch64_set_vec_u32 (cpu, vd, 0, val1);
11700 aarch64_set_vec_u32 (cpu, vd + 1, 0, val2);
11701 aarch64_set_vec_u32 (cpu, vd + 2, 0, val3);
11702 aarch64_set_vec_u32 (cpu, vd + 3, 0, val4);
11703 }
11704 }
11705 break;
11706
11707 case 3:
11708 {
11709 uint64_t val1 = aarch64_get_mem_u64 (cpu, address);
11710 uint64_t val2 = aarch64_get_mem_u64 (cpu, address + 8);
11711 uint64_t val3 = aarch64_get_mem_u64 (cpu, address + 16);
11712 uint64_t val4 = aarch64_get_mem_u64 (cpu, address + 24);
11713
11714 for (i = 0; i < (full ? 2 : 1); i++)
11715 {
11716 aarch64_set_vec_u64 (cpu, vd, 0, val1);
11717 aarch64_set_vec_u64 (cpu, vd + 1, 0, val2);
11718 aarch64_set_vec_u64 (cpu, vd + 2, 0, val3);
11719 aarch64_set_vec_u64 (cpu, vd + 3, 0, val4);
11720 }
11721 }
11722 break;
11723
11724 default:
11725 HALT_UNALLOC;
11726 }
11727 break;
11728
11729 default:
11730 HALT_UNALLOC;
11731 }
11732 }
11733
11734 static void
11735 do_vec_load_store (sim_cpu *cpu)
11736 {
11737 /* {LD|ST}<N> {Vd..Vd+N}, vaddr
11738
11739 instr[31] = 0
11740 instr[30] = element selector 0=>half, 1=>all elements
11741 instr[29,25] = 00110
11742 instr[24] = ?
11743 instr[23] = 0=>simple, 1=>post
11744 instr[22] = 0=>store, 1=>load
11745 instr[21] = 0 (LDn) / small(0)-large(1) selector (LDnR)
11746 instr[20,16] = 00000 (simple), Vinc (reg-post-inc, no SP),
11747 11111 (immediate post inc)
11748 instr[15,12] = elements and destinations. eg for load:
11749 0000=>LD4 => load multiple 4-element to
11750 four consecutive registers
11751 0100=>LD3 => load multiple 3-element to
11752 three consecutive registers
11753 1000=>LD2 => load multiple 2-element to
11754 two consecutive registers
11755 0010=>LD1 => load multiple 1-element to
11756 four consecutive registers
11757 0110=>LD1 => load multiple 1-element to
11758 three consecutive registers
11759 1010=>LD1 => load multiple 1-element to
11760 two consecutive registers
11761 0111=>LD1 => load multiple 1-element to
11762 one register
11763 1100=>LDR1,LDR2
11764 1110=>LDR3,LDR4
11765 instr[11,10] = element size 00=> byte(b), 01=> half(h),
11766 10=> word(s), 11=> double(d)
11767 instr[9,5] = Vn, can be SP
11768 instr[4,0] = Vd */
11769
11770 int post;
11771 int load;
11772 unsigned vn;
11773 uint64_t address;
11774 int type;
11775
11776 if (INSTR (31, 31) != 0 || INSTR (29, 25) != 0x06)
11777 HALT_NYI;
11778
11779 type = INSTR (15, 12);
11780 if (type != 0xE && type != 0xE && INSTR (21, 21) != 0)
11781 HALT_NYI;
11782
11783 post = INSTR (23, 23);
11784 load = INSTR (22, 22);
11785 vn = INSTR (9, 5);
11786 address = aarch64_get_reg_u64 (cpu, vn, SP_OK);
11787
11788 if (post)
11789 {
11790 unsigned vm = INSTR (20, 16);
11791
11792 if (vm == R31)
11793 {
11794 unsigned sizeof_operation;
11795
11796 switch (type)
11797 {
11798 case 0: sizeof_operation = 32; break;
11799 case 4: sizeof_operation = 24; break;
11800 case 8: sizeof_operation = 16; break;
11801
11802 case 0xC:
11803 sizeof_operation = INSTR (21, 21) ? 2 : 1;
11804 sizeof_operation <<= INSTR (11, 10);
11805 break;
11806
11807 case 0xE:
11808 sizeof_operation = INSTR (21, 21) ? 8 : 4;
11809 sizeof_operation <<= INSTR (11, 10);
11810 break;
11811
11812 case 7:
11813 /* One register, immediate offset variant. */
11814 sizeof_operation = 8;
11815 break;
11816
11817 case 10:
11818 /* Two registers, immediate offset variant. */
11819 sizeof_operation = 16;
11820 break;
11821
11822 case 6:
11823 /* Three registers, immediate offset variant. */
11824 sizeof_operation = 24;
11825 break;
11826
11827 case 2:
11828 /* Four registers, immediate offset variant. */
11829 sizeof_operation = 32;
11830 break;
11831
11832 default:
11833 HALT_UNALLOC;
11834 }
11835
11836 if (INSTR (30, 30))
11837 sizeof_operation *= 2;
11838
11839 aarch64_set_reg_u64 (cpu, vn, SP_OK, address + sizeof_operation);
11840 }
11841 else
11842 aarch64_set_reg_u64 (cpu, vn, SP_OK,
11843 address + aarch64_get_reg_u64 (cpu, vm, NO_SP));
11844 }
11845 else
11846 {
11847 NYI_assert (20, 16, 0);
11848 }
11849
11850 if (load)
11851 {
11852 switch (type)
11853 {
11854 case 0: LD4 (cpu, address); return;
11855 case 4: LD3 (cpu, address); return;
11856 case 8: LD2 (cpu, address); return;
11857 case 2: LD1_4 (cpu, address); return;
11858 case 6: LD1_3 (cpu, address); return;
11859 case 10: LD1_2 (cpu, address); return;
11860 case 7: LD1_1 (cpu, address); return;
11861
11862 case 0xE:
11863 case 0xC: do_vec_LDnR (cpu, address); return;
11864
11865 default:
11866 HALT_NYI;
11867 }
11868 }
11869
11870 /* Stores. */
11871 switch (type)
11872 {
11873 case 0: ST4 (cpu, address); return;
11874 case 4: ST3 (cpu, address); return;
11875 case 8: ST2 (cpu, address); return;
11876 case 2: ST1_4 (cpu, address); return;
11877 case 6: ST1_3 (cpu, address); return;
11878 case 10: ST1_2 (cpu, address); return;
11879 case 7: ST1_1 (cpu, address); return;
11880 default:
11881 HALT_NYI;
11882 }
11883 }
11884
11885 static void
11886 dexLdSt (sim_cpu *cpu)
11887 {
11888 /* uint32_t group = dispatchGroup (aarch64_get_instr (cpu));
11889 assert group == GROUP_LDST_0100 || group == GROUP_LDST_0110 ||
11890 group == GROUP_LDST_1100 || group == GROUP_LDST_1110
11891 bits [29,28:26] of a LS are the secondary dispatch vector. */
11892 uint32_t group2 = dispatchLS (aarch64_get_instr (cpu));
11893
11894 switch (group2)
11895 {
11896 case LS_EXCL_000:
11897 dexLoadExclusive (cpu); return;
11898
11899 case LS_LIT_010:
11900 case LS_LIT_011:
11901 dexLoadLiteral (cpu); return;
11902
11903 case LS_OTHER_110:
11904 case LS_OTHER_111:
11905 dexLoadOther (cpu); return;
11906
11907 case LS_ADVSIMD_001:
11908 do_vec_load_store (cpu); return;
11909
11910 case LS_PAIR_100:
11911 dex_load_store_pair_gr (cpu); return;
11912
11913 case LS_PAIR_101:
11914 dex_load_store_pair_fp (cpu); return;
11915
11916 default:
11917 /* Should never reach here. */
11918 HALT_NYI;
11919 }
11920 }
11921
11922 /* Specific decode and execute for group Data Processing Register. */
11923
11924 static void
11925 dexLogicalShiftedRegister (sim_cpu *cpu)
11926 {
11927 /* instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
11928 instr[30,29] = op
11929 instr[28:24] = 01010
11930 instr[23,22] = shift : 0 ==> LSL, 1 ==> LSR, 2 ==> ASR, 3 ==> ROR
11931 instr[21] = N
11932 instr[20,16] = Rm
11933 instr[15,10] = count : must be 0xxxxx for 32 bit
11934 instr[9,5] = Rn
11935 instr[4,0] = Rd */
11936
11937 uint32_t size = INSTR (31, 31);
11938 Shift shiftType = INSTR (23, 22);
11939 uint32_t count = INSTR (15, 10);
11940
11941 /* 32 bit operations must have count[5] = 0.
11942 or else we have an UNALLOC. */
11943 if (size == 0 && uimm (count, 5, 5))
11944 HALT_UNALLOC;
11945
11946 /* Dispatch on size:op:N. */
11947 switch ((INSTR (31, 29) << 1) | INSTR (21, 21))
11948 {
11949 case 0: and32_shift (cpu, shiftType, count); return;
11950 case 1: bic32_shift (cpu, shiftType, count); return;
11951 case 2: orr32_shift (cpu, shiftType, count); return;
11952 case 3: orn32_shift (cpu, shiftType, count); return;
11953 case 4: eor32_shift (cpu, shiftType, count); return;
11954 case 5: eon32_shift (cpu, shiftType, count); return;
11955 case 6: ands32_shift (cpu, shiftType, count); return;
11956 case 7: bics32_shift (cpu, shiftType, count); return;
11957 case 8: and64_shift (cpu, shiftType, count); return;
11958 case 9: bic64_shift (cpu, shiftType, count); return;
11959 case 10:orr64_shift (cpu, shiftType, count); return;
11960 case 11:orn64_shift (cpu, shiftType, count); return;
11961 case 12:eor64_shift (cpu, shiftType, count); return;
11962 case 13:eon64_shift (cpu, shiftType, count); return;
11963 case 14:ands64_shift (cpu, shiftType, count); return;
11964 case 15:bics64_shift (cpu, shiftType, count); return;
11965 }
11966 }
11967
11968 /* 32 bit conditional select. */
11969 static void
11970 csel32 (sim_cpu *cpu, CondCode cc)
11971 {
11972 unsigned rm = INSTR (20, 16);
11973 unsigned rn = INSTR (9, 5);
11974 unsigned rd = INSTR (4, 0);
11975
11976 aarch64_set_reg_u64 (cpu, rd, NO_SP,
11977 testConditionCode (cpu, cc)
11978 ? aarch64_get_reg_u32 (cpu, rn, NO_SP)
11979 : aarch64_get_reg_u32 (cpu, rm, NO_SP));
11980 }
11981
11982 /* 64 bit conditional select. */
11983 static void
11984 csel64 (sim_cpu *cpu, CondCode cc)
11985 {
11986 unsigned rm = INSTR (20, 16);
11987 unsigned rn = INSTR (9, 5);
11988 unsigned rd = INSTR (4, 0);
11989
11990 aarch64_set_reg_u64 (cpu, rd, NO_SP,
11991 testConditionCode (cpu, cc)
11992 ? aarch64_get_reg_u64 (cpu, rn, NO_SP)
11993 : aarch64_get_reg_u64 (cpu, rm, NO_SP));
11994 }
11995
11996 /* 32 bit conditional increment. */
11997 static void
11998 csinc32 (sim_cpu *cpu, CondCode cc)
11999 {
12000 unsigned rm = INSTR (20, 16);
12001 unsigned rn = INSTR (9, 5);
12002 unsigned rd = INSTR (4, 0);
12003
12004 aarch64_set_reg_u64 (cpu, rd, NO_SP,
12005 testConditionCode (cpu, cc)
12006 ? aarch64_get_reg_u32 (cpu, rn, NO_SP)
12007 : aarch64_get_reg_u32 (cpu, rm, NO_SP) + 1);
12008 }
12009
12010 /* 64 bit conditional increment. */
12011 static void
12012 csinc64 (sim_cpu *cpu, CondCode cc)
12013 {
12014 unsigned rm = INSTR (20, 16);
12015 unsigned rn = INSTR (9, 5);
12016 unsigned rd = INSTR (4, 0);
12017
12018 aarch64_set_reg_u64 (cpu, rd, NO_SP,
12019 testConditionCode (cpu, cc)
12020 ? aarch64_get_reg_u64 (cpu, rn, NO_SP)
12021 : aarch64_get_reg_u64 (cpu, rm, NO_SP) + 1);
12022 }
12023
12024 /* 32 bit conditional invert. */
12025 static void
12026 csinv32 (sim_cpu *cpu, CondCode cc)
12027 {
12028 unsigned rm = INSTR (20, 16);
12029 unsigned rn = INSTR (9, 5);
12030 unsigned rd = INSTR (4, 0);
12031
12032 aarch64_set_reg_u64 (cpu, rd, NO_SP,
12033 testConditionCode (cpu, cc)
12034 ? aarch64_get_reg_u32 (cpu, rn, NO_SP)
12035 : ~ aarch64_get_reg_u32 (cpu, rm, NO_SP));
12036 }
12037
12038 /* 64 bit conditional invert. */
12039 static void
12040 csinv64 (sim_cpu *cpu, CondCode cc)
12041 {
12042 unsigned rm = INSTR (20, 16);
12043 unsigned rn = INSTR (9, 5);
12044 unsigned rd = INSTR (4, 0);
12045
12046 aarch64_set_reg_u64 (cpu, rd, NO_SP,
12047 testConditionCode (cpu, cc)
12048 ? aarch64_get_reg_u64 (cpu, rn, NO_SP)
12049 : ~ aarch64_get_reg_u64 (cpu, rm, NO_SP));
12050 }
12051
12052 /* 32 bit conditional negate. */
12053 static void
12054 csneg32 (sim_cpu *cpu, CondCode cc)
12055 {
12056 unsigned rm = INSTR (20, 16);
12057 unsigned rn = INSTR (9, 5);
12058 unsigned rd = INSTR (4, 0);
12059
12060 aarch64_set_reg_u64 (cpu, rd, NO_SP,
12061 testConditionCode (cpu, cc)
12062 ? aarch64_get_reg_u32 (cpu, rn, NO_SP)
12063 : - aarch64_get_reg_u32 (cpu, rm, NO_SP));
12064 }
12065
12066 /* 64 bit conditional negate. */
12067 static void
12068 csneg64 (sim_cpu *cpu, CondCode cc)
12069 {
12070 unsigned rm = INSTR (20, 16);
12071 unsigned rn = INSTR (9, 5);
12072 unsigned rd = INSTR (4, 0);
12073
12074 aarch64_set_reg_u64 (cpu, rd, NO_SP,
12075 testConditionCode (cpu, cc)
12076 ? aarch64_get_reg_u64 (cpu, rn, NO_SP)
12077 : - aarch64_get_reg_u64 (cpu, rm, NO_SP));
12078 }
12079
12080 static void
12081 dexCondSelect (sim_cpu *cpu)
12082 {
12083 /* instr[28,21] = 11011011
12084 instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
12085 instr[30:11,10] = op : 000 ==> CSEL, 001 ==> CSINC,
12086 100 ==> CSINV, 101 ==> CSNEG,
12087 _1_ ==> UNALLOC
12088 instr[29] = S : 0 ==> ok, 1 ==> UNALLOC
12089 instr[15,12] = cond
12090 instr[29] = S : 0 ==> ok, 1 ==> UNALLOC */
12091
12092 CondCode cc = INSTR (15, 12);
12093 uint32_t S = INSTR (29, 29);
12094 uint32_t op2 = INSTR (11, 10);
12095
12096 if (S == 1)
12097 HALT_UNALLOC;
12098
12099 if (op2 & 0x2)
12100 HALT_UNALLOC;
12101
12102 switch ((INSTR (31, 30) << 1) | op2)
12103 {
12104 case 0: csel32 (cpu, cc); return;
12105 case 1: csinc32 (cpu, cc); return;
12106 case 2: csinv32 (cpu, cc); return;
12107 case 3: csneg32 (cpu, cc); return;
12108 case 4: csel64 (cpu, cc); return;
12109 case 5: csinc64 (cpu, cc); return;
12110 case 6: csinv64 (cpu, cc); return;
12111 case 7: csneg64 (cpu, cc); return;
12112 }
12113 }
12114
12115 /* Some helpers for counting leading 1 or 0 bits. */
12116
12117 /* Counts the number of leading bits which are the same
12118 in a 32 bit value in the range 1 to 32. */
12119 static uint32_t
12120 leading32 (uint32_t value)
12121 {
12122 int32_t mask= 0xffff0000;
12123 uint32_t count= 16; /* Counts number of bits set in mask. */
12124 uint32_t lo = 1; /* Lower bound for number of sign bits. */
12125 uint32_t hi = 32; /* Upper bound for number of sign bits. */
12126
12127 while (lo + 1 < hi)
12128 {
12129 int32_t test = (value & mask);
12130
12131 if (test == 0 || test == mask)
12132 {
12133 lo = count;
12134 count = (lo + hi) / 2;
12135 mask >>= (count - lo);
12136 }
12137 else
12138 {
12139 hi = count;
12140 count = (lo + hi) / 2;
12141 mask <<= hi - count;
12142 }
12143 }
12144
12145 if (lo != hi)
12146 {
12147 int32_t test;
12148
12149 mask >>= 1;
12150 test = (value & mask);
12151
12152 if (test == 0 || test == mask)
12153 count = hi;
12154 else
12155 count = lo;
12156 }
12157
12158 return count;
12159 }
12160
12161 /* Counts the number of leading bits which are the same
12162 in a 64 bit value in the range 1 to 64. */
12163 static uint64_t
12164 leading64 (uint64_t value)
12165 {
12166 int64_t mask= 0xffffffff00000000LL;
12167 uint64_t count = 32; /* Counts number of bits set in mask. */
12168 uint64_t lo = 1; /* Lower bound for number of sign bits. */
12169 uint64_t hi = 64; /* Upper bound for number of sign bits. */
12170
12171 while (lo + 1 < hi)
12172 {
12173 int64_t test = (value & mask);
12174
12175 if (test == 0 || test == mask)
12176 {
12177 lo = count;
12178 count = (lo + hi) / 2;
12179 mask >>= (count - lo);
12180 }
12181 else
12182 {
12183 hi = count;
12184 count = (lo + hi) / 2;
12185 mask <<= hi - count;
12186 }
12187 }
12188
12189 if (lo != hi)
12190 {
12191 int64_t test;
12192
12193 mask >>= 1;
12194 test = (value & mask);
12195
12196 if (test == 0 || test == mask)
12197 count = hi;
12198 else
12199 count = lo;
12200 }
12201
12202 return count;
12203 }
12204
12205 /* Bit operations. */
12206 /* N.B register args may not be SP. */
12207
12208 /* 32 bit count leading sign bits. */
12209 static void
12210 cls32 (sim_cpu *cpu)
12211 {
12212 unsigned rn = INSTR (9, 5);
12213 unsigned rd = INSTR (4, 0);
12214
12215 /* N.B. the result needs to exclude the leading bit. */
12216 aarch64_set_reg_u64
12217 (cpu, rd, NO_SP, leading32 (aarch64_get_reg_u32 (cpu, rn, NO_SP)) - 1);
12218 }
12219
12220 /* 64 bit count leading sign bits. */
12221 static void
12222 cls64 (sim_cpu *cpu)
12223 {
12224 unsigned rn = INSTR (9, 5);
12225 unsigned rd = INSTR (4, 0);
12226
12227 /* N.B. the result needs to exclude the leading bit. */
12228 aarch64_set_reg_u64
12229 (cpu, rd, NO_SP, leading64 (aarch64_get_reg_u64 (cpu, rn, NO_SP)) - 1);
12230 }
12231
12232 /* 32 bit count leading zero bits. */
12233 static void
12234 clz32 (sim_cpu *cpu)
12235 {
12236 unsigned rn = INSTR (9, 5);
12237 unsigned rd = INSTR (4, 0);
12238 uint32_t value = aarch64_get_reg_u32 (cpu, rn, NO_SP);
12239
12240 /* if the sign (top) bit is set then the count is 0. */
12241 if (pick32 (value, 31, 31))
12242 aarch64_set_reg_u64 (cpu, rd, NO_SP, 0L);
12243 else
12244 aarch64_set_reg_u64 (cpu, rd, NO_SP, leading32 (value));
12245 }
12246
12247 /* 64 bit count leading zero bits. */
12248 static void
12249 clz64 (sim_cpu *cpu)
12250 {
12251 unsigned rn = INSTR (9, 5);
12252 unsigned rd = INSTR (4, 0);
12253 uint64_t value = aarch64_get_reg_u64 (cpu, rn, NO_SP);
12254
12255 /* if the sign (top) bit is set then the count is 0. */
12256 if (pick64 (value, 63, 63))
12257 aarch64_set_reg_u64 (cpu, rd, NO_SP, 0L);
12258 else
12259 aarch64_set_reg_u64 (cpu, rd, NO_SP, leading64 (value));
12260 }
12261
12262 /* 32 bit reverse bits. */
12263 static void
12264 rbit32 (sim_cpu *cpu)
12265 {
12266 unsigned rn = INSTR (9, 5);
12267 unsigned rd = INSTR (4, 0);
12268 uint32_t value = aarch64_get_reg_u32 (cpu, rn, NO_SP);
12269 uint32_t result = 0;
12270 int i;
12271
12272 for (i = 0; i < 32; i++)
12273 {
12274 result <<= 1;
12275 result |= (value & 1);
12276 value >>= 1;
12277 }
12278 aarch64_set_reg_u64 (cpu, rd, NO_SP, result);
12279 }
12280
12281 /* 64 bit reverse bits. */
12282 static void
12283 rbit64 (sim_cpu *cpu)
12284 {
12285 unsigned rn = INSTR (9, 5);
12286 unsigned rd = INSTR (4, 0);
12287 uint64_t value = aarch64_get_reg_u64 (cpu, rn, NO_SP);
12288 uint64_t result = 0;
12289 int i;
12290
12291 for (i = 0; i < 64; i++)
12292 {
12293 result <<= 1;
12294 result |= (value & 1UL);
12295 value >>= 1;
12296 }
12297 aarch64_set_reg_u64 (cpu, rd, NO_SP, result);
12298 }
12299
12300 /* 32 bit reverse bytes. */
12301 static void
12302 rev32 (sim_cpu *cpu)
12303 {
12304 unsigned rn = INSTR (9, 5);
12305 unsigned rd = INSTR (4, 0);
12306 uint32_t value = aarch64_get_reg_u32 (cpu, rn, NO_SP);
12307 uint32_t result = 0;
12308 int i;
12309
12310 for (i = 0; i < 4; i++)
12311 {
12312 result <<= 8;
12313 result |= (value & 0xff);
12314 value >>= 8;
12315 }
12316 aarch64_set_reg_u64 (cpu, rd, NO_SP, result);
12317 }
12318
12319 /* 64 bit reverse bytes. */
12320 static void
12321 rev64 (sim_cpu *cpu)
12322 {
12323 unsigned rn = INSTR (9, 5);
12324 unsigned rd = INSTR (4, 0);
12325 uint64_t value = aarch64_get_reg_u64 (cpu, rn, NO_SP);
12326 uint64_t result = 0;
12327 int i;
12328
12329 for (i = 0; i < 8; i++)
12330 {
12331 result <<= 8;
12332 result |= (value & 0xffULL);
12333 value >>= 8;
12334 }
12335 aarch64_set_reg_u64 (cpu, rd, NO_SP, result);
12336 }
12337
12338 /* 32 bit reverse shorts. */
12339 /* N.B.this reverses the order of the bytes in each half word. */
12340 static void
12341 revh32 (sim_cpu *cpu)
12342 {
12343 unsigned rn = INSTR (9, 5);
12344 unsigned rd = INSTR (4, 0);
12345 uint32_t value = aarch64_get_reg_u32 (cpu, rn, NO_SP);
12346 uint32_t result = 0;
12347 int i;
12348
12349 for (i = 0; i < 2; i++)
12350 {
12351 result <<= 8;
12352 result |= (value & 0x00ff00ff);
12353 value >>= 8;
12354 }
12355 aarch64_set_reg_u64 (cpu, rd, NO_SP, result);
12356 }
12357
12358 /* 64 bit reverse shorts. */
12359 /* N.B.this reverses the order of the bytes in each half word. */
12360 static void
12361 revh64 (sim_cpu *cpu)
12362 {
12363 unsigned rn = INSTR (9, 5);
12364 unsigned rd = INSTR (4, 0);
12365 uint64_t value = aarch64_get_reg_u64 (cpu, rn, NO_SP);
12366 uint64_t result = 0;
12367 int i;
12368
12369 for (i = 0; i < 2; i++)
12370 {
12371 result <<= 8;
12372 result |= (value & 0x00ff00ff00ff00ffULL);
12373 value >>= 8;
12374 }
12375 aarch64_set_reg_u64 (cpu, rd, NO_SP, result);
12376 }
12377
12378 static void
12379 dexDataProc1Source (sim_cpu *cpu)
12380 {
12381 /* instr[30] = 1
12382 instr[28,21] = 111010110
12383 instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
12384 instr[29] = S : 0 ==> ok, 1 ==> UNALLOC
12385 instr[20,16] = opcode2 : 00000 ==> ok, ow ==> UNALLOC
12386 instr[15,10] = opcode : 000000 ==> RBIT, 000001 ==> REV16,
12387 000010 ==> REV, 000011 ==> UNALLOC
12388 000100 ==> CLZ, 000101 ==> CLS
12389 ow ==> UNALLOC
12390 instr[9,5] = rn : may not be SP
12391 instr[4,0] = rd : may not be SP. */
12392
12393 uint32_t S = INSTR (29, 29);
12394 uint32_t opcode2 = INSTR (20, 16);
12395 uint32_t opcode = INSTR (15, 10);
12396 uint32_t dispatch = ((INSTR (31, 31) << 3) | opcode);
12397
12398 if (S == 1)
12399 HALT_UNALLOC;
12400
12401 if (opcode2 != 0)
12402 HALT_UNALLOC;
12403
12404 if (opcode & 0x38)
12405 HALT_UNALLOC;
12406
12407 switch (dispatch)
12408 {
12409 case 0: rbit32 (cpu); return;
12410 case 1: revh32 (cpu); return;
12411 case 2: rev32 (cpu); return;
12412 case 4: clz32 (cpu); return;
12413 case 5: cls32 (cpu); return;
12414 case 8: rbit64 (cpu); return;
12415 case 9: revh64 (cpu); return;
12416 case 10:rev32 (cpu); return;
12417 case 11:rev64 (cpu); return;
12418 case 12:clz64 (cpu); return;
12419 case 13:cls64 (cpu); return;
12420 default: HALT_UNALLOC;
12421 }
12422 }
12423
12424 /* Variable shift.
12425 Shifts by count supplied in register.
12426 N.B register args may not be SP.
12427 These all use the shifted auxiliary function for
12428 simplicity and clarity. Writing the actual shift
12429 inline would avoid a branch and so be faster but
12430 would also necessitate getting signs right. */
12431
12432 /* 32 bit arithmetic shift right. */
12433 static void
12434 asrv32 (sim_cpu *cpu)
12435 {
12436 unsigned rm = INSTR (20, 16);
12437 unsigned rn = INSTR (9, 5);
12438 unsigned rd = INSTR (4, 0);
12439
12440 aarch64_set_reg_u64
12441 (cpu, rd, NO_SP,
12442 shifted32 (aarch64_get_reg_u32 (cpu, rn, NO_SP), ASR,
12443 (aarch64_get_reg_u32 (cpu, rm, NO_SP) & 0x1f)));
12444 }
12445
12446 /* 64 bit arithmetic shift right. */
12447 static void
12448 asrv64 (sim_cpu *cpu)
12449 {
12450 unsigned rm = INSTR (20, 16);
12451 unsigned rn = INSTR (9, 5);
12452 unsigned rd = INSTR (4, 0);
12453
12454 aarch64_set_reg_u64
12455 (cpu, rd, NO_SP,
12456 shifted64 (aarch64_get_reg_u64 (cpu, rn, NO_SP), ASR,
12457 (aarch64_get_reg_u64 (cpu, rm, NO_SP) & 0x3f)));
12458 }
12459
12460 /* 32 bit logical shift left. */
12461 static void
12462 lslv32 (sim_cpu *cpu)
12463 {
12464 unsigned rm = INSTR (20, 16);
12465 unsigned rn = INSTR (9, 5);
12466 unsigned rd = INSTR (4, 0);
12467
12468 aarch64_set_reg_u64
12469 (cpu, rd, NO_SP,
12470 shifted32 (aarch64_get_reg_u32 (cpu, rn, NO_SP), LSL,
12471 (aarch64_get_reg_u32 (cpu, rm, NO_SP) & 0x1f)));
12472 }
12473
12474 /* 64 bit arithmetic shift left. */
12475 static void
12476 lslv64 (sim_cpu *cpu)
12477 {
12478 unsigned rm = INSTR (20, 16);
12479 unsigned rn = INSTR (9, 5);
12480 unsigned rd = INSTR (4, 0);
12481
12482 aarch64_set_reg_u64
12483 (cpu, rd, NO_SP,
12484 shifted64 (aarch64_get_reg_u64 (cpu, rn, NO_SP), LSL,
12485 (aarch64_get_reg_u64 (cpu, rm, NO_SP) & 0x3f)));
12486 }
12487
12488 /* 32 bit logical shift right. */
12489 static void
12490 lsrv32 (sim_cpu *cpu)
12491 {
12492 unsigned rm = INSTR (20, 16);
12493 unsigned rn = INSTR (9, 5);
12494 unsigned rd = INSTR (4, 0);
12495
12496 aarch64_set_reg_u64
12497 (cpu, rd, NO_SP,
12498 shifted32 (aarch64_get_reg_u32 (cpu, rn, NO_SP), LSR,
12499 (aarch64_get_reg_u32 (cpu, rm, NO_SP) & 0x1f)));
12500 }
12501
12502 /* 64 bit logical shift right. */
12503 static void
12504 lsrv64 (sim_cpu *cpu)
12505 {
12506 unsigned rm = INSTR (20, 16);
12507 unsigned rn = INSTR (9, 5);
12508 unsigned rd = INSTR (4, 0);
12509
12510 aarch64_set_reg_u64
12511 (cpu, rd, NO_SP,
12512 shifted64 (aarch64_get_reg_u64 (cpu, rn, NO_SP), LSR,
12513 (aarch64_get_reg_u64 (cpu, rm, NO_SP) & 0x3f)));
12514 }
12515
12516 /* 32 bit rotate right. */
12517 static void
12518 rorv32 (sim_cpu *cpu)
12519 {
12520 unsigned rm = INSTR (20, 16);
12521 unsigned rn = INSTR (9, 5);
12522 unsigned rd = INSTR (4, 0);
12523
12524 aarch64_set_reg_u64
12525 (cpu, rd, NO_SP,
12526 shifted32 (aarch64_get_reg_u32 (cpu, rn, NO_SP), ROR,
12527 (aarch64_get_reg_u32 (cpu, rm, NO_SP) & 0x1f)));
12528 }
12529
12530 /* 64 bit rotate right. */
12531 static void
12532 rorv64 (sim_cpu *cpu)
12533 {
12534 unsigned rm = INSTR (20, 16);
12535 unsigned rn = INSTR (9, 5);
12536 unsigned rd = INSTR (4, 0);
12537
12538 aarch64_set_reg_u64
12539 (cpu, rd, NO_SP,
12540 shifted64 (aarch64_get_reg_u64 (cpu, rn, NO_SP), ROR,
12541 (aarch64_get_reg_u64 (cpu, rm, NO_SP) & 0x3f)));
12542 }
12543
12544
12545 /* divide. */
12546
12547 /* 32 bit signed divide. */
12548 static void
12549 cpuiv32 (sim_cpu *cpu)
12550 {
12551 unsigned rm = INSTR (20, 16);
12552 unsigned rn = INSTR (9, 5);
12553 unsigned rd = INSTR (4, 0);
12554 /* N.B. the pseudo-code does the divide using 64 bit data. */
12555 /* TODO : check that this rounds towards zero as required. */
12556 int64_t dividend = aarch64_get_reg_s32 (cpu, rn, NO_SP);
12557 int64_t divisor = aarch64_get_reg_s32 (cpu, rm, NO_SP);
12558
12559 aarch64_set_reg_s64 (cpu, rd, NO_SP,
12560 divisor ? ((int32_t) (dividend / divisor)) : 0);
12561 }
12562
12563 /* 64 bit signed divide. */
12564 static void
12565 cpuiv64 (sim_cpu *cpu)
12566 {
12567 unsigned rm = INSTR (20, 16);
12568 unsigned rn = INSTR (9, 5);
12569 unsigned rd = INSTR (4, 0);
12570
12571 /* TODO : check that this rounds towards zero as required. */
12572 int64_t divisor = aarch64_get_reg_s64 (cpu, rm, NO_SP);
12573
12574 aarch64_set_reg_s64
12575 (cpu, rd, NO_SP,
12576 divisor ? (aarch64_get_reg_s64 (cpu, rn, NO_SP) / divisor) : 0);
12577 }
12578
12579 /* 32 bit unsigned divide. */
12580 static void
12581 udiv32 (sim_cpu *cpu)
12582 {
12583 unsigned rm = INSTR (20, 16);
12584 unsigned rn = INSTR (9, 5);
12585 unsigned rd = INSTR (4, 0);
12586
12587 /* N.B. the pseudo-code does the divide using 64 bit data. */
12588 uint64_t dividend = aarch64_get_reg_u32 (cpu, rn, NO_SP);
12589 uint64_t divisor = aarch64_get_reg_u32 (cpu, rm, NO_SP);
12590
12591 aarch64_set_reg_u64 (cpu, rd, NO_SP,
12592 divisor ? (uint32_t) (dividend / divisor) : 0);
12593 }
12594
12595 /* 64 bit unsigned divide. */
12596 static void
12597 udiv64 (sim_cpu *cpu)
12598 {
12599 unsigned rm = INSTR (20, 16);
12600 unsigned rn = INSTR (9, 5);
12601 unsigned rd = INSTR (4, 0);
12602
12603 /* TODO : check that this rounds towards zero as required. */
12604 uint64_t divisor = aarch64_get_reg_u64 (cpu, rm, NO_SP);
12605
12606 aarch64_set_reg_u64
12607 (cpu, rd, NO_SP,
12608 divisor ? (aarch64_get_reg_u64 (cpu, rn, NO_SP) / divisor) : 0);
12609 }
12610
12611 static void
12612 dexDataProc2Source (sim_cpu *cpu)
12613 {
12614 /* assert instr[30] == 0
12615 instr[28,21] == 11010110
12616 instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
12617 instr[29] = S : 0 ==> ok, 1 ==> UNALLOC
12618 instr[15,10] = opcode : 000010 ==> UDIV, 000011 ==> CPUIV,
12619 001000 ==> LSLV, 001001 ==> LSRV
12620 001010 ==> ASRV, 001011 ==> RORV
12621 ow ==> UNALLOC. */
12622
12623 uint32_t dispatch;
12624 uint32_t S = INSTR (29, 29);
12625 uint32_t opcode = INSTR (15, 10);
12626
12627 if (S == 1)
12628 HALT_UNALLOC;
12629
12630 if (opcode & 0x34)
12631 HALT_UNALLOC;
12632
12633 dispatch = ( (INSTR (31, 31) << 3)
12634 | (uimm (opcode, 3, 3) << 2)
12635 | uimm (opcode, 1, 0));
12636 switch (dispatch)
12637 {
12638 case 2: udiv32 (cpu); return;
12639 case 3: cpuiv32 (cpu); return;
12640 case 4: lslv32 (cpu); return;
12641 case 5: lsrv32 (cpu); return;
12642 case 6: asrv32 (cpu); return;
12643 case 7: rorv32 (cpu); return;
12644 case 10: udiv64 (cpu); return;
12645 case 11: cpuiv64 (cpu); return;
12646 case 12: lslv64 (cpu); return;
12647 case 13: lsrv64 (cpu); return;
12648 case 14: asrv64 (cpu); return;
12649 case 15: rorv64 (cpu); return;
12650 default: HALT_UNALLOC;
12651 }
12652 }
12653
12654
12655 /* Multiply. */
12656
12657 /* 32 bit multiply and add. */
12658 static void
12659 madd32 (sim_cpu *cpu)
12660 {
12661 unsigned rm = INSTR (20, 16);
12662 unsigned ra = INSTR (14, 10);
12663 unsigned rn = INSTR (9, 5);
12664 unsigned rd = INSTR (4, 0);
12665
12666 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
12667 aarch64_set_reg_u64 (cpu, rd, NO_SP,
12668 aarch64_get_reg_u32 (cpu, ra, NO_SP)
12669 + aarch64_get_reg_u32 (cpu, rn, NO_SP)
12670 * aarch64_get_reg_u32 (cpu, rm, NO_SP));
12671 }
12672
12673 /* 64 bit multiply and add. */
12674 static void
12675 madd64 (sim_cpu *cpu)
12676 {
12677 unsigned rm = INSTR (20, 16);
12678 unsigned ra = INSTR (14, 10);
12679 unsigned rn = INSTR (9, 5);
12680 unsigned rd = INSTR (4, 0);
12681
12682 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
12683 aarch64_set_reg_u64 (cpu, rd, NO_SP,
12684 aarch64_get_reg_u64 (cpu, ra, NO_SP)
12685 + (aarch64_get_reg_u64 (cpu, rn, NO_SP)
12686 * aarch64_get_reg_u64 (cpu, rm, NO_SP)));
12687 }
12688
12689 /* 32 bit multiply and sub. */
12690 static void
12691 msub32 (sim_cpu *cpu)
12692 {
12693 unsigned rm = INSTR (20, 16);
12694 unsigned ra = INSTR (14, 10);
12695 unsigned rn = INSTR (9, 5);
12696 unsigned rd = INSTR (4, 0);
12697
12698 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
12699 aarch64_set_reg_u64 (cpu, rd, NO_SP,
12700 aarch64_get_reg_u32 (cpu, ra, NO_SP)
12701 - aarch64_get_reg_u32 (cpu, rn, NO_SP)
12702 * aarch64_get_reg_u32 (cpu, rm, NO_SP));
12703 }
12704
12705 /* 64 bit multiply and sub. */
12706 static void
12707 msub64 (sim_cpu *cpu)
12708 {
12709 unsigned rm = INSTR (20, 16);
12710 unsigned ra = INSTR (14, 10);
12711 unsigned rn = INSTR (9, 5);
12712 unsigned rd = INSTR (4, 0);
12713
12714 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
12715 aarch64_set_reg_u64 (cpu, rd, NO_SP,
12716 aarch64_get_reg_u64 (cpu, ra, NO_SP)
12717 - aarch64_get_reg_u64 (cpu, rn, NO_SP)
12718 * aarch64_get_reg_u64 (cpu, rm, NO_SP));
12719 }
12720
12721 /* Signed multiply add long -- source, source2 : 32 bit, source3 : 64 bit. */
12722 static void
12723 smaddl (sim_cpu *cpu)
12724 {
12725 unsigned rm = INSTR (20, 16);
12726 unsigned ra = INSTR (14, 10);
12727 unsigned rn = INSTR (9, 5);
12728 unsigned rd = INSTR (4, 0);
12729
12730 /* N.B. we need to multiply the signed 32 bit values in rn, rm to
12731 obtain a 64 bit product. */
12732 aarch64_set_reg_s64
12733 (cpu, rd, NO_SP,
12734 aarch64_get_reg_s64 (cpu, ra, NO_SP)
12735 + ((int64_t) aarch64_get_reg_s32 (cpu, rn, NO_SP))
12736 * ((int64_t) aarch64_get_reg_s32 (cpu, rm, NO_SP)));
12737 }
12738
12739 /* Signed multiply sub long -- source, source2 : 32 bit, source3 : 64 bit. */
12740 static void
12741 smsubl (sim_cpu *cpu)
12742 {
12743 unsigned rm = INSTR (20, 16);
12744 unsigned ra = INSTR (14, 10);
12745 unsigned rn = INSTR (9, 5);
12746 unsigned rd = INSTR (4, 0);
12747
12748 /* N.B. we need to multiply the signed 32 bit values in rn, rm to
12749 obtain a 64 bit product. */
12750 aarch64_set_reg_s64
12751 (cpu, rd, NO_SP,
12752 aarch64_get_reg_s64 (cpu, ra, NO_SP)
12753 - ((int64_t) aarch64_get_reg_s32 (cpu, rn, NO_SP))
12754 * ((int64_t) aarch64_get_reg_s32 (cpu, rm, NO_SP)));
12755 }
12756
12757 /* Integer Multiply/Divide. */
12758
12759 /* First some macros and a helper function. */
12760 /* Macros to test or access elements of 64 bit words. */
12761
12762 /* Mask used to access lo 32 bits of 64 bit unsigned int. */
12763 #define LOW_WORD_MASK ((1ULL << 32) - 1)
12764 /* Return the lo 32 bit word of a 64 bit unsigned int as a 64 bit unsigned int. */
12765 #define lowWordToU64(_value_u64) ((_value_u64) & LOW_WORD_MASK)
12766 /* Return the hi 32 bit word of a 64 bit unsigned int as a 64 bit unsigned int. */
12767 #define highWordToU64(_value_u64) ((_value_u64) >> 32)
12768
12769 /* Offset of sign bit in 64 bit signed integger. */
12770 #define SIGN_SHIFT_U64 63
12771 /* The sign bit itself -- also identifies the minimum negative int value. */
12772 #define SIGN_BIT_U64 (1UL << SIGN_SHIFT_U64)
12773 /* Return true if a 64 bit signed int presented as an unsigned int is the
12774 most negative value. */
12775 #define isMinimumU64(_value_u64) ((_value_u64) == SIGN_BIT_U64)
12776 /* Return true (non-zero) if a 64 bit signed int presented as an unsigned
12777 int has its sign bit set to false. */
12778 #define isSignSetU64(_value_u64) ((_value_u64) & SIGN_BIT_U64)
12779 /* Return 1L or -1L according to whether a 64 bit signed int presented as
12780 an unsigned int has its sign bit set or not. */
12781 #define signOfU64(_value_u64) (1L + (((value_u64) >> SIGN_SHIFT_U64) * -2L)
12782 /* Clear the sign bit of a 64 bit signed int presented as an unsigned int. */
12783 #define clearSignU64(_value_u64) ((_value_u64) &= ~SIGN_BIT_U64)
12784
12785 /* Multiply two 64 bit ints and return.
12786 the hi 64 bits of the 128 bit product. */
12787
12788 static uint64_t
12789 mul64hi (uint64_t value1, uint64_t value2)
12790 {
12791 uint64_t resultmid1;
12792 uint64_t result;
12793 uint64_t value1_lo = lowWordToU64 (value1);
12794 uint64_t value1_hi = highWordToU64 (value1) ;
12795 uint64_t value2_lo = lowWordToU64 (value2);
12796 uint64_t value2_hi = highWordToU64 (value2);
12797
12798 /* Cross-multiply and collect results. */
12799 uint64_t xproductlo = value1_lo * value2_lo;
12800 uint64_t xproductmid1 = value1_lo * value2_hi;
12801 uint64_t xproductmid2 = value1_hi * value2_lo;
12802 uint64_t xproducthi = value1_hi * value2_hi;
12803 uint64_t carry = 0;
12804 /* Start accumulating 64 bit results. */
12805 /* Drop bottom half of lowest cross-product. */
12806 uint64_t resultmid = xproductlo >> 32;
12807 /* Add in middle products. */
12808 resultmid = resultmid + xproductmid1;
12809
12810 /* Check for overflow. */
12811 if (resultmid < xproductmid1)
12812 /* Carry over 1 into top cross-product. */
12813 carry++;
12814
12815 resultmid1 = resultmid + xproductmid2;
12816
12817 /* Check for overflow. */
12818 if (resultmid1 < xproductmid2)
12819 /* Carry over 1 into top cross-product. */
12820 carry++;
12821
12822 /* Drop lowest 32 bits of middle cross-product. */
12823 result = resultmid1 >> 32;
12824
12825 /* Add top cross-product plus and any carry. */
12826 result += xproducthi + carry;
12827
12828 return result;
12829 }
12830
12831 /* Signed multiply high, source, source2 :
12832 64 bit, dest <-- high 64-bit of result. */
12833 static void
12834 smulh (sim_cpu *cpu)
12835 {
12836 uint64_t uresult;
12837 int64_t result;
12838 unsigned rm = INSTR (20, 16);
12839 unsigned rn = INSTR (9, 5);
12840 unsigned rd = INSTR (4, 0);
12841 GReg ra = INSTR (14, 10);
12842 int64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP);
12843 int64_t value2 = aarch64_get_reg_u64 (cpu, rm, NO_SP);
12844 uint64_t uvalue1;
12845 uint64_t uvalue2;
12846 int64_t signum = 1;
12847
12848 if (ra != R31)
12849 HALT_UNALLOC;
12850
12851 /* Convert to unsigned and use the unsigned mul64hi routine
12852 the fix the sign up afterwards. */
12853 if (value1 < 0)
12854 {
12855 signum *= -1L;
12856 uvalue1 = -value1;
12857 }
12858 else
12859 {
12860 uvalue1 = value1;
12861 }
12862
12863 if (value2 < 0)
12864 {
12865 signum *= -1L;
12866 uvalue2 = -value2;
12867 }
12868 else
12869 {
12870 uvalue2 = value2;
12871 }
12872
12873 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
12874 uresult = mul64hi (uvalue1, uvalue2);
12875 result = uresult;
12876 result *= signum;
12877
12878 aarch64_set_reg_s64 (cpu, rd, NO_SP, result);
12879 }
12880
12881 /* Unsigned multiply add long -- source, source2 :
12882 32 bit, source3 : 64 bit. */
12883 static void
12884 umaddl (sim_cpu *cpu)
12885 {
12886 unsigned rm = INSTR (20, 16);
12887 unsigned ra = INSTR (14, 10);
12888 unsigned rn = INSTR (9, 5);
12889 unsigned rd = INSTR (4, 0);
12890
12891 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
12892 /* N.B. we need to multiply the signed 32 bit values in rn, rm to
12893 obtain a 64 bit product. */
12894 aarch64_set_reg_u64
12895 (cpu, rd, NO_SP,
12896 aarch64_get_reg_u64 (cpu, ra, NO_SP)
12897 + ((uint64_t) aarch64_get_reg_u32 (cpu, rn, NO_SP))
12898 * ((uint64_t) aarch64_get_reg_u32 (cpu, rm, NO_SP)));
12899 }
12900
12901 /* Unsigned multiply sub long -- source, source2 : 32 bit, source3 : 64 bit. */
12902 static void
12903 umsubl (sim_cpu *cpu)
12904 {
12905 unsigned rm = INSTR (20, 16);
12906 unsigned ra = INSTR (14, 10);
12907 unsigned rn = INSTR (9, 5);
12908 unsigned rd = INSTR (4, 0);
12909
12910 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
12911 /* N.B. we need to multiply the signed 32 bit values in rn, rm to
12912 obtain a 64 bit product. */
12913 aarch64_set_reg_u64
12914 (cpu, rd, NO_SP,
12915 aarch64_get_reg_u64 (cpu, ra, NO_SP)
12916 - ((uint64_t) aarch64_get_reg_u32 (cpu, rn, NO_SP))
12917 * ((uint64_t) aarch64_get_reg_u32 (cpu, rm, NO_SP)));
12918 }
12919
12920 /* Unsigned multiply high, source, source2 :
12921 64 bit, dest <-- high 64-bit of result. */
12922 static void
12923 umulh (sim_cpu *cpu)
12924 {
12925 unsigned rm = INSTR (20, 16);
12926 unsigned rn = INSTR (9, 5);
12927 unsigned rd = INSTR (4, 0);
12928 GReg ra = INSTR (14, 10);
12929
12930 if (ra != R31)
12931 HALT_UNALLOC;
12932
12933 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
12934 aarch64_set_reg_u64 (cpu, rd, NO_SP,
12935 mul64hi (aarch64_get_reg_u64 (cpu, rn, NO_SP),
12936 aarch64_get_reg_u64 (cpu, rm, NO_SP)));
12937 }
12938
12939 static void
12940 dexDataProc3Source (sim_cpu *cpu)
12941 {
12942 /* assert instr[28,24] == 11011. */
12943 /* instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit (for rd at least)
12944 instr[30,29] = op54 : 00 ==> ok, ow ==> UNALLOC
12945 instr[23,21] = op31 : 111 ==> UNALLOC, o2 ==> ok
12946 instr[15] = o0 : 0/1 ==> ok
12947 instr[23,21:15] ==> op : 0000 ==> MADD, 0001 ==> MSUB, (32/64 bit)
12948 0010 ==> SMADDL, 0011 ==> SMSUBL, (64 bit only)
12949 0100 ==> SMULH, (64 bit only)
12950 1010 ==> UMADDL, 1011 ==> UNSUBL, (64 bit only)
12951 1100 ==> UMULH (64 bit only)
12952 ow ==> UNALLOC. */
12953
12954 uint32_t dispatch;
12955 uint32_t size = INSTR (31, 31);
12956 uint32_t op54 = INSTR (30, 29);
12957 uint32_t op31 = INSTR (23, 21);
12958 uint32_t o0 = INSTR (15, 15);
12959
12960 if (op54 != 0)
12961 HALT_UNALLOC;
12962
12963 if (size == 0)
12964 {
12965 if (op31 != 0)
12966 HALT_UNALLOC;
12967
12968 if (o0 == 0)
12969 madd32 (cpu);
12970 else
12971 msub32 (cpu);
12972 return;
12973 }
12974
12975 dispatch = (op31 << 1) | o0;
12976
12977 switch (dispatch)
12978 {
12979 case 0: madd64 (cpu); return;
12980 case 1: msub64 (cpu); return;
12981 case 2: smaddl (cpu); return;
12982 case 3: smsubl (cpu); return;
12983 case 4: smulh (cpu); return;
12984 case 10: umaddl (cpu); return;
12985 case 11: umsubl (cpu); return;
12986 case 12: umulh (cpu); return;
12987 default: HALT_UNALLOC;
12988 }
12989 }
12990
12991 static void
12992 dexDPReg (sim_cpu *cpu)
12993 {
12994 /* uint32_t group = dispatchGroup (aarch64_get_instr (cpu));
12995 assert group == GROUP_DPREG_0101 || group == GROUP_DPREG_1101
12996 bits [28:24:21] of a DPReg are the secondary dispatch vector. */
12997 uint32_t group2 = dispatchDPReg (aarch64_get_instr (cpu));
12998
12999 switch (group2)
13000 {
13001 case DPREG_LOG_000:
13002 case DPREG_LOG_001:
13003 dexLogicalShiftedRegister (cpu); return;
13004
13005 case DPREG_ADDSHF_010:
13006 dexAddSubtractShiftedRegister (cpu); return;
13007
13008 case DPREG_ADDEXT_011:
13009 dexAddSubtractExtendedRegister (cpu); return;
13010
13011 case DPREG_ADDCOND_100:
13012 {
13013 /* This set bundles a variety of different operations. */
13014 /* Check for. */
13015 /* 1) add/sub w carry. */
13016 uint32_t mask1 = 0x1FE00000U;
13017 uint32_t val1 = 0x1A000000U;
13018 /* 2) cond compare register/immediate. */
13019 uint32_t mask2 = 0x1FE00000U;
13020 uint32_t val2 = 0x1A400000U;
13021 /* 3) cond select. */
13022 uint32_t mask3 = 0x1FE00000U;
13023 uint32_t val3 = 0x1A800000U;
13024 /* 4) data proc 1/2 source. */
13025 uint32_t mask4 = 0x1FE00000U;
13026 uint32_t val4 = 0x1AC00000U;
13027
13028 if ((aarch64_get_instr (cpu) & mask1) == val1)
13029 dexAddSubtractWithCarry (cpu);
13030
13031 else if ((aarch64_get_instr (cpu) & mask2) == val2)
13032 CondCompare (cpu);
13033
13034 else if ((aarch64_get_instr (cpu) & mask3) == val3)
13035 dexCondSelect (cpu);
13036
13037 else if ((aarch64_get_instr (cpu) & mask4) == val4)
13038 {
13039 /* Bit 30 is clear for data proc 2 source
13040 and set for data proc 1 source. */
13041 if (aarch64_get_instr (cpu) & (1U << 30))
13042 dexDataProc1Source (cpu);
13043 else
13044 dexDataProc2Source (cpu);
13045 }
13046
13047 else
13048 /* Should not reach here. */
13049 HALT_NYI;
13050
13051 return;
13052 }
13053
13054 case DPREG_3SRC_110:
13055 dexDataProc3Source (cpu); return;
13056
13057 case DPREG_UNALLOC_101:
13058 HALT_UNALLOC;
13059
13060 case DPREG_3SRC_111:
13061 dexDataProc3Source (cpu); return;
13062
13063 default:
13064 /* Should never reach here. */
13065 HALT_NYI;
13066 }
13067 }
13068
13069 /* Unconditional Branch immediate.
13070 Offset is a PC-relative byte offset in the range +/- 128MiB.
13071 The offset is assumed to be raw from the decode i.e. the
13072 simulator is expected to scale them from word offsets to byte. */
13073
13074 /* Unconditional branch. */
13075 static void
13076 buc (sim_cpu *cpu, int32_t offset)
13077 {
13078 aarch64_set_next_PC_by_offset (cpu, offset);
13079 }
13080
13081 static unsigned stack_depth = 0;
13082
13083 /* Unconditional branch and link -- writes return PC to LR. */
13084 static void
13085 bl (sim_cpu *cpu, int32_t offset)
13086 {
13087 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13088 aarch64_save_LR (cpu);
13089 aarch64_set_next_PC_by_offset (cpu, offset);
13090
13091 if (TRACE_BRANCH_P (cpu))
13092 {
13093 ++ stack_depth;
13094 TRACE_BRANCH (cpu,
13095 " %*scall %" PRIx64 " [%s]"
13096 " [args: %" PRIx64 " %" PRIx64 " %" PRIx64 "]",
13097 stack_depth, " ", aarch64_get_next_PC (cpu),
13098 aarch64_get_func (aarch64_get_next_PC (cpu)),
13099 aarch64_get_reg_u64 (cpu, 0, NO_SP),
13100 aarch64_get_reg_u64 (cpu, 1, NO_SP),
13101 aarch64_get_reg_u64 (cpu, 2, NO_SP)
13102 );
13103 }
13104 }
13105
13106 /* Unconditional Branch register.
13107 Branch/return address is in source register. */
13108
13109 /* Unconditional branch. */
13110 static void
13111 br (sim_cpu *cpu)
13112 {
13113 unsigned rn = INSTR (9, 5);
13114 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13115 aarch64_set_next_PC (cpu, aarch64_get_reg_u64 (cpu, rn, NO_SP));
13116 }
13117
13118 /* Unconditional branch and link -- writes return PC to LR. */
13119 static void
13120 blr (sim_cpu *cpu)
13121 {
13122 unsigned rn = INSTR (9, 5);
13123
13124 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13125 /* The pseudo code in the spec says we update LR before fetching.
13126 the value from the rn. */
13127 aarch64_save_LR (cpu);
13128 aarch64_set_next_PC (cpu, aarch64_get_reg_u64 (cpu, rn, NO_SP));
13129
13130 if (TRACE_BRANCH_P (cpu))
13131 {
13132 ++ stack_depth;
13133 TRACE_BRANCH (cpu,
13134 " %*scall %" PRIx64 " [%s]"
13135 " [args: %" PRIx64 " %" PRIx64 " %" PRIx64 "]",
13136 stack_depth, " ", aarch64_get_next_PC (cpu),
13137 aarch64_get_func (aarch64_get_next_PC (cpu)),
13138 aarch64_get_reg_u64 (cpu, 0, NO_SP),
13139 aarch64_get_reg_u64 (cpu, 1, NO_SP),
13140 aarch64_get_reg_u64 (cpu, 2, NO_SP)
13141 );
13142 }
13143 }
13144
13145 /* Return -- assembler will default source to LR this is functionally
13146 equivalent to br but, presumably, unlike br it side effects the
13147 branch predictor. */
13148 static void
13149 ret (sim_cpu *cpu)
13150 {
13151 unsigned rn = INSTR (9, 5);
13152 aarch64_set_next_PC (cpu, aarch64_get_reg_u64 (cpu, rn, NO_SP));
13153
13154 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13155 if (TRACE_BRANCH_P (cpu))
13156 {
13157 TRACE_BRANCH (cpu,
13158 " %*sreturn [result: %" PRIx64 "]",
13159 stack_depth, " ", aarch64_get_reg_u64 (cpu, 0, NO_SP));
13160 -- stack_depth;
13161 }
13162 }
13163
13164 /* NOP -- we implement this and call it from the decode in case we
13165 want to intercept it later. */
13166
13167 static void
13168 nop (sim_cpu *cpu)
13169 {
13170 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13171 }
13172
13173 /* Data synchronization barrier. */
13174
13175 static void
13176 dsb (sim_cpu *cpu)
13177 {
13178 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13179 }
13180
13181 /* Data memory barrier. */
13182
13183 static void
13184 dmb (sim_cpu *cpu)
13185 {
13186 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13187 }
13188
13189 /* Instruction synchronization barrier. */
13190
13191 static void
13192 isb (sim_cpu *cpu)
13193 {
13194 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13195 }
13196
13197 static void
13198 dexBranchImmediate (sim_cpu *cpu)
13199 {
13200 /* assert instr[30,26] == 00101
13201 instr[31] ==> 0 == B, 1 == BL
13202 instr[25,0] == imm26 branch offset counted in words. */
13203
13204 uint32_t top = INSTR (31, 31);
13205 /* We have a 26 byte signed word offset which we need to pass to the
13206 execute routine as a signed byte offset. */
13207 int32_t offset = simm32 (aarch64_get_instr (cpu), 25, 0) << 2;
13208
13209 if (top)
13210 bl (cpu, offset);
13211 else
13212 buc (cpu, offset);
13213 }
13214
13215 /* Control Flow. */
13216
13217 /* Conditional branch
13218
13219 Offset is a PC-relative byte offset in the range +/- 1MiB pos is
13220 a bit position in the range 0 .. 63
13221
13222 cc is a CondCode enum value as pulled out of the decode
13223
13224 N.B. any offset register (source) can only be Xn or Wn. */
13225
13226 static void
13227 bcc (sim_cpu *cpu, int32_t offset, CondCode cc)
13228 {
13229 /* The test returns TRUE if CC is met. */
13230 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13231 if (testConditionCode (cpu, cc))
13232 aarch64_set_next_PC_by_offset (cpu, offset);
13233 }
13234
13235 /* 32 bit branch on register non-zero. */
13236 static void
13237 cbnz32 (sim_cpu *cpu, int32_t offset)
13238 {
13239 unsigned rt = INSTR (4, 0);
13240
13241 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13242 if (aarch64_get_reg_u32 (cpu, rt, NO_SP) != 0)
13243 aarch64_set_next_PC_by_offset (cpu, offset);
13244 }
13245
13246 /* 64 bit branch on register zero. */
13247 static void
13248 cbnz (sim_cpu *cpu, int32_t offset)
13249 {
13250 unsigned rt = INSTR (4, 0);
13251
13252 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13253 if (aarch64_get_reg_u64 (cpu, rt, NO_SP) != 0)
13254 aarch64_set_next_PC_by_offset (cpu, offset);
13255 }
13256
13257 /* 32 bit branch on register non-zero. */
13258 static void
13259 cbz32 (sim_cpu *cpu, int32_t offset)
13260 {
13261 unsigned rt = INSTR (4, 0);
13262
13263 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13264 if (aarch64_get_reg_u32 (cpu, rt, NO_SP) == 0)
13265 aarch64_set_next_PC_by_offset (cpu, offset);
13266 }
13267
13268 /* 64 bit branch on register zero. */
13269 static void
13270 cbz (sim_cpu *cpu, int32_t offset)
13271 {
13272 unsigned rt = INSTR (4, 0);
13273
13274 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13275 if (aarch64_get_reg_u64 (cpu, rt, NO_SP) == 0)
13276 aarch64_set_next_PC_by_offset (cpu, offset);
13277 }
13278
13279 /* Branch on register bit test non-zero -- one size fits all. */
13280 static void
13281 tbnz (sim_cpu *cpu, uint32_t pos, int32_t offset)
13282 {
13283 unsigned rt = INSTR (4, 0);
13284
13285 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13286 if (aarch64_get_reg_u64 (cpu, rt, NO_SP) & (1 << pos))
13287 aarch64_set_next_PC_by_offset (cpu, offset);
13288 }
13289
13290 /* Branch on register bit test zero -- one size fits all. */
13291 static void
13292 tbz (sim_cpu *cpu, uint32_t pos, int32_t offset)
13293 {
13294 unsigned rt = INSTR (4, 0);
13295
13296 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13297 if (!(aarch64_get_reg_u64 (cpu, rt, NO_SP) & (1 << pos)))
13298 aarch64_set_next_PC_by_offset (cpu, offset);
13299 }
13300
13301 static void
13302 dexCompareBranchImmediate (sim_cpu *cpu)
13303 {
13304 /* instr[30,25] = 01 1010
13305 instr[31] = size : 0 ==> 32, 1 ==> 64
13306 instr[24] = op : 0 ==> CBZ, 1 ==> CBNZ
13307 instr[23,5] = simm19 branch offset counted in words
13308 instr[4,0] = rt */
13309
13310 uint32_t size = INSTR (31, 31);
13311 uint32_t op = INSTR (24, 24);
13312 int32_t offset = simm32 (aarch64_get_instr (cpu), 23, 5) << 2;
13313
13314 if (size == 0)
13315 {
13316 if (op == 0)
13317 cbz32 (cpu, offset);
13318 else
13319 cbnz32 (cpu, offset);
13320 }
13321 else
13322 {
13323 if (op == 0)
13324 cbz (cpu, offset);
13325 else
13326 cbnz (cpu, offset);
13327 }
13328 }
13329
13330 static void
13331 dexTestBranchImmediate (sim_cpu *cpu)
13332 {
13333 /* instr[31] = b5 : bit 5 of test bit idx
13334 instr[30,25] = 01 1011
13335 instr[24] = op : 0 ==> TBZ, 1 == TBNZ
13336 instr[23,19] = b40 : bits 4 to 0 of test bit idx
13337 instr[18,5] = simm14 : signed offset counted in words
13338 instr[4,0] = uimm5 */
13339
13340 uint32_t pos = ((INSTR (31, 31) << 4) | INSTR (23, 19));
13341 int32_t offset = simm32 (aarch64_get_instr (cpu), 18, 5) << 2;
13342
13343 NYI_assert (30, 25, 0x1b);
13344
13345 if (INSTR (24, 24) == 0)
13346 tbz (cpu, pos, offset);
13347 else
13348 tbnz (cpu, pos, offset);
13349 }
13350
13351 static void
13352 dexCondBranchImmediate (sim_cpu *cpu)
13353 {
13354 /* instr[31,25] = 010 1010
13355 instr[24] = op1; op => 00 ==> B.cond
13356 instr[23,5] = simm19 : signed offset counted in words
13357 instr[4] = op0
13358 instr[3,0] = cond */
13359
13360 int32_t offset;
13361 uint32_t op = ((INSTR (24, 24) << 1) | INSTR (4, 4));
13362
13363 NYI_assert (31, 25, 0x2a);
13364
13365 if (op != 0)
13366 HALT_UNALLOC;
13367
13368 offset = simm32 (aarch64_get_instr (cpu), 23, 5) << 2;
13369
13370 bcc (cpu, offset, INSTR (3, 0));
13371 }
13372
13373 static void
13374 dexBranchRegister (sim_cpu *cpu)
13375 {
13376 /* instr[31,25] = 110 1011
13377 instr[24,21] = op : 0 ==> BR, 1 => BLR, 2 => RET, 3 => ERET, 4 => DRPS
13378 instr[20,16] = op2 : must be 11111
13379 instr[15,10] = op3 : must be 000000
13380 instr[4,0] = op2 : must be 11111. */
13381
13382 uint32_t op = INSTR (24, 21);
13383 uint32_t op2 = INSTR (20, 16);
13384 uint32_t op3 = INSTR (15, 10);
13385 uint32_t op4 = INSTR (4, 0);
13386
13387 NYI_assert (31, 25, 0x6b);
13388
13389 if (op2 != 0x1F || op3 != 0 || op4 != 0)
13390 HALT_UNALLOC;
13391
13392 if (op == 0)
13393 br (cpu);
13394
13395 else if (op == 1)
13396 blr (cpu);
13397
13398 else if (op == 2)
13399 ret (cpu);
13400
13401 else
13402 {
13403 /* ERET and DRPS accept 0b11111 for rn = instr [4,0]. */
13404 /* anything else is unallocated. */
13405 uint32_t rn = INSTR (4, 0);
13406
13407 if (rn != 0x1f)
13408 HALT_UNALLOC;
13409
13410 if (op == 4 || op == 5)
13411 HALT_NYI;
13412
13413 HALT_UNALLOC;
13414 }
13415 }
13416
13417 /* FIXME: We should get the Angel SWI values from ../../libgloss/aarch64/svc.h
13418 but this may not be available. So instead we define the values we need
13419 here. */
13420 #define AngelSVC_Reason_Open 0x01
13421 #define AngelSVC_Reason_Close 0x02
13422 #define AngelSVC_Reason_Write 0x05
13423 #define AngelSVC_Reason_Read 0x06
13424 #define AngelSVC_Reason_IsTTY 0x09
13425 #define AngelSVC_Reason_Seek 0x0A
13426 #define AngelSVC_Reason_FLen 0x0C
13427 #define AngelSVC_Reason_Remove 0x0E
13428 #define AngelSVC_Reason_Rename 0x0F
13429 #define AngelSVC_Reason_Clock 0x10
13430 #define AngelSVC_Reason_Time 0x11
13431 #define AngelSVC_Reason_System 0x12
13432 #define AngelSVC_Reason_Errno 0x13
13433 #define AngelSVC_Reason_GetCmdLine 0x15
13434 #define AngelSVC_Reason_HeapInfo 0x16
13435 #define AngelSVC_Reason_ReportException 0x18
13436 #define AngelSVC_Reason_Elapsed 0x30
13437
13438
13439 static void
13440 handle_halt (sim_cpu *cpu, uint32_t val)
13441 {
13442 uint64_t result = 0;
13443
13444 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13445 if (val != 0xf000)
13446 {
13447 TRACE_SYSCALL (cpu, " HLT [0x%x]", val);
13448 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
13449 sim_stopped, SIM_SIGTRAP);
13450 }
13451
13452 /* We have encountered an Angel SVC call. See if we can process it. */
13453 switch (aarch64_get_reg_u32 (cpu, 0, NO_SP))
13454 {
13455 case AngelSVC_Reason_HeapInfo:
13456 {
13457 /* Get the values. */
13458 uint64_t stack_top = aarch64_get_stack_start (cpu);
13459 uint64_t heap_base = aarch64_get_heap_start (cpu);
13460
13461 /* Get the pointer */
13462 uint64_t ptr = aarch64_get_reg_u64 (cpu, 1, SP_OK);
13463 ptr = aarch64_get_mem_u64 (cpu, ptr);
13464
13465 /* Fill in the memory block. */
13466 /* Start addr of heap. */
13467 aarch64_set_mem_u64 (cpu, ptr + 0, heap_base);
13468 /* End addr of heap. */
13469 aarch64_set_mem_u64 (cpu, ptr + 8, stack_top);
13470 /* Lowest stack addr. */
13471 aarch64_set_mem_u64 (cpu, ptr + 16, heap_base);
13472 /* Initial stack addr. */
13473 aarch64_set_mem_u64 (cpu, ptr + 24, stack_top);
13474
13475 TRACE_SYSCALL (cpu, " AngelSVC: Get Heap Info");
13476 }
13477 break;
13478
13479 case AngelSVC_Reason_Open:
13480 {
13481 /* Get the pointer */
13482 /* uint64_t ptr = aarch64_get_reg_u64 (cpu, 1, SP_OK);. */
13483 /* FIXME: For now we just assume that we will only be asked
13484 to open the standard file descriptors. */
13485 static int fd = 0;
13486 result = fd ++;
13487
13488 TRACE_SYSCALL (cpu, " AngelSVC: Open file %d", fd - 1);
13489 }
13490 break;
13491
13492 case AngelSVC_Reason_Close:
13493 {
13494 uint64_t fh = aarch64_get_reg_u64 (cpu, 1, SP_OK);
13495 TRACE_SYSCALL (cpu, " AngelSVC: Close file %d", (int) fh);
13496 result = 0;
13497 }
13498 break;
13499
13500 case AngelSVC_Reason_Errno:
13501 result = 0;
13502 TRACE_SYSCALL (cpu, " AngelSVC: Get Errno");
13503 break;
13504
13505 case AngelSVC_Reason_Clock:
13506 result =
13507 #ifdef CLOCKS_PER_SEC
13508 (CLOCKS_PER_SEC >= 100)
13509 ? (clock () / (CLOCKS_PER_SEC / 100))
13510 : ((clock () * 100) / CLOCKS_PER_SEC)
13511 #else
13512 /* Presume unix... clock() returns microseconds. */
13513 (clock () / 10000)
13514 #endif
13515 ;
13516 TRACE_SYSCALL (cpu, " AngelSVC: Get Clock");
13517 break;
13518
13519 case AngelSVC_Reason_GetCmdLine:
13520 {
13521 /* Get the pointer */
13522 uint64_t ptr = aarch64_get_reg_u64 (cpu, 1, SP_OK);
13523 ptr = aarch64_get_mem_u64 (cpu, ptr);
13524
13525 /* FIXME: No command line for now. */
13526 aarch64_set_mem_u64 (cpu, ptr, 0);
13527 TRACE_SYSCALL (cpu, " AngelSVC: Get Command Line");
13528 }
13529 break;
13530
13531 case AngelSVC_Reason_IsTTY:
13532 result = 1;
13533 TRACE_SYSCALL (cpu, " AngelSVC: IsTTY ?");
13534 break;
13535
13536 case AngelSVC_Reason_Write:
13537 {
13538 /* Get the pointer */
13539 uint64_t ptr = aarch64_get_reg_u64 (cpu, 1, SP_OK);
13540 /* Get the write control block. */
13541 uint64_t fd = aarch64_get_mem_u64 (cpu, ptr);
13542 uint64_t buf = aarch64_get_mem_u64 (cpu, ptr + 8);
13543 uint64_t len = aarch64_get_mem_u64 (cpu, ptr + 16);
13544
13545 TRACE_SYSCALL (cpu, "write of %" PRIx64 " bytes from %"
13546 PRIx64 " on descriptor %" PRIx64,
13547 len, buf, fd);
13548
13549 if (len > 1280)
13550 {
13551 TRACE_SYSCALL (cpu,
13552 " AngelSVC: Write: Suspiciously long write: %ld",
13553 (long) len);
13554 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
13555 sim_stopped, SIM_SIGBUS);
13556 }
13557 else if (fd == 1)
13558 {
13559 printf ("%.*s", (int) len, aarch64_get_mem_ptr (cpu, buf));
13560 }
13561 else if (fd == 2)
13562 {
13563 TRACE (cpu, 0, "\n");
13564 sim_io_eprintf (CPU_STATE (cpu), "%.*s",
13565 (int) len, aarch64_get_mem_ptr (cpu, buf));
13566 TRACE (cpu, 0, "\n");
13567 }
13568 else
13569 {
13570 TRACE_SYSCALL (cpu,
13571 " AngelSVC: Write: Unexpected file handle: %d",
13572 (int) fd);
13573 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
13574 sim_stopped, SIM_SIGABRT);
13575 }
13576 }
13577 break;
13578
13579 case AngelSVC_Reason_ReportException:
13580 {
13581 /* Get the pointer */
13582 uint64_t ptr = aarch64_get_reg_u64 (cpu, 1, SP_OK);
13583 /*ptr = aarch64_get_mem_u64 (cpu, ptr);. */
13584 uint64_t type = aarch64_get_mem_u64 (cpu, ptr);
13585 uint64_t state = aarch64_get_mem_u64 (cpu, ptr + 8);
13586
13587 TRACE_SYSCALL (cpu,
13588 "Angel Exception: type 0x%" PRIx64 " state %" PRIx64,
13589 type, state);
13590
13591 if (type == 0x20026)
13592 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
13593 sim_exited, state);
13594 else
13595 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
13596 sim_stopped, SIM_SIGINT);
13597 }
13598 break;
13599
13600 case AngelSVC_Reason_Read:
13601 case AngelSVC_Reason_FLen:
13602 case AngelSVC_Reason_Seek:
13603 case AngelSVC_Reason_Remove:
13604 case AngelSVC_Reason_Time:
13605 case AngelSVC_Reason_System:
13606 case AngelSVC_Reason_Rename:
13607 case AngelSVC_Reason_Elapsed:
13608 default:
13609 TRACE_SYSCALL (cpu, " HLT [Unknown angel %x]",
13610 aarch64_get_reg_u32 (cpu, 0, NO_SP));
13611 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
13612 sim_stopped, SIM_SIGTRAP);
13613 }
13614
13615 aarch64_set_reg_u64 (cpu, 0, NO_SP, result);
13616 }
13617
13618 static void
13619 dexExcpnGen (sim_cpu *cpu)
13620 {
13621 /* instr[31:24] = 11010100
13622 instr[23,21] = opc : 000 ==> GEN EXCPN, 001 ==> BRK
13623 010 ==> HLT, 101 ==> DBG GEN EXCPN
13624 instr[20,5] = imm16
13625 instr[4,2] = opc2 000 ==> OK, ow ==> UNALLOC
13626 instr[1,0] = LL : discriminates opc */
13627
13628 uint32_t opc = INSTR (23, 21);
13629 uint32_t imm16 = INSTR (20, 5);
13630 uint32_t opc2 = INSTR (4, 2);
13631 uint32_t LL;
13632
13633 NYI_assert (31, 24, 0xd4);
13634
13635 if (opc2 != 0)
13636 HALT_UNALLOC;
13637
13638 LL = INSTR (1, 0);
13639
13640 /* We only implement HLT and BRK for now. */
13641 if (opc == 1 && LL == 0)
13642 {
13643 TRACE_EVENTS (cpu, " BRK [0x%x]", imm16);
13644 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
13645 sim_exited, aarch64_get_reg_s32 (cpu, R0, SP_OK));
13646 }
13647
13648 if (opc == 2 && LL == 0)
13649 handle_halt (cpu, imm16);
13650
13651 else if (opc == 0 || opc == 5)
13652 HALT_NYI;
13653
13654 else
13655 HALT_UNALLOC;
13656 }
13657
13658 /* Stub for accessing system registers. */
13659
13660 static uint64_t
13661 system_get (sim_cpu *cpu, unsigned op0, unsigned op1, unsigned crn,
13662 unsigned crm, unsigned op2)
13663 {
13664 if (crn == 0 && op1 == 3 && crm == 0 && op2 == 7)
13665 /* DCZID_EL0 - the Data Cache Zero ID register.
13666 We do not support DC ZVA at the moment, so
13667 we return a value with the disable bit set.
13668 We implement support for the DCZID register since
13669 it is used by the C library's memset function. */
13670 return ((uint64_t) 1) << 4;
13671
13672 if (crn == 0 && op1 == 3 && crm == 0 && op2 == 1)
13673 /* Cache Type Register. */
13674 return 0x80008000UL;
13675
13676 if (crn == 13 && op1 == 3 && crm == 0 && op2 == 2)
13677 /* TPIDR_EL0 - thread pointer id. */
13678 return aarch64_get_thread_id (cpu);
13679
13680 if (op1 == 3 && crm == 4 && op2 == 0)
13681 return aarch64_get_FPCR (cpu);
13682
13683 if (op1 == 3 && crm == 4 && op2 == 1)
13684 return aarch64_get_FPSR (cpu);
13685
13686 else if (op1 == 3 && crm == 2 && op2 == 0)
13687 return aarch64_get_CPSR (cpu);
13688
13689 HALT_NYI;
13690 }
13691
13692 static void
13693 system_set (sim_cpu *cpu, unsigned op0, unsigned op1, unsigned crn,
13694 unsigned crm, unsigned op2, uint64_t val)
13695 {
13696 if (op1 == 3 && crm == 4 && op2 == 0)
13697 aarch64_set_FPCR (cpu, val);
13698
13699 else if (op1 == 3 && crm == 4 && op2 == 1)
13700 aarch64_set_FPSR (cpu, val);
13701
13702 else if (op1 == 3 && crm == 2 && op2 == 0)
13703 aarch64_set_CPSR (cpu, val);
13704
13705 else
13706 HALT_NYI;
13707 }
13708
13709 static void
13710 do_mrs (sim_cpu *cpu)
13711 {
13712 /* instr[31:20] = 1101 0101 0001 1
13713 instr[19] = op0
13714 instr[18,16] = op1
13715 instr[15,12] = CRn
13716 instr[11,8] = CRm
13717 instr[7,5] = op2
13718 instr[4,0] = Rt */
13719 unsigned sys_op0 = INSTR (19, 19) + 2;
13720 unsigned sys_op1 = INSTR (18, 16);
13721 unsigned sys_crn = INSTR (15, 12);
13722 unsigned sys_crm = INSTR (11, 8);
13723 unsigned sys_op2 = INSTR (7, 5);
13724 unsigned rt = INSTR (4, 0);
13725
13726 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13727 aarch64_set_reg_u64 (cpu, rt, NO_SP,
13728 system_get (cpu, sys_op0, sys_op1, sys_crn, sys_crm, sys_op2));
13729 }
13730
13731 static void
13732 do_MSR_immediate (sim_cpu *cpu)
13733 {
13734 /* instr[31:19] = 1101 0101 0000 0
13735 instr[18,16] = op1
13736 instr[15,12] = 0100
13737 instr[11,8] = CRm
13738 instr[7,5] = op2
13739 instr[4,0] = 1 1111 */
13740
13741 unsigned op1 = INSTR (18, 16);
13742 /*unsigned crm = INSTR (11, 8);*/
13743 unsigned op2 = INSTR (7, 5);
13744
13745 NYI_assert (31, 19, 0x1AA0);
13746 NYI_assert (15, 12, 0x4);
13747 NYI_assert (4, 0, 0x1F);
13748
13749 if (op1 == 0)
13750 {
13751 if (op2 == 5)
13752 HALT_NYI; /* set SPSel. */
13753 else
13754 HALT_UNALLOC;
13755 }
13756 else if (op1 == 3)
13757 {
13758 if (op2 == 6)
13759 HALT_NYI; /* set DAIFset. */
13760 else if (op2 == 7)
13761 HALT_NYI; /* set DAIFclr. */
13762 else
13763 HALT_UNALLOC;
13764 }
13765 else
13766 HALT_UNALLOC;
13767 }
13768
13769 static void
13770 do_MSR_reg (sim_cpu *cpu)
13771 {
13772 /* instr[31:20] = 1101 0101 0001
13773 instr[19] = op0
13774 instr[18,16] = op1
13775 instr[15,12] = CRn
13776 instr[11,8] = CRm
13777 instr[7,5] = op2
13778 instr[4,0] = Rt */
13779
13780 unsigned sys_op0 = INSTR (19, 19) + 2;
13781 unsigned sys_op1 = INSTR (18, 16);
13782 unsigned sys_crn = INSTR (15, 12);
13783 unsigned sys_crm = INSTR (11, 8);
13784 unsigned sys_op2 = INSTR (7, 5);
13785 unsigned rt = INSTR (4, 0);
13786
13787 NYI_assert (31, 20, 0xD51);
13788
13789 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13790 system_set (cpu, sys_op0, sys_op1, sys_crn, sys_crm, sys_op2,
13791 aarch64_get_reg_u64 (cpu, rt, NO_SP));
13792 }
13793
13794 static void
13795 do_SYS (sim_cpu *cpu)
13796 {
13797 /* instr[31,19] = 1101 0101 0000 1
13798 instr[18,16] = op1
13799 instr[15,12] = CRn
13800 instr[11,8] = CRm
13801 instr[7,5] = op2
13802 instr[4,0] = Rt */
13803 NYI_assert (31, 19, 0x1AA1);
13804
13805 /* FIXME: For now we just silently accept system ops. */
13806 }
13807
13808 static void
13809 dexSystem (sim_cpu *cpu)
13810 {
13811 /* instr[31:22] = 1101 01010 0
13812 instr[21] = L
13813 instr[20,19] = op0
13814 instr[18,16] = op1
13815 instr[15,12] = CRn
13816 instr[11,8] = CRm
13817 instr[7,5] = op2
13818 instr[4,0] = uimm5 */
13819
13820 /* We are interested in HINT, DSB, DMB and ISB
13821
13822 Hint #0 encodes NOOP (this is the only hint we care about)
13823 L == 0, op0 == 0, op1 = 011, CRn = 0010, Rt = 11111,
13824 CRm op2 != 0000 000 OR CRm op2 == 0000 000 || CRm op > 0000 101
13825
13826 DSB, DMB, ISB are data store barrier, data memory barrier and
13827 instruction store barrier, respectively, where
13828
13829 L == 0, op0 == 0, op1 = 011, CRn = 0011, Rt = 11111,
13830 op2 : DSB ==> 100, DMB ==> 101, ISB ==> 110
13831 CRm<3:2> ==> domain, CRm<1:0> ==> types,
13832 domain : 00 ==> OuterShareable, 01 ==> Nonshareable,
13833 10 ==> InerShareable, 11 ==> FullSystem
13834 types : 01 ==> Reads, 10 ==> Writes,
13835 11 ==> All, 00 ==> All (domain == FullSystem). */
13836
13837 unsigned rt = INSTR (4, 0);
13838
13839 NYI_assert (31, 22, 0x354);
13840
13841 switch (INSTR (21, 12))
13842 {
13843 case 0x032:
13844 if (rt == 0x1F)
13845 {
13846 /* NOP has CRm != 0000 OR. */
13847 /* (CRm == 0000 AND (op2 == 000 OR op2 > 101)). */
13848 uint32_t crm = INSTR (11, 8);
13849 uint32_t op2 = INSTR (7, 5);
13850
13851 if (crm != 0 || (op2 == 0 || op2 > 5))
13852 {
13853 /* Actually call nop method so we can reimplement it later. */
13854 nop (cpu);
13855 return;
13856 }
13857 }
13858 HALT_NYI;
13859
13860 case 0x033:
13861 {
13862 uint32_t op2 = INSTR (7, 5);
13863
13864 switch (op2)
13865 {
13866 case 2: HALT_NYI;
13867 case 4: dsb (cpu); return;
13868 case 5: dmb (cpu); return;
13869 case 6: isb (cpu); return;
13870 default: HALT_UNALLOC;
13871 }
13872 }
13873
13874 case 0x3B0:
13875 case 0x3B4:
13876 case 0x3BD:
13877 do_mrs (cpu);
13878 return;
13879
13880 case 0x0B7:
13881 do_SYS (cpu); /* DC is an alias of SYS. */
13882 return;
13883
13884 default:
13885 if (INSTR (21, 20) == 0x1)
13886 do_MSR_reg (cpu);
13887 else if (INSTR (21, 19) == 0 && INSTR (15, 12) == 0x4)
13888 do_MSR_immediate (cpu);
13889 else
13890 HALT_NYI;
13891 return;
13892 }
13893 }
13894
13895 static void
13896 dexBr (sim_cpu *cpu)
13897 {
13898 /* uint32_t group = dispatchGroup (aarch64_get_instr (cpu));
13899 assert group == GROUP_BREXSYS_1010 || group == GROUP_BREXSYS_1011
13900 bits [31,29] of a BrExSys are the secondary dispatch vector. */
13901 uint32_t group2 = dispatchBrExSys (aarch64_get_instr (cpu));
13902
13903 switch (group2)
13904 {
13905 case BR_IMM_000:
13906 return dexBranchImmediate (cpu);
13907
13908 case BR_IMMCMP_001:
13909 /* Compare has bit 25 clear while test has it set. */
13910 if (!INSTR (25, 25))
13911 dexCompareBranchImmediate (cpu);
13912 else
13913 dexTestBranchImmediate (cpu);
13914 return;
13915
13916 case BR_IMMCOND_010:
13917 /* This is a conditional branch if bit 25 is clear otherwise
13918 unallocated. */
13919 if (!INSTR (25, 25))
13920 dexCondBranchImmediate (cpu);
13921 else
13922 HALT_UNALLOC;
13923 return;
13924
13925 case BR_UNALLOC_011:
13926 HALT_UNALLOC;
13927
13928 case BR_IMM_100:
13929 dexBranchImmediate (cpu);
13930 return;
13931
13932 case BR_IMMCMP_101:
13933 /* Compare has bit 25 clear while test has it set. */
13934 if (!INSTR (25, 25))
13935 dexCompareBranchImmediate (cpu);
13936 else
13937 dexTestBranchImmediate (cpu);
13938 return;
13939
13940 case BR_REG_110:
13941 /* Unconditional branch reg has bit 25 set. */
13942 if (INSTR (25, 25))
13943 dexBranchRegister (cpu);
13944
13945 /* This includes both Excpn Gen, System and unalloc operations.
13946 We need to decode the Excpn Gen operation BRK so we can plant
13947 debugger entry points.
13948 Excpn Gen operations have instr [24] = 0.
13949 we need to decode at least one of the System operations NOP
13950 which is an alias for HINT #0.
13951 System operations have instr [24,22] = 100. */
13952 else if (INSTR (24, 24) == 0)
13953 dexExcpnGen (cpu);
13954
13955 else if (INSTR (24, 22) == 4)
13956 dexSystem (cpu);
13957
13958 else
13959 HALT_UNALLOC;
13960
13961 return;
13962
13963 case BR_UNALLOC_111:
13964 HALT_UNALLOC;
13965
13966 default:
13967 /* Should never reach here. */
13968 HALT_NYI;
13969 }
13970 }
13971
13972 static void
13973 aarch64_decode_and_execute (sim_cpu *cpu, uint64_t pc)
13974 {
13975 /* We need to check if gdb wants an in here. */
13976 /* checkBreak (cpu);. */
13977
13978 uint64_t group = dispatchGroup (aarch64_get_instr (cpu));
13979
13980 switch (group)
13981 {
13982 case GROUP_PSEUDO_0000: dexPseudo (cpu); break;
13983 case GROUP_LDST_0100: dexLdSt (cpu); break;
13984 case GROUP_DPREG_0101: dexDPReg (cpu); break;
13985 case GROUP_LDST_0110: dexLdSt (cpu); break;
13986 case GROUP_ADVSIMD_0111: dexAdvSIMD0 (cpu); break;
13987 case GROUP_DPIMM_1000: dexDPImm (cpu); break;
13988 case GROUP_DPIMM_1001: dexDPImm (cpu); break;
13989 case GROUP_BREXSYS_1010: dexBr (cpu); break;
13990 case GROUP_BREXSYS_1011: dexBr (cpu); break;
13991 case GROUP_LDST_1100: dexLdSt (cpu); break;
13992 case GROUP_DPREG_1101: dexDPReg (cpu); break;
13993 case GROUP_LDST_1110: dexLdSt (cpu); break;
13994 case GROUP_ADVSIMD_1111: dexAdvSIMD1 (cpu); break;
13995
13996 case GROUP_UNALLOC_0001:
13997 case GROUP_UNALLOC_0010:
13998 case GROUP_UNALLOC_0011:
13999 HALT_UNALLOC;
14000
14001 default:
14002 /* Should never reach here. */
14003 HALT_NYI;
14004 }
14005 }
14006
14007 static bfd_boolean
14008 aarch64_step (sim_cpu *cpu)
14009 {
14010 uint64_t pc = aarch64_get_PC (cpu);
14011
14012 if (pc == TOP_LEVEL_RETURN_PC)
14013 return FALSE;
14014
14015 aarch64_set_next_PC (cpu, pc + 4);
14016 aarch64_get_instr (cpu) = aarch64_get_mem_u32 (cpu, pc);
14017
14018 TRACE_INSN (cpu, " pc = %" PRIx64 " instr = %08x", pc,
14019 aarch64_get_instr (cpu));
14020 TRACE_DISASM (cpu, pc);
14021
14022 aarch64_decode_and_execute (cpu, pc);
14023
14024 return TRUE;
14025 }
14026
14027 void
14028 aarch64_run (SIM_DESC sd)
14029 {
14030 sim_cpu *cpu = STATE_CPU (sd, 0);
14031
14032 while (aarch64_step (cpu))
14033 aarch64_update_PC (cpu);
14034
14035 sim_engine_halt (sd, NULL, NULL, aarch64_get_PC (cpu),
14036 sim_exited, aarch64_get_reg_s32 (cpu, R0, SP_OK));
14037 }
14038
14039 void
14040 aarch64_init (sim_cpu *cpu, uint64_t pc)
14041 {
14042 uint64_t sp = aarch64_get_stack_start (cpu);
14043
14044 /* Install SP, FP and PC and set LR to -20
14045 so we can detect a top-level return. */
14046 aarch64_set_reg_u64 (cpu, SP, SP_OK, sp);
14047 aarch64_set_reg_u64 (cpu, FP, SP_OK, sp);
14048 aarch64_set_reg_u64 (cpu, LR, SP_OK, TOP_LEVEL_RETURN_PC);
14049 aarch64_set_next_PC (cpu, pc);
14050 aarch64_update_PC (cpu);
14051 aarch64_init_LIT_table ();
14052 }