]> git.ipfire.org Git - thirdparty/binutils-gdb.git/blob - sim/aarch64/simulator.c
sim: switch config.h usage to defs.h
[thirdparty/binutils-gdb.git] / sim / aarch64 / simulator.c
1 /* simulator.c -- Interface for the AArch64 simulator.
2
3 Copyright (C) 2015-2021 Free Software Foundation, Inc.
4
5 Contributed by Red Hat.
6
7 This file is part of GDB.
8
9 This program is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 3 of the License, or
12 (at your option) any later version.
13
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with this program. If not, see <http://www.gnu.org/licenses/>. */
21
22 /* This must come before any other includes. */
23 #include "defs.h"
24
25 #include <stdlib.h>
26 #include <stdio.h>
27 #include <string.h>
28 #include <sys/types.h>
29 #include <math.h>
30 #include <time.h>
31 #include <limits.h>
32
33 #include "simulator.h"
34 #include "cpustate.h"
35 #include "memory.h"
36
37 #define NO_SP 0
38 #define SP_OK 1
39
40 #define TST(_flag) (aarch64_test_CPSR_bit (cpu, _flag))
41 #define IS_SET(_X) (TST (( _X )) ? 1 : 0)
42 #define IS_CLEAR(_X) (TST (( _X )) ? 0 : 1)
43
44 /* Space saver macro. */
45 #define INSTR(HIGH, LOW) uimm (aarch64_get_instr (cpu), (HIGH), (LOW))
46
47 #define HALT_UNALLOC \
48 do \
49 { \
50 TRACE_DISASM (cpu, aarch64_get_PC (cpu)); \
51 TRACE_INSN (cpu, \
52 "Unallocated instruction detected at sim line %d," \
53 " exe addr %" PRIx64, \
54 __LINE__, aarch64_get_PC (cpu)); \
55 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),\
56 sim_stopped, SIM_SIGILL); \
57 } \
58 while (0)
59
60 #define HALT_NYI \
61 do \
62 { \
63 TRACE_DISASM (cpu, aarch64_get_PC (cpu)); \
64 TRACE_INSN (cpu, \
65 "Unimplemented instruction detected at sim line %d," \
66 " exe addr %" PRIx64, \
67 __LINE__, aarch64_get_PC (cpu)); \
68 if (! TRACE_ANY_P (cpu)) \
69 sim_io_eprintf (CPU_STATE (cpu), "SIM Error: Unimplemented instruction: %#08x\n", \
70 aarch64_get_instr (cpu)); \
71 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),\
72 sim_stopped, SIM_SIGABRT); \
73 } \
74 while (0)
75
76 #define NYI_assert(HI, LO, EXPECTED) \
77 do \
78 { \
79 if (INSTR ((HI), (LO)) != (EXPECTED)) \
80 HALT_NYI; \
81 } \
82 while (0)
83
84 /* Helper functions used by expandLogicalImmediate. */
85
86 /* for i = 1, ... N result<i-1> = 1 other bits are zero */
87 static inline uint64_t
88 ones (int N)
89 {
90 return (N == 64 ? (uint64_t)-1UL : ((1UL << N) - 1));
91 }
92
93 /* result<0> to val<N> */
94 static inline uint64_t
95 pickbit (uint64_t val, int N)
96 {
97 return pickbits64 (val, N, N);
98 }
99
100 static uint64_t
101 expand_logical_immediate (uint32_t S, uint32_t R, uint32_t N)
102 {
103 uint64_t mask;
104 uint64_t imm;
105 unsigned simd_size;
106
107 /* The immediate value is S+1 bits to 1, left rotated by SIMDsize - R
108 (in other words, right rotated by R), then replicated. */
109 if (N != 0)
110 {
111 simd_size = 64;
112 mask = 0xffffffffffffffffull;
113 }
114 else
115 {
116 switch (S)
117 {
118 case 0x00 ... 0x1f: /* 0xxxxx */ simd_size = 32; break;
119 case 0x20 ... 0x2f: /* 10xxxx */ simd_size = 16; S &= 0xf; break;
120 case 0x30 ... 0x37: /* 110xxx */ simd_size = 8; S &= 0x7; break;
121 case 0x38 ... 0x3b: /* 1110xx */ simd_size = 4; S &= 0x3; break;
122 case 0x3c ... 0x3d: /* 11110x */ simd_size = 2; S &= 0x1; break;
123 default: return 0;
124 }
125 mask = (1ull << simd_size) - 1;
126 /* Top bits are IGNORED. */
127 R &= simd_size - 1;
128 }
129
130 /* NOTE: if S = simd_size - 1 we get 0xf..f which is rejected. */
131 if (S == simd_size - 1)
132 return 0;
133
134 /* S+1 consecutive bits to 1. */
135 /* NOTE: S can't be 63 due to detection above. */
136 imm = (1ull << (S + 1)) - 1;
137
138 /* Rotate to the left by simd_size - R. */
139 if (R != 0)
140 imm = ((imm << (simd_size - R)) & mask) | (imm >> R);
141
142 /* Replicate the value according to SIMD size. */
143 switch (simd_size)
144 {
145 case 2: imm = (imm << 2) | imm;
146 case 4: imm = (imm << 4) | imm;
147 case 8: imm = (imm << 8) | imm;
148 case 16: imm = (imm << 16) | imm;
149 case 32: imm = (imm << 32) | imm;
150 case 64: break;
151 default: return 0;
152 }
153
154 return imm;
155 }
156
157 /* Instr[22,10] encodes N immr and imms. we want a lookup table
158 for each possible combination i.e. 13 bits worth of int entries. */
159 #define LI_TABLE_SIZE (1 << 13)
160 static uint64_t LITable[LI_TABLE_SIZE];
161
162 void
163 aarch64_init_LIT_table (void)
164 {
165 unsigned index;
166
167 for (index = 0; index < LI_TABLE_SIZE; index++)
168 {
169 uint32_t N = uimm (index, 12, 12);
170 uint32_t immr = uimm (index, 11, 6);
171 uint32_t imms = uimm (index, 5, 0);
172
173 LITable [index] = expand_logical_immediate (imms, immr, N);
174 }
175 }
176
177 static void
178 dexNotify (sim_cpu *cpu)
179 {
180 /* instr[14,0] == type : 0 ==> method entry, 1 ==> method reentry
181 2 ==> exit Java, 3 ==> start next bytecode. */
182 uint32_t type = INSTR (14, 0);
183
184 TRACE_EVENTS (cpu, "Notify Insn encountered, type = 0x%x", type);
185
186 switch (type)
187 {
188 case 0:
189 /* aarch64_notifyMethodEntry (aarch64_get_reg_u64 (cpu, R23, 0),
190 aarch64_get_reg_u64 (cpu, R22, 0)); */
191 break;
192 case 1:
193 /* aarch64_notifyMethodReentry (aarch64_get_reg_u64 (cpu, R23, 0),
194 aarch64_get_reg_u64 (cpu, R22, 0)); */
195 break;
196 case 2:
197 /* aarch64_notifyMethodExit (); */
198 break;
199 case 3:
200 /* aarch64_notifyBCStart (aarch64_get_reg_u64 (cpu, R23, 0),
201 aarch64_get_reg_u64 (cpu, R22, 0)); */
202 break;
203 }
204 }
205
206 /* secondary decode within top level groups */
207
208 static void
209 dexPseudo (sim_cpu *cpu)
210 {
211 /* assert instr[28,27] = 00
212
213 We provide 2 pseudo instructions:
214
215 HALT stops execution of the simulator causing an immediate
216 return to the x86 code which entered it.
217
218 CALLOUT initiates recursive entry into x86 code. A register
219 argument holds the address of the x86 routine. Immediate
220 values in the instruction identify the number of general
221 purpose and floating point register arguments to be passed
222 and the type of any value to be returned. */
223
224 uint32_t PSEUDO_HALT = 0xE0000000U;
225 uint32_t PSEUDO_CALLOUT = 0x00018000U;
226 uint32_t PSEUDO_CALLOUTR = 0x00018001U;
227 uint32_t PSEUDO_NOTIFY = 0x00014000U;
228 uint32_t dispatch;
229
230 if (aarch64_get_instr (cpu) == PSEUDO_HALT)
231 {
232 TRACE_EVENTS (cpu, " Pseudo Halt Instruction");
233 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
234 sim_stopped, SIM_SIGTRAP);
235 }
236
237 dispatch = INSTR (31, 15);
238
239 /* We do not handle callouts at the moment. */
240 if (dispatch == PSEUDO_CALLOUT || dispatch == PSEUDO_CALLOUTR)
241 {
242 TRACE_EVENTS (cpu, " Callout");
243 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
244 sim_stopped, SIM_SIGABRT);
245 }
246
247 else if (dispatch == PSEUDO_NOTIFY)
248 dexNotify (cpu);
249
250 else
251 HALT_UNALLOC;
252 }
253
254 /* Load-store single register (unscaled offset)
255 These instructions employ a base register plus an unscaled signed
256 9 bit offset.
257
258 N.B. the base register (source) can be Xn or SP. all other
259 registers may not be SP. */
260
261 /* 32 bit load 32 bit unscaled signed 9 bit. */
262 static void
263 ldur32 (sim_cpu *cpu, int32_t offset)
264 {
265 unsigned rn = INSTR (9, 5);
266 unsigned rt = INSTR (4, 0);
267
268 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
269 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u32
270 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
271 + offset));
272 }
273
274 /* 64 bit load 64 bit unscaled signed 9 bit. */
275 static void
276 ldur64 (sim_cpu *cpu, int32_t offset)
277 {
278 unsigned rn = INSTR (9, 5);
279 unsigned rt = INSTR (4, 0);
280
281 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
282 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u64
283 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
284 + offset));
285 }
286
287 /* 32 bit load zero-extended byte unscaled signed 9 bit. */
288 static void
289 ldurb32 (sim_cpu *cpu, int32_t offset)
290 {
291 unsigned rn = INSTR (9, 5);
292 unsigned rt = INSTR (4, 0);
293
294 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
295 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u8
296 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
297 + offset));
298 }
299
300 /* 32 bit load sign-extended byte unscaled signed 9 bit. */
301 static void
302 ldursb32 (sim_cpu *cpu, int32_t offset)
303 {
304 unsigned rn = INSTR (9, 5);
305 unsigned rt = INSTR (4, 0);
306
307 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
308 aarch64_set_reg_u64 (cpu, rt, NO_SP, (uint32_t) aarch64_get_mem_s8
309 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
310 + offset));
311 }
312
313 /* 64 bit load sign-extended byte unscaled signed 9 bit. */
314 static void
315 ldursb64 (sim_cpu *cpu, int32_t offset)
316 {
317 unsigned rn = INSTR (9, 5);
318 unsigned rt = INSTR (4, 0);
319
320 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
321 aarch64_set_reg_s64 (cpu, rt, NO_SP, aarch64_get_mem_s8
322 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
323 + offset));
324 }
325
326 /* 32 bit load zero-extended short unscaled signed 9 bit */
327 static void
328 ldurh32 (sim_cpu *cpu, int32_t offset)
329 {
330 unsigned rn = INSTR (9, 5);
331 unsigned rd = INSTR (4, 0);
332
333 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
334 aarch64_set_reg_u64 (cpu, rd, NO_SP, aarch64_get_mem_u16
335 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
336 + offset));
337 }
338
339 /* 32 bit load sign-extended short unscaled signed 9 bit */
340 static void
341 ldursh32 (sim_cpu *cpu, int32_t offset)
342 {
343 unsigned rn = INSTR (9, 5);
344 unsigned rd = INSTR (4, 0);
345
346 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
347 aarch64_set_reg_u64 (cpu, rd, NO_SP, (uint32_t) aarch64_get_mem_s16
348 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
349 + offset));
350 }
351
352 /* 64 bit load sign-extended short unscaled signed 9 bit */
353 static void
354 ldursh64 (sim_cpu *cpu, int32_t offset)
355 {
356 unsigned rn = INSTR (9, 5);
357 unsigned rt = INSTR (4, 0);
358
359 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
360 aarch64_set_reg_s64 (cpu, rt, NO_SP, aarch64_get_mem_s16
361 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
362 + offset));
363 }
364
365 /* 64 bit load sign-extended word unscaled signed 9 bit */
366 static void
367 ldursw (sim_cpu *cpu, int32_t offset)
368 {
369 unsigned rn = INSTR (9, 5);
370 unsigned rd = INSTR (4, 0);
371
372 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
373 aarch64_set_reg_u64 (cpu, rd, NO_SP, (uint32_t) aarch64_get_mem_s32
374 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
375 + offset));
376 }
377
378 /* N.B. with stores the value in source is written to the address
379 identified by source2 modified by offset. */
380
381 /* 32 bit store 32 bit unscaled signed 9 bit. */
382 static void
383 stur32 (sim_cpu *cpu, int32_t offset)
384 {
385 unsigned rn = INSTR (9, 5);
386 unsigned rd = INSTR (4, 0);
387
388 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
389 aarch64_set_mem_u32 (cpu,
390 aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset,
391 aarch64_get_reg_u32 (cpu, rd, NO_SP));
392 }
393
394 /* 64 bit store 64 bit unscaled signed 9 bit */
395 static void
396 stur64 (sim_cpu *cpu, int32_t offset)
397 {
398 unsigned rn = INSTR (9, 5);
399 unsigned rd = INSTR (4, 0);
400
401 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
402 aarch64_set_mem_u64 (cpu,
403 aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset,
404 aarch64_get_reg_u64 (cpu, rd, NO_SP));
405 }
406
407 /* 32 bit store byte unscaled signed 9 bit */
408 static void
409 sturb (sim_cpu *cpu, int32_t offset)
410 {
411 unsigned rn = INSTR (9, 5);
412 unsigned rd = INSTR (4, 0);
413
414 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
415 aarch64_set_mem_u8 (cpu,
416 aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset,
417 aarch64_get_reg_u8 (cpu, rd, NO_SP));
418 }
419
420 /* 32 bit store short unscaled signed 9 bit */
421 static void
422 sturh (sim_cpu *cpu, int32_t offset)
423 {
424 unsigned rn = INSTR (9, 5);
425 unsigned rd = INSTR (4, 0);
426
427 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
428 aarch64_set_mem_u16 (cpu,
429 aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset,
430 aarch64_get_reg_u16 (cpu, rd, NO_SP));
431 }
432
433 /* Load single register pc-relative label
434 Offset is a signed 19 bit immediate count in words
435 rt may not be SP. */
436
437 /* 32 bit pc-relative load */
438 static void
439 ldr32_pcrel (sim_cpu *cpu, int32_t offset)
440 {
441 unsigned rd = INSTR (4, 0);
442
443 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
444 aarch64_set_reg_u64 (cpu, rd, NO_SP,
445 aarch64_get_mem_u32
446 (cpu, aarch64_get_PC (cpu) + offset * 4));
447 }
448
449 /* 64 bit pc-relative load */
450 static void
451 ldr_pcrel (sim_cpu *cpu, int32_t offset)
452 {
453 unsigned rd = INSTR (4, 0);
454
455 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
456 aarch64_set_reg_u64 (cpu, rd, NO_SP,
457 aarch64_get_mem_u64
458 (cpu, aarch64_get_PC (cpu) + offset * 4));
459 }
460
461 /* sign extended 32 bit pc-relative load */
462 static void
463 ldrsw_pcrel (sim_cpu *cpu, int32_t offset)
464 {
465 unsigned rd = INSTR (4, 0);
466
467 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
468 aarch64_set_reg_u64 (cpu, rd, NO_SP,
469 aarch64_get_mem_s32
470 (cpu, aarch64_get_PC (cpu) + offset * 4));
471 }
472
473 /* float pc-relative load */
474 static void
475 fldrs_pcrel (sim_cpu *cpu, int32_t offset)
476 {
477 unsigned int rd = INSTR (4, 0);
478
479 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
480 aarch64_set_vec_u32 (cpu, rd, 0,
481 aarch64_get_mem_u32
482 (cpu, aarch64_get_PC (cpu) + offset * 4));
483 }
484
485 /* double pc-relative load */
486 static void
487 fldrd_pcrel (sim_cpu *cpu, int32_t offset)
488 {
489 unsigned int st = INSTR (4, 0);
490
491 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
492 aarch64_set_vec_u64 (cpu, st, 0,
493 aarch64_get_mem_u64
494 (cpu, aarch64_get_PC (cpu) + offset * 4));
495 }
496
497 /* long double pc-relative load. */
498 static void
499 fldrq_pcrel (sim_cpu *cpu, int32_t offset)
500 {
501 unsigned int st = INSTR (4, 0);
502 uint64_t addr = aarch64_get_PC (cpu) + offset * 4;
503 FRegister a;
504
505 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
506 aarch64_get_mem_long_double (cpu, addr, & a);
507 aarch64_set_FP_long_double (cpu, st, a);
508 }
509
510 /* This can be used to scale an offset by applying
511 the requisite shift. the second argument is either
512 16, 32 or 64. */
513
514 #define SCALE(_offset, _elementSize) \
515 ((_offset) << ScaleShift ## _elementSize)
516
517 /* This can be used to optionally scale a register derived offset
518 by applying the requisite shift as indicated by the Scaling
519 argument. The second argument is either Byte, Short, Word
520 or Long. The third argument is either Scaled or Unscaled.
521 N.B. when _Scaling is Scaled the shift gets ANDed with
522 all 1s while when it is Unscaled it gets ANDed with 0. */
523
524 #define OPT_SCALE(_offset, _elementType, _Scaling) \
525 ((_offset) << (_Scaling ? ScaleShift ## _elementType : 0))
526
527 /* This can be used to zero or sign extend a 32 bit register derived
528 value to a 64 bit value. the first argument must be the value as
529 a uint32_t and the second must be either UXTW or SXTW. The result
530 is returned as an int64_t. */
531
532 static inline int64_t
533 extend (uint32_t value, Extension extension)
534 {
535 union
536 {
537 uint32_t u;
538 int32_t n;
539 } x;
540
541 /* A branchless variant of this ought to be possible. */
542 if (extension == UXTW || extension == NoExtension)
543 return value;
544
545 x.u = value;
546 return x.n;
547 }
548
549 /* Scalar Floating Point
550
551 FP load/store single register (4 addressing modes)
552
553 N.B. the base register (source) can be the stack pointer.
554 The secondary source register (source2) can only be an Xn register. */
555
556 /* Load 32 bit unscaled signed 9 bit with pre- or post-writeback. */
557 static void
558 fldrs_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
559 {
560 unsigned rn = INSTR (9, 5);
561 unsigned st = INSTR (4, 0);
562 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
563
564 if (wb != Post)
565 address += offset;
566
567 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
568 aarch64_set_vec_u32 (cpu, st, 0, aarch64_get_mem_u32 (cpu, address));
569 if (wb == Post)
570 address += offset;
571
572 if (wb != NoWriteBack)
573 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
574 }
575
576 /* Load 8 bit with unsigned 12 bit offset. */
577 static void
578 fldrb_abs (sim_cpu *cpu, uint32_t offset)
579 {
580 unsigned rd = INSTR (4, 0);
581 unsigned rn = INSTR (9, 5);
582 uint64_t addr = aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset;
583
584 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
585 aarch64_set_vec_u8 (cpu, rd, 0, aarch64_get_mem_u32 (cpu, addr));
586 }
587
588 /* Load 16 bit scaled unsigned 12 bit. */
589 static void
590 fldrh_abs (sim_cpu *cpu, uint32_t offset)
591 {
592 unsigned rd = INSTR (4, 0);
593 unsigned rn = INSTR (9, 5);
594 uint64_t addr = aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 16);
595
596 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
597 aarch64_set_vec_u16 (cpu, rd, 0, aarch64_get_mem_u16 (cpu, addr));
598 }
599
600 /* Load 32 bit scaled unsigned 12 bit. */
601 static void
602 fldrs_abs (sim_cpu *cpu, uint32_t offset)
603 {
604 unsigned rd = INSTR (4, 0);
605 unsigned rn = INSTR (9, 5);
606 uint64_t addr = aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 32);
607
608 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
609 aarch64_set_vec_u32 (cpu, rd, 0, aarch64_get_mem_u32 (cpu, addr));
610 }
611
612 /* Load 64 bit scaled unsigned 12 bit. */
613 static void
614 fldrd_abs (sim_cpu *cpu, uint32_t offset)
615 {
616 unsigned rd = INSTR (4, 0);
617 unsigned rn = INSTR (9, 5);
618 uint64_t addr = aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 64);
619
620 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
621 aarch64_set_vec_u64 (cpu, rd, 0, aarch64_get_mem_u64 (cpu, addr));
622 }
623
624 /* Load 128 bit scaled unsigned 12 bit. */
625 static void
626 fldrq_abs (sim_cpu *cpu, uint32_t offset)
627 {
628 unsigned rd = INSTR (4, 0);
629 unsigned rn = INSTR (9, 5);
630 uint64_t addr = aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 128);
631
632 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
633 aarch64_set_vec_u64 (cpu, rd, 0, aarch64_get_mem_u64 (cpu, addr));
634 aarch64_set_vec_u64 (cpu, rd, 1, aarch64_get_mem_u64 (cpu, addr + 8));
635 }
636
637 /* Load 32 bit scaled or unscaled zero- or sign-extended
638 32-bit register offset. */
639 static void
640 fldrs_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
641 {
642 unsigned rm = INSTR (20, 16);
643 unsigned rn = INSTR (9, 5);
644 unsigned st = INSTR (4, 0);
645 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
646 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
647 uint64_t displacement = OPT_SCALE (extended, 32, scaling);
648
649 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
650 aarch64_set_vec_u32 (cpu, st, 0, aarch64_get_mem_u32
651 (cpu, address + displacement));
652 }
653
654 /* Load 64 bit unscaled signed 9 bit with pre- or post-writeback. */
655 static void
656 fldrd_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
657 {
658 unsigned rn = INSTR (9, 5);
659 unsigned st = INSTR (4, 0);
660 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
661
662 if (wb != Post)
663 address += offset;
664
665 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
666 aarch64_set_vec_u64 (cpu, st, 0, aarch64_get_mem_u64 (cpu, address));
667
668 if (wb == Post)
669 address += offset;
670
671 if (wb != NoWriteBack)
672 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
673 }
674
675 /* Load 64 bit scaled or unscaled zero- or sign-extended 32-bit register offset. */
676 static void
677 fldrd_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
678 {
679 unsigned rm = INSTR (20, 16);
680 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
681 uint64_t displacement = OPT_SCALE (extended, 64, scaling);
682
683 fldrd_wb (cpu, displacement, NoWriteBack);
684 }
685
686 /* Load 128 bit unscaled signed 9 bit with pre- or post-writeback. */
687 static void
688 fldrq_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
689 {
690 FRegister a;
691 unsigned rn = INSTR (9, 5);
692 unsigned st = INSTR (4, 0);
693 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
694
695 if (wb != Post)
696 address += offset;
697
698 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
699 aarch64_get_mem_long_double (cpu, address, & a);
700 aarch64_set_FP_long_double (cpu, st, a);
701
702 if (wb == Post)
703 address += offset;
704
705 if (wb != NoWriteBack)
706 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
707 }
708
709 /* Load 128 bit scaled or unscaled zero- or sign-extended 32-bit register offset */
710 static void
711 fldrq_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
712 {
713 unsigned rm = INSTR (20, 16);
714 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
715 uint64_t displacement = OPT_SCALE (extended, 128, scaling);
716
717 fldrq_wb (cpu, displacement, NoWriteBack);
718 }
719
720 /* Memory Access
721
722 load-store single register
723 There are four addressing modes available here which all employ a
724 64 bit source (base) register.
725
726 N.B. the base register (source) can be the stack pointer.
727 The secondary source register (source2)can only be an Xn register.
728
729 Scaled, 12-bit, unsigned immediate offset, without pre- and
730 post-index options.
731 Unscaled, 9-bit, signed immediate offset with pre- or post-index
732 writeback.
733 scaled or unscaled 64-bit register offset.
734 scaled or unscaled 32-bit extended register offset.
735
736 All offsets are assumed to be raw from the decode i.e. the
737 simulator is expected to adjust scaled offsets based on the
738 accessed data size with register or extended register offset
739 versions the same applies except that in the latter case the
740 operation may also require a sign extend.
741
742 A separate method is provided for each possible addressing mode. */
743
744 /* 32 bit load 32 bit scaled unsigned 12 bit */
745 static void
746 ldr32_abs (sim_cpu *cpu, uint32_t offset)
747 {
748 unsigned rn = INSTR (9, 5);
749 unsigned rt = INSTR (4, 0);
750
751 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
752 /* The target register may not be SP but the source may be. */
753 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u32
754 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
755 + SCALE (offset, 32)));
756 }
757
758 /* 32 bit load 32 bit unscaled signed 9 bit with pre- or post-writeback. */
759 static void
760 ldr32_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
761 {
762 unsigned rn = INSTR (9, 5);
763 unsigned rt = INSTR (4, 0);
764 uint64_t address;
765
766 if (rn == rt && wb != NoWriteBack)
767 HALT_UNALLOC;
768
769 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
770
771 if (wb != Post)
772 address += offset;
773
774 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
775 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u32 (cpu, address));
776
777 if (wb == Post)
778 address += offset;
779
780 if (wb != NoWriteBack)
781 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
782 }
783
784 /* 32 bit load 32 bit scaled or unscaled
785 zero- or sign-extended 32-bit register offset */
786 static void
787 ldr32_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
788 {
789 unsigned rm = INSTR (20, 16);
790 unsigned rn = INSTR (9, 5);
791 unsigned rt = INSTR (4, 0);
792 /* rn may reference SP, rm and rt must reference ZR */
793
794 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
795 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
796 uint64_t displacement = OPT_SCALE (extended, 32, scaling);
797
798 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
799 aarch64_set_reg_u64 (cpu, rt, NO_SP,
800 aarch64_get_mem_u32 (cpu, address + displacement));
801 }
802
803 /* 64 bit load 64 bit scaled unsigned 12 bit */
804 static void
805 ldr_abs (sim_cpu *cpu, uint32_t offset)
806 {
807 unsigned rn = INSTR (9, 5);
808 unsigned rt = INSTR (4, 0);
809
810 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
811 /* The target register may not be SP but the source may be. */
812 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u64
813 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
814 + SCALE (offset, 64)));
815 }
816
817 /* 64 bit load 64 bit unscaled signed 9 bit with pre- or post-writeback. */
818 static void
819 ldr_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
820 {
821 unsigned rn = INSTR (9, 5);
822 unsigned rt = INSTR (4, 0);
823 uint64_t address;
824
825 if (rn == rt && wb != NoWriteBack)
826 HALT_UNALLOC;
827
828 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
829
830 if (wb != Post)
831 address += offset;
832
833 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
834 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u64 (cpu, address));
835
836 if (wb == Post)
837 address += offset;
838
839 if (wb != NoWriteBack)
840 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
841 }
842
843 /* 64 bit load 64 bit scaled or unscaled zero-
844 or sign-extended 32-bit register offset. */
845 static void
846 ldr_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
847 {
848 unsigned rm = INSTR (20, 16);
849 unsigned rn = INSTR (9, 5);
850 unsigned rt = INSTR (4, 0);
851 /* rn may reference SP, rm and rt must reference ZR */
852
853 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
854 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
855 uint64_t displacement = OPT_SCALE (extended, 64, scaling);
856
857 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
858 aarch64_set_reg_u64 (cpu, rt, NO_SP,
859 aarch64_get_mem_u64 (cpu, address + displacement));
860 }
861
862 /* 32 bit load zero-extended byte scaled unsigned 12 bit. */
863 static void
864 ldrb32_abs (sim_cpu *cpu, uint32_t offset)
865 {
866 unsigned rn = INSTR (9, 5);
867 unsigned rt = INSTR (4, 0);
868
869 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
870 /* The target register may not be SP but the source may be
871 there is no scaling required for a byte load. */
872 aarch64_set_reg_u64 (cpu, rt, NO_SP,
873 aarch64_get_mem_u8
874 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset));
875 }
876
877 /* 32 bit load zero-extended byte unscaled signed 9 bit with pre- or post-writeback. */
878 static void
879 ldrb32_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
880 {
881 unsigned rn = INSTR (9, 5);
882 unsigned rt = INSTR (4, 0);
883 uint64_t address;
884
885 if (rn == rt && wb != NoWriteBack)
886 HALT_UNALLOC;
887
888 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
889
890 if (wb != Post)
891 address += offset;
892
893 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
894 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u8 (cpu, address));
895
896 if (wb == Post)
897 address += offset;
898
899 if (wb != NoWriteBack)
900 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
901 }
902
903 /* 32 bit load zero-extended byte scaled or unscaled zero-
904 or sign-extended 32-bit register offset. */
905 static void
906 ldrb32_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
907 {
908 unsigned rm = INSTR (20, 16);
909 unsigned rn = INSTR (9, 5);
910 unsigned rt = INSTR (4, 0);
911 /* rn may reference SP, rm and rt must reference ZR */
912
913 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
914 int64_t displacement = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
915 extension);
916
917 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
918 /* There is no scaling required for a byte load. */
919 aarch64_set_reg_u64 (cpu, rt, NO_SP,
920 aarch64_get_mem_u8 (cpu, address + displacement));
921 }
922
923 /* 64 bit load sign-extended byte unscaled signed 9 bit
924 with pre- or post-writeback. */
925 static void
926 ldrsb_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
927 {
928 unsigned rn = INSTR (9, 5);
929 unsigned rt = INSTR (4, 0);
930 uint64_t address;
931 int64_t val;
932
933 if (rn == rt && wb != NoWriteBack)
934 HALT_UNALLOC;
935
936 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
937
938 if (wb != Post)
939 address += offset;
940
941 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
942 val = aarch64_get_mem_s8 (cpu, address);
943 aarch64_set_reg_s64 (cpu, rt, NO_SP, val);
944
945 if (wb == Post)
946 address += offset;
947
948 if (wb != NoWriteBack)
949 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
950 }
951
952 /* 64 bit load sign-extended byte scaled unsigned 12 bit. */
953 static void
954 ldrsb_abs (sim_cpu *cpu, uint32_t offset)
955 {
956 ldrsb_wb (cpu, offset, NoWriteBack);
957 }
958
959 /* 64 bit load sign-extended byte scaled or unscaled zero-
960 or sign-extended 32-bit register offset. */
961 static void
962 ldrsb_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
963 {
964 unsigned rm = INSTR (20, 16);
965 unsigned rn = INSTR (9, 5);
966 unsigned rt = INSTR (4, 0);
967 /* rn may reference SP, rm and rt must reference ZR */
968
969 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
970 int64_t displacement = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
971 extension);
972 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
973 /* There is no scaling required for a byte load. */
974 aarch64_set_reg_s64 (cpu, rt, NO_SP,
975 aarch64_get_mem_s8 (cpu, address + displacement));
976 }
977
978 /* 32 bit load zero-extended short scaled unsigned 12 bit. */
979 static void
980 ldrh32_abs (sim_cpu *cpu, uint32_t offset)
981 {
982 unsigned rn = INSTR (9, 5);
983 unsigned rt = INSTR (4, 0);
984 uint32_t val;
985
986 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
987 /* The target register may not be SP but the source may be. */
988 val = aarch64_get_mem_u16 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
989 + SCALE (offset, 16));
990 aarch64_set_reg_u32 (cpu, rt, NO_SP, val);
991 }
992
993 /* 32 bit load zero-extended short unscaled signed 9 bit
994 with pre- or post-writeback. */
995 static void
996 ldrh32_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
997 {
998 unsigned rn = INSTR (9, 5);
999 unsigned rt = INSTR (4, 0);
1000 uint64_t address;
1001
1002 if (rn == rt && wb != NoWriteBack)
1003 HALT_UNALLOC;
1004
1005 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1006
1007 if (wb != Post)
1008 address += offset;
1009
1010 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1011 aarch64_set_reg_u32 (cpu, rt, NO_SP, aarch64_get_mem_u16 (cpu, address));
1012
1013 if (wb == Post)
1014 address += offset;
1015
1016 if (wb != NoWriteBack)
1017 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
1018 }
1019
1020 /* 32 bit load zero-extended short scaled or unscaled zero-
1021 or sign-extended 32-bit register offset. */
1022 static void
1023 ldrh32_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1024 {
1025 unsigned rm = INSTR (20, 16);
1026 unsigned rn = INSTR (9, 5);
1027 unsigned rt = INSTR (4, 0);
1028 /* rn may reference SP, rm and rt must reference ZR */
1029
1030 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1031 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
1032 uint64_t displacement = OPT_SCALE (extended, 16, scaling);
1033
1034 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1035 aarch64_set_reg_u32 (cpu, rt, NO_SP,
1036 aarch64_get_mem_u16 (cpu, address + displacement));
1037 }
1038
1039 /* 32 bit load sign-extended short scaled unsigned 12 bit. */
1040 static void
1041 ldrsh32_abs (sim_cpu *cpu, uint32_t offset)
1042 {
1043 unsigned rn = INSTR (9, 5);
1044 unsigned rt = INSTR (4, 0);
1045 int32_t val;
1046
1047 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1048 /* The target register may not be SP but the source may be. */
1049 val = aarch64_get_mem_s16 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
1050 + SCALE (offset, 16));
1051 aarch64_set_reg_s32 (cpu, rt, NO_SP, val);
1052 }
1053
1054 /* 32 bit load sign-extended short unscaled signed 9 bit
1055 with pre- or post-writeback. */
1056 static void
1057 ldrsh32_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
1058 {
1059 unsigned rn = INSTR (9, 5);
1060 unsigned rt = INSTR (4, 0);
1061 uint64_t address;
1062
1063 if (rn == rt && wb != NoWriteBack)
1064 HALT_UNALLOC;
1065
1066 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1067
1068 if (wb != Post)
1069 address += offset;
1070
1071 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1072 aarch64_set_reg_s32 (cpu, rt, NO_SP,
1073 (int32_t) aarch64_get_mem_s16 (cpu, address));
1074
1075 if (wb == Post)
1076 address += offset;
1077
1078 if (wb != NoWriteBack)
1079 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
1080 }
1081
1082 /* 32 bit load sign-extended short scaled or unscaled zero-
1083 or sign-extended 32-bit register offset. */
1084 static void
1085 ldrsh32_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1086 {
1087 unsigned rm = INSTR (20, 16);
1088 unsigned rn = INSTR (9, 5);
1089 unsigned rt = INSTR (4, 0);
1090 /* rn may reference SP, rm and rt must reference ZR */
1091
1092 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1093 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
1094 uint64_t displacement = OPT_SCALE (extended, 16, scaling);
1095
1096 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1097 aarch64_set_reg_s32 (cpu, rt, NO_SP,
1098 (int32_t) aarch64_get_mem_s16
1099 (cpu, address + displacement));
1100 }
1101
1102 /* 64 bit load sign-extended short scaled unsigned 12 bit. */
1103 static void
1104 ldrsh_abs (sim_cpu *cpu, uint32_t offset)
1105 {
1106 unsigned rn = INSTR (9, 5);
1107 unsigned rt = INSTR (4, 0);
1108 int64_t val;
1109
1110 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1111 /* The target register may not be SP but the source may be. */
1112 val = aarch64_get_mem_s16 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
1113 + SCALE (offset, 16));
1114 aarch64_set_reg_s64 (cpu, rt, NO_SP, val);
1115 }
1116
1117 /* 64 bit load sign-extended short unscaled signed 9 bit
1118 with pre- or post-writeback. */
1119 static void
1120 ldrsh64_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
1121 {
1122 unsigned rn = INSTR (9, 5);
1123 unsigned rt = INSTR (4, 0);
1124 uint64_t address;
1125 int64_t val;
1126
1127 if (rn == rt && wb != NoWriteBack)
1128 HALT_UNALLOC;
1129
1130 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1131 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1132
1133 if (wb != Post)
1134 address += offset;
1135
1136 val = aarch64_get_mem_s16 (cpu, address);
1137 aarch64_set_reg_s64 (cpu, rt, NO_SP, val);
1138
1139 if (wb == Post)
1140 address += offset;
1141
1142 if (wb != NoWriteBack)
1143 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
1144 }
1145
1146 /* 64 bit load sign-extended short scaled or unscaled zero-
1147 or sign-extended 32-bit register offset. */
1148 static void
1149 ldrsh_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1150 {
1151 unsigned rm = INSTR (20, 16);
1152 unsigned rn = INSTR (9, 5);
1153 unsigned rt = INSTR (4, 0);
1154
1155 /* rn may reference SP, rm and rt must reference ZR */
1156
1157 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1158 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
1159 uint64_t displacement = OPT_SCALE (extended, 16, scaling);
1160 int64_t val;
1161
1162 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1163 val = aarch64_get_mem_s16 (cpu, address + displacement);
1164 aarch64_set_reg_s64 (cpu, rt, NO_SP, val);
1165 }
1166
1167 /* 64 bit load sign-extended 32 bit scaled unsigned 12 bit. */
1168 static void
1169 ldrsw_abs (sim_cpu *cpu, uint32_t offset)
1170 {
1171 unsigned rn = INSTR (9, 5);
1172 unsigned rt = INSTR (4, 0);
1173 int64_t val;
1174
1175 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1176 val = aarch64_get_mem_s32 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
1177 + SCALE (offset, 32));
1178 /* The target register may not be SP but the source may be. */
1179 return aarch64_set_reg_s64 (cpu, rt, NO_SP, val);
1180 }
1181
1182 /* 64 bit load sign-extended 32 bit unscaled signed 9 bit
1183 with pre- or post-writeback. */
1184 static void
1185 ldrsw_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
1186 {
1187 unsigned rn = INSTR (9, 5);
1188 unsigned rt = INSTR (4, 0);
1189 uint64_t address;
1190
1191 if (rn == rt && wb != NoWriteBack)
1192 HALT_UNALLOC;
1193
1194 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1195
1196 if (wb != Post)
1197 address += offset;
1198
1199 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1200 aarch64_set_reg_s64 (cpu, rt, NO_SP, aarch64_get_mem_s32 (cpu, address));
1201
1202 if (wb == Post)
1203 address += offset;
1204
1205 if (wb != NoWriteBack)
1206 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
1207 }
1208
1209 /* 64 bit load sign-extended 32 bit scaled or unscaled zero-
1210 or sign-extended 32-bit register offset. */
1211 static void
1212 ldrsw_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1213 {
1214 unsigned rm = INSTR (20, 16);
1215 unsigned rn = INSTR (9, 5);
1216 unsigned rt = INSTR (4, 0);
1217 /* rn may reference SP, rm and rt must reference ZR */
1218
1219 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1220 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
1221 uint64_t displacement = OPT_SCALE (extended, 32, scaling);
1222
1223 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1224 aarch64_set_reg_s64 (cpu, rt, NO_SP,
1225 aarch64_get_mem_s32 (cpu, address + displacement));
1226 }
1227
1228 /* N.B. with stores the value in source is written to the
1229 address identified by source2 modified by source3/offset. */
1230
1231 /* 32 bit store scaled unsigned 12 bit. */
1232 static void
1233 str32_abs (sim_cpu *cpu, uint32_t offset)
1234 {
1235 unsigned rn = INSTR (9, 5);
1236 unsigned rt = INSTR (4, 0);
1237
1238 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1239 /* The target register may not be SP but the source may be. */
1240 aarch64_set_mem_u32 (cpu, (aarch64_get_reg_u64 (cpu, rn, SP_OK)
1241 + SCALE (offset, 32)),
1242 aarch64_get_reg_u32 (cpu, rt, NO_SP));
1243 }
1244
1245 /* 32 bit store unscaled signed 9 bit with pre- or post-writeback. */
1246 static void
1247 str32_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
1248 {
1249 unsigned rn = INSTR (9, 5);
1250 unsigned rt = INSTR (4, 0);
1251 uint64_t address;
1252
1253 if (rn == rt && wb != NoWriteBack)
1254 HALT_UNALLOC;
1255
1256 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1257 if (wb != Post)
1258 address += offset;
1259
1260 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1261 aarch64_set_mem_u32 (cpu, address, aarch64_get_reg_u32 (cpu, rt, NO_SP));
1262
1263 if (wb == Post)
1264 address += offset;
1265
1266 if (wb != NoWriteBack)
1267 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
1268 }
1269
1270 /* 32 bit store scaled or unscaled zero- or
1271 sign-extended 32-bit register offset. */
1272 static void
1273 str32_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1274 {
1275 unsigned rm = INSTR (20, 16);
1276 unsigned rn = INSTR (9, 5);
1277 unsigned rt = INSTR (4, 0);
1278
1279 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1280 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
1281 uint64_t displacement = OPT_SCALE (extended, 32, scaling);
1282
1283 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1284 aarch64_set_mem_u32 (cpu, address + displacement,
1285 aarch64_get_reg_u64 (cpu, rt, NO_SP));
1286 }
1287
1288 /* 64 bit store scaled unsigned 12 bit. */
1289 static void
1290 str_abs (sim_cpu *cpu, uint32_t offset)
1291 {
1292 unsigned rn = INSTR (9, 5);
1293 unsigned rt = INSTR (4, 0);
1294
1295 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1296 aarch64_set_mem_u64 (cpu,
1297 aarch64_get_reg_u64 (cpu, rn, SP_OK)
1298 + SCALE (offset, 64),
1299 aarch64_get_reg_u64 (cpu, rt, NO_SP));
1300 }
1301
1302 /* 64 bit store unscaled signed 9 bit with pre- or post-writeback. */
1303 static void
1304 str_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
1305 {
1306 unsigned rn = INSTR (9, 5);
1307 unsigned rt = INSTR (4, 0);
1308 uint64_t address;
1309
1310 if (rn == rt && wb != NoWriteBack)
1311 HALT_UNALLOC;
1312
1313 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1314
1315 if (wb != Post)
1316 address += offset;
1317
1318 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1319 aarch64_set_mem_u64 (cpu, address, aarch64_get_reg_u64 (cpu, rt, NO_SP));
1320
1321 if (wb == Post)
1322 address += offset;
1323
1324 if (wb != NoWriteBack)
1325 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
1326 }
1327
1328 /* 64 bit store scaled or unscaled zero-
1329 or sign-extended 32-bit register offset. */
1330 static void
1331 str_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1332 {
1333 unsigned rm = INSTR (20, 16);
1334 unsigned rn = INSTR (9, 5);
1335 unsigned rt = INSTR (4, 0);
1336 /* rn may reference SP, rm and rt must reference ZR */
1337
1338 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1339 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
1340 extension);
1341 uint64_t displacement = OPT_SCALE (extended, 64, scaling);
1342
1343 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1344 aarch64_set_mem_u64 (cpu, address + displacement,
1345 aarch64_get_reg_u64 (cpu, rt, NO_SP));
1346 }
1347
1348 /* 32 bit store byte scaled unsigned 12 bit. */
1349 static void
1350 strb_abs (sim_cpu *cpu, uint32_t offset)
1351 {
1352 unsigned rn = INSTR (9, 5);
1353 unsigned rt = INSTR (4, 0);
1354
1355 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1356 /* The target register may not be SP but the source may be.
1357 There is no scaling required for a byte load. */
1358 aarch64_set_mem_u8 (cpu,
1359 aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset,
1360 aarch64_get_reg_u8 (cpu, rt, NO_SP));
1361 }
1362
1363 /* 32 bit store byte unscaled signed 9 bit with pre- or post-writeback. */
1364 static void
1365 strb_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
1366 {
1367 unsigned rn = INSTR (9, 5);
1368 unsigned rt = INSTR (4, 0);
1369 uint64_t address;
1370
1371 if (rn == rt && wb != NoWriteBack)
1372 HALT_UNALLOC;
1373
1374 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1375
1376 if (wb != Post)
1377 address += offset;
1378
1379 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1380 aarch64_set_mem_u8 (cpu, address, aarch64_get_reg_u8 (cpu, rt, NO_SP));
1381
1382 if (wb == Post)
1383 address += offset;
1384
1385 if (wb != NoWriteBack)
1386 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
1387 }
1388
1389 /* 32 bit store byte scaled or unscaled zero-
1390 or sign-extended 32-bit register offset. */
1391 static void
1392 strb_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1393 {
1394 unsigned rm = INSTR (20, 16);
1395 unsigned rn = INSTR (9, 5);
1396 unsigned rt = INSTR (4, 0);
1397 /* rn may reference SP, rm and rt must reference ZR */
1398
1399 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1400 int64_t displacement = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
1401 extension);
1402
1403 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1404 /* There is no scaling required for a byte load. */
1405 aarch64_set_mem_u8 (cpu, address + displacement,
1406 aarch64_get_reg_u8 (cpu, rt, NO_SP));
1407 }
1408
1409 /* 32 bit store short scaled unsigned 12 bit. */
1410 static void
1411 strh_abs (sim_cpu *cpu, uint32_t offset)
1412 {
1413 unsigned rn = INSTR (9, 5);
1414 unsigned rt = INSTR (4, 0);
1415
1416 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1417 /* The target register may not be SP but the source may be. */
1418 aarch64_set_mem_u16 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
1419 + SCALE (offset, 16),
1420 aarch64_get_reg_u16 (cpu, rt, NO_SP));
1421 }
1422
1423 /* 32 bit store short unscaled signed 9 bit with pre- or post-writeback. */
1424 static void
1425 strh_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
1426 {
1427 unsigned rn = INSTR (9, 5);
1428 unsigned rt = INSTR (4, 0);
1429 uint64_t address;
1430
1431 if (rn == rt && wb != NoWriteBack)
1432 HALT_UNALLOC;
1433
1434 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1435
1436 if (wb != Post)
1437 address += offset;
1438
1439 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1440 aarch64_set_mem_u16 (cpu, address, aarch64_get_reg_u16 (cpu, rt, NO_SP));
1441
1442 if (wb == Post)
1443 address += offset;
1444
1445 if (wb != NoWriteBack)
1446 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
1447 }
1448
1449 /* 32 bit store short scaled or unscaled zero-
1450 or sign-extended 32-bit register offset. */
1451 static void
1452 strh_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1453 {
1454 unsigned rm = INSTR (20, 16);
1455 unsigned rn = INSTR (9, 5);
1456 unsigned rt = INSTR (4, 0);
1457 /* rn may reference SP, rm and rt must reference ZR */
1458
1459 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1460 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
1461 uint64_t displacement = OPT_SCALE (extended, 16, scaling);
1462
1463 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1464 aarch64_set_mem_u16 (cpu, address + displacement,
1465 aarch64_get_reg_u16 (cpu, rt, NO_SP));
1466 }
1467
1468 /* Prefetch unsigned 12 bit. */
1469 static void
1470 prfm_abs (sim_cpu *cpu, uint32_t offset)
1471 {
1472 /* instr[4,0] = prfop : 00000 ==> PLDL1KEEP, 00001 ==> PLDL1STRM,
1473 00010 ==> PLDL2KEEP, 00001 ==> PLDL2STRM,
1474 00100 ==> PLDL3KEEP, 00101 ==> PLDL3STRM,
1475 10000 ==> PSTL1KEEP, 10001 ==> PSTL1STRM,
1476 10010 ==> PSTL2KEEP, 10001 ==> PSTL2STRM,
1477 10100 ==> PSTL3KEEP, 10101 ==> PSTL3STRM,
1478 ow ==> UNALLOC
1479 PrfOp prfop = prfop (instr, 4, 0);
1480 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK)
1481 + SCALE (offset, 64). */
1482
1483 /* TODO : implement prefetch of address. */
1484 }
1485
1486 /* Prefetch scaled or unscaled zero- or sign-extended 32-bit register offset. */
1487 static void
1488 prfm_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1489 {
1490 /* instr[4,0] = prfop : 00000 ==> PLDL1KEEP, 00001 ==> PLDL1STRM,
1491 00010 ==> PLDL2KEEP, 00001 ==> PLDL2STRM,
1492 00100 ==> PLDL3KEEP, 00101 ==> PLDL3STRM,
1493 10000 ==> PSTL1KEEP, 10001 ==> PSTL1STRM,
1494 10010 ==> PSTL2KEEP, 10001 ==> PSTL2STRM,
1495 10100 ==> PSTL3KEEP, 10101 ==> PSTL3STRM,
1496 ow ==> UNALLOC
1497 rn may reference SP, rm may only reference ZR
1498 PrfOp prfop = prfop (instr, 4, 0);
1499 uint64_t base = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1500 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
1501 extension);
1502 uint64_t displacement = OPT_SCALE (extended, 64, scaling);
1503 uint64_t address = base + displacement. */
1504
1505 /* TODO : implement prefetch of address */
1506 }
1507
1508 /* 64 bit pc-relative prefetch. */
1509 static void
1510 prfm_pcrel (sim_cpu *cpu, int32_t offset)
1511 {
1512 /* instr[4,0] = prfop : 00000 ==> PLDL1KEEP, 00001 ==> PLDL1STRM,
1513 00010 ==> PLDL2KEEP, 00001 ==> PLDL2STRM,
1514 00100 ==> PLDL3KEEP, 00101 ==> PLDL3STRM,
1515 10000 ==> PSTL1KEEP, 10001 ==> PSTL1STRM,
1516 10010 ==> PSTL2KEEP, 10001 ==> PSTL2STRM,
1517 10100 ==> PSTL3KEEP, 10101 ==> PSTL3STRM,
1518 ow ==> UNALLOC
1519 PrfOp prfop = prfop (instr, 4, 0);
1520 uint64_t address = aarch64_get_PC (cpu) + offset. */
1521
1522 /* TODO : implement this */
1523 }
1524
1525 /* Load-store exclusive. */
1526
1527 static void
1528 ldxr (sim_cpu *cpu)
1529 {
1530 unsigned rn = INSTR (9, 5);
1531 unsigned rt = INSTR (4, 0);
1532 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1533 int size = INSTR (31, 30);
1534 /* int ordered = INSTR (15, 15); */
1535 /* int exclusive = ! INSTR (23, 23); */
1536
1537 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1538 switch (size)
1539 {
1540 case 0:
1541 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u8 (cpu, address));
1542 break;
1543 case 1:
1544 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u16 (cpu, address));
1545 break;
1546 case 2:
1547 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u32 (cpu, address));
1548 break;
1549 case 3:
1550 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u64 (cpu, address));
1551 break;
1552 }
1553 }
1554
1555 static void
1556 stxr (sim_cpu *cpu)
1557 {
1558 unsigned rn = INSTR (9, 5);
1559 unsigned rt = INSTR (4, 0);
1560 unsigned rs = INSTR (20, 16);
1561 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1562 int size = INSTR (31, 30);
1563 uint64_t data = aarch64_get_reg_u64 (cpu, rt, NO_SP);
1564
1565 switch (size)
1566 {
1567 case 0: aarch64_set_mem_u8 (cpu, address, data); break;
1568 case 1: aarch64_set_mem_u16 (cpu, address, data); break;
1569 case 2: aarch64_set_mem_u32 (cpu, address, data); break;
1570 case 3: aarch64_set_mem_u64 (cpu, address, data); break;
1571 }
1572
1573 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1574 aarch64_set_reg_u64 (cpu, rs, NO_SP, 0); /* Always exclusive... */
1575 }
1576
1577 static void
1578 dexLoadLiteral (sim_cpu *cpu)
1579 {
1580 /* instr[29,27] == 011
1581 instr[25,24] == 00
1582 instr[31,30:26] = opc: 000 ==> LDRW, 001 ==> FLDRS
1583 010 ==> LDRX, 011 ==> FLDRD
1584 100 ==> LDRSW, 101 ==> FLDRQ
1585 110 ==> PRFM, 111 ==> UNALLOC
1586 instr[26] ==> V : 0 ==> GReg, 1 ==> FReg
1587 instr[23, 5] == simm19 */
1588
1589 /* unsigned rt = INSTR (4, 0); */
1590 uint32_t dispatch = (INSTR (31, 30) << 1) | INSTR (26, 26);
1591 int32_t imm = simm32 (aarch64_get_instr (cpu), 23, 5);
1592
1593 switch (dispatch)
1594 {
1595 case 0: ldr32_pcrel (cpu, imm); break;
1596 case 1: fldrs_pcrel (cpu, imm); break;
1597 case 2: ldr_pcrel (cpu, imm); break;
1598 case 3: fldrd_pcrel (cpu, imm); break;
1599 case 4: ldrsw_pcrel (cpu, imm); break;
1600 case 5: fldrq_pcrel (cpu, imm); break;
1601 case 6: prfm_pcrel (cpu, imm); break;
1602 case 7:
1603 default:
1604 HALT_UNALLOC;
1605 }
1606 }
1607
1608 /* Immediate arithmetic
1609 The aimm argument is a 12 bit unsigned value or a 12 bit unsigned
1610 value left shifted by 12 bits (done at decode).
1611
1612 N.B. the register args (dest, source) can normally be Xn or SP.
1613 the exception occurs for flag setting instructions which may
1614 only use Xn for the output (dest). */
1615
1616 /* 32 bit add immediate. */
1617 static void
1618 add32 (sim_cpu *cpu, uint32_t aimm)
1619 {
1620 unsigned rn = INSTR (9, 5);
1621 unsigned rd = INSTR (4, 0);
1622
1623 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1624 aarch64_set_reg_u64 (cpu, rd, SP_OK,
1625 aarch64_get_reg_u32 (cpu, rn, SP_OK) + aimm);
1626 }
1627
1628 /* 64 bit add immediate. */
1629 static void
1630 add64 (sim_cpu *cpu, uint32_t aimm)
1631 {
1632 unsigned rn = INSTR (9, 5);
1633 unsigned rd = INSTR (4, 0);
1634
1635 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1636 aarch64_set_reg_u64 (cpu, rd, SP_OK,
1637 aarch64_get_reg_u64 (cpu, rn, SP_OK) + aimm);
1638 }
1639
1640 static void
1641 set_flags_for_add32 (sim_cpu *cpu, int32_t value1, int32_t value2)
1642 {
1643 int32_t result = value1 + value2;
1644 int64_t sresult = (int64_t) value1 + (int64_t) value2;
1645 uint64_t uresult = (uint64_t)(uint32_t) value1
1646 + (uint64_t)(uint32_t) value2;
1647 uint32_t flags = 0;
1648
1649 if (result == 0)
1650 flags |= Z;
1651
1652 if (result & (1 << 31))
1653 flags |= N;
1654
1655 if (uresult != (uint32_t)uresult)
1656 flags |= C;
1657
1658 if (sresult != (int32_t)sresult)
1659 flags |= V;
1660
1661 aarch64_set_CPSR (cpu, flags);
1662 }
1663
1664 #define NEG(a) (((a) & signbit) == signbit)
1665 #define POS(a) (((a) & signbit) == 0)
1666
1667 static void
1668 set_flags_for_add64 (sim_cpu *cpu, uint64_t value1, uint64_t value2)
1669 {
1670 uint64_t result = value1 + value2;
1671 uint32_t flags = 0;
1672 uint64_t signbit = 1ULL << 63;
1673
1674 if (result == 0)
1675 flags |= Z;
1676
1677 if (NEG (result))
1678 flags |= N;
1679
1680 if ( (NEG (value1) && NEG (value2))
1681 || (NEG (value1) && POS (result))
1682 || (NEG (value2) && POS (result)))
1683 flags |= C;
1684
1685 if ( (NEG (value1) && NEG (value2) && POS (result))
1686 || (POS (value1) && POS (value2) && NEG (result)))
1687 flags |= V;
1688
1689 aarch64_set_CPSR (cpu, flags);
1690 }
1691
1692 static void
1693 set_flags_for_sub32 (sim_cpu *cpu, uint32_t value1, uint32_t value2)
1694 {
1695 uint32_t result = value1 - value2;
1696 uint32_t flags = 0;
1697 uint32_t signbit = 1U << 31;
1698
1699 if (result == 0)
1700 flags |= Z;
1701
1702 if (NEG (result))
1703 flags |= N;
1704
1705 if ( (NEG (value1) && POS (value2))
1706 || (NEG (value1) && POS (result))
1707 || (POS (value2) && POS (result)))
1708 flags |= C;
1709
1710 if ( (NEG (value1) && POS (value2) && POS (result))
1711 || (POS (value1) && NEG (value2) && NEG (result)))
1712 flags |= V;
1713
1714 aarch64_set_CPSR (cpu, flags);
1715 }
1716
1717 static void
1718 set_flags_for_sub64 (sim_cpu *cpu, uint64_t value1, uint64_t value2)
1719 {
1720 uint64_t result = value1 - value2;
1721 uint32_t flags = 0;
1722 uint64_t signbit = 1ULL << 63;
1723
1724 if (result == 0)
1725 flags |= Z;
1726
1727 if (NEG (result))
1728 flags |= N;
1729
1730 if ( (NEG (value1) && POS (value2))
1731 || (NEG (value1) && POS (result))
1732 || (POS (value2) && POS (result)))
1733 flags |= C;
1734
1735 if ( (NEG (value1) && POS (value2) && POS (result))
1736 || (POS (value1) && NEG (value2) && NEG (result)))
1737 flags |= V;
1738
1739 aarch64_set_CPSR (cpu, flags);
1740 }
1741
1742 static void
1743 set_flags_for_binop32 (sim_cpu *cpu, uint32_t result)
1744 {
1745 uint32_t flags = 0;
1746
1747 if (result == 0)
1748 flags |= Z;
1749 else
1750 flags &= ~ Z;
1751
1752 if (result & (1 << 31))
1753 flags |= N;
1754 else
1755 flags &= ~ N;
1756
1757 aarch64_set_CPSR (cpu, flags);
1758 }
1759
1760 static void
1761 set_flags_for_binop64 (sim_cpu *cpu, uint64_t result)
1762 {
1763 uint32_t flags = 0;
1764
1765 if (result == 0)
1766 flags |= Z;
1767 else
1768 flags &= ~ Z;
1769
1770 if (result & (1ULL << 63))
1771 flags |= N;
1772 else
1773 flags &= ~ N;
1774
1775 aarch64_set_CPSR (cpu, flags);
1776 }
1777
1778 /* 32 bit add immediate set flags. */
1779 static void
1780 adds32 (sim_cpu *cpu, uint32_t aimm)
1781 {
1782 unsigned rn = INSTR (9, 5);
1783 unsigned rd = INSTR (4, 0);
1784 /* TODO : do we need to worry about signs here? */
1785 int32_t value1 = aarch64_get_reg_s32 (cpu, rn, SP_OK);
1786
1787 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1788 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + aimm);
1789 set_flags_for_add32 (cpu, value1, aimm);
1790 }
1791
1792 /* 64 bit add immediate set flags. */
1793 static void
1794 adds64 (sim_cpu *cpu, uint32_t aimm)
1795 {
1796 unsigned rn = INSTR (9, 5);
1797 unsigned rd = INSTR (4, 0);
1798 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1799 uint64_t value2 = aimm;
1800
1801 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1802 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2);
1803 set_flags_for_add64 (cpu, value1, value2);
1804 }
1805
1806 /* 32 bit sub immediate. */
1807 static void
1808 sub32 (sim_cpu *cpu, uint32_t aimm)
1809 {
1810 unsigned rn = INSTR (9, 5);
1811 unsigned rd = INSTR (4, 0);
1812
1813 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1814 aarch64_set_reg_u64 (cpu, rd, SP_OK,
1815 aarch64_get_reg_u32 (cpu, rn, SP_OK) - aimm);
1816 }
1817
1818 /* 64 bit sub immediate. */
1819 static void
1820 sub64 (sim_cpu *cpu, uint32_t aimm)
1821 {
1822 unsigned rn = INSTR (9, 5);
1823 unsigned rd = INSTR (4, 0);
1824
1825 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1826 aarch64_set_reg_u64 (cpu, rd, SP_OK,
1827 aarch64_get_reg_u64 (cpu, rn, SP_OK) - aimm);
1828 }
1829
1830 /* 32 bit sub immediate set flags. */
1831 static void
1832 subs32 (sim_cpu *cpu, uint32_t aimm)
1833 {
1834 unsigned rn = INSTR (9, 5);
1835 unsigned rd = INSTR (4, 0);
1836 uint32_t value1 = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1837 uint32_t value2 = aimm;
1838
1839 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1840 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 - value2);
1841 set_flags_for_sub32 (cpu, value1, value2);
1842 }
1843
1844 /* 64 bit sub immediate set flags. */
1845 static void
1846 subs64 (sim_cpu *cpu, uint32_t aimm)
1847 {
1848 unsigned rn = INSTR (9, 5);
1849 unsigned rd = INSTR (4, 0);
1850 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1851 uint32_t value2 = aimm;
1852
1853 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1854 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 - value2);
1855 set_flags_for_sub64 (cpu, value1, value2);
1856 }
1857
1858 /* Data Processing Register. */
1859
1860 /* First two helpers to perform the shift operations. */
1861
1862 static inline uint32_t
1863 shifted32 (uint32_t value, Shift shift, uint32_t count)
1864 {
1865 switch (shift)
1866 {
1867 default:
1868 case LSL:
1869 return (value << count);
1870 case LSR:
1871 return (value >> count);
1872 case ASR:
1873 {
1874 int32_t svalue = value;
1875 return (svalue >> count);
1876 }
1877 case ROR:
1878 {
1879 uint32_t top = value >> count;
1880 uint32_t bottom = value << (32 - count);
1881 return (bottom | top);
1882 }
1883 }
1884 }
1885
1886 static inline uint64_t
1887 shifted64 (uint64_t value, Shift shift, uint32_t count)
1888 {
1889 switch (shift)
1890 {
1891 default:
1892 case LSL:
1893 return (value << count);
1894 case LSR:
1895 return (value >> count);
1896 case ASR:
1897 {
1898 int64_t svalue = value;
1899 return (svalue >> count);
1900 }
1901 case ROR:
1902 {
1903 uint64_t top = value >> count;
1904 uint64_t bottom = value << (64 - count);
1905 return (bottom | top);
1906 }
1907 }
1908 }
1909
1910 /* Arithmetic shifted register.
1911 These allow an optional LSL, ASR or LSR to the second source
1912 register with a count up to the register bit count.
1913
1914 N.B register args may not be SP. */
1915
1916 /* 32 bit ADD shifted register. */
1917 static void
1918 add32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
1919 {
1920 unsigned rm = INSTR (20, 16);
1921 unsigned rn = INSTR (9, 5);
1922 unsigned rd = INSTR (4, 0);
1923
1924 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1925 aarch64_set_reg_u64 (cpu, rd, NO_SP,
1926 aarch64_get_reg_u32 (cpu, rn, NO_SP)
1927 + shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP),
1928 shift, count));
1929 }
1930
1931 /* 64 bit ADD shifted register. */
1932 static void
1933 add64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
1934 {
1935 unsigned rm = INSTR (20, 16);
1936 unsigned rn = INSTR (9, 5);
1937 unsigned rd = INSTR (4, 0);
1938
1939 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1940 aarch64_set_reg_u64 (cpu, rd, NO_SP,
1941 aarch64_get_reg_u64 (cpu, rn, NO_SP)
1942 + shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP),
1943 shift, count));
1944 }
1945
1946 /* 32 bit ADD shifted register setting flags. */
1947 static void
1948 adds32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
1949 {
1950 unsigned rm = INSTR (20, 16);
1951 unsigned rn = INSTR (9, 5);
1952 unsigned rd = INSTR (4, 0);
1953
1954 uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP);
1955 uint32_t value2 = shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP),
1956 shift, count);
1957
1958 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1959 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2);
1960 set_flags_for_add32 (cpu, value1, value2);
1961 }
1962
1963 /* 64 bit ADD shifted register setting flags. */
1964 static void
1965 adds64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
1966 {
1967 unsigned rm = INSTR (20, 16);
1968 unsigned rn = INSTR (9, 5);
1969 unsigned rd = INSTR (4, 0);
1970
1971 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP);
1972 uint64_t value2 = shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP),
1973 shift, count);
1974
1975 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1976 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2);
1977 set_flags_for_add64 (cpu, value1, value2);
1978 }
1979
1980 /* 32 bit SUB shifted register. */
1981 static void
1982 sub32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
1983 {
1984 unsigned rm = INSTR (20, 16);
1985 unsigned rn = INSTR (9, 5);
1986 unsigned rd = INSTR (4, 0);
1987
1988 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1989 aarch64_set_reg_u64 (cpu, rd, NO_SP,
1990 aarch64_get_reg_u32 (cpu, rn, NO_SP)
1991 - shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP),
1992 shift, count));
1993 }
1994
1995 /* 64 bit SUB shifted register. */
1996 static void
1997 sub64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
1998 {
1999 unsigned rm = INSTR (20, 16);
2000 unsigned rn = INSTR (9, 5);
2001 unsigned rd = INSTR (4, 0);
2002
2003 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2004 aarch64_set_reg_u64 (cpu, rd, NO_SP,
2005 aarch64_get_reg_u64 (cpu, rn, NO_SP)
2006 - shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP),
2007 shift, count));
2008 }
2009
2010 /* 32 bit SUB shifted register setting flags. */
2011 static void
2012 subs32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
2013 {
2014 unsigned rm = INSTR (20, 16);
2015 unsigned rn = INSTR (9, 5);
2016 unsigned rd = INSTR (4, 0);
2017
2018 uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP);
2019 uint32_t value2 = shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP),
2020 shift, count);
2021
2022 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2023 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 - value2);
2024 set_flags_for_sub32 (cpu, value1, value2);
2025 }
2026
2027 /* 64 bit SUB shifted register setting flags. */
2028 static void
2029 subs64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
2030 {
2031 unsigned rm = INSTR (20, 16);
2032 unsigned rn = INSTR (9, 5);
2033 unsigned rd = INSTR (4, 0);
2034
2035 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP);
2036 uint64_t value2 = shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP),
2037 shift, count);
2038
2039 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2040 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 - value2);
2041 set_flags_for_sub64 (cpu, value1, value2);
2042 }
2043
2044 /* First a couple more helpers to fetch the
2045 relevant source register element either
2046 sign or zero extended as required by the
2047 extension value. */
2048
2049 static uint32_t
2050 extreg32 (sim_cpu *cpu, unsigned int lo, Extension extension)
2051 {
2052 switch (extension)
2053 {
2054 case UXTB: return aarch64_get_reg_u8 (cpu, lo, NO_SP);
2055 case UXTH: return aarch64_get_reg_u16 (cpu, lo, NO_SP);
2056 case UXTW: /* Fall through. */
2057 case UXTX: return aarch64_get_reg_u32 (cpu, lo, NO_SP);
2058 case SXTB: return aarch64_get_reg_s8 (cpu, lo, NO_SP);
2059 case SXTH: return aarch64_get_reg_s16 (cpu, lo, NO_SP);
2060 case SXTW: /* Fall through. */
2061 case SXTX: /* Fall through. */
2062 default: return aarch64_get_reg_s32 (cpu, lo, NO_SP);
2063 }
2064 }
2065
2066 static uint64_t
2067 extreg64 (sim_cpu *cpu, unsigned int lo, Extension extension)
2068 {
2069 switch (extension)
2070 {
2071 case UXTB: return aarch64_get_reg_u8 (cpu, lo, NO_SP);
2072 case UXTH: return aarch64_get_reg_u16 (cpu, lo, NO_SP);
2073 case UXTW: return aarch64_get_reg_u32 (cpu, lo, NO_SP);
2074 case UXTX: return aarch64_get_reg_u64 (cpu, lo, NO_SP);
2075 case SXTB: return aarch64_get_reg_s8 (cpu, lo, NO_SP);
2076 case SXTH: return aarch64_get_reg_s16 (cpu, lo, NO_SP);
2077 case SXTW: return aarch64_get_reg_s32 (cpu, lo, NO_SP);
2078 case SXTX:
2079 default: return aarch64_get_reg_s64 (cpu, lo, NO_SP);
2080 }
2081 }
2082
2083 /* Arithmetic extending register
2084 These allow an optional sign extension of some portion of the
2085 second source register followed by an optional left shift of
2086 between 1 and 4 bits (i.e. a shift of 0-4 bits???)
2087
2088 N.B output (dest) and first input arg (source) may normally be Xn
2089 or SP. However, for flag setting operations dest can only be
2090 Xn. Second input registers are always Xn. */
2091
2092 /* 32 bit ADD extending register. */
2093 static void
2094 add32_ext (sim_cpu *cpu, Extension extension, uint32_t shift)
2095 {
2096 unsigned rm = INSTR (20, 16);
2097 unsigned rn = INSTR (9, 5);
2098 unsigned rd = INSTR (4, 0);
2099
2100 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2101 aarch64_set_reg_u64 (cpu, rd, SP_OK,
2102 aarch64_get_reg_u32 (cpu, rn, SP_OK)
2103 + (extreg32 (cpu, rm, extension) << shift));
2104 }
2105
2106 /* 64 bit ADD extending register.
2107 N.B. This subsumes the case with 64 bit source2 and UXTX #n or LSL #0. */
2108 static void
2109 add64_ext (sim_cpu *cpu, Extension extension, uint32_t shift)
2110 {
2111 unsigned rm = INSTR (20, 16);
2112 unsigned rn = INSTR (9, 5);
2113 unsigned rd = INSTR (4, 0);
2114
2115 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2116 aarch64_set_reg_u64 (cpu, rd, SP_OK,
2117 aarch64_get_reg_u64 (cpu, rn, SP_OK)
2118 + (extreg64 (cpu, rm, extension) << shift));
2119 }
2120
2121 /* 32 bit ADD extending register setting flags. */
2122 static void
2123 adds32_ext (sim_cpu *cpu, Extension extension, uint32_t shift)
2124 {
2125 unsigned rm = INSTR (20, 16);
2126 unsigned rn = INSTR (9, 5);
2127 unsigned rd = INSTR (4, 0);
2128
2129 uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, SP_OK);
2130 uint32_t value2 = extreg32 (cpu, rm, extension) << shift;
2131
2132 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2133 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2);
2134 set_flags_for_add32 (cpu, value1, value2);
2135 }
2136
2137 /* 64 bit ADD extending register setting flags */
2138 /* N.B. this subsumes the case with 64 bit source2 and UXTX #n or LSL #0 */
2139 static void
2140 adds64_ext (sim_cpu *cpu, Extension extension, uint32_t shift)
2141 {
2142 unsigned rm = INSTR (20, 16);
2143 unsigned rn = INSTR (9, 5);
2144 unsigned rd = INSTR (4, 0);
2145
2146 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, SP_OK);
2147 uint64_t value2 = extreg64 (cpu, rm, extension) << shift;
2148
2149 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2150 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2);
2151 set_flags_for_add64 (cpu, value1, value2);
2152 }
2153
2154 /* 32 bit SUB extending register. */
2155 static void
2156 sub32_ext (sim_cpu *cpu, Extension extension, uint32_t shift)
2157 {
2158 unsigned rm = INSTR (20, 16);
2159 unsigned rn = INSTR (9, 5);
2160 unsigned rd = INSTR (4, 0);
2161
2162 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2163 aarch64_set_reg_u64 (cpu, rd, SP_OK,
2164 aarch64_get_reg_u32 (cpu, rn, SP_OK)
2165 - (extreg32 (cpu, rm, extension) << shift));
2166 }
2167
2168 /* 64 bit SUB extending register. */
2169 /* N.B. this subsumes the case with 64 bit source2 and UXTX #n or LSL #0. */
2170 static void
2171 sub64_ext (sim_cpu *cpu, Extension extension, uint32_t shift)
2172 {
2173 unsigned rm = INSTR (20, 16);
2174 unsigned rn = INSTR (9, 5);
2175 unsigned rd = INSTR (4, 0);
2176
2177 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2178 aarch64_set_reg_u64 (cpu, rd, SP_OK,
2179 aarch64_get_reg_u64 (cpu, rn, SP_OK)
2180 - (extreg64 (cpu, rm, extension) << shift));
2181 }
2182
2183 /* 32 bit SUB extending register setting flags. */
2184 static void
2185 subs32_ext (sim_cpu *cpu, Extension extension, uint32_t shift)
2186 {
2187 unsigned rm = INSTR (20, 16);
2188 unsigned rn = INSTR (9, 5);
2189 unsigned rd = INSTR (4, 0);
2190
2191 uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, SP_OK);
2192 uint32_t value2 = extreg32 (cpu, rm, extension) << shift;
2193
2194 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2195 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 - value2);
2196 set_flags_for_sub32 (cpu, value1, value2);
2197 }
2198
2199 /* 64 bit SUB extending register setting flags */
2200 /* N.B. this subsumes the case with 64 bit source2 and UXTX #n or LSL #0 */
2201 static void
2202 subs64_ext (sim_cpu *cpu, Extension extension, uint32_t shift)
2203 {
2204 unsigned rm = INSTR (20, 16);
2205 unsigned rn = INSTR (9, 5);
2206 unsigned rd = INSTR (4, 0);
2207
2208 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, SP_OK);
2209 uint64_t value2 = extreg64 (cpu, rm, extension) << shift;
2210
2211 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2212 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 - value2);
2213 set_flags_for_sub64 (cpu, value1, value2);
2214 }
2215
2216 static void
2217 dexAddSubtractImmediate (sim_cpu *cpu)
2218 {
2219 /* instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
2220 instr[30] = op : 0 ==> ADD, 1 ==> SUB
2221 instr[29] = set : 0 ==> no flags, 1 ==> set flags
2222 instr[28,24] = 10001
2223 instr[23,22] = shift : 00 == LSL#0, 01 = LSL#12 1x = UNALLOC
2224 instr[21,10] = uimm12
2225 instr[9,5] = Rn
2226 instr[4,0] = Rd */
2227
2228 /* N.B. the shift is applied at decode before calling the add/sub routine. */
2229 uint32_t shift = INSTR (23, 22);
2230 uint32_t imm = INSTR (21, 10);
2231 uint32_t dispatch = INSTR (31, 29);
2232
2233 NYI_assert (28, 24, 0x11);
2234
2235 if (shift > 1)
2236 HALT_UNALLOC;
2237
2238 if (shift)
2239 imm <<= 12;
2240
2241 switch (dispatch)
2242 {
2243 case 0: add32 (cpu, imm); break;
2244 case 1: adds32 (cpu, imm); break;
2245 case 2: sub32 (cpu, imm); break;
2246 case 3: subs32 (cpu, imm); break;
2247 case 4: add64 (cpu, imm); break;
2248 case 5: adds64 (cpu, imm); break;
2249 case 6: sub64 (cpu, imm); break;
2250 case 7: subs64 (cpu, imm); break;
2251 }
2252 }
2253
2254 static void
2255 dexAddSubtractShiftedRegister (sim_cpu *cpu)
2256 {
2257 /* instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
2258 instr[30,29] = op : 00 ==> ADD, 01 ==> ADDS, 10 ==> SUB, 11 ==> SUBS
2259 instr[28,24] = 01011
2260 instr[23,22] = shift : 0 ==> LSL, 1 ==> LSR, 2 ==> ASR, 3 ==> UNALLOC
2261 instr[21] = 0
2262 instr[20,16] = Rm
2263 instr[15,10] = count : must be 0xxxxx for 32 bit
2264 instr[9,5] = Rn
2265 instr[4,0] = Rd */
2266
2267 uint32_t size = INSTR (31, 31);
2268 uint32_t count = INSTR (15, 10);
2269 Shift shiftType = INSTR (23, 22);
2270
2271 NYI_assert (28, 24, 0x0B);
2272 NYI_assert (21, 21, 0);
2273
2274 /* Shift encoded as ROR is unallocated. */
2275 if (shiftType == ROR)
2276 HALT_UNALLOC;
2277
2278 /* 32 bit operations must have count[5] = 0
2279 or else we have an UNALLOC. */
2280 if (size == 0 && uimm (count, 5, 5))
2281 HALT_UNALLOC;
2282
2283 /* Dispatch on size:op i.e instr [31,29]. */
2284 switch (INSTR (31, 29))
2285 {
2286 case 0: add32_shift (cpu, shiftType, count); break;
2287 case 1: adds32_shift (cpu, shiftType, count); break;
2288 case 2: sub32_shift (cpu, shiftType, count); break;
2289 case 3: subs32_shift (cpu, shiftType, count); break;
2290 case 4: add64_shift (cpu, shiftType, count); break;
2291 case 5: adds64_shift (cpu, shiftType, count); break;
2292 case 6: sub64_shift (cpu, shiftType, count); break;
2293 case 7: subs64_shift (cpu, shiftType, count); break;
2294 }
2295 }
2296
2297 static void
2298 dexAddSubtractExtendedRegister (sim_cpu *cpu)
2299 {
2300 /* instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
2301 instr[30] = op : 0 ==> ADD, 1 ==> SUB
2302 instr[29] = set? : 0 ==> no flags, 1 ==> set flags
2303 instr[28,24] = 01011
2304 instr[23,22] = opt : 0 ==> ok, 1,2,3 ==> UNALLOC
2305 instr[21] = 1
2306 instr[20,16] = Rm
2307 instr[15,13] = option : 000 ==> UXTB, 001 ==> UXTH,
2308 000 ==> LSL|UXTW, 001 ==> UXTZ,
2309 000 ==> SXTB, 001 ==> SXTH,
2310 000 ==> SXTW, 001 ==> SXTX,
2311 instr[12,10] = shift : 0,1,2,3,4 ==> ok, 5,6,7 ==> UNALLOC
2312 instr[9,5] = Rn
2313 instr[4,0] = Rd */
2314
2315 Extension extensionType = INSTR (15, 13);
2316 uint32_t shift = INSTR (12, 10);
2317
2318 NYI_assert (28, 24, 0x0B);
2319 NYI_assert (21, 21, 1);
2320
2321 /* Shift may not exceed 4. */
2322 if (shift > 4)
2323 HALT_UNALLOC;
2324
2325 /* Dispatch on size:op:set?. */
2326 switch (INSTR (31, 29))
2327 {
2328 case 0: add32_ext (cpu, extensionType, shift); break;
2329 case 1: adds32_ext (cpu, extensionType, shift); break;
2330 case 2: sub32_ext (cpu, extensionType, shift); break;
2331 case 3: subs32_ext (cpu, extensionType, shift); break;
2332 case 4: add64_ext (cpu, extensionType, shift); break;
2333 case 5: adds64_ext (cpu, extensionType, shift); break;
2334 case 6: sub64_ext (cpu, extensionType, shift); break;
2335 case 7: subs64_ext (cpu, extensionType, shift); break;
2336 }
2337 }
2338
2339 /* Conditional data processing
2340 Condition register is implicit 3rd source. */
2341
2342 /* 32 bit add with carry. */
2343 /* N.B register args may not be SP. */
2344
2345 static void
2346 adc32 (sim_cpu *cpu)
2347 {
2348 unsigned rm = INSTR (20, 16);
2349 unsigned rn = INSTR (9, 5);
2350 unsigned rd = INSTR (4, 0);
2351
2352 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2353 aarch64_set_reg_u64 (cpu, rd, NO_SP,
2354 aarch64_get_reg_u32 (cpu, rn, NO_SP)
2355 + aarch64_get_reg_u32 (cpu, rm, NO_SP)
2356 + IS_SET (C));
2357 }
2358
2359 /* 64 bit add with carry */
2360 static void
2361 adc64 (sim_cpu *cpu)
2362 {
2363 unsigned rm = INSTR (20, 16);
2364 unsigned rn = INSTR (9, 5);
2365 unsigned rd = INSTR (4, 0);
2366
2367 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2368 aarch64_set_reg_u64 (cpu, rd, NO_SP,
2369 aarch64_get_reg_u64 (cpu, rn, NO_SP)
2370 + aarch64_get_reg_u64 (cpu, rm, NO_SP)
2371 + IS_SET (C));
2372 }
2373
2374 /* 32 bit add with carry setting flags. */
2375 static void
2376 adcs32 (sim_cpu *cpu)
2377 {
2378 unsigned rm = INSTR (20, 16);
2379 unsigned rn = INSTR (9, 5);
2380 unsigned rd = INSTR (4, 0);
2381
2382 uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP);
2383 uint32_t value2 = aarch64_get_reg_u32 (cpu, rm, NO_SP);
2384 uint32_t carry = IS_SET (C);
2385
2386 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2387 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2 + carry);
2388 set_flags_for_add32 (cpu, value1, value2 + carry);
2389 }
2390
2391 /* 64 bit add with carry setting flags. */
2392 static void
2393 adcs64 (sim_cpu *cpu)
2394 {
2395 unsigned rm = INSTR (20, 16);
2396 unsigned rn = INSTR (9, 5);
2397 unsigned rd = INSTR (4, 0);
2398
2399 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP);
2400 uint64_t value2 = aarch64_get_reg_u64 (cpu, rm, NO_SP);
2401 uint64_t carry = IS_SET (C);
2402
2403 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2404 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2 + carry);
2405 set_flags_for_add64 (cpu, value1, value2 + carry);
2406 }
2407
2408 /* 32 bit sub with carry. */
2409 static void
2410 sbc32 (sim_cpu *cpu)
2411 {
2412 unsigned rm = INSTR (20, 16);
2413 unsigned rn = INSTR (9, 5); /* ngc iff rn == 31. */
2414 unsigned rd = INSTR (4, 0);
2415
2416 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2417 aarch64_set_reg_u64 (cpu, rd, NO_SP,
2418 aarch64_get_reg_u32 (cpu, rn, NO_SP)
2419 - aarch64_get_reg_u32 (cpu, rm, NO_SP)
2420 - 1 + IS_SET (C));
2421 }
2422
2423 /* 64 bit sub with carry */
2424 static void
2425 sbc64 (sim_cpu *cpu)
2426 {
2427 unsigned rm = INSTR (20, 16);
2428 unsigned rn = INSTR (9, 5);
2429 unsigned rd = INSTR (4, 0);
2430
2431 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2432 aarch64_set_reg_u64 (cpu, rd, NO_SP,
2433 aarch64_get_reg_u64 (cpu, rn, NO_SP)
2434 - aarch64_get_reg_u64 (cpu, rm, NO_SP)
2435 - 1 + IS_SET (C));
2436 }
2437
2438 /* 32 bit sub with carry setting flags */
2439 static void
2440 sbcs32 (sim_cpu *cpu)
2441 {
2442 unsigned rm = INSTR (20, 16);
2443 unsigned rn = INSTR (9, 5);
2444 unsigned rd = INSTR (4, 0);
2445
2446 uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP);
2447 uint32_t value2 = aarch64_get_reg_u32 (cpu, rm, NO_SP);
2448 uint32_t carry = IS_SET (C);
2449 uint32_t result = value1 - value2 + 1 - carry;
2450
2451 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2452 aarch64_set_reg_u64 (cpu, rd, NO_SP, result);
2453 set_flags_for_sub32 (cpu, value1, value2 + 1 - carry);
2454 }
2455
2456 /* 64 bit sub with carry setting flags */
2457 static void
2458 sbcs64 (sim_cpu *cpu)
2459 {
2460 unsigned rm = INSTR (20, 16);
2461 unsigned rn = INSTR (9, 5);
2462 unsigned rd = INSTR (4, 0);
2463
2464 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP);
2465 uint64_t value2 = aarch64_get_reg_u64 (cpu, rm, NO_SP);
2466 uint64_t carry = IS_SET (C);
2467 uint64_t result = value1 - value2 + 1 - carry;
2468
2469 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2470 aarch64_set_reg_u64 (cpu, rd, NO_SP, result);
2471 set_flags_for_sub64 (cpu, value1, value2 + 1 - carry);
2472 }
2473
2474 static void
2475 dexAddSubtractWithCarry (sim_cpu *cpu)
2476 {
2477 /* instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
2478 instr[30] = op : 0 ==> ADC, 1 ==> SBC
2479 instr[29] = set? : 0 ==> no flags, 1 ==> set flags
2480 instr[28,21] = 1 1010 000
2481 instr[20,16] = Rm
2482 instr[15,10] = op2 : 00000 ==> ok, ow ==> UNALLOC
2483 instr[9,5] = Rn
2484 instr[4,0] = Rd */
2485
2486 uint32_t op2 = INSTR (15, 10);
2487
2488 NYI_assert (28, 21, 0xD0);
2489
2490 if (op2 != 0)
2491 HALT_UNALLOC;
2492
2493 /* Dispatch on size:op:set?. */
2494 switch (INSTR (31, 29))
2495 {
2496 case 0: adc32 (cpu); break;
2497 case 1: adcs32 (cpu); break;
2498 case 2: sbc32 (cpu); break;
2499 case 3: sbcs32 (cpu); break;
2500 case 4: adc64 (cpu); break;
2501 case 5: adcs64 (cpu); break;
2502 case 6: sbc64 (cpu); break;
2503 case 7: sbcs64 (cpu); break;
2504 }
2505 }
2506
2507 static uint32_t
2508 testConditionCode (sim_cpu *cpu, CondCode cc)
2509 {
2510 /* This should be reduceable to branchless logic
2511 by some careful testing of bits in CC followed
2512 by the requisite masking and combining of bits
2513 from the flag register.
2514
2515 For now we do it with a switch. */
2516 int res;
2517
2518 switch (cc)
2519 {
2520 case EQ: res = IS_SET (Z); break;
2521 case NE: res = IS_CLEAR (Z); break;
2522 case CS: res = IS_SET (C); break;
2523 case CC: res = IS_CLEAR (C); break;
2524 case MI: res = IS_SET (N); break;
2525 case PL: res = IS_CLEAR (N); break;
2526 case VS: res = IS_SET (V); break;
2527 case VC: res = IS_CLEAR (V); break;
2528 case HI: res = IS_SET (C) && IS_CLEAR (Z); break;
2529 case LS: res = IS_CLEAR (C) || IS_SET (Z); break;
2530 case GE: res = IS_SET (N) == IS_SET (V); break;
2531 case LT: res = IS_SET (N) != IS_SET (V); break;
2532 case GT: res = IS_CLEAR (Z) && (IS_SET (N) == IS_SET (V)); break;
2533 case LE: res = IS_SET (Z) || (IS_SET (N) != IS_SET (V)); break;
2534 case AL:
2535 case NV:
2536 default:
2537 res = 1;
2538 break;
2539 }
2540 return res;
2541 }
2542
2543 static void
2544 CondCompare (sim_cpu *cpu) /* aka: ccmp and ccmn */
2545 {
2546 /* instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
2547 instr[30] = compare with positive (1) or negative value (0)
2548 instr[29,21] = 1 1101 0010
2549 instr[20,16] = Rm or const
2550 instr[15,12] = cond
2551 instr[11] = compare reg (0) or const (1)
2552 instr[10] = 0
2553 instr[9,5] = Rn
2554 instr[4] = 0
2555 instr[3,0] = value for CPSR bits if the comparison does not take place. */
2556 signed int negate;
2557 unsigned rm;
2558 unsigned rn;
2559
2560 NYI_assert (29, 21, 0x1d2);
2561 NYI_assert (10, 10, 0);
2562 NYI_assert (4, 4, 0);
2563
2564 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2565 if (! testConditionCode (cpu, INSTR (15, 12)))
2566 {
2567 aarch64_set_CPSR (cpu, INSTR (3, 0));
2568 return;
2569 }
2570
2571 negate = INSTR (30, 30) ? 1 : -1;
2572 rm = INSTR (20, 16);
2573 rn = INSTR ( 9, 5);
2574
2575 if (INSTR (31, 31))
2576 {
2577 if (INSTR (11, 11))
2578 set_flags_for_sub64 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK),
2579 negate * (uint64_t) rm);
2580 else
2581 set_flags_for_sub64 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK),
2582 negate * aarch64_get_reg_u64 (cpu, rm, SP_OK));
2583 }
2584 else
2585 {
2586 if (INSTR (11, 11))
2587 set_flags_for_sub32 (cpu, aarch64_get_reg_u32 (cpu, rn, SP_OK),
2588 negate * rm);
2589 else
2590 set_flags_for_sub32 (cpu, aarch64_get_reg_u32 (cpu, rn, SP_OK),
2591 negate * aarch64_get_reg_u32 (cpu, rm, SP_OK));
2592 }
2593 }
2594
2595 static void
2596 do_vec_MOV_whole_vector (sim_cpu *cpu)
2597 {
2598 /* MOV Vd.T, Vs.T (alias for ORR Vd.T, Vn.T, Vm.T where Vn == Vm)
2599
2600 instr[31] = 0
2601 instr[30] = half(0)/full(1)
2602 instr[29,21] = 001110101
2603 instr[20,16] = Vs
2604 instr[15,10] = 000111
2605 instr[9,5] = Vs
2606 instr[4,0] = Vd */
2607
2608 unsigned vs = INSTR (9, 5);
2609 unsigned vd = INSTR (4, 0);
2610
2611 NYI_assert (29, 21, 0x075);
2612 NYI_assert (15, 10, 0x07);
2613
2614 if (INSTR (20, 16) != vs)
2615 HALT_NYI;
2616
2617 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2618 if (INSTR (30, 30))
2619 aarch64_set_vec_u64 (cpu, vd, 1, aarch64_get_vec_u64 (cpu, vs, 1));
2620
2621 aarch64_set_vec_u64 (cpu, vd, 0, aarch64_get_vec_u64 (cpu, vs, 0));
2622 }
2623
2624 static void
2625 do_vec_SMOV_into_scalar (sim_cpu *cpu)
2626 {
2627 /* instr[31] = 0
2628 instr[30] = word(0)/long(1)
2629 instr[29,21] = 00 1110 000
2630 instr[20,16] = element size and index
2631 instr[15,10] = 00 0010 11
2632 instr[9,5] = V source
2633 instr[4,0] = R dest */
2634
2635 unsigned vs = INSTR (9, 5);
2636 unsigned rd = INSTR (4, 0);
2637 unsigned imm5 = INSTR (20, 16);
2638 unsigned full = INSTR (30, 30);
2639 int size, index;
2640
2641 NYI_assert (29, 21, 0x070);
2642 NYI_assert (15, 10, 0x0B);
2643
2644 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2645
2646 if (imm5 & 0x1)
2647 {
2648 size = 0;
2649 index = (imm5 >> 1) & 0xF;
2650 }
2651 else if (imm5 & 0x2)
2652 {
2653 size = 1;
2654 index = (imm5 >> 2) & 0x7;
2655 }
2656 else if (full && (imm5 & 0x4))
2657 {
2658 size = 2;
2659 index = (imm5 >> 3) & 0x3;
2660 }
2661 else
2662 HALT_UNALLOC;
2663
2664 switch (size)
2665 {
2666 case 0:
2667 if (full)
2668 aarch64_set_reg_s64 (cpu, rd, NO_SP,
2669 aarch64_get_vec_s8 (cpu, vs, index));
2670 else
2671 aarch64_set_reg_s32 (cpu, rd, NO_SP,
2672 aarch64_get_vec_s8 (cpu, vs, index));
2673 break;
2674
2675 case 1:
2676 if (full)
2677 aarch64_set_reg_s64 (cpu, rd, NO_SP,
2678 aarch64_get_vec_s16 (cpu, vs, index));
2679 else
2680 aarch64_set_reg_s32 (cpu, rd, NO_SP,
2681 aarch64_get_vec_s16 (cpu, vs, index));
2682 break;
2683
2684 case 2:
2685 aarch64_set_reg_s64 (cpu, rd, NO_SP,
2686 aarch64_get_vec_s32 (cpu, vs, index));
2687 break;
2688
2689 default:
2690 HALT_UNALLOC;
2691 }
2692 }
2693
2694 static void
2695 do_vec_UMOV_into_scalar (sim_cpu *cpu)
2696 {
2697 /* instr[31] = 0
2698 instr[30] = word(0)/long(1)
2699 instr[29,21] = 00 1110 000
2700 instr[20,16] = element size and index
2701 instr[15,10] = 00 0011 11
2702 instr[9,5] = V source
2703 instr[4,0] = R dest */
2704
2705 unsigned vs = INSTR (9, 5);
2706 unsigned rd = INSTR (4, 0);
2707 unsigned imm5 = INSTR (20, 16);
2708 unsigned full = INSTR (30, 30);
2709 int size, index;
2710
2711 NYI_assert (29, 21, 0x070);
2712 NYI_assert (15, 10, 0x0F);
2713
2714 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2715
2716 if (!full)
2717 {
2718 if (imm5 & 0x1)
2719 {
2720 size = 0;
2721 index = (imm5 >> 1) & 0xF;
2722 }
2723 else if (imm5 & 0x2)
2724 {
2725 size = 1;
2726 index = (imm5 >> 2) & 0x7;
2727 }
2728 else if (imm5 & 0x4)
2729 {
2730 size = 2;
2731 index = (imm5 >> 3) & 0x3;
2732 }
2733 else
2734 HALT_UNALLOC;
2735 }
2736 else if (imm5 & 0x8)
2737 {
2738 size = 3;
2739 index = (imm5 >> 4) & 0x1;
2740 }
2741 else
2742 HALT_UNALLOC;
2743
2744 switch (size)
2745 {
2746 case 0:
2747 aarch64_set_reg_u32 (cpu, rd, NO_SP,
2748 aarch64_get_vec_u8 (cpu, vs, index));
2749 break;
2750
2751 case 1:
2752 aarch64_set_reg_u32 (cpu, rd, NO_SP,
2753 aarch64_get_vec_u16 (cpu, vs, index));
2754 break;
2755
2756 case 2:
2757 aarch64_set_reg_u32 (cpu, rd, NO_SP,
2758 aarch64_get_vec_u32 (cpu, vs, index));
2759 break;
2760
2761 case 3:
2762 aarch64_set_reg_u64 (cpu, rd, NO_SP,
2763 aarch64_get_vec_u64 (cpu, vs, index));
2764 break;
2765
2766 default:
2767 HALT_UNALLOC;
2768 }
2769 }
2770
2771 static void
2772 do_vec_INS (sim_cpu *cpu)
2773 {
2774 /* instr[31,21] = 01001110000
2775 instr[20,16] = element size and index
2776 instr[15,10] = 000111
2777 instr[9,5] = W source
2778 instr[4,0] = V dest */
2779
2780 int index;
2781 unsigned rs = INSTR (9, 5);
2782 unsigned vd = INSTR (4, 0);
2783
2784 NYI_assert (31, 21, 0x270);
2785 NYI_assert (15, 10, 0x07);
2786
2787 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2788 if (INSTR (16, 16))
2789 {
2790 index = INSTR (20, 17);
2791 aarch64_set_vec_u8 (cpu, vd, index,
2792 aarch64_get_reg_u8 (cpu, rs, NO_SP));
2793 }
2794 else if (INSTR (17, 17))
2795 {
2796 index = INSTR (20, 18);
2797 aarch64_set_vec_u16 (cpu, vd, index,
2798 aarch64_get_reg_u16 (cpu, rs, NO_SP));
2799 }
2800 else if (INSTR (18, 18))
2801 {
2802 index = INSTR (20, 19);
2803 aarch64_set_vec_u32 (cpu, vd, index,
2804 aarch64_get_reg_u32 (cpu, rs, NO_SP));
2805 }
2806 else if (INSTR (19, 19))
2807 {
2808 index = INSTR (20, 20);
2809 aarch64_set_vec_u64 (cpu, vd, index,
2810 aarch64_get_reg_u64 (cpu, rs, NO_SP));
2811 }
2812 else
2813 HALT_NYI;
2814 }
2815
2816 static void
2817 do_vec_DUP_vector_into_vector (sim_cpu *cpu)
2818 {
2819 /* instr[31] = 0
2820 instr[30] = half(0)/full(1)
2821 instr[29,21] = 00 1110 000
2822 instr[20,16] = element size and index
2823 instr[15,10] = 0000 01
2824 instr[9,5] = V source
2825 instr[4,0] = V dest. */
2826
2827 unsigned full = INSTR (30, 30);
2828 unsigned vs = INSTR (9, 5);
2829 unsigned vd = INSTR (4, 0);
2830 int i, index;
2831
2832 NYI_assert (29, 21, 0x070);
2833 NYI_assert (15, 10, 0x01);
2834
2835 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2836 if (INSTR (16, 16))
2837 {
2838 index = INSTR (20, 17);
2839
2840 for (i = 0; i < (full ? 16 : 8); i++)
2841 aarch64_set_vec_u8 (cpu, vd, i, aarch64_get_vec_u8 (cpu, vs, index));
2842 }
2843 else if (INSTR (17, 17))
2844 {
2845 index = INSTR (20, 18);
2846
2847 for (i = 0; i < (full ? 8 : 4); i++)
2848 aarch64_set_vec_u16 (cpu, vd, i, aarch64_get_vec_u16 (cpu, vs, index));
2849 }
2850 else if (INSTR (18, 18))
2851 {
2852 index = INSTR (20, 19);
2853
2854 for (i = 0; i < (full ? 4 : 2); i++)
2855 aarch64_set_vec_u32 (cpu, vd, i, aarch64_get_vec_u32 (cpu, vs, index));
2856 }
2857 else
2858 {
2859 if (INSTR (19, 19) == 0)
2860 HALT_UNALLOC;
2861
2862 if (! full)
2863 HALT_UNALLOC;
2864
2865 index = INSTR (20, 20);
2866
2867 for (i = 0; i < 2; i++)
2868 aarch64_set_vec_u64 (cpu, vd, i, aarch64_get_vec_u64 (cpu, vs, index));
2869 }
2870 }
2871
2872 static void
2873 do_vec_TBL (sim_cpu *cpu)
2874 {
2875 /* instr[31] = 0
2876 instr[30] = half(0)/full(1)
2877 instr[29,21] = 00 1110 000
2878 instr[20,16] = Vm
2879 instr[15] = 0
2880 instr[14,13] = vec length
2881 instr[12,10] = 000
2882 instr[9,5] = V start
2883 instr[4,0] = V dest */
2884
2885 int full = INSTR (30, 30);
2886 int len = INSTR (14, 13) + 1;
2887 unsigned vm = INSTR (20, 16);
2888 unsigned vn = INSTR (9, 5);
2889 unsigned vd = INSTR (4, 0);
2890 unsigned i;
2891
2892 NYI_assert (29, 21, 0x070);
2893 NYI_assert (12, 10, 0);
2894
2895 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2896 for (i = 0; i < (full ? 16 : 8); i++)
2897 {
2898 unsigned int selector = aarch64_get_vec_u8 (cpu, vm, i);
2899 uint8_t val;
2900
2901 if (selector < 16)
2902 val = aarch64_get_vec_u8 (cpu, vn, selector);
2903 else if (selector < 32)
2904 val = len < 2 ? 0 : aarch64_get_vec_u8 (cpu, vn + 1, selector - 16);
2905 else if (selector < 48)
2906 val = len < 3 ? 0 : aarch64_get_vec_u8 (cpu, vn + 2, selector - 32);
2907 else if (selector < 64)
2908 val = len < 4 ? 0 : aarch64_get_vec_u8 (cpu, vn + 3, selector - 48);
2909 else
2910 val = 0;
2911
2912 aarch64_set_vec_u8 (cpu, vd, i, val);
2913 }
2914 }
2915
2916 static void
2917 do_vec_TRN (sim_cpu *cpu)
2918 {
2919 /* instr[31] = 0
2920 instr[30] = half(0)/full(1)
2921 instr[29,24] = 00 1110
2922 instr[23,22] = size
2923 instr[21] = 0
2924 instr[20,16] = Vm
2925 instr[15] = 0
2926 instr[14] = TRN1 (0) / TRN2 (1)
2927 instr[13,10] = 1010
2928 instr[9,5] = V source
2929 instr[4,0] = V dest. */
2930
2931 int full = INSTR (30, 30);
2932 int second = INSTR (14, 14);
2933 unsigned vm = INSTR (20, 16);
2934 unsigned vn = INSTR (9, 5);
2935 unsigned vd = INSTR (4, 0);
2936 unsigned i;
2937
2938 NYI_assert (29, 24, 0x0E);
2939 NYI_assert (13, 10, 0xA);
2940
2941 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2942 switch (INSTR (23, 22))
2943 {
2944 case 0:
2945 for (i = 0; i < (full ? 8 : 4); i++)
2946 {
2947 aarch64_set_vec_u8
2948 (cpu, vd, i * 2,
2949 aarch64_get_vec_u8 (cpu, second ? vm : vn, i * 2));
2950 aarch64_set_vec_u8
2951 (cpu, vd, 1 * 2 + 1,
2952 aarch64_get_vec_u8 (cpu, second ? vn : vm, i * 2 + 1));
2953 }
2954 break;
2955
2956 case 1:
2957 for (i = 0; i < (full ? 4 : 2); i++)
2958 {
2959 aarch64_set_vec_u16
2960 (cpu, vd, i * 2,
2961 aarch64_get_vec_u16 (cpu, second ? vm : vn, i * 2));
2962 aarch64_set_vec_u16
2963 (cpu, vd, 1 * 2 + 1,
2964 aarch64_get_vec_u16 (cpu, second ? vn : vm, i * 2 + 1));
2965 }
2966 break;
2967
2968 case 2:
2969 aarch64_set_vec_u32
2970 (cpu, vd, 0, aarch64_get_vec_u32 (cpu, second ? vm : vn, 0));
2971 aarch64_set_vec_u32
2972 (cpu, vd, 1, aarch64_get_vec_u32 (cpu, second ? vn : vm, 1));
2973 aarch64_set_vec_u32
2974 (cpu, vd, 2, aarch64_get_vec_u32 (cpu, second ? vm : vn, 2));
2975 aarch64_set_vec_u32
2976 (cpu, vd, 3, aarch64_get_vec_u32 (cpu, second ? vn : vm, 3));
2977 break;
2978
2979 case 3:
2980 if (! full)
2981 HALT_UNALLOC;
2982
2983 aarch64_set_vec_u64 (cpu, vd, 0,
2984 aarch64_get_vec_u64 (cpu, second ? vm : vn, 0));
2985 aarch64_set_vec_u64 (cpu, vd, 1,
2986 aarch64_get_vec_u64 (cpu, second ? vn : vm, 1));
2987 break;
2988 }
2989 }
2990
2991 static void
2992 do_vec_DUP_scalar_into_vector (sim_cpu *cpu)
2993 {
2994 /* instr[31] = 0
2995 instr[30] = 0=> zero top 64-bits, 1=> duplicate into top 64-bits
2996 [must be 1 for 64-bit xfer]
2997 instr[29,20] = 00 1110 0000
2998 instr[19,16] = element size: 0001=> 8-bits, 0010=> 16-bits,
2999 0100=> 32-bits. 1000=>64-bits
3000 instr[15,10] = 0000 11
3001 instr[9,5] = W source
3002 instr[4,0] = V dest. */
3003
3004 unsigned i;
3005 unsigned Vd = INSTR (4, 0);
3006 unsigned Rs = INSTR (9, 5);
3007 int both = INSTR (30, 30);
3008
3009 NYI_assert (29, 20, 0x0E0);
3010 NYI_assert (15, 10, 0x03);
3011
3012 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3013 switch (INSTR (19, 16))
3014 {
3015 case 1:
3016 for (i = 0; i < (both ? 16 : 8); i++)
3017 aarch64_set_vec_u8 (cpu, Vd, i, aarch64_get_reg_u8 (cpu, Rs, NO_SP));
3018 break;
3019
3020 case 2:
3021 for (i = 0; i < (both ? 8 : 4); i++)
3022 aarch64_set_vec_u16 (cpu, Vd, i, aarch64_get_reg_u16 (cpu, Rs, NO_SP));
3023 break;
3024
3025 case 4:
3026 for (i = 0; i < (both ? 4 : 2); i++)
3027 aarch64_set_vec_u32 (cpu, Vd, i, aarch64_get_reg_u32 (cpu, Rs, NO_SP));
3028 break;
3029
3030 case 8:
3031 if (!both)
3032 HALT_NYI;
3033 aarch64_set_vec_u64 (cpu, Vd, 0, aarch64_get_reg_u64 (cpu, Rs, NO_SP));
3034 aarch64_set_vec_u64 (cpu, Vd, 1, aarch64_get_reg_u64 (cpu, Rs, NO_SP));
3035 break;
3036
3037 default:
3038 HALT_NYI;
3039 }
3040 }
3041
3042 static void
3043 do_vec_UZP (sim_cpu *cpu)
3044 {
3045 /* instr[31] = 0
3046 instr[30] = half(0)/full(1)
3047 instr[29,24] = 00 1110
3048 instr[23,22] = size: byte(00), half(01), word (10), long (11)
3049 instr[21] = 0
3050 instr[20,16] = Vm
3051 instr[15] = 0
3052 instr[14] = lower (0) / upper (1)
3053 instr[13,10] = 0110
3054 instr[9,5] = Vn
3055 instr[4,0] = Vd. */
3056
3057 int full = INSTR (30, 30);
3058 int upper = INSTR (14, 14);
3059
3060 unsigned vm = INSTR (20, 16);
3061 unsigned vn = INSTR (9, 5);
3062 unsigned vd = INSTR (4, 0);
3063
3064 uint64_t val_m1 = aarch64_get_vec_u64 (cpu, vm, 0);
3065 uint64_t val_m2 = aarch64_get_vec_u64 (cpu, vm, 1);
3066 uint64_t val_n1 = aarch64_get_vec_u64 (cpu, vn, 0);
3067 uint64_t val_n2 = aarch64_get_vec_u64 (cpu, vn, 1);
3068
3069 uint64_t val1;
3070 uint64_t val2;
3071
3072 uint64_t input2 = full ? val_n2 : val_m1;
3073
3074 NYI_assert (29, 24, 0x0E);
3075 NYI_assert (21, 21, 0);
3076 NYI_assert (15, 15, 0);
3077 NYI_assert (13, 10, 6);
3078
3079 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3080 switch (INSTR (23, 22))
3081 {
3082 case 0:
3083 val1 = (val_n1 >> (upper * 8)) & 0xFFULL;
3084 val1 |= (val_n1 >> ((upper * 8) + 8)) & 0xFF00ULL;
3085 val1 |= (val_n1 >> ((upper * 8) + 16)) & 0xFF0000ULL;
3086 val1 |= (val_n1 >> ((upper * 8) + 24)) & 0xFF000000ULL;
3087
3088 val1 |= (input2 << (32 - (upper * 8))) & 0xFF00000000ULL;
3089 val1 |= (input2 << (24 - (upper * 8))) & 0xFF0000000000ULL;
3090 val1 |= (input2 << (16 - (upper * 8))) & 0xFF000000000000ULL;
3091 val1 |= (input2 << (8 - (upper * 8))) & 0xFF00000000000000ULL;
3092
3093 if (full)
3094 {
3095 val2 = (val_m1 >> (upper * 8)) & 0xFFULL;
3096 val2 |= (val_m1 >> ((upper * 8) + 8)) & 0xFF00ULL;
3097 val2 |= (val_m1 >> ((upper * 8) + 16)) & 0xFF0000ULL;
3098 val2 |= (val_m1 >> ((upper * 8) + 24)) & 0xFF000000ULL;
3099
3100 val2 |= (val_m2 << (32 - (upper * 8))) & 0xFF00000000ULL;
3101 val2 |= (val_m2 << (24 - (upper * 8))) & 0xFF0000000000ULL;
3102 val2 |= (val_m2 << (16 - (upper * 8))) & 0xFF000000000000ULL;
3103 val2 |= (val_m2 << (8 - (upper * 8))) & 0xFF00000000000000ULL;
3104 }
3105 break;
3106
3107 case 1:
3108 val1 = (val_n1 >> (upper * 16)) & 0xFFFFULL;
3109 val1 |= (val_n1 >> ((upper * 16) + 16)) & 0xFFFF0000ULL;
3110
3111 val1 |= (input2 << (32 - (upper * 16))) & 0xFFFF00000000ULL;;
3112 val1 |= (input2 << (16 - (upper * 16))) & 0xFFFF000000000000ULL;
3113
3114 if (full)
3115 {
3116 val2 = (val_m1 >> (upper * 16)) & 0xFFFFULL;
3117 val2 |= (val_m1 >> ((upper * 16) + 16)) & 0xFFFF0000ULL;
3118
3119 val2 |= (val_m2 << (32 - (upper * 16))) & 0xFFFF00000000ULL;
3120 val2 |= (val_m2 << (16 - (upper * 16))) & 0xFFFF000000000000ULL;
3121 }
3122 break;
3123
3124 case 2:
3125 val1 = (val_n1 >> (upper * 32)) & 0xFFFFFFFF;
3126 val1 |= (input2 << (32 - (upper * 32))) & 0xFFFFFFFF00000000ULL;
3127
3128 if (full)
3129 {
3130 val2 = (val_m1 >> (upper * 32)) & 0xFFFFFFFF;
3131 val2 |= (val_m2 << (32 - (upper * 32))) & 0xFFFFFFFF00000000ULL;
3132 }
3133 break;
3134
3135 case 3:
3136 if (! full)
3137 HALT_UNALLOC;
3138
3139 val1 = upper ? val_n2 : val_n1;
3140 val2 = upper ? val_m2 : val_m1;
3141 break;
3142 }
3143
3144 aarch64_set_vec_u64 (cpu, vd, 0, val1);
3145 if (full)
3146 aarch64_set_vec_u64 (cpu, vd, 1, val2);
3147 }
3148
3149 static void
3150 do_vec_ZIP (sim_cpu *cpu)
3151 {
3152 /* instr[31] = 0
3153 instr[30] = half(0)/full(1)
3154 instr[29,24] = 00 1110
3155 instr[23,22] = size: byte(00), hald(01), word (10), long (11)
3156 instr[21] = 0
3157 instr[20,16] = Vm
3158 instr[15] = 0
3159 instr[14] = lower (0) / upper (1)
3160 instr[13,10] = 1110
3161 instr[9,5] = Vn
3162 instr[4,0] = Vd. */
3163
3164 int full = INSTR (30, 30);
3165 int upper = INSTR (14, 14);
3166
3167 unsigned vm = INSTR (20, 16);
3168 unsigned vn = INSTR (9, 5);
3169 unsigned vd = INSTR (4, 0);
3170
3171 uint64_t val_m1 = aarch64_get_vec_u64 (cpu, vm, 0);
3172 uint64_t val_m2 = aarch64_get_vec_u64 (cpu, vm, 1);
3173 uint64_t val_n1 = aarch64_get_vec_u64 (cpu, vn, 0);
3174 uint64_t val_n2 = aarch64_get_vec_u64 (cpu, vn, 1);
3175
3176 uint64_t val1 = 0;
3177 uint64_t val2 = 0;
3178
3179 uint64_t input1 = upper ? val_n1 : val_m1;
3180 uint64_t input2 = upper ? val_n2 : val_m2;
3181
3182 NYI_assert (29, 24, 0x0E);
3183 NYI_assert (21, 21, 0);
3184 NYI_assert (15, 15, 0);
3185 NYI_assert (13, 10, 0xE);
3186
3187 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3188 switch (INSTR (23, 23))
3189 {
3190 case 0:
3191 val1 =
3192 ((input1 << 0) & (0xFF << 0))
3193 | ((input2 << 8) & (0xFF << 8))
3194 | ((input1 << 8) & (0xFF << 16))
3195 | ((input2 << 16) & (0xFF << 24))
3196 | ((input1 << 16) & (0xFFULL << 32))
3197 | ((input2 << 24) & (0xFFULL << 40))
3198 | ((input1 << 24) & (0xFFULL << 48))
3199 | ((input2 << 32) & (0xFFULL << 56));
3200
3201 val2 =
3202 ((input1 >> 32) & (0xFF << 0))
3203 | ((input2 >> 24) & (0xFF << 8))
3204 | ((input1 >> 24) & (0xFF << 16))
3205 | ((input2 >> 16) & (0xFF << 24))
3206 | ((input1 >> 16) & (0xFFULL << 32))
3207 | ((input2 >> 8) & (0xFFULL << 40))
3208 | ((input1 >> 8) & (0xFFULL << 48))
3209 | ((input2 >> 0) & (0xFFULL << 56));
3210 break;
3211
3212 case 1:
3213 val1 =
3214 ((input1 << 0) & (0xFFFF << 0))
3215 | ((input2 << 16) & (0xFFFF << 16))
3216 | ((input1 << 16) & (0xFFFFULL << 32))
3217 | ((input2 << 32) & (0xFFFFULL << 48));
3218
3219 val2 =
3220 ((input1 >> 32) & (0xFFFF << 0))
3221 | ((input2 >> 16) & (0xFFFF << 16))
3222 | ((input1 >> 16) & (0xFFFFULL << 32))
3223 | ((input2 >> 0) & (0xFFFFULL << 48));
3224 break;
3225
3226 case 2:
3227 val1 = (input1 & 0xFFFFFFFFULL) | (input2 << 32);
3228 val2 = (input2 & 0xFFFFFFFFULL) | (input1 << 32);
3229 break;
3230
3231 case 3:
3232 val1 = input1;
3233 val2 = input2;
3234 break;
3235 }
3236
3237 aarch64_set_vec_u64 (cpu, vd, 0, val1);
3238 if (full)
3239 aarch64_set_vec_u64 (cpu, vd, 1, val2);
3240 }
3241
3242 /* Floating point immediates are encoded in 8 bits.
3243 fpimm[7] = sign bit.
3244 fpimm[6:4] = signed exponent.
3245 fpimm[3:0] = fraction (assuming leading 1).
3246 i.e. F = s * 1.f * 2^(e - b). */
3247
3248 static float
3249 fp_immediate_for_encoding_32 (uint32_t imm8)
3250 {
3251 float u;
3252 uint32_t s, e, f, i;
3253
3254 s = (imm8 >> 7) & 0x1;
3255 e = (imm8 >> 4) & 0x7;
3256 f = imm8 & 0xf;
3257
3258 /* The fp value is s * n/16 * 2r where n is 16+e. */
3259 u = (16.0 + f) / 16.0;
3260
3261 /* N.B. exponent is signed. */
3262 if (e < 4)
3263 {
3264 int epos = e;
3265
3266 for (i = 0; i <= epos; i++)
3267 u *= 2.0;
3268 }
3269 else
3270 {
3271 int eneg = 7 - e;
3272
3273 for (i = 0; i < eneg; i++)
3274 u /= 2.0;
3275 }
3276
3277 if (s)
3278 u = - u;
3279
3280 return u;
3281 }
3282
3283 static double
3284 fp_immediate_for_encoding_64 (uint32_t imm8)
3285 {
3286 double u;
3287 uint32_t s, e, f, i;
3288
3289 s = (imm8 >> 7) & 0x1;
3290 e = (imm8 >> 4) & 0x7;
3291 f = imm8 & 0xf;
3292
3293 /* The fp value is s * n/16 * 2r where n is 16+e. */
3294 u = (16.0 + f) / 16.0;
3295
3296 /* N.B. exponent is signed. */
3297 if (e < 4)
3298 {
3299 int epos = e;
3300
3301 for (i = 0; i <= epos; i++)
3302 u *= 2.0;
3303 }
3304 else
3305 {
3306 int eneg = 7 - e;
3307
3308 for (i = 0; i < eneg; i++)
3309 u /= 2.0;
3310 }
3311
3312 if (s)
3313 u = - u;
3314
3315 return u;
3316 }
3317
3318 static void
3319 do_vec_MOV_immediate (sim_cpu *cpu)
3320 {
3321 /* instr[31] = 0
3322 instr[30] = full/half selector
3323 instr[29,19] = 00111100000
3324 instr[18,16] = high 3 bits of uimm8
3325 instr[15,12] = size & shift:
3326 0000 => 32-bit
3327 0010 => 32-bit + LSL#8
3328 0100 => 32-bit + LSL#16
3329 0110 => 32-bit + LSL#24
3330 1010 => 16-bit + LSL#8
3331 1000 => 16-bit
3332 1101 => 32-bit + MSL#16
3333 1100 => 32-bit + MSL#8
3334 1110 => 8-bit
3335 1111 => double
3336 instr[11,10] = 01
3337 instr[9,5] = low 5-bits of uimm8
3338 instr[4,0] = Vd. */
3339
3340 int full = INSTR (30, 30);
3341 unsigned vd = INSTR (4, 0);
3342 unsigned val = (INSTR (18, 16) << 5) | INSTR (9, 5);
3343 unsigned i;
3344
3345 NYI_assert (29, 19, 0x1E0);
3346 NYI_assert (11, 10, 1);
3347
3348 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3349 switch (INSTR (15, 12))
3350 {
3351 case 0x0: /* 32-bit, no shift. */
3352 case 0x2: /* 32-bit, shift by 8. */
3353 case 0x4: /* 32-bit, shift by 16. */
3354 case 0x6: /* 32-bit, shift by 24. */
3355 val <<= (8 * INSTR (14, 13));
3356 for (i = 0; i < (full ? 4 : 2); i++)
3357 aarch64_set_vec_u32 (cpu, vd, i, val);
3358 break;
3359
3360 case 0xa: /* 16-bit, shift by 8. */
3361 val <<= 8;
3362 /* Fall through. */
3363 case 0x8: /* 16-bit, no shift. */
3364 for (i = 0; i < (full ? 8 : 4); i++)
3365 aarch64_set_vec_u16 (cpu, vd, i, val);
3366 break;
3367
3368 case 0xd: /* 32-bit, mask shift by 16. */
3369 val <<= 8;
3370 val |= 0xFF;
3371 /* Fall through. */
3372 case 0xc: /* 32-bit, mask shift by 8. */
3373 val <<= 8;
3374 val |= 0xFF;
3375 for (i = 0; i < (full ? 4 : 2); i++)
3376 aarch64_set_vec_u32 (cpu, vd, i, val);
3377 break;
3378
3379 case 0xe: /* 8-bit, no shift. */
3380 for (i = 0; i < (full ? 16 : 8); i++)
3381 aarch64_set_vec_u8 (cpu, vd, i, val);
3382 break;
3383
3384 case 0xf: /* FMOV Vs.{2|4}S, #fpimm. */
3385 {
3386 float u = fp_immediate_for_encoding_32 (val);
3387 for (i = 0; i < (full ? 4 : 2); i++)
3388 aarch64_set_vec_float (cpu, vd, i, u);
3389 break;
3390 }
3391
3392 default:
3393 HALT_NYI;
3394 }
3395 }
3396
3397 static void
3398 do_vec_MVNI (sim_cpu *cpu)
3399 {
3400 /* instr[31] = 0
3401 instr[30] = full/half selector
3402 instr[29,19] = 10111100000
3403 instr[18,16] = high 3 bits of uimm8
3404 instr[15,12] = selector
3405 instr[11,10] = 01
3406 instr[9,5] = low 5-bits of uimm8
3407 instr[4,0] = Vd. */
3408
3409 int full = INSTR (30, 30);
3410 unsigned vd = INSTR (4, 0);
3411 unsigned val = (INSTR (18, 16) << 5) | INSTR (9, 5);
3412 unsigned i;
3413
3414 NYI_assert (29, 19, 0x5E0);
3415 NYI_assert (11, 10, 1);
3416
3417 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3418 switch (INSTR (15, 12))
3419 {
3420 case 0x0: /* 32-bit, no shift. */
3421 case 0x2: /* 32-bit, shift by 8. */
3422 case 0x4: /* 32-bit, shift by 16. */
3423 case 0x6: /* 32-bit, shift by 24. */
3424 val <<= (8 * INSTR (14, 13));
3425 val = ~ val;
3426 for (i = 0; i < (full ? 4 : 2); i++)
3427 aarch64_set_vec_u32 (cpu, vd, i, val);
3428 return;
3429
3430 case 0xa: /* 16-bit, 8 bit shift. */
3431 val <<= 8;
3432 case 0x8: /* 16-bit, no shift. */
3433 val = ~ val;
3434 for (i = 0; i < (full ? 8 : 4); i++)
3435 aarch64_set_vec_u16 (cpu, vd, i, val);
3436 return;
3437
3438 case 0xd: /* 32-bit, mask shift by 16. */
3439 val <<= 8;
3440 val |= 0xFF;
3441 case 0xc: /* 32-bit, mask shift by 8. */
3442 val <<= 8;
3443 val |= 0xFF;
3444 val = ~ val;
3445 for (i = 0; i < (full ? 4 : 2); i++)
3446 aarch64_set_vec_u32 (cpu, vd, i, val);
3447 return;
3448
3449 case 0xE: /* MOVI Dn, #mask64 */
3450 {
3451 uint64_t mask = 0;
3452
3453 for (i = 0; i < 8; i++)
3454 if (val & (1 << i))
3455 mask |= (0xFFUL << (i * 8));
3456 aarch64_set_vec_u64 (cpu, vd, 0, mask);
3457 aarch64_set_vec_u64 (cpu, vd, 1, mask);
3458 return;
3459 }
3460
3461 case 0xf: /* FMOV Vd.2D, #fpimm. */
3462 {
3463 double u = fp_immediate_for_encoding_64 (val);
3464
3465 if (! full)
3466 HALT_UNALLOC;
3467
3468 aarch64_set_vec_double (cpu, vd, 0, u);
3469 aarch64_set_vec_double (cpu, vd, 1, u);
3470 return;
3471 }
3472
3473 default:
3474 HALT_NYI;
3475 }
3476 }
3477
3478 #define ABS(A) ((A) < 0 ? - (A) : (A))
3479
3480 static void
3481 do_vec_ABS (sim_cpu *cpu)
3482 {
3483 /* instr[31] = 0
3484 instr[30] = half(0)/full(1)
3485 instr[29,24] = 00 1110
3486 instr[23,22] = size: 00=> 8-bit, 01=> 16-bit, 10=> 32-bit, 11=> 64-bit
3487 instr[21,10] = 10 0000 1011 10
3488 instr[9,5] = Vn
3489 instr[4.0] = Vd. */
3490
3491 unsigned vn = INSTR (9, 5);
3492 unsigned vd = INSTR (4, 0);
3493 unsigned full = INSTR (30, 30);
3494 unsigned i;
3495
3496 NYI_assert (29, 24, 0x0E);
3497 NYI_assert (21, 10, 0x82E);
3498
3499 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3500 switch (INSTR (23, 22))
3501 {
3502 case 0:
3503 for (i = 0; i < (full ? 16 : 8); i++)
3504 aarch64_set_vec_s8 (cpu, vd, i,
3505 ABS (aarch64_get_vec_s8 (cpu, vn, i)));
3506 break;
3507
3508 case 1:
3509 for (i = 0; i < (full ? 8 : 4); i++)
3510 aarch64_set_vec_s16 (cpu, vd, i,
3511 ABS (aarch64_get_vec_s16 (cpu, vn, i)));
3512 break;
3513
3514 case 2:
3515 for (i = 0; i < (full ? 4 : 2); i++)
3516 aarch64_set_vec_s32 (cpu, vd, i,
3517 ABS (aarch64_get_vec_s32 (cpu, vn, i)));
3518 break;
3519
3520 case 3:
3521 if (! full)
3522 HALT_NYI;
3523 for (i = 0; i < 2; i++)
3524 aarch64_set_vec_s64 (cpu, vd, i,
3525 ABS (aarch64_get_vec_s64 (cpu, vn, i)));
3526 break;
3527 }
3528 }
3529
3530 static void
3531 do_vec_ADDV (sim_cpu *cpu)
3532 {
3533 /* instr[31] = 0
3534 instr[30] = full/half selector
3535 instr[29,24] = 00 1110
3536 instr[23,22] = size: 00=> 8-bit, 01=> 16-bit, 10=> 32-bit, 11=> 64-bit
3537 instr[21,10] = 11 0001 1011 10
3538 instr[9,5] = Vm
3539 instr[4.0] = Rd. */
3540
3541 unsigned vm = INSTR (9, 5);
3542 unsigned rd = INSTR (4, 0);
3543 unsigned i;
3544 int full = INSTR (30, 30);
3545
3546 NYI_assert (29, 24, 0x0E);
3547 NYI_assert (21, 10, 0xC6E);
3548
3549 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3550 switch (INSTR (23, 22))
3551 {
3552 case 0:
3553 {
3554 uint8_t val = 0;
3555 for (i = 0; i < (full ? 16 : 8); i++)
3556 val += aarch64_get_vec_u8 (cpu, vm, i);
3557 aarch64_set_vec_u64 (cpu, rd, 0, val);
3558 return;
3559 }
3560
3561 case 1:
3562 {
3563 uint16_t val = 0;
3564 for (i = 0; i < (full ? 8 : 4); i++)
3565 val += aarch64_get_vec_u16 (cpu, vm, i);
3566 aarch64_set_vec_u64 (cpu, rd, 0, val);
3567 return;
3568 }
3569
3570 case 2:
3571 {
3572 uint32_t val = 0;
3573 if (! full)
3574 HALT_UNALLOC;
3575 for (i = 0; i < 4; i++)
3576 val += aarch64_get_vec_u32 (cpu, vm, i);
3577 aarch64_set_vec_u64 (cpu, rd, 0, val);
3578 return;
3579 }
3580
3581 case 3:
3582 HALT_UNALLOC;
3583 }
3584 }
3585
3586 static void
3587 do_vec_ins_2 (sim_cpu *cpu)
3588 {
3589 /* instr[31,21] = 01001110000
3590 instr[20,18] = size & element selector
3591 instr[17,14] = 0000
3592 instr[13] = direction: to vec(0), from vec (1)
3593 instr[12,10] = 111
3594 instr[9,5] = Vm
3595 instr[4,0] = Vd. */
3596
3597 unsigned elem;
3598 unsigned vm = INSTR (9, 5);
3599 unsigned vd = INSTR (4, 0);
3600
3601 NYI_assert (31, 21, 0x270);
3602 NYI_assert (17, 14, 0);
3603 NYI_assert (12, 10, 7);
3604
3605 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3606 if (INSTR (13, 13) == 1)
3607 {
3608 if (INSTR (18, 18) == 1)
3609 {
3610 /* 32-bit moves. */
3611 elem = INSTR (20, 19);
3612 aarch64_set_reg_u64 (cpu, vd, NO_SP,
3613 aarch64_get_vec_u32 (cpu, vm, elem));
3614 }
3615 else
3616 {
3617 /* 64-bit moves. */
3618 if (INSTR (19, 19) != 1)
3619 HALT_NYI;
3620
3621 elem = INSTR (20, 20);
3622 aarch64_set_reg_u64 (cpu, vd, NO_SP,
3623 aarch64_get_vec_u64 (cpu, vm, elem));
3624 }
3625 }
3626 else
3627 {
3628 if (INSTR (18, 18) == 1)
3629 {
3630 /* 32-bit moves. */
3631 elem = INSTR (20, 19);
3632 aarch64_set_vec_u32 (cpu, vd, elem,
3633 aarch64_get_reg_u32 (cpu, vm, NO_SP));
3634 }
3635 else
3636 {
3637 /* 64-bit moves. */
3638 if (INSTR (19, 19) != 1)
3639 HALT_NYI;
3640
3641 elem = INSTR (20, 20);
3642 aarch64_set_vec_u64 (cpu, vd, elem,
3643 aarch64_get_reg_u64 (cpu, vm, NO_SP));
3644 }
3645 }
3646 }
3647
3648 #define DO_VEC_WIDENING_MUL(N, DST_TYPE, READ_TYPE, WRITE_TYPE) \
3649 do \
3650 { \
3651 DST_TYPE a[N], b[N]; \
3652 \
3653 for (i = 0; i < (N); i++) \
3654 { \
3655 a[i] = aarch64_get_vec_##READ_TYPE (cpu, vn, i + bias); \
3656 b[i] = aarch64_get_vec_##READ_TYPE (cpu, vm, i + bias); \
3657 } \
3658 for (i = 0; i < (N); i++) \
3659 aarch64_set_vec_##WRITE_TYPE (cpu, vd, i, a[i] * b[i]); \
3660 } \
3661 while (0)
3662
3663 static void
3664 do_vec_mull (sim_cpu *cpu)
3665 {
3666 /* instr[31] = 0
3667 instr[30] = lower(0)/upper(1) selector
3668 instr[29] = signed(0)/unsigned(1)
3669 instr[28,24] = 0 1110
3670 instr[23,22] = size: 8-bit (00), 16-bit (01), 32-bit (10)
3671 instr[21] = 1
3672 instr[20,16] = Vm
3673 instr[15,10] = 11 0000
3674 instr[9,5] = Vn
3675 instr[4.0] = Vd. */
3676
3677 int unsign = INSTR (29, 29);
3678 int bias = INSTR (30, 30);
3679 unsigned vm = INSTR (20, 16);
3680 unsigned vn = INSTR ( 9, 5);
3681 unsigned vd = INSTR ( 4, 0);
3682 unsigned i;
3683
3684 NYI_assert (28, 24, 0x0E);
3685 NYI_assert (15, 10, 0x30);
3686
3687 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3688 /* NB: Read source values before writing results, in case
3689 the source and destination vectors are the same. */
3690 switch (INSTR (23, 22))
3691 {
3692 case 0:
3693 if (bias)
3694 bias = 8;
3695 if (unsign)
3696 DO_VEC_WIDENING_MUL (8, uint16_t, u8, u16);
3697 else
3698 DO_VEC_WIDENING_MUL (8, int16_t, s8, s16);
3699 return;
3700
3701 case 1:
3702 if (bias)
3703 bias = 4;
3704 if (unsign)
3705 DO_VEC_WIDENING_MUL (4, uint32_t, u16, u32);
3706 else
3707 DO_VEC_WIDENING_MUL (4, int32_t, s16, s32);
3708 return;
3709
3710 case 2:
3711 if (bias)
3712 bias = 2;
3713 if (unsign)
3714 DO_VEC_WIDENING_MUL (2, uint64_t, u32, u64);
3715 else
3716 DO_VEC_WIDENING_MUL (2, int64_t, s32, s64);
3717 return;
3718
3719 case 3:
3720 HALT_NYI;
3721 }
3722 }
3723
3724 static void
3725 do_vec_fadd (sim_cpu *cpu)
3726 {
3727 /* instr[31] = 0
3728 instr[30] = half(0)/full(1)
3729 instr[29,24] = 001110
3730 instr[23] = FADD(0)/FSUB(1)
3731 instr[22] = float (0)/double(1)
3732 instr[21] = 1
3733 instr[20,16] = Vm
3734 instr[15,10] = 110101
3735 instr[9,5] = Vn
3736 instr[4.0] = Vd. */
3737
3738 unsigned vm = INSTR (20, 16);
3739 unsigned vn = INSTR (9, 5);
3740 unsigned vd = INSTR (4, 0);
3741 unsigned i;
3742 int full = INSTR (30, 30);
3743
3744 NYI_assert (29, 24, 0x0E);
3745 NYI_assert (21, 21, 1);
3746 NYI_assert (15, 10, 0x35);
3747
3748 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3749 if (INSTR (23, 23))
3750 {
3751 if (INSTR (22, 22))
3752 {
3753 if (! full)
3754 HALT_NYI;
3755
3756 for (i = 0; i < 2; i++)
3757 aarch64_set_vec_double (cpu, vd, i,
3758 aarch64_get_vec_double (cpu, vn, i)
3759 - aarch64_get_vec_double (cpu, vm, i));
3760 }
3761 else
3762 {
3763 for (i = 0; i < (full ? 4 : 2); i++)
3764 aarch64_set_vec_float (cpu, vd, i,
3765 aarch64_get_vec_float (cpu, vn, i)
3766 - aarch64_get_vec_float (cpu, vm, i));
3767 }
3768 }
3769 else
3770 {
3771 if (INSTR (22, 22))
3772 {
3773 if (! full)
3774 HALT_NYI;
3775
3776 for (i = 0; i < 2; i++)
3777 aarch64_set_vec_double (cpu, vd, i,
3778 aarch64_get_vec_double (cpu, vm, i)
3779 + aarch64_get_vec_double (cpu, vn, i));
3780 }
3781 else
3782 {
3783 for (i = 0; i < (full ? 4 : 2); i++)
3784 aarch64_set_vec_float (cpu, vd, i,
3785 aarch64_get_vec_float (cpu, vm, i)
3786 + aarch64_get_vec_float (cpu, vn, i));
3787 }
3788 }
3789 }
3790
3791 static void
3792 do_vec_add (sim_cpu *cpu)
3793 {
3794 /* instr[31] = 0
3795 instr[30] = full/half selector
3796 instr[29,24] = 001110
3797 instr[23,22] = size: 00=> 8-bit, 01=> 16-bit, 10=> 32-bit, 11=> 64-bit
3798 instr[21] = 1
3799 instr[20,16] = Vn
3800 instr[15,10] = 100001
3801 instr[9,5] = Vm
3802 instr[4.0] = Vd. */
3803
3804 unsigned vm = INSTR (20, 16);
3805 unsigned vn = INSTR (9, 5);
3806 unsigned vd = INSTR (4, 0);
3807 unsigned i;
3808 int full = INSTR (30, 30);
3809
3810 NYI_assert (29, 24, 0x0E);
3811 NYI_assert (21, 21, 1);
3812 NYI_assert (15, 10, 0x21);
3813
3814 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3815 switch (INSTR (23, 22))
3816 {
3817 case 0:
3818 for (i = 0; i < (full ? 16 : 8); i++)
3819 aarch64_set_vec_u8 (cpu, vd, i, aarch64_get_vec_u8 (cpu, vn, i)
3820 + aarch64_get_vec_u8 (cpu, vm, i));
3821 return;
3822
3823 case 1:
3824 for (i = 0; i < (full ? 8 : 4); i++)
3825 aarch64_set_vec_u16 (cpu, vd, i, aarch64_get_vec_u16 (cpu, vn, i)
3826 + aarch64_get_vec_u16 (cpu, vm, i));
3827 return;
3828
3829 case 2:
3830 for (i = 0; i < (full ? 4 : 2); i++)
3831 aarch64_set_vec_u32 (cpu, vd, i, aarch64_get_vec_u32 (cpu, vn, i)
3832 + aarch64_get_vec_u32 (cpu, vm, i));
3833 return;
3834
3835 case 3:
3836 if (! full)
3837 HALT_UNALLOC;
3838 aarch64_set_vec_u64 (cpu, vd, 0, aarch64_get_vec_u64 (cpu, vn, 0)
3839 + aarch64_get_vec_u64 (cpu, vm, 0));
3840 aarch64_set_vec_u64 (cpu, vd, 1,
3841 aarch64_get_vec_u64 (cpu, vn, 1)
3842 + aarch64_get_vec_u64 (cpu, vm, 1));
3843 return;
3844 }
3845 }
3846
3847 static void
3848 do_vec_mul (sim_cpu *cpu)
3849 {
3850 /* instr[31] = 0
3851 instr[30] = full/half selector
3852 instr[29,24] = 00 1110
3853 instr[23,22] = size: 00=> 8-bit, 01=> 16-bit, 10=> 32-bit
3854 instr[21] = 1
3855 instr[20,16] = Vn
3856 instr[15,10] = 10 0111
3857 instr[9,5] = Vm
3858 instr[4.0] = Vd. */
3859
3860 unsigned vm = INSTR (20, 16);
3861 unsigned vn = INSTR (9, 5);
3862 unsigned vd = INSTR (4, 0);
3863 unsigned i;
3864 int full = INSTR (30, 30);
3865 int bias = 0;
3866
3867 NYI_assert (29, 24, 0x0E);
3868 NYI_assert (21, 21, 1);
3869 NYI_assert (15, 10, 0x27);
3870
3871 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3872 switch (INSTR (23, 22))
3873 {
3874 case 0:
3875 DO_VEC_WIDENING_MUL (full ? 16 : 8, uint8_t, u8, u8);
3876 return;
3877
3878 case 1:
3879 DO_VEC_WIDENING_MUL (full ? 8 : 4, uint16_t, u16, u16);
3880 return;
3881
3882 case 2:
3883 DO_VEC_WIDENING_MUL (full ? 4 : 2, uint32_t, u32, u32);
3884 return;
3885
3886 case 3:
3887 HALT_UNALLOC;
3888 }
3889 }
3890
3891 static void
3892 do_vec_MLA (sim_cpu *cpu)
3893 {
3894 /* instr[31] = 0
3895 instr[30] = full/half selector
3896 instr[29,24] = 00 1110
3897 instr[23,22] = size: 00=> 8-bit, 01=> 16-bit, 10=> 32-bit
3898 instr[21] = 1
3899 instr[20,16] = Vn
3900 instr[15,10] = 1001 01
3901 instr[9,5] = Vm
3902 instr[4.0] = Vd. */
3903
3904 unsigned vm = INSTR (20, 16);
3905 unsigned vn = INSTR (9, 5);
3906 unsigned vd = INSTR (4, 0);
3907 unsigned i;
3908 int full = INSTR (30, 30);
3909
3910 NYI_assert (29, 24, 0x0E);
3911 NYI_assert (21, 21, 1);
3912 NYI_assert (15, 10, 0x25);
3913
3914 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3915 switch (INSTR (23, 22))
3916 {
3917 case 0:
3918 for (i = 0; i < (full ? 16 : 8); i++)
3919 aarch64_set_vec_u8 (cpu, vd, i,
3920 aarch64_get_vec_u8 (cpu, vd, i)
3921 + (aarch64_get_vec_u8 (cpu, vn, i)
3922 * aarch64_get_vec_u8 (cpu, vm, i)));
3923 return;
3924
3925 case 1:
3926 for (i = 0; i < (full ? 8 : 4); i++)
3927 aarch64_set_vec_u16 (cpu, vd, i,
3928 aarch64_get_vec_u16 (cpu, vd, i)
3929 + (aarch64_get_vec_u16 (cpu, vn, i)
3930 * aarch64_get_vec_u16 (cpu, vm, i)));
3931 return;
3932
3933 case 2:
3934 for (i = 0; i < (full ? 4 : 2); i++)
3935 aarch64_set_vec_u32 (cpu, vd, i,
3936 aarch64_get_vec_u32 (cpu, vd, i)
3937 + (aarch64_get_vec_u32 (cpu, vn, i)
3938 * aarch64_get_vec_u32 (cpu, vm, i)));
3939 return;
3940
3941 default:
3942 HALT_UNALLOC;
3943 }
3944 }
3945
3946 static float
3947 fmaxnm (float a, float b)
3948 {
3949 if (! isnan (a))
3950 {
3951 if (! isnan (b))
3952 return a > b ? a : b;
3953 return a;
3954 }
3955 else if (! isnan (b))
3956 return b;
3957 return a;
3958 }
3959
3960 static float
3961 fminnm (float a, float b)
3962 {
3963 if (! isnan (a))
3964 {
3965 if (! isnan (b))
3966 return a < b ? a : b;
3967 return a;
3968 }
3969 else if (! isnan (b))
3970 return b;
3971 return a;
3972 }
3973
3974 static double
3975 dmaxnm (double a, double b)
3976 {
3977 if (! isnan (a))
3978 {
3979 if (! isnan (b))
3980 return a > b ? a : b;
3981 return a;
3982 }
3983 else if (! isnan (b))
3984 return b;
3985 return a;
3986 }
3987
3988 static double
3989 dminnm (double a, double b)
3990 {
3991 if (! isnan (a))
3992 {
3993 if (! isnan (b))
3994 return a < b ? a : b;
3995 return a;
3996 }
3997 else if (! isnan (b))
3998 return b;
3999 return a;
4000 }
4001
4002 static void
4003 do_vec_FminmaxNMP (sim_cpu *cpu)
4004 {
4005 /* instr [31] = 0
4006 instr [30] = half (0)/full (1)
4007 instr [29,24] = 10 1110
4008 instr [23] = max(0)/min(1)
4009 instr [22] = float (0)/double (1)
4010 instr [21] = 1
4011 instr [20,16] = Vn
4012 instr [15,10] = 1100 01
4013 instr [9,5] = Vm
4014 instr [4.0] = Vd. */
4015
4016 unsigned vm = INSTR (20, 16);
4017 unsigned vn = INSTR (9, 5);
4018 unsigned vd = INSTR (4, 0);
4019 int full = INSTR (30, 30);
4020
4021 NYI_assert (29, 24, 0x2E);
4022 NYI_assert (21, 21, 1);
4023 NYI_assert (15, 10, 0x31);
4024
4025 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4026 if (INSTR (22, 22))
4027 {
4028 double (* fn)(double, double) = INSTR (23, 23)
4029 ? dminnm : dmaxnm;
4030
4031 if (! full)
4032 HALT_NYI;
4033 aarch64_set_vec_double (cpu, vd, 0,
4034 fn (aarch64_get_vec_double (cpu, vn, 0),
4035 aarch64_get_vec_double (cpu, vn, 1)));
4036 aarch64_set_vec_double (cpu, vd, 0,
4037 fn (aarch64_get_vec_double (cpu, vm, 0),
4038 aarch64_get_vec_double (cpu, vm, 1)));
4039 }
4040 else
4041 {
4042 float (* fn)(float, float) = INSTR (23, 23)
4043 ? fminnm : fmaxnm;
4044
4045 aarch64_set_vec_float (cpu, vd, 0,
4046 fn (aarch64_get_vec_float (cpu, vn, 0),
4047 aarch64_get_vec_float (cpu, vn, 1)));
4048 if (full)
4049 aarch64_set_vec_float (cpu, vd, 1,
4050 fn (aarch64_get_vec_float (cpu, vn, 2),
4051 aarch64_get_vec_float (cpu, vn, 3)));
4052
4053 aarch64_set_vec_float (cpu, vd, (full ? 2 : 1),
4054 fn (aarch64_get_vec_float (cpu, vm, 0),
4055 aarch64_get_vec_float (cpu, vm, 1)));
4056 if (full)
4057 aarch64_set_vec_float (cpu, vd, 3,
4058 fn (aarch64_get_vec_float (cpu, vm, 2),
4059 aarch64_get_vec_float (cpu, vm, 3)));
4060 }
4061 }
4062
4063 static void
4064 do_vec_AND (sim_cpu *cpu)
4065 {
4066 /* instr[31] = 0
4067 instr[30] = half (0)/full (1)
4068 instr[29,21] = 001110001
4069 instr[20,16] = Vm
4070 instr[15,10] = 000111
4071 instr[9,5] = Vn
4072 instr[4.0] = Vd. */
4073
4074 unsigned vm = INSTR (20, 16);
4075 unsigned vn = INSTR (9, 5);
4076 unsigned vd = INSTR (4, 0);
4077 unsigned i;
4078 int full = INSTR (30, 30);
4079
4080 NYI_assert (29, 21, 0x071);
4081 NYI_assert (15, 10, 0x07);
4082
4083 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4084 for (i = 0; i < (full ? 4 : 2); i++)
4085 aarch64_set_vec_u32 (cpu, vd, i,
4086 aarch64_get_vec_u32 (cpu, vn, i)
4087 & aarch64_get_vec_u32 (cpu, vm, i));
4088 }
4089
4090 static void
4091 do_vec_BSL (sim_cpu *cpu)
4092 {
4093 /* instr[31] = 0
4094 instr[30] = half (0)/full (1)
4095 instr[29,21] = 101110011
4096 instr[20,16] = Vm
4097 instr[15,10] = 000111
4098 instr[9,5] = Vn
4099 instr[4.0] = Vd. */
4100
4101 unsigned vm = INSTR (20, 16);
4102 unsigned vn = INSTR (9, 5);
4103 unsigned vd = INSTR (4, 0);
4104 unsigned i;
4105 int full = INSTR (30, 30);
4106
4107 NYI_assert (29, 21, 0x173);
4108 NYI_assert (15, 10, 0x07);
4109
4110 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4111 for (i = 0; i < (full ? 16 : 8); i++)
4112 aarch64_set_vec_u8 (cpu, vd, i,
4113 ( aarch64_get_vec_u8 (cpu, vd, i)
4114 & aarch64_get_vec_u8 (cpu, vn, i))
4115 | ((~ aarch64_get_vec_u8 (cpu, vd, i))
4116 & aarch64_get_vec_u8 (cpu, vm, i)));
4117 }
4118
4119 static void
4120 do_vec_EOR (sim_cpu *cpu)
4121 {
4122 /* instr[31] = 0
4123 instr[30] = half (0)/full (1)
4124 instr[29,21] = 10 1110 001
4125 instr[20,16] = Vm
4126 instr[15,10] = 000111
4127 instr[9,5] = Vn
4128 instr[4.0] = Vd. */
4129
4130 unsigned vm = INSTR (20, 16);
4131 unsigned vn = INSTR (9, 5);
4132 unsigned vd = INSTR (4, 0);
4133 unsigned i;
4134 int full = INSTR (30, 30);
4135
4136 NYI_assert (29, 21, 0x171);
4137 NYI_assert (15, 10, 0x07);
4138
4139 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4140 for (i = 0; i < (full ? 4 : 2); i++)
4141 aarch64_set_vec_u32 (cpu, vd, i,
4142 aarch64_get_vec_u32 (cpu, vn, i)
4143 ^ aarch64_get_vec_u32 (cpu, vm, i));
4144 }
4145
4146 static void
4147 do_vec_bit (sim_cpu *cpu)
4148 {
4149 /* instr[31] = 0
4150 instr[30] = half (0)/full (1)
4151 instr[29,23] = 10 1110 1
4152 instr[22] = BIT (0) / BIF (1)
4153 instr[21] = 1
4154 instr[20,16] = Vm
4155 instr[15,10] = 0001 11
4156 instr[9,5] = Vn
4157 instr[4.0] = Vd. */
4158
4159 unsigned vm = INSTR (20, 16);
4160 unsigned vn = INSTR (9, 5);
4161 unsigned vd = INSTR (4, 0);
4162 unsigned full = INSTR (30, 30);
4163 unsigned test_false = INSTR (22, 22);
4164 unsigned i;
4165
4166 NYI_assert (29, 23, 0x5D);
4167 NYI_assert (21, 21, 1);
4168 NYI_assert (15, 10, 0x07);
4169
4170 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4171 for (i = 0; i < (full ? 4 : 2); i++)
4172 {
4173 uint32_t vd_val = aarch64_get_vec_u32 (cpu, vd, i);
4174 uint32_t vn_val = aarch64_get_vec_u32 (cpu, vn, i);
4175 uint32_t vm_val = aarch64_get_vec_u32 (cpu, vm, i);
4176 if (test_false)
4177 aarch64_set_vec_u32 (cpu, vd, i,
4178 (vd_val & vm_val) | (vn_val & ~vm_val));
4179 else
4180 aarch64_set_vec_u32 (cpu, vd, i,
4181 (vd_val & ~vm_val) | (vn_val & vm_val));
4182 }
4183 }
4184
4185 static void
4186 do_vec_ORN (sim_cpu *cpu)
4187 {
4188 /* instr[31] = 0
4189 instr[30] = half (0)/full (1)
4190 instr[29,21] = 00 1110 111
4191 instr[20,16] = Vm
4192 instr[15,10] = 00 0111
4193 instr[9,5] = Vn
4194 instr[4.0] = Vd. */
4195
4196 unsigned vm = INSTR (20, 16);
4197 unsigned vn = INSTR (9, 5);
4198 unsigned vd = INSTR (4, 0);
4199 unsigned i;
4200 int full = INSTR (30, 30);
4201
4202 NYI_assert (29, 21, 0x077);
4203 NYI_assert (15, 10, 0x07);
4204
4205 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4206 for (i = 0; i < (full ? 16 : 8); i++)
4207 aarch64_set_vec_u8 (cpu, vd, i,
4208 aarch64_get_vec_u8 (cpu, vn, i)
4209 | ~ aarch64_get_vec_u8 (cpu, vm, i));
4210 }
4211
4212 static void
4213 do_vec_ORR (sim_cpu *cpu)
4214 {
4215 /* instr[31] = 0
4216 instr[30] = half (0)/full (1)
4217 instr[29,21] = 00 1110 101
4218 instr[20,16] = Vm
4219 instr[15,10] = 0001 11
4220 instr[9,5] = Vn
4221 instr[4.0] = Vd. */
4222
4223 unsigned vm = INSTR (20, 16);
4224 unsigned vn = INSTR (9, 5);
4225 unsigned vd = INSTR (4, 0);
4226 unsigned i;
4227 int full = INSTR (30, 30);
4228
4229 NYI_assert (29, 21, 0x075);
4230 NYI_assert (15, 10, 0x07);
4231
4232 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4233 for (i = 0; i < (full ? 16 : 8); i++)
4234 aarch64_set_vec_u8 (cpu, vd, i,
4235 aarch64_get_vec_u8 (cpu, vn, i)
4236 | aarch64_get_vec_u8 (cpu, vm, i));
4237 }
4238
4239 static void
4240 do_vec_BIC (sim_cpu *cpu)
4241 {
4242 /* instr[31] = 0
4243 instr[30] = half (0)/full (1)
4244 instr[29,21] = 00 1110 011
4245 instr[20,16] = Vm
4246 instr[15,10] = 00 0111
4247 instr[9,5] = Vn
4248 instr[4.0] = Vd. */
4249
4250 unsigned vm = INSTR (20, 16);
4251 unsigned vn = INSTR (9, 5);
4252 unsigned vd = INSTR (4, 0);
4253 unsigned i;
4254 int full = INSTR (30, 30);
4255
4256 NYI_assert (29, 21, 0x073);
4257 NYI_assert (15, 10, 0x07);
4258
4259 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4260 for (i = 0; i < (full ? 16 : 8); i++)
4261 aarch64_set_vec_u8 (cpu, vd, i,
4262 aarch64_get_vec_u8 (cpu, vn, i)
4263 & ~ aarch64_get_vec_u8 (cpu, vm, i));
4264 }
4265
4266 static void
4267 do_vec_XTN (sim_cpu *cpu)
4268 {
4269 /* instr[31] = 0
4270 instr[30] = first part (0)/ second part (1)
4271 instr[29,24] = 00 1110
4272 instr[23,22] = size: byte(00), half(01), word (10)
4273 instr[21,10] = 1000 0100 1010
4274 instr[9,5] = Vs
4275 instr[4,0] = Vd. */
4276
4277 unsigned vs = INSTR (9, 5);
4278 unsigned vd = INSTR (4, 0);
4279 unsigned bias = INSTR (30, 30);
4280 unsigned i;
4281
4282 NYI_assert (29, 24, 0x0E);
4283 NYI_assert (21, 10, 0x84A);
4284
4285 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4286 switch (INSTR (23, 22))
4287 {
4288 case 0:
4289 for (i = 0; i < 8; i++)
4290 aarch64_set_vec_u8 (cpu, vd, i + (bias * 8),
4291 aarch64_get_vec_u16 (cpu, vs, i));
4292 return;
4293
4294 case 1:
4295 for (i = 0; i < 4; i++)
4296 aarch64_set_vec_u16 (cpu, vd, i + (bias * 4),
4297 aarch64_get_vec_u32 (cpu, vs, i));
4298 return;
4299
4300 case 2:
4301 for (i = 0; i < 2; i++)
4302 aarch64_set_vec_u32 (cpu, vd, i + (bias * 2),
4303 aarch64_get_vec_u64 (cpu, vs, i));
4304 return;
4305 }
4306 }
4307
4308 /* Return the number of bits set in the input value. */
4309 #if __GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4)
4310 # define popcount __builtin_popcount
4311 #else
4312 static int
4313 popcount (unsigned char x)
4314 {
4315 static const unsigned char popcnt[16] =
4316 {
4317 0, 1, 1, 2,
4318 1, 2, 2, 3,
4319 1, 2, 2, 3,
4320 2, 3, 3, 4
4321 };
4322
4323 /* Only counts the low 8 bits of the input as that is all we need. */
4324 return popcnt[x % 16] + popcnt[x / 16];
4325 }
4326 #endif
4327
4328 static void
4329 do_vec_CNT (sim_cpu *cpu)
4330 {
4331 /* instr[31] = 0
4332 instr[30] = half (0)/ full (1)
4333 instr[29,24] = 00 1110
4334 instr[23,22] = size: byte(00)
4335 instr[21,10] = 1000 0001 0110
4336 instr[9,5] = Vs
4337 instr[4,0] = Vd. */
4338
4339 unsigned vs = INSTR (9, 5);
4340 unsigned vd = INSTR (4, 0);
4341 int full = INSTR (30, 30);
4342 int size = INSTR (23, 22);
4343 int i;
4344
4345 NYI_assert (29, 24, 0x0E);
4346 NYI_assert (21, 10, 0x816);
4347
4348 if (size != 0)
4349 HALT_UNALLOC;
4350
4351 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4352
4353 for (i = 0; i < (full ? 16 : 8); i++)
4354 aarch64_set_vec_u8 (cpu, vd, i,
4355 popcount (aarch64_get_vec_u8 (cpu, vs, i)));
4356 }
4357
4358 static void
4359 do_vec_maxv (sim_cpu *cpu)
4360 {
4361 /* instr[31] = 0
4362 instr[30] = half(0)/full(1)
4363 instr[29] = signed (0)/unsigned(1)
4364 instr[28,24] = 0 1110
4365 instr[23,22] = size: byte(00), half(01), word (10)
4366 instr[21] = 1
4367 instr[20,17] = 1 000
4368 instr[16] = max(0)/min(1)
4369 instr[15,10] = 1010 10
4370 instr[9,5] = V source
4371 instr[4.0] = R dest. */
4372
4373 unsigned vs = INSTR (9, 5);
4374 unsigned rd = INSTR (4, 0);
4375 unsigned full = INSTR (30, 30);
4376 unsigned i;
4377
4378 NYI_assert (28, 24, 0x0E);
4379 NYI_assert (21, 21, 1);
4380 NYI_assert (20, 17, 8);
4381 NYI_assert (15, 10, 0x2A);
4382
4383 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4384 switch ((INSTR (29, 29) << 1) | INSTR (16, 16))
4385 {
4386 case 0: /* SMAXV. */
4387 {
4388 int64_t smax;
4389 switch (INSTR (23, 22))
4390 {
4391 case 0:
4392 smax = aarch64_get_vec_s8 (cpu, vs, 0);
4393 for (i = 1; i < (full ? 16 : 8); i++)
4394 smax = max (smax, aarch64_get_vec_s8 (cpu, vs, i));
4395 break;
4396 case 1:
4397 smax = aarch64_get_vec_s16 (cpu, vs, 0);
4398 for (i = 1; i < (full ? 8 : 4); i++)
4399 smax = max (smax, aarch64_get_vec_s16 (cpu, vs, i));
4400 break;
4401 case 2:
4402 smax = aarch64_get_vec_s32 (cpu, vs, 0);
4403 for (i = 1; i < (full ? 4 : 2); i++)
4404 smax = max (smax, aarch64_get_vec_s32 (cpu, vs, i));
4405 break;
4406 case 3:
4407 HALT_UNALLOC;
4408 }
4409 aarch64_set_reg_s64 (cpu, rd, NO_SP, smax);
4410 return;
4411 }
4412
4413 case 1: /* SMINV. */
4414 {
4415 int64_t smin;
4416 switch (INSTR (23, 22))
4417 {
4418 case 0:
4419 smin = aarch64_get_vec_s8 (cpu, vs, 0);
4420 for (i = 1; i < (full ? 16 : 8); i++)
4421 smin = min (smin, aarch64_get_vec_s8 (cpu, vs, i));
4422 break;
4423 case 1:
4424 smin = aarch64_get_vec_s16 (cpu, vs, 0);
4425 for (i = 1; i < (full ? 8 : 4); i++)
4426 smin = min (smin, aarch64_get_vec_s16 (cpu, vs, i));
4427 break;
4428 case 2:
4429 smin = aarch64_get_vec_s32 (cpu, vs, 0);
4430 for (i = 1; i < (full ? 4 : 2); i++)
4431 smin = min (smin, aarch64_get_vec_s32 (cpu, vs, i));
4432 break;
4433
4434 case 3:
4435 HALT_UNALLOC;
4436 }
4437 aarch64_set_reg_s64 (cpu, rd, NO_SP, smin);
4438 return;
4439 }
4440
4441 case 2: /* UMAXV. */
4442 {
4443 uint64_t umax;
4444 switch (INSTR (23, 22))
4445 {
4446 case 0:
4447 umax = aarch64_get_vec_u8 (cpu, vs, 0);
4448 for (i = 1; i < (full ? 16 : 8); i++)
4449 umax = max (umax, aarch64_get_vec_u8 (cpu, vs, i));
4450 break;
4451 case 1:
4452 umax = aarch64_get_vec_u16 (cpu, vs, 0);
4453 for (i = 1; i < (full ? 8 : 4); i++)
4454 umax = max (umax, aarch64_get_vec_u16 (cpu, vs, i));
4455 break;
4456 case 2:
4457 umax = aarch64_get_vec_u32 (cpu, vs, 0);
4458 for (i = 1; i < (full ? 4 : 2); i++)
4459 umax = max (umax, aarch64_get_vec_u32 (cpu, vs, i));
4460 break;
4461
4462 case 3:
4463 HALT_UNALLOC;
4464 }
4465 aarch64_set_reg_u64 (cpu, rd, NO_SP, umax);
4466 return;
4467 }
4468
4469 case 3: /* UMINV. */
4470 {
4471 uint64_t umin;
4472 switch (INSTR (23, 22))
4473 {
4474 case 0:
4475 umin = aarch64_get_vec_u8 (cpu, vs, 0);
4476 for (i = 1; i < (full ? 16 : 8); i++)
4477 umin = min (umin, aarch64_get_vec_u8 (cpu, vs, i));
4478 break;
4479 case 1:
4480 umin = aarch64_get_vec_u16 (cpu, vs, 0);
4481 for (i = 1; i < (full ? 8 : 4); i++)
4482 umin = min (umin, aarch64_get_vec_u16 (cpu, vs, i));
4483 break;
4484 case 2:
4485 umin = aarch64_get_vec_u32 (cpu, vs, 0);
4486 for (i = 1; i < (full ? 4 : 2); i++)
4487 umin = min (umin, aarch64_get_vec_u32 (cpu, vs, i));
4488 break;
4489
4490 case 3:
4491 HALT_UNALLOC;
4492 }
4493 aarch64_set_reg_u64 (cpu, rd, NO_SP, umin);
4494 return;
4495 }
4496 }
4497 }
4498
4499 static void
4500 do_vec_fminmaxV (sim_cpu *cpu)
4501 {
4502 /* instr[31,24] = 0110 1110
4503 instr[23] = max(0)/min(1)
4504 instr[22,14] = 011 0000 11
4505 instr[13,12] = nm(00)/normal(11)
4506 instr[11,10] = 10
4507 instr[9,5] = V source
4508 instr[4.0] = R dest. */
4509
4510 unsigned vs = INSTR (9, 5);
4511 unsigned rd = INSTR (4, 0);
4512 unsigned i;
4513 float res = aarch64_get_vec_float (cpu, vs, 0);
4514
4515 NYI_assert (31, 24, 0x6E);
4516 NYI_assert (22, 14, 0x0C3);
4517 NYI_assert (11, 10, 2);
4518
4519 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4520 if (INSTR (23, 23))
4521 {
4522 switch (INSTR (13, 12))
4523 {
4524 case 0: /* FMNINNMV. */
4525 for (i = 1; i < 4; i++)
4526 res = fminnm (res, aarch64_get_vec_float (cpu, vs, i));
4527 break;
4528
4529 case 3: /* FMINV. */
4530 for (i = 1; i < 4; i++)
4531 res = min (res, aarch64_get_vec_float (cpu, vs, i));
4532 break;
4533
4534 default:
4535 HALT_NYI;
4536 }
4537 }
4538 else
4539 {
4540 switch (INSTR (13, 12))
4541 {
4542 case 0: /* FMNAXNMV. */
4543 for (i = 1; i < 4; i++)
4544 res = fmaxnm (res, aarch64_get_vec_float (cpu, vs, i));
4545 break;
4546
4547 case 3: /* FMAXV. */
4548 for (i = 1; i < 4; i++)
4549 res = max (res, aarch64_get_vec_float (cpu, vs, i));
4550 break;
4551
4552 default:
4553 HALT_NYI;
4554 }
4555 }
4556
4557 aarch64_set_FP_float (cpu, rd, res);
4558 }
4559
4560 static void
4561 do_vec_Fminmax (sim_cpu *cpu)
4562 {
4563 /* instr[31] = 0
4564 instr[30] = half(0)/full(1)
4565 instr[29,24] = 00 1110
4566 instr[23] = max(0)/min(1)
4567 instr[22] = float(0)/double(1)
4568 instr[21] = 1
4569 instr[20,16] = Vm
4570 instr[15,14] = 11
4571 instr[13,12] = nm(00)/normal(11)
4572 instr[11,10] = 01
4573 instr[9,5] = Vn
4574 instr[4,0] = Vd. */
4575
4576 unsigned vm = INSTR (20, 16);
4577 unsigned vn = INSTR (9, 5);
4578 unsigned vd = INSTR (4, 0);
4579 unsigned full = INSTR (30, 30);
4580 unsigned min = INSTR (23, 23);
4581 unsigned i;
4582
4583 NYI_assert (29, 24, 0x0E);
4584 NYI_assert (21, 21, 1);
4585 NYI_assert (15, 14, 3);
4586 NYI_assert (11, 10, 1);
4587
4588 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4589 if (INSTR (22, 22))
4590 {
4591 double (* func)(double, double);
4592
4593 if (! full)
4594 HALT_NYI;
4595
4596 if (INSTR (13, 12) == 0)
4597 func = min ? dminnm : dmaxnm;
4598 else if (INSTR (13, 12) == 3)
4599 func = min ? fmin : fmax;
4600 else
4601 HALT_NYI;
4602
4603 for (i = 0; i < 2; i++)
4604 aarch64_set_vec_double (cpu, vd, i,
4605 func (aarch64_get_vec_double (cpu, vn, i),
4606 aarch64_get_vec_double (cpu, vm, i)));
4607 }
4608 else
4609 {
4610 float (* func)(float, float);
4611
4612 if (INSTR (13, 12) == 0)
4613 func = min ? fminnm : fmaxnm;
4614 else if (INSTR (13, 12) == 3)
4615 func = min ? fminf : fmaxf;
4616 else
4617 HALT_NYI;
4618
4619 for (i = 0; i < (full ? 4 : 2); i++)
4620 aarch64_set_vec_float (cpu, vd, i,
4621 func (aarch64_get_vec_float (cpu, vn, i),
4622 aarch64_get_vec_float (cpu, vm, i)));
4623 }
4624 }
4625
4626 static void
4627 do_vec_SCVTF (sim_cpu *cpu)
4628 {
4629 /* instr[31] = 0
4630 instr[30] = Q
4631 instr[29,23] = 00 1110 0
4632 instr[22] = float(0)/double(1)
4633 instr[21,10] = 10 0001 1101 10
4634 instr[9,5] = Vn
4635 instr[4,0] = Vd. */
4636
4637 unsigned vn = INSTR (9, 5);
4638 unsigned vd = INSTR (4, 0);
4639 unsigned full = INSTR (30, 30);
4640 unsigned size = INSTR (22, 22);
4641 unsigned i;
4642
4643 NYI_assert (29, 23, 0x1C);
4644 NYI_assert (21, 10, 0x876);
4645
4646 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4647 if (size)
4648 {
4649 if (! full)
4650 HALT_UNALLOC;
4651
4652 for (i = 0; i < 2; i++)
4653 {
4654 double val = (double) aarch64_get_vec_u64 (cpu, vn, i);
4655 aarch64_set_vec_double (cpu, vd, i, val);
4656 }
4657 }
4658 else
4659 {
4660 for (i = 0; i < (full ? 4 : 2); i++)
4661 {
4662 float val = (float) aarch64_get_vec_u32 (cpu, vn, i);
4663 aarch64_set_vec_float (cpu, vd, i, val);
4664 }
4665 }
4666 }
4667
4668 #define VEC_CMP(SOURCE, CMP) \
4669 do \
4670 { \
4671 switch (size) \
4672 { \
4673 case 0: \
4674 for (i = 0; i < (full ? 16 : 8); i++) \
4675 aarch64_set_vec_u8 (cpu, vd, i, \
4676 aarch64_get_vec_##SOURCE##8 (cpu, vn, i) \
4677 CMP \
4678 aarch64_get_vec_##SOURCE##8 (cpu, vm, i) \
4679 ? -1 : 0); \
4680 return; \
4681 case 1: \
4682 for (i = 0; i < (full ? 8 : 4); i++) \
4683 aarch64_set_vec_u16 (cpu, vd, i, \
4684 aarch64_get_vec_##SOURCE##16 (cpu, vn, i) \
4685 CMP \
4686 aarch64_get_vec_##SOURCE##16 (cpu, vm, i) \
4687 ? -1 : 0); \
4688 return; \
4689 case 2: \
4690 for (i = 0; i < (full ? 4 : 2); i++) \
4691 aarch64_set_vec_u32 (cpu, vd, i, \
4692 aarch64_get_vec_##SOURCE##32 (cpu, vn, i) \
4693 CMP \
4694 aarch64_get_vec_##SOURCE##32 (cpu, vm, i) \
4695 ? -1 : 0); \
4696 return; \
4697 case 3: \
4698 if (! full) \
4699 HALT_UNALLOC; \
4700 for (i = 0; i < 2; i++) \
4701 aarch64_set_vec_u64 (cpu, vd, i, \
4702 aarch64_get_vec_##SOURCE##64 (cpu, vn, i) \
4703 CMP \
4704 aarch64_get_vec_##SOURCE##64 (cpu, vm, i) \
4705 ? -1ULL : 0); \
4706 return; \
4707 } \
4708 } \
4709 while (0)
4710
4711 #define VEC_CMP0(SOURCE, CMP) \
4712 do \
4713 { \
4714 switch (size) \
4715 { \
4716 case 0: \
4717 for (i = 0; i < (full ? 16 : 8); i++) \
4718 aarch64_set_vec_u8 (cpu, vd, i, \
4719 aarch64_get_vec_##SOURCE##8 (cpu, vn, i) \
4720 CMP 0 ? -1 : 0); \
4721 return; \
4722 case 1: \
4723 for (i = 0; i < (full ? 8 : 4); i++) \
4724 aarch64_set_vec_u16 (cpu, vd, i, \
4725 aarch64_get_vec_##SOURCE##16 (cpu, vn, i) \
4726 CMP 0 ? -1 : 0); \
4727 return; \
4728 case 2: \
4729 for (i = 0; i < (full ? 4 : 2); i++) \
4730 aarch64_set_vec_u32 (cpu, vd, i, \
4731 aarch64_get_vec_##SOURCE##32 (cpu, vn, i) \
4732 CMP 0 ? -1 : 0); \
4733 return; \
4734 case 3: \
4735 if (! full) \
4736 HALT_UNALLOC; \
4737 for (i = 0; i < 2; i++) \
4738 aarch64_set_vec_u64 (cpu, vd, i, \
4739 aarch64_get_vec_##SOURCE##64 (cpu, vn, i) \
4740 CMP 0 ? -1ULL : 0); \
4741 return; \
4742 } \
4743 } \
4744 while (0)
4745
4746 #define VEC_FCMP0(CMP) \
4747 do \
4748 { \
4749 if (vm != 0) \
4750 HALT_NYI; \
4751 if (INSTR (22, 22)) \
4752 { \
4753 if (! full) \
4754 HALT_NYI; \
4755 for (i = 0; i < 2; i++) \
4756 aarch64_set_vec_u64 (cpu, vd, i, \
4757 aarch64_get_vec_double (cpu, vn, i) \
4758 CMP 0.0 ? -1 : 0); \
4759 } \
4760 else \
4761 { \
4762 for (i = 0; i < (full ? 4 : 2); i++) \
4763 aarch64_set_vec_u32 (cpu, vd, i, \
4764 aarch64_get_vec_float (cpu, vn, i) \
4765 CMP 0.0 ? -1 : 0); \
4766 } \
4767 return; \
4768 } \
4769 while (0)
4770
4771 #define VEC_FCMP(CMP) \
4772 do \
4773 { \
4774 if (INSTR (22, 22)) \
4775 { \
4776 if (! full) \
4777 HALT_NYI; \
4778 for (i = 0; i < 2; i++) \
4779 aarch64_set_vec_u64 (cpu, vd, i, \
4780 aarch64_get_vec_double (cpu, vn, i) \
4781 CMP \
4782 aarch64_get_vec_double (cpu, vm, i) \
4783 ? -1 : 0); \
4784 } \
4785 else \
4786 { \
4787 for (i = 0; i < (full ? 4 : 2); i++) \
4788 aarch64_set_vec_u32 (cpu, vd, i, \
4789 aarch64_get_vec_float (cpu, vn, i) \
4790 CMP \
4791 aarch64_get_vec_float (cpu, vm, i) \
4792 ? -1 : 0); \
4793 } \
4794 return; \
4795 } \
4796 while (0)
4797
4798 static void
4799 do_vec_compare (sim_cpu *cpu)
4800 {
4801 /* instr[31] = 0
4802 instr[30] = half(0)/full(1)
4803 instr[29] = part-of-comparison-type
4804 instr[28,24] = 0 1110
4805 instr[23,22] = size of integer compares: byte(00), half(01), word (10), long (11)
4806 type of float compares: single (-0) / double (-1)
4807 instr[21] = 1
4808 instr[20,16] = Vm or 00000 (compare vs 0)
4809 instr[15,10] = part-of-comparison-type
4810 instr[9,5] = Vn
4811 instr[4.0] = Vd. */
4812
4813 int full = INSTR (30, 30);
4814 int size = INSTR (23, 22);
4815 unsigned vm = INSTR (20, 16);
4816 unsigned vn = INSTR (9, 5);
4817 unsigned vd = INSTR (4, 0);
4818 unsigned i;
4819
4820 NYI_assert (28, 24, 0x0E);
4821 NYI_assert (21, 21, 1);
4822
4823 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4824 if ((INSTR (11, 11)
4825 && INSTR (14, 14))
4826 || ((INSTR (11, 11) == 0
4827 && INSTR (10, 10) == 0)))
4828 {
4829 /* A compare vs 0. */
4830 if (vm != 0)
4831 {
4832 if (INSTR (15, 10) == 0x2A)
4833 do_vec_maxv (cpu);
4834 else if (INSTR (15, 10) == 0x32
4835 || INSTR (15, 10) == 0x3E)
4836 do_vec_fminmaxV (cpu);
4837 else if (INSTR (29, 23) == 0x1C
4838 && INSTR (21, 10) == 0x876)
4839 do_vec_SCVTF (cpu);
4840 else
4841 HALT_NYI;
4842 return;
4843 }
4844 }
4845
4846 if (INSTR (14, 14))
4847 {
4848 /* A floating point compare. */
4849 unsigned decode = (INSTR (29, 29) << 5) | (INSTR (23, 23) << 4)
4850 | INSTR (13, 10);
4851
4852 NYI_assert (15, 15, 1);
4853
4854 switch (decode)
4855 {
4856 case /* 0b010010: GT#0 */ 0x12: VEC_FCMP0 (>);
4857 case /* 0b110010: GE#0 */ 0x32: VEC_FCMP0 (>=);
4858 case /* 0b010110: EQ#0 */ 0x16: VEC_FCMP0 (==);
4859 case /* 0b110110: LE#0 */ 0x36: VEC_FCMP0 (<=);
4860 case /* 0b011010: LT#0 */ 0x1A: VEC_FCMP0 (<);
4861 case /* 0b111001: GT */ 0x39: VEC_FCMP (>);
4862 case /* 0b101001: GE */ 0x29: VEC_FCMP (>=);
4863 case /* 0b001001: EQ */ 0x09: VEC_FCMP (==);
4864
4865 default:
4866 HALT_NYI;
4867 }
4868 }
4869 else
4870 {
4871 unsigned decode = (INSTR (29, 29) << 6) | INSTR (15, 10);
4872
4873 switch (decode)
4874 {
4875 case 0x0D: /* 0001101 GT */ VEC_CMP (s, > );
4876 case 0x0F: /* 0001111 GE */ VEC_CMP (s, >= );
4877 case 0x22: /* 0100010 GT #0 */ VEC_CMP0 (s, > );
4878 case 0x23: /* 0100011 TST */ VEC_CMP (u, & );
4879 case 0x26: /* 0100110 EQ #0 */ VEC_CMP0 (s, == );
4880 case 0x2A: /* 0101010 LT #0 */ VEC_CMP0 (s, < );
4881 case 0x4D: /* 1001101 HI */ VEC_CMP (u, > );
4882 case 0x4F: /* 1001111 HS */ VEC_CMP (u, >= );
4883 case 0x62: /* 1100010 GE #0 */ VEC_CMP0 (s, >= );
4884 case 0x63: /* 1100011 EQ */ VEC_CMP (u, == );
4885 case 0x66: /* 1100110 LE #0 */ VEC_CMP0 (s, <= );
4886 default:
4887 if (vm == 0)
4888 HALT_NYI;
4889 do_vec_maxv (cpu);
4890 }
4891 }
4892 }
4893
4894 static void
4895 do_vec_SSHL (sim_cpu *cpu)
4896 {
4897 /* instr[31] = 0
4898 instr[30] = first part (0)/ second part (1)
4899 instr[29,24] = 00 1110
4900 instr[23,22] = size: byte(00), half(01), word (10), long (11)
4901 instr[21] = 1
4902 instr[20,16] = Vm
4903 instr[15,10] = 0100 01
4904 instr[9,5] = Vn
4905 instr[4,0] = Vd. */
4906
4907 unsigned full = INSTR (30, 30);
4908 unsigned vm = INSTR (20, 16);
4909 unsigned vn = INSTR (9, 5);
4910 unsigned vd = INSTR (4, 0);
4911 unsigned i;
4912 signed int shift;
4913
4914 NYI_assert (29, 24, 0x0E);
4915 NYI_assert (21, 21, 1);
4916 NYI_assert (15, 10, 0x11);
4917
4918 /* FIXME: What is a signed shift left in this context ?. */
4919
4920 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4921 switch (INSTR (23, 22))
4922 {
4923 case 0:
4924 for (i = 0; i < (full ? 16 : 8); i++)
4925 {
4926 shift = aarch64_get_vec_s8 (cpu, vm, i);
4927 if (shift >= 0)
4928 aarch64_set_vec_s8 (cpu, vd, i, aarch64_get_vec_s8 (cpu, vn, i)
4929 << shift);
4930 else
4931 aarch64_set_vec_s8 (cpu, vd, i, aarch64_get_vec_s8 (cpu, vn, i)
4932 >> - shift);
4933 }
4934 return;
4935
4936 case 1:
4937 for (i = 0; i < (full ? 8 : 4); i++)
4938 {
4939 shift = aarch64_get_vec_s8 (cpu, vm, i * 2);
4940 if (shift >= 0)
4941 aarch64_set_vec_s16 (cpu, vd, i, aarch64_get_vec_s16 (cpu, vn, i)
4942 << shift);
4943 else
4944 aarch64_set_vec_s16 (cpu, vd, i, aarch64_get_vec_s16 (cpu, vn, i)
4945 >> - shift);
4946 }
4947 return;
4948
4949 case 2:
4950 for (i = 0; i < (full ? 4 : 2); i++)
4951 {
4952 shift = aarch64_get_vec_s8 (cpu, vm, i * 4);
4953 if (shift >= 0)
4954 aarch64_set_vec_s32 (cpu, vd, i, aarch64_get_vec_s32 (cpu, vn, i)
4955 << shift);
4956 else
4957 aarch64_set_vec_s32 (cpu, vd, i, aarch64_get_vec_s32 (cpu, vn, i)
4958 >> - shift);
4959 }
4960 return;
4961
4962 case 3:
4963 if (! full)
4964 HALT_UNALLOC;
4965 for (i = 0; i < 2; i++)
4966 {
4967 shift = aarch64_get_vec_s8 (cpu, vm, i * 8);
4968 if (shift >= 0)
4969 aarch64_set_vec_s64 (cpu, vd, i, aarch64_get_vec_s64 (cpu, vn, i)
4970 << shift);
4971 else
4972 aarch64_set_vec_s64 (cpu, vd, i, aarch64_get_vec_s64 (cpu, vn, i)
4973 >> - shift);
4974 }
4975 return;
4976 }
4977 }
4978
4979 static void
4980 do_vec_USHL (sim_cpu *cpu)
4981 {
4982 /* instr[31] = 0
4983 instr[30] = first part (0)/ second part (1)
4984 instr[29,24] = 10 1110
4985 instr[23,22] = size: byte(00), half(01), word (10), long (11)
4986 instr[21] = 1
4987 instr[20,16] = Vm
4988 instr[15,10] = 0100 01
4989 instr[9,5] = Vn
4990 instr[4,0] = Vd */
4991
4992 unsigned full = INSTR (30, 30);
4993 unsigned vm = INSTR (20, 16);
4994 unsigned vn = INSTR (9, 5);
4995 unsigned vd = INSTR (4, 0);
4996 unsigned i;
4997 signed int shift;
4998
4999 NYI_assert (29, 24, 0x2E);
5000 NYI_assert (15, 10, 0x11);
5001
5002 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5003 switch (INSTR (23, 22))
5004 {
5005 case 0:
5006 for (i = 0; i < (full ? 16 : 8); i++)
5007 {
5008 shift = aarch64_get_vec_s8 (cpu, vm, i);
5009 if (shift >= 0)
5010 aarch64_set_vec_u8 (cpu, vd, i, aarch64_get_vec_u8 (cpu, vn, i)
5011 << shift);
5012 else
5013 aarch64_set_vec_u8 (cpu, vd, i, aarch64_get_vec_u8 (cpu, vn, i)
5014 >> - shift);
5015 }
5016 return;
5017
5018 case 1:
5019 for (i = 0; i < (full ? 8 : 4); i++)
5020 {
5021 shift = aarch64_get_vec_s8 (cpu, vm, i * 2);
5022 if (shift >= 0)
5023 aarch64_set_vec_u16 (cpu, vd, i, aarch64_get_vec_u16 (cpu, vn, i)
5024 << shift);
5025 else
5026 aarch64_set_vec_u16 (cpu, vd, i, aarch64_get_vec_u16 (cpu, vn, i)
5027 >> - shift);
5028 }
5029 return;
5030
5031 case 2:
5032 for (i = 0; i < (full ? 4 : 2); i++)
5033 {
5034 shift = aarch64_get_vec_s8 (cpu, vm, i * 4);
5035 if (shift >= 0)
5036 aarch64_set_vec_u32 (cpu, vd, i, aarch64_get_vec_u32 (cpu, vn, i)
5037 << shift);
5038 else
5039 aarch64_set_vec_u32 (cpu, vd, i, aarch64_get_vec_u32 (cpu, vn, i)
5040 >> - shift);
5041 }
5042 return;
5043
5044 case 3:
5045 if (! full)
5046 HALT_UNALLOC;
5047 for (i = 0; i < 2; i++)
5048 {
5049 shift = aarch64_get_vec_s8 (cpu, vm, i * 8);
5050 if (shift >= 0)
5051 aarch64_set_vec_u64 (cpu, vd, i, aarch64_get_vec_u64 (cpu, vn, i)
5052 << shift);
5053 else
5054 aarch64_set_vec_u64 (cpu, vd, i, aarch64_get_vec_u64 (cpu, vn, i)
5055 >> - shift);
5056 }
5057 return;
5058 }
5059 }
5060
5061 static void
5062 do_vec_FMLA (sim_cpu *cpu)
5063 {
5064 /* instr[31] = 0
5065 instr[30] = full/half selector
5066 instr[29,23] = 0011100
5067 instr[22] = size: 0=>float, 1=>double
5068 instr[21] = 1
5069 instr[20,16] = Vn
5070 instr[15,10] = 1100 11
5071 instr[9,5] = Vm
5072 instr[4.0] = Vd. */
5073
5074 unsigned vm = INSTR (20, 16);
5075 unsigned vn = INSTR (9, 5);
5076 unsigned vd = INSTR (4, 0);
5077 unsigned i;
5078 int full = INSTR (30, 30);
5079
5080 NYI_assert (29, 23, 0x1C);
5081 NYI_assert (21, 21, 1);
5082 NYI_assert (15, 10, 0x33);
5083
5084 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5085 if (INSTR (22, 22))
5086 {
5087 if (! full)
5088 HALT_UNALLOC;
5089 for (i = 0; i < 2; i++)
5090 aarch64_set_vec_double (cpu, vd, i,
5091 aarch64_get_vec_double (cpu, vn, i) *
5092 aarch64_get_vec_double (cpu, vm, i) +
5093 aarch64_get_vec_double (cpu, vd, i));
5094 }
5095 else
5096 {
5097 for (i = 0; i < (full ? 4 : 2); i++)
5098 aarch64_set_vec_float (cpu, vd, i,
5099 aarch64_get_vec_float (cpu, vn, i) *
5100 aarch64_get_vec_float (cpu, vm, i) +
5101 aarch64_get_vec_float (cpu, vd, i));
5102 }
5103 }
5104
5105 static void
5106 do_vec_max (sim_cpu *cpu)
5107 {
5108 /* instr[31] = 0
5109 instr[30] = full/half selector
5110 instr[29] = SMAX (0) / UMAX (1)
5111 instr[28,24] = 0 1110
5112 instr[23,22] = size: 00=> 8-bit, 01=> 16-bit, 10=> 32-bit
5113 instr[21] = 1
5114 instr[20,16] = Vn
5115 instr[15,10] = 0110 01
5116 instr[9,5] = Vm
5117 instr[4.0] = Vd. */
5118
5119 unsigned vm = INSTR (20, 16);
5120 unsigned vn = INSTR (9, 5);
5121 unsigned vd = INSTR (4, 0);
5122 unsigned i;
5123 int full = INSTR (30, 30);
5124
5125 NYI_assert (28, 24, 0x0E);
5126 NYI_assert (21, 21, 1);
5127 NYI_assert (15, 10, 0x19);
5128
5129 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5130 if (INSTR (29, 29))
5131 {
5132 switch (INSTR (23, 22))
5133 {
5134 case 0:
5135 for (i = 0; i < (full ? 16 : 8); i++)
5136 aarch64_set_vec_u8 (cpu, vd, i,
5137 aarch64_get_vec_u8 (cpu, vn, i)
5138 > aarch64_get_vec_u8 (cpu, vm, i)
5139 ? aarch64_get_vec_u8 (cpu, vn, i)
5140 : aarch64_get_vec_u8 (cpu, vm, i));
5141 return;
5142
5143 case 1:
5144 for (i = 0; i < (full ? 8 : 4); i++)
5145 aarch64_set_vec_u16 (cpu, vd, i,
5146 aarch64_get_vec_u16 (cpu, vn, i)
5147 > aarch64_get_vec_u16 (cpu, vm, i)
5148 ? aarch64_get_vec_u16 (cpu, vn, i)
5149 : aarch64_get_vec_u16 (cpu, vm, i));
5150 return;
5151
5152 case 2:
5153 for (i = 0; i < (full ? 4 : 2); i++)
5154 aarch64_set_vec_u32 (cpu, vd, i,
5155 aarch64_get_vec_u32 (cpu, vn, i)
5156 > aarch64_get_vec_u32 (cpu, vm, i)
5157 ? aarch64_get_vec_u32 (cpu, vn, i)
5158 : aarch64_get_vec_u32 (cpu, vm, i));
5159 return;
5160
5161 case 3:
5162 HALT_UNALLOC;
5163 }
5164 }
5165 else
5166 {
5167 switch (INSTR (23, 22))
5168 {
5169 case 0:
5170 for (i = 0; i < (full ? 16 : 8); i++)
5171 aarch64_set_vec_s8 (cpu, vd, i,
5172 aarch64_get_vec_s8 (cpu, vn, i)
5173 > aarch64_get_vec_s8 (cpu, vm, i)
5174 ? aarch64_get_vec_s8 (cpu, vn, i)
5175 : aarch64_get_vec_s8 (cpu, vm, i));
5176 return;
5177
5178 case 1:
5179 for (i = 0; i < (full ? 8 : 4); i++)
5180 aarch64_set_vec_s16 (cpu, vd, i,
5181 aarch64_get_vec_s16 (cpu, vn, i)
5182 > aarch64_get_vec_s16 (cpu, vm, i)
5183 ? aarch64_get_vec_s16 (cpu, vn, i)
5184 : aarch64_get_vec_s16 (cpu, vm, i));
5185 return;
5186
5187 case 2:
5188 for (i = 0; i < (full ? 4 : 2); i++)
5189 aarch64_set_vec_s32 (cpu, vd, i,
5190 aarch64_get_vec_s32 (cpu, vn, i)
5191 > aarch64_get_vec_s32 (cpu, vm, i)
5192 ? aarch64_get_vec_s32 (cpu, vn, i)
5193 : aarch64_get_vec_s32 (cpu, vm, i));
5194 return;
5195
5196 case 3:
5197 HALT_UNALLOC;
5198 }
5199 }
5200 }
5201
5202 static void
5203 do_vec_min (sim_cpu *cpu)
5204 {
5205 /* instr[31] = 0
5206 instr[30] = full/half selector
5207 instr[29] = SMIN (0) / UMIN (1)
5208 instr[28,24] = 0 1110
5209 instr[23,22] = size: 00=> 8-bit, 01=> 16-bit, 10=> 32-bit
5210 instr[21] = 1
5211 instr[20,16] = Vn
5212 instr[15,10] = 0110 11
5213 instr[9,5] = Vm
5214 instr[4.0] = Vd. */
5215
5216 unsigned vm = INSTR (20, 16);
5217 unsigned vn = INSTR (9, 5);
5218 unsigned vd = INSTR (4, 0);
5219 unsigned i;
5220 int full = INSTR (30, 30);
5221
5222 NYI_assert (28, 24, 0x0E);
5223 NYI_assert (21, 21, 1);
5224 NYI_assert (15, 10, 0x1B);
5225
5226 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5227 if (INSTR (29, 29))
5228 {
5229 switch (INSTR (23, 22))
5230 {
5231 case 0:
5232 for (i = 0; i < (full ? 16 : 8); i++)
5233 aarch64_set_vec_u8 (cpu, vd, i,
5234 aarch64_get_vec_u8 (cpu, vn, i)
5235 < aarch64_get_vec_u8 (cpu, vm, i)
5236 ? aarch64_get_vec_u8 (cpu, vn, i)
5237 : aarch64_get_vec_u8 (cpu, vm, i));
5238 return;
5239
5240 case 1:
5241 for (i = 0; i < (full ? 8 : 4); i++)
5242 aarch64_set_vec_u16 (cpu, vd, i,
5243 aarch64_get_vec_u16 (cpu, vn, i)
5244 < aarch64_get_vec_u16 (cpu, vm, i)
5245 ? aarch64_get_vec_u16 (cpu, vn, i)
5246 : aarch64_get_vec_u16 (cpu, vm, i));
5247 return;
5248
5249 case 2:
5250 for (i = 0; i < (full ? 4 : 2); i++)
5251 aarch64_set_vec_u32 (cpu, vd, i,
5252 aarch64_get_vec_u32 (cpu, vn, i)
5253 < aarch64_get_vec_u32 (cpu, vm, i)
5254 ? aarch64_get_vec_u32 (cpu, vn, i)
5255 : aarch64_get_vec_u32 (cpu, vm, i));
5256 return;
5257
5258 case 3:
5259 HALT_UNALLOC;
5260 }
5261 }
5262 else
5263 {
5264 switch (INSTR (23, 22))
5265 {
5266 case 0:
5267 for (i = 0; i < (full ? 16 : 8); i++)
5268 aarch64_set_vec_s8 (cpu, vd, i,
5269 aarch64_get_vec_s8 (cpu, vn, i)
5270 < aarch64_get_vec_s8 (cpu, vm, i)
5271 ? aarch64_get_vec_s8 (cpu, vn, i)
5272 : aarch64_get_vec_s8 (cpu, vm, i));
5273 return;
5274
5275 case 1:
5276 for (i = 0; i < (full ? 8 : 4); i++)
5277 aarch64_set_vec_s16 (cpu, vd, i,
5278 aarch64_get_vec_s16 (cpu, vn, i)
5279 < aarch64_get_vec_s16 (cpu, vm, i)
5280 ? aarch64_get_vec_s16 (cpu, vn, i)
5281 : aarch64_get_vec_s16 (cpu, vm, i));
5282 return;
5283
5284 case 2:
5285 for (i = 0; i < (full ? 4 : 2); i++)
5286 aarch64_set_vec_s32 (cpu, vd, i,
5287 aarch64_get_vec_s32 (cpu, vn, i)
5288 < aarch64_get_vec_s32 (cpu, vm, i)
5289 ? aarch64_get_vec_s32 (cpu, vn, i)
5290 : aarch64_get_vec_s32 (cpu, vm, i));
5291 return;
5292
5293 case 3:
5294 HALT_UNALLOC;
5295 }
5296 }
5297 }
5298
5299 static void
5300 do_vec_sub_long (sim_cpu *cpu)
5301 {
5302 /* instr[31] = 0
5303 instr[30] = lower (0) / upper (1)
5304 instr[29] = signed (0) / unsigned (1)
5305 instr[28,24] = 0 1110
5306 instr[23,22] = size: bytes (00), half (01), word (10)
5307 instr[21] = 1
5308 insrt[20,16] = Vm
5309 instr[15,10] = 0010 00
5310 instr[9,5] = Vn
5311 instr[4,0] = V dest. */
5312
5313 unsigned size = INSTR (23, 22);
5314 unsigned vm = INSTR (20, 16);
5315 unsigned vn = INSTR (9, 5);
5316 unsigned vd = INSTR (4, 0);
5317 unsigned bias = 0;
5318 unsigned i;
5319
5320 NYI_assert (28, 24, 0x0E);
5321 NYI_assert (21, 21, 1);
5322 NYI_assert (15, 10, 0x08);
5323
5324 if (size == 3)
5325 HALT_UNALLOC;
5326
5327 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5328 switch (INSTR (30, 29))
5329 {
5330 case 2: /* SSUBL2. */
5331 bias = 2;
5332 case 0: /* SSUBL. */
5333 switch (size)
5334 {
5335 case 0:
5336 bias *= 3;
5337 for (i = 0; i < 8; i++)
5338 aarch64_set_vec_s16 (cpu, vd, i,
5339 aarch64_get_vec_s8 (cpu, vn, i + bias)
5340 - aarch64_get_vec_s8 (cpu, vm, i + bias));
5341 break;
5342
5343 case 1:
5344 bias *= 2;
5345 for (i = 0; i < 4; i++)
5346 aarch64_set_vec_s32 (cpu, vd, i,
5347 aarch64_get_vec_s16 (cpu, vn, i + bias)
5348 - aarch64_get_vec_s16 (cpu, vm, i + bias));
5349 break;
5350
5351 case 2:
5352 for (i = 0; i < 2; i++)
5353 aarch64_set_vec_s64 (cpu, vd, i,
5354 aarch64_get_vec_s32 (cpu, vn, i + bias)
5355 - aarch64_get_vec_s32 (cpu, vm, i + bias));
5356 break;
5357
5358 default:
5359 HALT_UNALLOC;
5360 }
5361 break;
5362
5363 case 3: /* USUBL2. */
5364 bias = 2;
5365 case 1: /* USUBL. */
5366 switch (size)
5367 {
5368 case 0:
5369 bias *= 3;
5370 for (i = 0; i < 8; i++)
5371 aarch64_set_vec_u16 (cpu, vd, i,
5372 aarch64_get_vec_u8 (cpu, vn, i + bias)
5373 - aarch64_get_vec_u8 (cpu, vm, i + bias));
5374 break;
5375
5376 case 1:
5377 bias *= 2;
5378 for (i = 0; i < 4; i++)
5379 aarch64_set_vec_u32 (cpu, vd, i,
5380 aarch64_get_vec_u16 (cpu, vn, i + bias)
5381 - aarch64_get_vec_u16 (cpu, vm, i + bias));
5382 break;
5383
5384 case 2:
5385 for (i = 0; i < 2; i++)
5386 aarch64_set_vec_u64 (cpu, vd, i,
5387 aarch64_get_vec_u32 (cpu, vn, i + bias)
5388 - aarch64_get_vec_u32 (cpu, vm, i + bias));
5389 break;
5390
5391 default:
5392 HALT_UNALLOC;
5393 }
5394 break;
5395 }
5396 }
5397
5398 static void
5399 do_vec_ADDP (sim_cpu *cpu)
5400 {
5401 /* instr[31] = 0
5402 instr[30] = half(0)/full(1)
5403 instr[29,24] = 00 1110
5404 instr[23,22] = size: bytes (00), half (01), word (10), long (11)
5405 instr[21] = 1
5406 insrt[20,16] = Vm
5407 instr[15,10] = 1011 11
5408 instr[9,5] = Vn
5409 instr[4,0] = V dest. */
5410
5411 FRegister copy_vn;
5412 FRegister copy_vm;
5413 unsigned full = INSTR (30, 30);
5414 unsigned size = INSTR (23, 22);
5415 unsigned vm = INSTR (20, 16);
5416 unsigned vn = INSTR (9, 5);
5417 unsigned vd = INSTR (4, 0);
5418 unsigned i, range;
5419
5420 NYI_assert (29, 24, 0x0E);
5421 NYI_assert (21, 21, 1);
5422 NYI_assert (15, 10, 0x2F);
5423
5424 /* Make copies of the source registers in case vd == vn/vm. */
5425 copy_vn = cpu->fr[vn];
5426 copy_vm = cpu->fr[vm];
5427
5428 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5429 switch (size)
5430 {
5431 case 0:
5432 range = full ? 8 : 4;
5433 for (i = 0; i < range; i++)
5434 {
5435 aarch64_set_vec_u8 (cpu, vd, i,
5436 copy_vn.b[i * 2] + copy_vn.b[i * 2 + 1]);
5437 aarch64_set_vec_u8 (cpu, vd, i + range,
5438 copy_vm.b[i * 2] + copy_vm.b[i * 2 + 1]);
5439 }
5440 return;
5441
5442 case 1:
5443 range = full ? 4 : 2;
5444 for (i = 0; i < range; i++)
5445 {
5446 aarch64_set_vec_u16 (cpu, vd, i,
5447 copy_vn.h[i * 2] + copy_vn.h[i * 2 + 1]);
5448 aarch64_set_vec_u16 (cpu, vd, i + range,
5449 copy_vm.h[i * 2] + copy_vm.h[i * 2 + 1]);
5450 }
5451 return;
5452
5453 case 2:
5454 range = full ? 2 : 1;
5455 for (i = 0; i < range; i++)
5456 {
5457 aarch64_set_vec_u32 (cpu, vd, i,
5458 copy_vn.w[i * 2] + copy_vn.w[i * 2 + 1]);
5459 aarch64_set_vec_u32 (cpu, vd, i + range,
5460 copy_vm.w[i * 2] + copy_vm.w[i * 2 + 1]);
5461 }
5462 return;
5463
5464 case 3:
5465 if (! full)
5466 HALT_UNALLOC;
5467 aarch64_set_vec_u64 (cpu, vd, 0, copy_vn.v[0] + copy_vn.v[1]);
5468 aarch64_set_vec_u64 (cpu, vd, 1, copy_vm.v[0] + copy_vm.v[1]);
5469 return;
5470 }
5471 }
5472
5473 /* Float point vector convert to longer (precision). */
5474 static void
5475 do_vec_FCVTL (sim_cpu *cpu)
5476 {
5477 /* instr[31] = 0
5478 instr[30] = half (0) / all (1)
5479 instr[29,23] = 00 1110 0
5480 instr[22] = single (0) / double (1)
5481 instr[21,10] = 10 0001 0111 10
5482 instr[9,5] = Rn
5483 instr[4,0] = Rd. */
5484
5485 unsigned rn = INSTR (9, 5);
5486 unsigned rd = INSTR (4, 0);
5487 unsigned full = INSTR (30, 30);
5488 unsigned i;
5489
5490 NYI_assert (31, 31, 0);
5491 NYI_assert (29, 23, 0x1C);
5492 NYI_assert (21, 10, 0x85E);
5493
5494 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5495 if (INSTR (22, 22))
5496 {
5497 for (i = 0; i < 2; i++)
5498 aarch64_set_vec_double (cpu, rd, i,
5499 aarch64_get_vec_float (cpu, rn, i + 2*full));
5500 }
5501 else
5502 {
5503 HALT_NYI;
5504
5505 #if 0
5506 /* TODO: Implement missing half-float support. */
5507 for (i = 0; i < 4; i++)
5508 aarch64_set_vec_float (cpu, rd, i,
5509 aarch64_get_vec_halffloat (cpu, rn, i + 4*full));
5510 #endif
5511 }
5512 }
5513
5514 static void
5515 do_vec_FABS (sim_cpu *cpu)
5516 {
5517 /* instr[31] = 0
5518 instr[30] = half(0)/full(1)
5519 instr[29,23] = 00 1110 1
5520 instr[22] = float(0)/double(1)
5521 instr[21,16] = 10 0000
5522 instr[15,10] = 1111 10
5523 instr[9,5] = Vn
5524 instr[4,0] = Vd. */
5525
5526 unsigned vn = INSTR (9, 5);
5527 unsigned vd = INSTR (4, 0);
5528 unsigned full = INSTR (30, 30);
5529 unsigned i;
5530
5531 NYI_assert (29, 23, 0x1D);
5532 NYI_assert (21, 10, 0x83E);
5533
5534 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5535 if (INSTR (22, 22))
5536 {
5537 if (! full)
5538 HALT_NYI;
5539
5540 for (i = 0; i < 2; i++)
5541 aarch64_set_vec_double (cpu, vd, i,
5542 fabs (aarch64_get_vec_double (cpu, vn, i)));
5543 }
5544 else
5545 {
5546 for (i = 0; i < (full ? 4 : 2); i++)
5547 aarch64_set_vec_float (cpu, vd, i,
5548 fabsf (aarch64_get_vec_float (cpu, vn, i)));
5549 }
5550 }
5551
5552 static void
5553 do_vec_FCVTZS (sim_cpu *cpu)
5554 {
5555 /* instr[31] = 0
5556 instr[30] = half (0) / all (1)
5557 instr[29,23] = 00 1110 1
5558 instr[22] = single (0) / double (1)
5559 instr[21,10] = 10 0001 1011 10
5560 instr[9,5] = Rn
5561 instr[4,0] = Rd. */
5562
5563 unsigned rn = INSTR (9, 5);
5564 unsigned rd = INSTR (4, 0);
5565 unsigned full = INSTR (30, 30);
5566 unsigned i;
5567
5568 NYI_assert (31, 31, 0);
5569 NYI_assert (29, 23, 0x1D);
5570 NYI_assert (21, 10, 0x86E);
5571
5572 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5573 if (INSTR (22, 22))
5574 {
5575 if (! full)
5576 HALT_UNALLOC;
5577
5578 for (i = 0; i < 2; i++)
5579 aarch64_set_vec_s64 (cpu, rd, i,
5580 (int64_t) aarch64_get_vec_double (cpu, rn, i));
5581 }
5582 else
5583 for (i = 0; i < (full ? 4 : 2); i++)
5584 aarch64_set_vec_s32 (cpu, rd, i,
5585 (int32_t) aarch64_get_vec_float (cpu, rn, i));
5586 }
5587
5588 static void
5589 do_vec_REV64 (sim_cpu *cpu)
5590 {
5591 /* instr[31] = 0
5592 instr[30] = full/half
5593 instr[29,24] = 00 1110
5594 instr[23,22] = size
5595 instr[21,10] = 10 0000 0000 10
5596 instr[9,5] = Rn
5597 instr[4,0] = Rd. */
5598
5599 unsigned rn = INSTR (9, 5);
5600 unsigned rd = INSTR (4, 0);
5601 unsigned size = INSTR (23, 22);
5602 unsigned full = INSTR (30, 30);
5603 unsigned i;
5604 FRegister val;
5605
5606 NYI_assert (29, 24, 0x0E);
5607 NYI_assert (21, 10, 0x802);
5608
5609 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5610 switch (size)
5611 {
5612 case 0:
5613 for (i = 0; i < (full ? 16 : 8); i++)
5614 val.b[i ^ 0x7] = aarch64_get_vec_u8 (cpu, rn, i);
5615 break;
5616
5617 case 1:
5618 for (i = 0; i < (full ? 8 : 4); i++)
5619 val.h[i ^ 0x3] = aarch64_get_vec_u16 (cpu, rn, i);
5620 break;
5621
5622 case 2:
5623 for (i = 0; i < (full ? 4 : 2); i++)
5624 val.w[i ^ 0x1] = aarch64_get_vec_u32 (cpu, rn, i);
5625 break;
5626
5627 case 3:
5628 HALT_UNALLOC;
5629 }
5630
5631 aarch64_set_vec_u64 (cpu, rd, 0, val.v[0]);
5632 if (full)
5633 aarch64_set_vec_u64 (cpu, rd, 1, val.v[1]);
5634 }
5635
5636 static void
5637 do_vec_REV16 (sim_cpu *cpu)
5638 {
5639 /* instr[31] = 0
5640 instr[30] = full/half
5641 instr[29,24] = 00 1110
5642 instr[23,22] = size
5643 instr[21,10] = 10 0000 0001 10
5644 instr[9,5] = Rn
5645 instr[4,0] = Rd. */
5646
5647 unsigned rn = INSTR (9, 5);
5648 unsigned rd = INSTR (4, 0);
5649 unsigned size = INSTR (23, 22);
5650 unsigned full = INSTR (30, 30);
5651 unsigned i;
5652 FRegister val;
5653
5654 NYI_assert (29, 24, 0x0E);
5655 NYI_assert (21, 10, 0x806);
5656
5657 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5658 switch (size)
5659 {
5660 case 0:
5661 for (i = 0; i < (full ? 16 : 8); i++)
5662 val.b[i ^ 0x1] = aarch64_get_vec_u8 (cpu, rn, i);
5663 break;
5664
5665 default:
5666 HALT_UNALLOC;
5667 }
5668
5669 aarch64_set_vec_u64 (cpu, rd, 0, val.v[0]);
5670 if (full)
5671 aarch64_set_vec_u64 (cpu, rd, 1, val.v[1]);
5672 }
5673
5674 static void
5675 do_vec_op1 (sim_cpu *cpu)
5676 {
5677 /* instr[31] = 0
5678 instr[30] = half/full
5679 instr[29,24] = 00 1110
5680 instr[23,21] = ???
5681 instr[20,16] = Vm
5682 instr[15,10] = sub-opcode
5683 instr[9,5] = Vn
5684 instr[4,0] = Vd */
5685 NYI_assert (29, 24, 0x0E);
5686
5687 if (INSTR (21, 21) == 0)
5688 {
5689 if (INSTR (23, 22) == 0)
5690 {
5691 if (INSTR (30, 30) == 1
5692 && INSTR (17, 14) == 0
5693 && INSTR (12, 10) == 7)
5694 return do_vec_ins_2 (cpu);
5695
5696 switch (INSTR (15, 10))
5697 {
5698 case 0x01: do_vec_DUP_vector_into_vector (cpu); return;
5699 case 0x03: do_vec_DUP_scalar_into_vector (cpu); return;
5700 case 0x07: do_vec_INS (cpu); return;
5701 case 0x0B: do_vec_SMOV_into_scalar (cpu); return;
5702 case 0x0F: do_vec_UMOV_into_scalar (cpu); return;
5703
5704 case 0x00:
5705 case 0x08:
5706 case 0x10:
5707 case 0x18:
5708 do_vec_TBL (cpu); return;
5709
5710 case 0x06:
5711 case 0x16:
5712 do_vec_UZP (cpu); return;
5713
5714 case 0x0A: do_vec_TRN (cpu); return;
5715
5716 case 0x0E:
5717 case 0x1E:
5718 do_vec_ZIP (cpu); return;
5719
5720 default:
5721 HALT_NYI;
5722 }
5723 }
5724
5725 switch (INSTR (13, 10))
5726 {
5727 case 0x6: do_vec_UZP (cpu); return;
5728 case 0xE: do_vec_ZIP (cpu); return;
5729 case 0xA: do_vec_TRN (cpu); return;
5730 default: HALT_NYI;
5731 }
5732 }
5733
5734 switch (INSTR (15, 10))
5735 {
5736 case 0x02: do_vec_REV64 (cpu); return;
5737 case 0x06: do_vec_REV16 (cpu); return;
5738
5739 case 0x07:
5740 switch (INSTR (23, 21))
5741 {
5742 case 1: do_vec_AND (cpu); return;
5743 case 3: do_vec_BIC (cpu); return;
5744 case 5: do_vec_ORR (cpu); return;
5745 case 7: do_vec_ORN (cpu); return;
5746 default: HALT_NYI;
5747 }
5748
5749 case 0x08: do_vec_sub_long (cpu); return;
5750 case 0x0a: do_vec_XTN (cpu); return;
5751 case 0x11: do_vec_SSHL (cpu); return;
5752 case 0x16: do_vec_CNT (cpu); return;
5753 case 0x19: do_vec_max (cpu); return;
5754 case 0x1B: do_vec_min (cpu); return;
5755 case 0x21: do_vec_add (cpu); return;
5756 case 0x25: do_vec_MLA (cpu); return;
5757 case 0x27: do_vec_mul (cpu); return;
5758 case 0x2F: do_vec_ADDP (cpu); return;
5759 case 0x30: do_vec_mull (cpu); return;
5760 case 0x33: do_vec_FMLA (cpu); return;
5761 case 0x35: do_vec_fadd (cpu); return;
5762
5763 case 0x1E:
5764 switch (INSTR (20, 16))
5765 {
5766 case 0x01: do_vec_FCVTL (cpu); return;
5767 default: HALT_NYI;
5768 }
5769
5770 case 0x2E:
5771 switch (INSTR (20, 16))
5772 {
5773 case 0x00: do_vec_ABS (cpu); return;
5774 case 0x01: do_vec_FCVTZS (cpu); return;
5775 case 0x11: do_vec_ADDV (cpu); return;
5776 default: HALT_NYI;
5777 }
5778
5779 case 0x31:
5780 case 0x3B:
5781 do_vec_Fminmax (cpu); return;
5782
5783 case 0x0D:
5784 case 0x0F:
5785 case 0x22:
5786 case 0x23:
5787 case 0x26:
5788 case 0x2A:
5789 case 0x32:
5790 case 0x36:
5791 case 0x39:
5792 case 0x3A:
5793 do_vec_compare (cpu); return;
5794
5795 case 0x3E:
5796 do_vec_FABS (cpu); return;
5797
5798 default:
5799 HALT_NYI;
5800 }
5801 }
5802
5803 static void
5804 do_vec_xtl (sim_cpu *cpu)
5805 {
5806 /* instr[31] = 0
5807 instr[30,29] = SXTL (00), UXTL (01), SXTL2 (10), UXTL2 (11)
5808 instr[28,22] = 0 1111 00
5809 instr[21,16] = size & shift (USHLL, SSHLL, USHLL2, SSHLL2)
5810 instr[15,10] = 1010 01
5811 instr[9,5] = V source
5812 instr[4,0] = V dest. */
5813
5814 unsigned vs = INSTR (9, 5);
5815 unsigned vd = INSTR (4, 0);
5816 unsigned i, shift, bias = 0;
5817
5818 NYI_assert (28, 22, 0x3C);
5819 NYI_assert (15, 10, 0x29);
5820
5821 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5822 switch (INSTR (30, 29))
5823 {
5824 case 2: /* SXTL2, SSHLL2. */
5825 bias = 2;
5826 case 0: /* SXTL, SSHLL. */
5827 if (INSTR (21, 21))
5828 {
5829 int64_t val1, val2;
5830
5831 shift = INSTR (20, 16);
5832 /* Get the source values before setting the destination values
5833 in case the source and destination are the same. */
5834 val1 = aarch64_get_vec_s32 (cpu, vs, bias) << shift;
5835 val2 = aarch64_get_vec_s32 (cpu, vs, bias + 1) << shift;
5836 aarch64_set_vec_s64 (cpu, vd, 0, val1);
5837 aarch64_set_vec_s64 (cpu, vd, 1, val2);
5838 }
5839 else if (INSTR (20, 20))
5840 {
5841 int32_t v[4];
5842 int32_t v1,v2,v3,v4;
5843
5844 shift = INSTR (19, 16);
5845 bias *= 2;
5846 for (i = 0; i < 4; i++)
5847 v[i] = aarch64_get_vec_s16 (cpu, vs, bias + i) << shift;
5848 for (i = 0; i < 4; i++)
5849 aarch64_set_vec_s32 (cpu, vd, i, v[i]);
5850 }
5851 else
5852 {
5853 int16_t v[8];
5854 NYI_assert (19, 19, 1);
5855
5856 shift = INSTR (18, 16);
5857 bias *= 4;
5858 for (i = 0; i < 8; i++)
5859 v[i] = aarch64_get_vec_s8 (cpu, vs, i + bias) << shift;
5860 for (i = 0; i < 8; i++)
5861 aarch64_set_vec_s16 (cpu, vd, i, v[i]);
5862 }
5863 return;
5864
5865 case 3: /* UXTL2, USHLL2. */
5866 bias = 2;
5867 case 1: /* UXTL, USHLL. */
5868 if (INSTR (21, 21))
5869 {
5870 uint64_t v1, v2;
5871 shift = INSTR (20, 16);
5872 v1 = aarch64_get_vec_u32 (cpu, vs, bias) << shift;
5873 v2 = aarch64_get_vec_u32 (cpu, vs, bias + 1) << shift;
5874 aarch64_set_vec_u64 (cpu, vd, 0, v1);
5875 aarch64_set_vec_u64 (cpu, vd, 1, v2);
5876 }
5877 else if (INSTR (20, 20))
5878 {
5879 uint32_t v[4];
5880 shift = INSTR (19, 16);
5881 bias *= 2;
5882 for (i = 0; i < 4; i++)
5883 v[i] = aarch64_get_vec_u16 (cpu, vs, i + bias) << shift;
5884 for (i = 0; i < 4; i++)
5885 aarch64_set_vec_u32 (cpu, vd, i, v[i]);
5886 }
5887 else
5888 {
5889 uint16_t v[8];
5890 NYI_assert (19, 19, 1);
5891
5892 shift = INSTR (18, 16);
5893 bias *= 4;
5894 for (i = 0; i < 8; i++)
5895 v[i] = aarch64_get_vec_u8 (cpu, vs, i + bias) << shift;
5896 for (i = 0; i < 8; i++)
5897 aarch64_set_vec_u16 (cpu, vd, i, v[i]);
5898 }
5899 return;
5900 }
5901 }
5902
5903 static void
5904 do_vec_SHL (sim_cpu *cpu)
5905 {
5906 /* instr [31] = 0
5907 instr [30] = half(0)/full(1)
5908 instr [29,23] = 001 1110
5909 instr [22,16] = size and shift amount
5910 instr [15,10] = 01 0101
5911 instr [9, 5] = Vs
5912 instr [4, 0] = Vd. */
5913
5914 int shift;
5915 int full = INSTR (30, 30);
5916 unsigned vs = INSTR (9, 5);
5917 unsigned vd = INSTR (4, 0);
5918 unsigned i;
5919
5920 NYI_assert (29, 23, 0x1E);
5921 NYI_assert (15, 10, 0x15);
5922
5923 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5924 if (INSTR (22, 22))
5925 {
5926 shift = INSTR (21, 16);
5927
5928 if (full == 0)
5929 HALT_UNALLOC;
5930
5931 for (i = 0; i < 2; i++)
5932 {
5933 uint64_t val = aarch64_get_vec_u64 (cpu, vs, i);
5934 aarch64_set_vec_u64 (cpu, vd, i, val << shift);
5935 }
5936
5937 return;
5938 }
5939
5940 if (INSTR (21, 21))
5941 {
5942 shift = INSTR (20, 16);
5943
5944 for (i = 0; i < (full ? 4 : 2); i++)
5945 {
5946 uint32_t val = aarch64_get_vec_u32 (cpu, vs, i);
5947 aarch64_set_vec_u32 (cpu, vd, i, val << shift);
5948 }
5949
5950 return;
5951 }
5952
5953 if (INSTR (20, 20))
5954 {
5955 shift = INSTR (19, 16);
5956
5957 for (i = 0; i < (full ? 8 : 4); i++)
5958 {
5959 uint16_t val = aarch64_get_vec_u16 (cpu, vs, i);
5960 aarch64_set_vec_u16 (cpu, vd, i, val << shift);
5961 }
5962
5963 return;
5964 }
5965
5966 if (INSTR (19, 19) == 0)
5967 HALT_UNALLOC;
5968
5969 shift = INSTR (18, 16);
5970
5971 for (i = 0; i < (full ? 16 : 8); i++)
5972 {
5973 uint8_t val = aarch64_get_vec_u8 (cpu, vs, i);
5974 aarch64_set_vec_u8 (cpu, vd, i, val << shift);
5975 }
5976 }
5977
5978 static void
5979 do_vec_SSHR_USHR (sim_cpu *cpu)
5980 {
5981 /* instr [31] = 0
5982 instr [30] = half(0)/full(1)
5983 instr [29] = signed(0)/unsigned(1)
5984 instr [28,23] = 0 1111 0
5985 instr [22,16] = size and shift amount
5986 instr [15,10] = 0000 01
5987 instr [9, 5] = Vs
5988 instr [4, 0] = Vd. */
5989
5990 int full = INSTR (30, 30);
5991 int sign = ! INSTR (29, 29);
5992 unsigned shift = INSTR (22, 16);
5993 unsigned vs = INSTR (9, 5);
5994 unsigned vd = INSTR (4, 0);
5995 unsigned i;
5996
5997 NYI_assert (28, 23, 0x1E);
5998 NYI_assert (15, 10, 0x01);
5999
6000 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6001 if (INSTR (22, 22))
6002 {
6003 shift = 128 - shift;
6004
6005 if (full == 0)
6006 HALT_UNALLOC;
6007
6008 if (sign)
6009 for (i = 0; i < 2; i++)
6010 {
6011 int64_t val = aarch64_get_vec_s64 (cpu, vs, i);
6012 aarch64_set_vec_s64 (cpu, vd, i, val >> shift);
6013 }
6014 else
6015 for (i = 0; i < 2; i++)
6016 {
6017 uint64_t val = aarch64_get_vec_u64 (cpu, vs, i);
6018 aarch64_set_vec_u64 (cpu, vd, i, val >> shift);
6019 }
6020
6021 return;
6022 }
6023
6024 if (INSTR (21, 21))
6025 {
6026 shift = 64 - shift;
6027
6028 if (sign)
6029 for (i = 0; i < (full ? 4 : 2); i++)
6030 {
6031 int32_t val = aarch64_get_vec_s32 (cpu, vs, i);
6032 aarch64_set_vec_s32 (cpu, vd, i, val >> shift);
6033 }
6034 else
6035 for (i = 0; i < (full ? 4 : 2); i++)
6036 {
6037 uint32_t val = aarch64_get_vec_u32 (cpu, vs, i);
6038 aarch64_set_vec_u32 (cpu, vd, i, val >> shift);
6039 }
6040
6041 return;
6042 }
6043
6044 if (INSTR (20, 20))
6045 {
6046 shift = 32 - shift;
6047
6048 if (sign)
6049 for (i = 0; i < (full ? 8 : 4); i++)
6050 {
6051 int16_t val = aarch64_get_vec_s16 (cpu, vs, i);
6052 aarch64_set_vec_s16 (cpu, vd, i, val >> shift);
6053 }
6054 else
6055 for (i = 0; i < (full ? 8 : 4); i++)
6056 {
6057 uint16_t val = aarch64_get_vec_u16 (cpu, vs, i);
6058 aarch64_set_vec_u16 (cpu, vd, i, val >> shift);
6059 }
6060
6061 return;
6062 }
6063
6064 if (INSTR (19, 19) == 0)
6065 HALT_UNALLOC;
6066
6067 shift = 16 - shift;
6068
6069 if (sign)
6070 for (i = 0; i < (full ? 16 : 8); i++)
6071 {
6072 int8_t val = aarch64_get_vec_s8 (cpu, vs, i);
6073 aarch64_set_vec_s8 (cpu, vd, i, val >> shift);
6074 }
6075 else
6076 for (i = 0; i < (full ? 16 : 8); i++)
6077 {
6078 uint8_t val = aarch64_get_vec_u8 (cpu, vs, i);
6079 aarch64_set_vec_u8 (cpu, vd, i, val >> shift);
6080 }
6081 }
6082
6083 static void
6084 do_vec_MUL_by_element (sim_cpu *cpu)
6085 {
6086 /* instr[31] = 0
6087 instr[30] = half/full
6088 instr[29,24] = 00 1111
6089 instr[23,22] = size
6090 instr[21] = L
6091 instr[20] = M
6092 instr[19,16] = m
6093 instr[15,12] = 1000
6094 instr[11] = H
6095 instr[10] = 0
6096 instr[9,5] = Vn
6097 instr[4,0] = Vd */
6098
6099 unsigned full = INSTR (30, 30);
6100 unsigned L = INSTR (21, 21);
6101 unsigned H = INSTR (11, 11);
6102 unsigned vn = INSTR (9, 5);
6103 unsigned vd = INSTR (4, 0);
6104 unsigned size = INSTR (23, 22);
6105 unsigned index;
6106 unsigned vm;
6107 unsigned e;
6108
6109 NYI_assert (29, 24, 0x0F);
6110 NYI_assert (15, 12, 0x8);
6111 NYI_assert (10, 10, 0);
6112
6113 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6114 switch (size)
6115 {
6116 case 1:
6117 {
6118 /* 16 bit products. */
6119 uint16_t product;
6120 uint16_t element1;
6121 uint16_t element2;
6122
6123 index = (H << 2) | (L << 1) | INSTR (20, 20);
6124 vm = INSTR (19, 16);
6125 element2 = aarch64_get_vec_u16 (cpu, vm, index);
6126
6127 for (e = 0; e < (full ? 8 : 4); e ++)
6128 {
6129 element1 = aarch64_get_vec_u16 (cpu, vn, e);
6130 product = element1 * element2;
6131 aarch64_set_vec_u16 (cpu, vd, e, product);
6132 }
6133 }
6134 break;
6135
6136 case 2:
6137 {
6138 /* 32 bit products. */
6139 uint32_t product;
6140 uint32_t element1;
6141 uint32_t element2;
6142
6143 index = (H << 1) | L;
6144 vm = INSTR (20, 16);
6145 element2 = aarch64_get_vec_u32 (cpu, vm, index);
6146
6147 for (e = 0; e < (full ? 4 : 2); e ++)
6148 {
6149 element1 = aarch64_get_vec_u32 (cpu, vn, e);
6150 product = element1 * element2;
6151 aarch64_set_vec_u32 (cpu, vd, e, product);
6152 }
6153 }
6154 break;
6155
6156 default:
6157 HALT_UNALLOC;
6158 }
6159 }
6160
6161 static void
6162 do_FMLA_by_element (sim_cpu *cpu)
6163 {
6164 /* instr[31] = 0
6165 instr[30] = half/full
6166 instr[29,23] = 00 1111 1
6167 instr[22] = size
6168 instr[21] = L
6169 instr[20,16] = m
6170 instr[15,12] = 0001
6171 instr[11] = H
6172 instr[10] = 0
6173 instr[9,5] = Vn
6174 instr[4,0] = Vd */
6175
6176 unsigned full = INSTR (30, 30);
6177 unsigned size = INSTR (22, 22);
6178 unsigned L = INSTR (21, 21);
6179 unsigned vm = INSTR (20, 16);
6180 unsigned H = INSTR (11, 11);
6181 unsigned vn = INSTR (9, 5);
6182 unsigned vd = INSTR (4, 0);
6183 unsigned e;
6184
6185 NYI_assert (29, 23, 0x1F);
6186 NYI_assert (15, 12, 0x1);
6187 NYI_assert (10, 10, 0);
6188
6189 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6190 if (size)
6191 {
6192 double element1, element2;
6193
6194 if (! full || L)
6195 HALT_UNALLOC;
6196
6197 element2 = aarch64_get_vec_double (cpu, vm, H);
6198
6199 for (e = 0; e < 2; e++)
6200 {
6201 element1 = aarch64_get_vec_double (cpu, vn, e);
6202 element1 *= element2;
6203 element1 += aarch64_get_vec_double (cpu, vd, e);
6204 aarch64_set_vec_double (cpu, vd, e, element1);
6205 }
6206 }
6207 else
6208 {
6209 float element1;
6210 float element2 = aarch64_get_vec_float (cpu, vm, (H << 1) | L);
6211
6212 for (e = 0; e < (full ? 4 : 2); e++)
6213 {
6214 element1 = aarch64_get_vec_float (cpu, vn, e);
6215 element1 *= element2;
6216 element1 += aarch64_get_vec_float (cpu, vd, e);
6217 aarch64_set_vec_float (cpu, vd, e, element1);
6218 }
6219 }
6220 }
6221
6222 static void
6223 do_vec_op2 (sim_cpu *cpu)
6224 {
6225 /* instr[31] = 0
6226 instr[30] = half/full
6227 instr[29,24] = 00 1111
6228 instr[23] = ?
6229 instr[22,16] = element size & index
6230 instr[15,10] = sub-opcode
6231 instr[9,5] = Vm
6232 instr[4,0] = Vd */
6233
6234 NYI_assert (29, 24, 0x0F);
6235
6236 if (INSTR (23, 23) != 0)
6237 {
6238 switch (INSTR (15, 10))
6239 {
6240 case 0x04:
6241 case 0x06:
6242 do_FMLA_by_element (cpu);
6243 return;
6244
6245 case 0x20:
6246 case 0x22:
6247 do_vec_MUL_by_element (cpu);
6248 return;
6249
6250 default:
6251 HALT_NYI;
6252 }
6253 }
6254 else
6255 {
6256 switch (INSTR (15, 10))
6257 {
6258 case 0x01: do_vec_SSHR_USHR (cpu); return;
6259 case 0x15: do_vec_SHL (cpu); return;
6260 case 0x20:
6261 case 0x22: do_vec_MUL_by_element (cpu); return;
6262 case 0x29: do_vec_xtl (cpu); return;
6263 default: HALT_NYI;
6264 }
6265 }
6266 }
6267
6268 static void
6269 do_vec_neg (sim_cpu *cpu)
6270 {
6271 /* instr[31] = 0
6272 instr[30] = full(1)/half(0)
6273 instr[29,24] = 10 1110
6274 instr[23,22] = size: byte(00), half (01), word (10), long (11)
6275 instr[21,10] = 1000 0010 1110
6276 instr[9,5] = Vs
6277 instr[4,0] = Vd */
6278
6279 int full = INSTR (30, 30);
6280 unsigned vs = INSTR (9, 5);
6281 unsigned vd = INSTR (4, 0);
6282 unsigned i;
6283
6284 NYI_assert (29, 24, 0x2E);
6285 NYI_assert (21, 10, 0x82E);
6286
6287 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6288 switch (INSTR (23, 22))
6289 {
6290 case 0:
6291 for (i = 0; i < (full ? 16 : 8); i++)
6292 aarch64_set_vec_s8 (cpu, vd, i, - aarch64_get_vec_s8 (cpu, vs, i));
6293 return;
6294
6295 case 1:
6296 for (i = 0; i < (full ? 8 : 4); i++)
6297 aarch64_set_vec_s16 (cpu, vd, i, - aarch64_get_vec_s16 (cpu, vs, i));
6298 return;
6299
6300 case 2:
6301 for (i = 0; i < (full ? 4 : 2); i++)
6302 aarch64_set_vec_s32 (cpu, vd, i, - aarch64_get_vec_s32 (cpu, vs, i));
6303 return;
6304
6305 case 3:
6306 if (! full)
6307 HALT_NYI;
6308 for (i = 0; i < 2; i++)
6309 aarch64_set_vec_s64 (cpu, vd, i, - aarch64_get_vec_s64 (cpu, vs, i));
6310 return;
6311 }
6312 }
6313
6314 static void
6315 do_vec_sqrt (sim_cpu *cpu)
6316 {
6317 /* instr[31] = 0
6318 instr[30] = full(1)/half(0)
6319 instr[29,23] = 101 1101
6320 instr[22] = single(0)/double(1)
6321 instr[21,10] = 1000 0111 1110
6322 instr[9,5] = Vs
6323 instr[4,0] = Vd. */
6324
6325 int full = INSTR (30, 30);
6326 unsigned vs = INSTR (9, 5);
6327 unsigned vd = INSTR (4, 0);
6328 unsigned i;
6329
6330 NYI_assert (29, 23, 0x5B);
6331 NYI_assert (21, 10, 0x87E);
6332
6333 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6334 if (INSTR (22, 22) == 0)
6335 for (i = 0; i < (full ? 4 : 2); i++)
6336 aarch64_set_vec_float (cpu, vd, i,
6337 sqrtf (aarch64_get_vec_float (cpu, vs, i)));
6338 else
6339 for (i = 0; i < 2; i++)
6340 aarch64_set_vec_double (cpu, vd, i,
6341 sqrt (aarch64_get_vec_double (cpu, vs, i)));
6342 }
6343
6344 static void
6345 do_vec_mls_indexed (sim_cpu *cpu)
6346 {
6347 /* instr[31] = 0
6348 instr[30] = half(0)/full(1)
6349 instr[29,24] = 10 1111
6350 instr[23,22] = 16-bit(01)/32-bit(10)
6351 instr[21,20+11] = index (if 16-bit)
6352 instr[21+11] = index (if 32-bit)
6353 instr[20,16] = Vm
6354 instr[15,12] = 0100
6355 instr[11] = part of index
6356 instr[10] = 0
6357 instr[9,5] = Vs
6358 instr[4,0] = Vd. */
6359
6360 int full = INSTR (30, 30);
6361 unsigned vs = INSTR (9, 5);
6362 unsigned vd = INSTR (4, 0);
6363 unsigned vm = INSTR (20, 16);
6364 unsigned i;
6365
6366 NYI_assert (15, 12, 4);
6367 NYI_assert (10, 10, 0);
6368
6369 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6370 switch (INSTR (23, 22))
6371 {
6372 case 1:
6373 {
6374 unsigned elem;
6375 uint32_t val;
6376
6377 if (vm > 15)
6378 HALT_NYI;
6379
6380 elem = (INSTR (21, 20) << 1) | INSTR (11, 11);
6381 val = aarch64_get_vec_u16 (cpu, vm, elem);
6382
6383 for (i = 0; i < (full ? 8 : 4); i++)
6384 aarch64_set_vec_u32 (cpu, vd, i,
6385 aarch64_get_vec_u32 (cpu, vd, i) -
6386 (aarch64_get_vec_u32 (cpu, vs, i) * val));
6387 return;
6388 }
6389
6390 case 2:
6391 {
6392 unsigned elem = (INSTR (21, 21) << 1) | INSTR (11, 11);
6393 uint64_t val = aarch64_get_vec_u32 (cpu, vm, elem);
6394
6395 for (i = 0; i < (full ? 4 : 2); i++)
6396 aarch64_set_vec_u64 (cpu, vd, i,
6397 aarch64_get_vec_u64 (cpu, vd, i) -
6398 (aarch64_get_vec_u64 (cpu, vs, i) * val));
6399 return;
6400 }
6401
6402 case 0:
6403 case 3:
6404 default:
6405 HALT_NYI;
6406 }
6407 }
6408
6409 static void
6410 do_vec_SUB (sim_cpu *cpu)
6411 {
6412 /* instr [31] = 0
6413 instr [30] = half(0)/full(1)
6414 instr [29,24] = 10 1110
6415 instr [23,22] = size: byte(00, half(01), word (10), long (11)
6416 instr [21] = 1
6417 instr [20,16] = Vm
6418 instr [15,10] = 10 0001
6419 instr [9, 5] = Vn
6420 instr [4, 0] = Vd. */
6421
6422 unsigned full = INSTR (30, 30);
6423 unsigned vm = INSTR (20, 16);
6424 unsigned vn = INSTR (9, 5);
6425 unsigned vd = INSTR (4, 0);
6426 unsigned i;
6427
6428 NYI_assert (29, 24, 0x2E);
6429 NYI_assert (21, 21, 1);
6430 NYI_assert (15, 10, 0x21);
6431
6432 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6433 switch (INSTR (23, 22))
6434 {
6435 case 0:
6436 for (i = 0; i < (full ? 16 : 8); i++)
6437 aarch64_set_vec_s8 (cpu, vd, i,
6438 aarch64_get_vec_s8 (cpu, vn, i)
6439 - aarch64_get_vec_s8 (cpu, vm, i));
6440 return;
6441
6442 case 1:
6443 for (i = 0; i < (full ? 8 : 4); i++)
6444 aarch64_set_vec_s16 (cpu, vd, i,
6445 aarch64_get_vec_s16 (cpu, vn, i)
6446 - aarch64_get_vec_s16 (cpu, vm, i));
6447 return;
6448
6449 case 2:
6450 for (i = 0; i < (full ? 4 : 2); i++)
6451 aarch64_set_vec_s32 (cpu, vd, i,
6452 aarch64_get_vec_s32 (cpu, vn, i)
6453 - aarch64_get_vec_s32 (cpu, vm, i));
6454 return;
6455
6456 case 3:
6457 if (full == 0)
6458 HALT_UNALLOC;
6459
6460 for (i = 0; i < 2; i++)
6461 aarch64_set_vec_s64 (cpu, vd, i,
6462 aarch64_get_vec_s64 (cpu, vn, i)
6463 - aarch64_get_vec_s64 (cpu, vm, i));
6464 return;
6465 }
6466 }
6467
6468 static void
6469 do_vec_MLS (sim_cpu *cpu)
6470 {
6471 /* instr [31] = 0
6472 instr [30] = half(0)/full(1)
6473 instr [29,24] = 10 1110
6474 instr [23,22] = size: byte(00, half(01), word (10)
6475 instr [21] = 1
6476 instr [20,16] = Vm
6477 instr [15,10] = 10 0101
6478 instr [9, 5] = Vn
6479 instr [4, 0] = Vd. */
6480
6481 unsigned full = INSTR (30, 30);
6482 unsigned vm = INSTR (20, 16);
6483 unsigned vn = INSTR (9, 5);
6484 unsigned vd = INSTR (4, 0);
6485 unsigned i;
6486
6487 NYI_assert (29, 24, 0x2E);
6488 NYI_assert (21, 21, 1);
6489 NYI_assert (15, 10, 0x25);
6490
6491 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6492 switch (INSTR (23, 22))
6493 {
6494 case 0:
6495 for (i = 0; i < (full ? 16 : 8); i++)
6496 aarch64_set_vec_u8 (cpu, vd, i,
6497 aarch64_get_vec_u8 (cpu, vd, i)
6498 - (aarch64_get_vec_u8 (cpu, vn, i)
6499 * aarch64_get_vec_u8 (cpu, vm, i)));
6500 return;
6501
6502 case 1:
6503 for (i = 0; i < (full ? 8 : 4); i++)
6504 aarch64_set_vec_u16 (cpu, vd, i,
6505 aarch64_get_vec_u16 (cpu, vd, i)
6506 - (aarch64_get_vec_u16 (cpu, vn, i)
6507 * aarch64_get_vec_u16 (cpu, vm, i)));
6508 return;
6509
6510 case 2:
6511 for (i = 0; i < (full ? 4 : 2); i++)
6512 aarch64_set_vec_u32 (cpu, vd, i,
6513 aarch64_get_vec_u32 (cpu, vd, i)
6514 - (aarch64_get_vec_u32 (cpu, vn, i)
6515 * aarch64_get_vec_u32 (cpu, vm, i)));
6516 return;
6517
6518 default:
6519 HALT_UNALLOC;
6520 }
6521 }
6522
6523 static void
6524 do_vec_FDIV (sim_cpu *cpu)
6525 {
6526 /* instr [31] = 0
6527 instr [30] = half(0)/full(1)
6528 instr [29,23] = 10 1110 0
6529 instr [22] = float()/double(1)
6530 instr [21] = 1
6531 instr [20,16] = Vm
6532 instr [15,10] = 1111 11
6533 instr [9, 5] = Vn
6534 instr [4, 0] = Vd. */
6535
6536 unsigned full = INSTR (30, 30);
6537 unsigned vm = INSTR (20, 16);
6538 unsigned vn = INSTR (9, 5);
6539 unsigned vd = INSTR (4, 0);
6540 unsigned i;
6541
6542 NYI_assert (29, 23, 0x5C);
6543 NYI_assert (21, 21, 1);
6544 NYI_assert (15, 10, 0x3F);
6545
6546 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6547 if (INSTR (22, 22))
6548 {
6549 if (! full)
6550 HALT_UNALLOC;
6551
6552 for (i = 0; i < 2; i++)
6553 aarch64_set_vec_double (cpu, vd, i,
6554 aarch64_get_vec_double (cpu, vn, i)
6555 / aarch64_get_vec_double (cpu, vm, i));
6556 }
6557 else
6558 for (i = 0; i < (full ? 4 : 2); i++)
6559 aarch64_set_vec_float (cpu, vd, i,
6560 aarch64_get_vec_float (cpu, vn, i)
6561 / aarch64_get_vec_float (cpu, vm, i));
6562 }
6563
6564 static void
6565 do_vec_FMUL (sim_cpu *cpu)
6566 {
6567 /* instr [31] = 0
6568 instr [30] = half(0)/full(1)
6569 instr [29,23] = 10 1110 0
6570 instr [22] = float(0)/double(1)
6571 instr [21] = 1
6572 instr [20,16] = Vm
6573 instr [15,10] = 1101 11
6574 instr [9, 5] = Vn
6575 instr [4, 0] = Vd. */
6576
6577 unsigned full = INSTR (30, 30);
6578 unsigned vm = INSTR (20, 16);
6579 unsigned vn = INSTR (9, 5);
6580 unsigned vd = INSTR (4, 0);
6581 unsigned i;
6582
6583 NYI_assert (29, 23, 0x5C);
6584 NYI_assert (21, 21, 1);
6585 NYI_assert (15, 10, 0x37);
6586
6587 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6588 if (INSTR (22, 22))
6589 {
6590 if (! full)
6591 HALT_UNALLOC;
6592
6593 for (i = 0; i < 2; i++)
6594 aarch64_set_vec_double (cpu, vd, i,
6595 aarch64_get_vec_double (cpu, vn, i)
6596 * aarch64_get_vec_double (cpu, vm, i));
6597 }
6598 else
6599 for (i = 0; i < (full ? 4 : 2); i++)
6600 aarch64_set_vec_float (cpu, vd, i,
6601 aarch64_get_vec_float (cpu, vn, i)
6602 * aarch64_get_vec_float (cpu, vm, i));
6603 }
6604
6605 static void
6606 do_vec_FADDP (sim_cpu *cpu)
6607 {
6608 /* instr [31] = 0
6609 instr [30] = half(0)/full(1)
6610 instr [29,23] = 10 1110 0
6611 instr [22] = float(0)/double(1)
6612 instr [21] = 1
6613 instr [20,16] = Vm
6614 instr [15,10] = 1101 01
6615 instr [9, 5] = Vn
6616 instr [4, 0] = Vd. */
6617
6618 unsigned full = INSTR (30, 30);
6619 unsigned vm = INSTR (20, 16);
6620 unsigned vn = INSTR (9, 5);
6621 unsigned vd = INSTR (4, 0);
6622
6623 NYI_assert (29, 23, 0x5C);
6624 NYI_assert (21, 21, 1);
6625 NYI_assert (15, 10, 0x35);
6626
6627 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6628 if (INSTR (22, 22))
6629 {
6630 /* Extract values before adding them incase vd == vn/vm. */
6631 double tmp1 = aarch64_get_vec_double (cpu, vn, 0);
6632 double tmp2 = aarch64_get_vec_double (cpu, vn, 1);
6633 double tmp3 = aarch64_get_vec_double (cpu, vm, 0);
6634 double tmp4 = aarch64_get_vec_double (cpu, vm, 1);
6635
6636 if (! full)
6637 HALT_UNALLOC;
6638
6639 aarch64_set_vec_double (cpu, vd, 0, tmp1 + tmp2);
6640 aarch64_set_vec_double (cpu, vd, 1, tmp3 + tmp4);
6641 }
6642 else
6643 {
6644 /* Extract values before adding them incase vd == vn/vm. */
6645 float tmp1 = aarch64_get_vec_float (cpu, vn, 0);
6646 float tmp2 = aarch64_get_vec_float (cpu, vn, 1);
6647 float tmp5 = aarch64_get_vec_float (cpu, vm, 0);
6648 float tmp6 = aarch64_get_vec_float (cpu, vm, 1);
6649
6650 if (full)
6651 {
6652 float tmp3 = aarch64_get_vec_float (cpu, vn, 2);
6653 float tmp4 = aarch64_get_vec_float (cpu, vn, 3);
6654 float tmp7 = aarch64_get_vec_float (cpu, vm, 2);
6655 float tmp8 = aarch64_get_vec_float (cpu, vm, 3);
6656
6657 aarch64_set_vec_float (cpu, vd, 0, tmp1 + tmp2);
6658 aarch64_set_vec_float (cpu, vd, 1, tmp3 + tmp4);
6659 aarch64_set_vec_float (cpu, vd, 2, tmp5 + tmp6);
6660 aarch64_set_vec_float (cpu, vd, 3, tmp7 + tmp8);
6661 }
6662 else
6663 {
6664 aarch64_set_vec_float (cpu, vd, 0, tmp1 + tmp2);
6665 aarch64_set_vec_float (cpu, vd, 1, tmp5 + tmp6);
6666 }
6667 }
6668 }
6669
6670 static void
6671 do_vec_FSQRT (sim_cpu *cpu)
6672 {
6673 /* instr[31] = 0
6674 instr[30] = half(0)/full(1)
6675 instr[29,23] = 10 1110 1
6676 instr[22] = single(0)/double(1)
6677 instr[21,10] = 10 0001 1111 10
6678 instr[9,5] = Vsrc
6679 instr[4,0] = Vdest. */
6680
6681 unsigned vn = INSTR (9, 5);
6682 unsigned vd = INSTR (4, 0);
6683 unsigned full = INSTR (30, 30);
6684 int i;
6685
6686 NYI_assert (29, 23, 0x5D);
6687 NYI_assert (21, 10, 0x87E);
6688
6689 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6690 if (INSTR (22, 22))
6691 {
6692 if (! full)
6693 HALT_UNALLOC;
6694
6695 for (i = 0; i < 2; i++)
6696 aarch64_set_vec_double (cpu, vd, i,
6697 sqrt (aarch64_get_vec_double (cpu, vn, i)));
6698 }
6699 else
6700 {
6701 for (i = 0; i < (full ? 4 : 2); i++)
6702 aarch64_set_vec_float (cpu, vd, i,
6703 sqrtf (aarch64_get_vec_float (cpu, vn, i)));
6704 }
6705 }
6706
6707 static void
6708 do_vec_FNEG (sim_cpu *cpu)
6709 {
6710 /* instr[31] = 0
6711 instr[30] = half (0)/full (1)
6712 instr[29,23] = 10 1110 1
6713 instr[22] = single (0)/double (1)
6714 instr[21,10] = 10 0000 1111 10
6715 instr[9,5] = Vsrc
6716 instr[4,0] = Vdest. */
6717
6718 unsigned vn = INSTR (9, 5);
6719 unsigned vd = INSTR (4, 0);
6720 unsigned full = INSTR (30, 30);
6721 int i;
6722
6723 NYI_assert (29, 23, 0x5D);
6724 NYI_assert (21, 10, 0x83E);
6725
6726 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6727 if (INSTR (22, 22))
6728 {
6729 if (! full)
6730 HALT_UNALLOC;
6731
6732 for (i = 0; i < 2; i++)
6733 aarch64_set_vec_double (cpu, vd, i,
6734 - aarch64_get_vec_double (cpu, vn, i));
6735 }
6736 else
6737 {
6738 for (i = 0; i < (full ? 4 : 2); i++)
6739 aarch64_set_vec_float (cpu, vd, i,
6740 - aarch64_get_vec_float (cpu, vn, i));
6741 }
6742 }
6743
6744 static void
6745 do_vec_NOT (sim_cpu *cpu)
6746 {
6747 /* instr[31] = 0
6748 instr[30] = half (0)/full (1)
6749 instr[29,10] = 10 1110 0010 0000 0101 10
6750 instr[9,5] = Vn
6751 instr[4.0] = Vd. */
6752
6753 unsigned vn = INSTR (9, 5);
6754 unsigned vd = INSTR (4, 0);
6755 unsigned i;
6756 int full = INSTR (30, 30);
6757
6758 NYI_assert (29, 10, 0xB8816);
6759
6760 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6761 for (i = 0; i < (full ? 16 : 8); i++)
6762 aarch64_set_vec_u8 (cpu, vd, i, ~ aarch64_get_vec_u8 (cpu, vn, i));
6763 }
6764
6765 static unsigned int
6766 clz (uint64_t val, unsigned size)
6767 {
6768 uint64_t mask = 1;
6769 int count;
6770
6771 mask <<= (size - 1);
6772 count = 0;
6773 do
6774 {
6775 if (val & mask)
6776 break;
6777 mask >>= 1;
6778 count ++;
6779 }
6780 while (mask);
6781
6782 return count;
6783 }
6784
6785 static void
6786 do_vec_CLZ (sim_cpu *cpu)
6787 {
6788 /* instr[31] = 0
6789 instr[30] = half (0)/full (1)
6790 instr[29,24] = 10 1110
6791 instr[23,22] = size
6792 instr[21,10] = 10 0000 0100 10
6793 instr[9,5] = Vn
6794 instr[4.0] = Vd. */
6795
6796 unsigned vn = INSTR (9, 5);
6797 unsigned vd = INSTR (4, 0);
6798 unsigned i;
6799 int full = INSTR (30,30);
6800
6801 NYI_assert (29, 24, 0x2E);
6802 NYI_assert (21, 10, 0x812);
6803
6804 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6805 switch (INSTR (23, 22))
6806 {
6807 case 0:
6808 for (i = 0; i < (full ? 16 : 8); i++)
6809 aarch64_set_vec_u8 (cpu, vd, i, clz (aarch64_get_vec_u8 (cpu, vn, i), 8));
6810 break;
6811 case 1:
6812 for (i = 0; i < (full ? 8 : 4); i++)
6813 aarch64_set_vec_u16 (cpu, vd, i, clz (aarch64_get_vec_u16 (cpu, vn, i), 16));
6814 break;
6815 case 2:
6816 for (i = 0; i < (full ? 4 : 2); i++)
6817 aarch64_set_vec_u32 (cpu, vd, i, clz (aarch64_get_vec_u32 (cpu, vn, i), 32));
6818 break;
6819 case 3:
6820 if (! full)
6821 HALT_UNALLOC;
6822 aarch64_set_vec_u64 (cpu, vd, 0, clz (aarch64_get_vec_u64 (cpu, vn, 0), 64));
6823 aarch64_set_vec_u64 (cpu, vd, 1, clz (aarch64_get_vec_u64 (cpu, vn, 1), 64));
6824 break;
6825 }
6826 }
6827
6828 static void
6829 do_vec_MOV_element (sim_cpu *cpu)
6830 {
6831 /* instr[31,21] = 0110 1110 000
6832 instr[20,16] = size & dest index
6833 instr[15] = 0
6834 instr[14,11] = source index
6835 instr[10] = 1
6836 instr[9,5] = Vs
6837 instr[4.0] = Vd. */
6838
6839 unsigned vs = INSTR (9, 5);
6840 unsigned vd = INSTR (4, 0);
6841 unsigned src_index;
6842 unsigned dst_index;
6843
6844 NYI_assert (31, 21, 0x370);
6845 NYI_assert (15, 15, 0);
6846 NYI_assert (10, 10, 1);
6847
6848 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6849 if (INSTR (16, 16))
6850 {
6851 /* Move a byte. */
6852 src_index = INSTR (14, 11);
6853 dst_index = INSTR (20, 17);
6854 aarch64_set_vec_u8 (cpu, vd, dst_index,
6855 aarch64_get_vec_u8 (cpu, vs, src_index));
6856 }
6857 else if (INSTR (17, 17))
6858 {
6859 /* Move 16-bits. */
6860 NYI_assert (11, 11, 0);
6861 src_index = INSTR (14, 12);
6862 dst_index = INSTR (20, 18);
6863 aarch64_set_vec_u16 (cpu, vd, dst_index,
6864 aarch64_get_vec_u16 (cpu, vs, src_index));
6865 }
6866 else if (INSTR (18, 18))
6867 {
6868 /* Move 32-bits. */
6869 NYI_assert (12, 11, 0);
6870 src_index = INSTR (14, 13);
6871 dst_index = INSTR (20, 19);
6872 aarch64_set_vec_u32 (cpu, vd, dst_index,
6873 aarch64_get_vec_u32 (cpu, vs, src_index));
6874 }
6875 else
6876 {
6877 NYI_assert (19, 19, 1);
6878 NYI_assert (13, 11, 0);
6879 src_index = INSTR (14, 14);
6880 dst_index = INSTR (20, 20);
6881 aarch64_set_vec_u64 (cpu, vd, dst_index,
6882 aarch64_get_vec_u64 (cpu, vs, src_index));
6883 }
6884 }
6885
6886 static void
6887 do_vec_REV32 (sim_cpu *cpu)
6888 {
6889 /* instr[31] = 0
6890 instr[30] = full/half
6891 instr[29,24] = 10 1110
6892 instr[23,22] = size
6893 instr[21,10] = 10 0000 0000 10
6894 instr[9,5] = Rn
6895 instr[4,0] = Rd. */
6896
6897 unsigned rn = INSTR (9, 5);
6898 unsigned rd = INSTR (4, 0);
6899 unsigned size = INSTR (23, 22);
6900 unsigned full = INSTR (30, 30);
6901 unsigned i;
6902 FRegister val;
6903
6904 NYI_assert (29, 24, 0x2E);
6905 NYI_assert (21, 10, 0x802);
6906
6907 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6908 switch (size)
6909 {
6910 case 0:
6911 for (i = 0; i < (full ? 16 : 8); i++)
6912 val.b[i ^ 0x3] = aarch64_get_vec_u8 (cpu, rn, i);
6913 break;
6914
6915 case 1:
6916 for (i = 0; i < (full ? 8 : 4); i++)
6917 val.h[i ^ 0x1] = aarch64_get_vec_u16 (cpu, rn, i);
6918 break;
6919
6920 default:
6921 HALT_UNALLOC;
6922 }
6923
6924 aarch64_set_vec_u64 (cpu, rd, 0, val.v[0]);
6925 if (full)
6926 aarch64_set_vec_u64 (cpu, rd, 1, val.v[1]);
6927 }
6928
6929 static void
6930 do_vec_EXT (sim_cpu *cpu)
6931 {
6932 /* instr[31] = 0
6933 instr[30] = full/half
6934 instr[29,21] = 10 1110 000
6935 instr[20,16] = Vm
6936 instr[15] = 0
6937 instr[14,11] = source index
6938 instr[10] = 0
6939 instr[9,5] = Vn
6940 instr[4.0] = Vd. */
6941
6942 unsigned vm = INSTR (20, 16);
6943 unsigned vn = INSTR (9, 5);
6944 unsigned vd = INSTR (4, 0);
6945 unsigned src_index = INSTR (14, 11);
6946 unsigned full = INSTR (30, 30);
6947 unsigned i;
6948 unsigned j;
6949 FRegister val;
6950
6951 NYI_assert (31, 21, 0x370);
6952 NYI_assert (15, 15, 0);
6953 NYI_assert (10, 10, 0);
6954
6955 if (!full && (src_index & 0x8))
6956 HALT_UNALLOC;
6957
6958 j = 0;
6959
6960 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6961 for (i = src_index; i < (full ? 16 : 8); i++)
6962 val.b[j ++] = aarch64_get_vec_u8 (cpu, vn, i);
6963 for (i = 0; i < src_index; i++)
6964 val.b[j ++] = aarch64_get_vec_u8 (cpu, vm, i);
6965
6966 aarch64_set_vec_u64 (cpu, vd, 0, val.v[0]);
6967 if (full)
6968 aarch64_set_vec_u64 (cpu, vd, 1, val.v[1]);
6969 }
6970
6971 static void
6972 dexAdvSIMD0 (sim_cpu *cpu)
6973 {
6974 /* instr [28,25] = 0 111. */
6975 if ( INSTR (15, 10) == 0x07
6976 && (INSTR (9, 5) ==
6977 INSTR (20, 16)))
6978 {
6979 if (INSTR (31, 21) == 0x075
6980 || INSTR (31, 21) == 0x275)
6981 {
6982 do_vec_MOV_whole_vector (cpu);
6983 return;
6984 }
6985 }
6986
6987 if (INSTR (29, 19) == 0x1E0)
6988 {
6989 do_vec_MOV_immediate (cpu);
6990 return;
6991 }
6992
6993 if (INSTR (29, 19) == 0x5E0)
6994 {
6995 do_vec_MVNI (cpu);
6996 return;
6997 }
6998
6999 if (INSTR (29, 19) == 0x1C0
7000 || INSTR (29, 19) == 0x1C1)
7001 {
7002 if (INSTR (15, 10) == 0x03)
7003 {
7004 do_vec_DUP_scalar_into_vector (cpu);
7005 return;
7006 }
7007 }
7008
7009 switch (INSTR (29, 24))
7010 {
7011 case 0x0E: do_vec_op1 (cpu); return;
7012 case 0x0F: do_vec_op2 (cpu); return;
7013
7014 case 0x2E:
7015 if (INSTR (21, 21) == 1)
7016 {
7017 switch (INSTR (15, 10))
7018 {
7019 case 0x02:
7020 do_vec_REV32 (cpu);
7021 return;
7022
7023 case 0x07:
7024 switch (INSTR (23, 22))
7025 {
7026 case 0: do_vec_EOR (cpu); return;
7027 case 1: do_vec_BSL (cpu); return;
7028 case 2:
7029 case 3: do_vec_bit (cpu); return;
7030 }
7031 break;
7032
7033 case 0x08: do_vec_sub_long (cpu); return;
7034 case 0x11: do_vec_USHL (cpu); return;
7035 case 0x12: do_vec_CLZ (cpu); return;
7036 case 0x16: do_vec_NOT (cpu); return;
7037 case 0x19: do_vec_max (cpu); return;
7038 case 0x1B: do_vec_min (cpu); return;
7039 case 0x21: do_vec_SUB (cpu); return;
7040 case 0x25: do_vec_MLS (cpu); return;
7041 case 0x31: do_vec_FminmaxNMP (cpu); return;
7042 case 0x35: do_vec_FADDP (cpu); return;
7043 case 0x37: do_vec_FMUL (cpu); return;
7044 case 0x3F: do_vec_FDIV (cpu); return;
7045
7046 case 0x3E:
7047 switch (INSTR (20, 16))
7048 {
7049 case 0x00: do_vec_FNEG (cpu); return;
7050 case 0x01: do_vec_FSQRT (cpu); return;
7051 default: HALT_NYI;
7052 }
7053
7054 case 0x0D:
7055 case 0x0F:
7056 case 0x22:
7057 case 0x23:
7058 case 0x26:
7059 case 0x2A:
7060 case 0x32:
7061 case 0x36:
7062 case 0x39:
7063 case 0x3A:
7064 do_vec_compare (cpu); return;
7065
7066 default:
7067 break;
7068 }
7069 }
7070
7071 if (INSTR (31, 21) == 0x370)
7072 {
7073 if (INSTR (10, 10))
7074 do_vec_MOV_element (cpu);
7075 else
7076 do_vec_EXT (cpu);
7077 return;
7078 }
7079
7080 switch (INSTR (21, 10))
7081 {
7082 case 0x82E: do_vec_neg (cpu); return;
7083 case 0x87E: do_vec_sqrt (cpu); return;
7084 default:
7085 if (INSTR (15, 10) == 0x30)
7086 {
7087 do_vec_mull (cpu);
7088 return;
7089 }
7090 break;
7091 }
7092 break;
7093
7094 case 0x2f:
7095 switch (INSTR (15, 10))
7096 {
7097 case 0x01: do_vec_SSHR_USHR (cpu); return;
7098 case 0x10:
7099 case 0x12: do_vec_mls_indexed (cpu); return;
7100 case 0x29: do_vec_xtl (cpu); return;
7101 default:
7102 HALT_NYI;
7103 }
7104
7105 default:
7106 break;
7107 }
7108
7109 HALT_NYI;
7110 }
7111
7112 /* 3 sources. */
7113
7114 /* Float multiply add. */
7115 static void
7116 fmadds (sim_cpu *cpu)
7117 {
7118 unsigned sa = INSTR (14, 10);
7119 unsigned sm = INSTR (20, 16);
7120 unsigned sn = INSTR ( 9, 5);
7121 unsigned sd = INSTR ( 4, 0);
7122
7123 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7124 aarch64_set_FP_float (cpu, sd, aarch64_get_FP_float (cpu, sa)
7125 + aarch64_get_FP_float (cpu, sn)
7126 * aarch64_get_FP_float (cpu, sm));
7127 }
7128
7129 /* Double multiply add. */
7130 static void
7131 fmaddd (sim_cpu *cpu)
7132 {
7133 unsigned sa = INSTR (14, 10);
7134 unsigned sm = INSTR (20, 16);
7135 unsigned sn = INSTR ( 9, 5);
7136 unsigned sd = INSTR ( 4, 0);
7137
7138 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7139 aarch64_set_FP_double (cpu, sd, aarch64_get_FP_double (cpu, sa)
7140 + aarch64_get_FP_double (cpu, sn)
7141 * aarch64_get_FP_double (cpu, sm));
7142 }
7143
7144 /* Float multiply subtract. */
7145 static void
7146 fmsubs (sim_cpu *cpu)
7147 {
7148 unsigned sa = INSTR (14, 10);
7149 unsigned sm = INSTR (20, 16);
7150 unsigned sn = INSTR ( 9, 5);
7151 unsigned sd = INSTR ( 4, 0);
7152
7153 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7154 aarch64_set_FP_float (cpu, sd, aarch64_get_FP_float (cpu, sa)
7155 - aarch64_get_FP_float (cpu, sn)
7156 * aarch64_get_FP_float (cpu, sm));
7157 }
7158
7159 /* Double multiply subtract. */
7160 static void
7161 fmsubd (sim_cpu *cpu)
7162 {
7163 unsigned sa = INSTR (14, 10);
7164 unsigned sm = INSTR (20, 16);
7165 unsigned sn = INSTR ( 9, 5);
7166 unsigned sd = INSTR ( 4, 0);
7167
7168 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7169 aarch64_set_FP_double (cpu, sd, aarch64_get_FP_double (cpu, sa)
7170 - aarch64_get_FP_double (cpu, sn)
7171 * aarch64_get_FP_double (cpu, sm));
7172 }
7173
7174 /* Float negative multiply add. */
7175 static void
7176 fnmadds (sim_cpu *cpu)
7177 {
7178 unsigned sa = INSTR (14, 10);
7179 unsigned sm = INSTR (20, 16);
7180 unsigned sn = INSTR ( 9, 5);
7181 unsigned sd = INSTR ( 4, 0);
7182
7183 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7184 aarch64_set_FP_float (cpu, sd, - aarch64_get_FP_float (cpu, sa)
7185 + (- aarch64_get_FP_float (cpu, sn))
7186 * aarch64_get_FP_float (cpu, sm));
7187 }
7188
7189 /* Double negative multiply add. */
7190 static void
7191 fnmaddd (sim_cpu *cpu)
7192 {
7193 unsigned sa = INSTR (14, 10);
7194 unsigned sm = INSTR (20, 16);
7195 unsigned sn = INSTR ( 9, 5);
7196 unsigned sd = INSTR ( 4, 0);
7197
7198 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7199 aarch64_set_FP_double (cpu, sd, - aarch64_get_FP_double (cpu, sa)
7200 + (- aarch64_get_FP_double (cpu, sn))
7201 * aarch64_get_FP_double (cpu, sm));
7202 }
7203
7204 /* Float negative multiply subtract. */
7205 static void
7206 fnmsubs (sim_cpu *cpu)
7207 {
7208 unsigned sa = INSTR (14, 10);
7209 unsigned sm = INSTR (20, 16);
7210 unsigned sn = INSTR ( 9, 5);
7211 unsigned sd = INSTR ( 4, 0);
7212
7213 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7214 aarch64_set_FP_float (cpu, sd, - aarch64_get_FP_float (cpu, sa)
7215 + aarch64_get_FP_float (cpu, sn)
7216 * aarch64_get_FP_float (cpu, sm));
7217 }
7218
7219 /* Double negative multiply subtract. */
7220 static void
7221 fnmsubd (sim_cpu *cpu)
7222 {
7223 unsigned sa = INSTR (14, 10);
7224 unsigned sm = INSTR (20, 16);
7225 unsigned sn = INSTR ( 9, 5);
7226 unsigned sd = INSTR ( 4, 0);
7227
7228 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7229 aarch64_set_FP_double (cpu, sd, - aarch64_get_FP_double (cpu, sa)
7230 + aarch64_get_FP_double (cpu, sn)
7231 * aarch64_get_FP_double (cpu, sm));
7232 }
7233
7234 static void
7235 dexSimpleFPDataProc3Source (sim_cpu *cpu)
7236 {
7237 /* instr[31] ==> M : 0 ==> OK, 1 ==> UNALLOC
7238 instr[30] = 0
7239 instr[29] ==> S : 0 ==> OK, 1 ==> UNALLOC
7240 instr[28,25] = 1111
7241 instr[24] = 1
7242 instr[23,22] ==> type : 0 ==> single, 01 ==> double, 1x ==> UNALLOC
7243 instr[21] ==> o1 : 0 ==> unnegated, 1 ==> negated
7244 instr[15] ==> o2 : 0 ==> ADD, 1 ==> SUB */
7245
7246 uint32_t M_S = (INSTR (31, 31) << 1) | INSTR (29, 29);
7247 /* dispatch on combined type:o1:o2. */
7248 uint32_t dispatch = (INSTR (23, 21) << 1) | INSTR (15, 15);
7249
7250 if (M_S != 0)
7251 HALT_UNALLOC;
7252
7253 switch (dispatch)
7254 {
7255 case 0: fmadds (cpu); return;
7256 case 1: fmsubs (cpu); return;
7257 case 2: fnmadds (cpu); return;
7258 case 3: fnmsubs (cpu); return;
7259 case 4: fmaddd (cpu); return;
7260 case 5: fmsubd (cpu); return;
7261 case 6: fnmaddd (cpu); return;
7262 case 7: fnmsubd (cpu); return;
7263 default:
7264 /* type > 1 is currently unallocated. */
7265 HALT_UNALLOC;
7266 }
7267 }
7268
7269 static void
7270 dexSimpleFPFixedConvert (sim_cpu *cpu)
7271 {
7272 HALT_NYI;
7273 }
7274
7275 static void
7276 dexSimpleFPCondCompare (sim_cpu *cpu)
7277 {
7278 /* instr [31,23] = 0001 1110 0
7279 instr [22] = type
7280 instr [21] = 1
7281 instr [20,16] = Rm
7282 instr [15,12] = condition
7283 instr [11,10] = 01
7284 instr [9,5] = Rn
7285 instr [4] = 0
7286 instr [3,0] = nzcv */
7287
7288 unsigned rm = INSTR (20, 16);
7289 unsigned rn = INSTR (9, 5);
7290
7291 NYI_assert (31, 23, 0x3C);
7292 NYI_assert (11, 10, 0x1);
7293 NYI_assert (4, 4, 0);
7294
7295 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7296 if (! testConditionCode (cpu, INSTR (15, 12)))
7297 {
7298 aarch64_set_CPSR (cpu, INSTR (3, 0));
7299 return;
7300 }
7301
7302 if (INSTR (22, 22))
7303 {
7304 /* Double precision. */
7305 double val1 = aarch64_get_vec_double (cpu, rn, 0);
7306 double val2 = aarch64_get_vec_double (cpu, rm, 0);
7307
7308 /* FIXME: Check for NaNs. */
7309 if (val1 == val2)
7310 aarch64_set_CPSR (cpu, (Z | C));
7311 else if (val1 < val2)
7312 aarch64_set_CPSR (cpu, N);
7313 else /* val1 > val2 */
7314 aarch64_set_CPSR (cpu, C);
7315 }
7316 else
7317 {
7318 /* Single precision. */
7319 float val1 = aarch64_get_vec_float (cpu, rn, 0);
7320 float val2 = aarch64_get_vec_float (cpu, rm, 0);
7321
7322 /* FIXME: Check for NaNs. */
7323 if (val1 == val2)
7324 aarch64_set_CPSR (cpu, (Z | C));
7325 else if (val1 < val2)
7326 aarch64_set_CPSR (cpu, N);
7327 else /* val1 > val2 */
7328 aarch64_set_CPSR (cpu, C);
7329 }
7330 }
7331
7332 /* 2 sources. */
7333
7334 /* Float add. */
7335 static void
7336 fadds (sim_cpu *cpu)
7337 {
7338 unsigned sm = INSTR (20, 16);
7339 unsigned sn = INSTR ( 9, 5);
7340 unsigned sd = INSTR ( 4, 0);
7341
7342 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7343 aarch64_set_FP_float (cpu, sd, aarch64_get_FP_float (cpu, sn)
7344 + aarch64_get_FP_float (cpu, sm));
7345 }
7346
7347 /* Double add. */
7348 static void
7349 faddd (sim_cpu *cpu)
7350 {
7351 unsigned sm = INSTR (20, 16);
7352 unsigned sn = INSTR ( 9, 5);
7353 unsigned sd = INSTR ( 4, 0);
7354
7355 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7356 aarch64_set_FP_double (cpu, sd, aarch64_get_FP_double (cpu, sn)
7357 + aarch64_get_FP_double (cpu, sm));
7358 }
7359
7360 /* Float divide. */
7361 static void
7362 fdivs (sim_cpu *cpu)
7363 {
7364 unsigned sm = INSTR (20, 16);
7365 unsigned sn = INSTR ( 9, 5);
7366 unsigned sd = INSTR ( 4, 0);
7367
7368 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7369 aarch64_set_FP_float (cpu, sd, aarch64_get_FP_float (cpu, sn)
7370 / aarch64_get_FP_float (cpu, sm));
7371 }
7372
7373 /* Double divide. */
7374 static void
7375 fdivd (sim_cpu *cpu)
7376 {
7377 unsigned sm = INSTR (20, 16);
7378 unsigned sn = INSTR ( 9, 5);
7379 unsigned sd = INSTR ( 4, 0);
7380
7381 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7382 aarch64_set_FP_double (cpu, sd, aarch64_get_FP_double (cpu, sn)
7383 / aarch64_get_FP_double (cpu, sm));
7384 }
7385
7386 /* Float multiply. */
7387 static void
7388 fmuls (sim_cpu *cpu)
7389 {
7390 unsigned sm = INSTR (20, 16);
7391 unsigned sn = INSTR ( 9, 5);
7392 unsigned sd = INSTR ( 4, 0);
7393
7394 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7395 aarch64_set_FP_float (cpu, sd, aarch64_get_FP_float (cpu, sn)
7396 * aarch64_get_FP_float (cpu, sm));
7397 }
7398
7399 /* Double multiply. */
7400 static void
7401 fmuld (sim_cpu *cpu)
7402 {
7403 unsigned sm = INSTR (20, 16);
7404 unsigned sn = INSTR ( 9, 5);
7405 unsigned sd = INSTR ( 4, 0);
7406
7407 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7408 aarch64_set_FP_double (cpu, sd, aarch64_get_FP_double (cpu, sn)
7409 * aarch64_get_FP_double (cpu, sm));
7410 }
7411
7412 /* Float negate and multiply. */
7413 static void
7414 fnmuls (sim_cpu *cpu)
7415 {
7416 unsigned sm = INSTR (20, 16);
7417 unsigned sn = INSTR ( 9, 5);
7418 unsigned sd = INSTR ( 4, 0);
7419
7420 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7421 aarch64_set_FP_float (cpu, sd, - (aarch64_get_FP_float (cpu, sn)
7422 * aarch64_get_FP_float (cpu, sm)));
7423 }
7424
7425 /* Double negate and multiply. */
7426 static void
7427 fnmuld (sim_cpu *cpu)
7428 {
7429 unsigned sm = INSTR (20, 16);
7430 unsigned sn = INSTR ( 9, 5);
7431 unsigned sd = INSTR ( 4, 0);
7432
7433 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7434 aarch64_set_FP_double (cpu, sd, - (aarch64_get_FP_double (cpu, sn)
7435 * aarch64_get_FP_double (cpu, sm)));
7436 }
7437
7438 /* Float subtract. */
7439 static void
7440 fsubs (sim_cpu *cpu)
7441 {
7442 unsigned sm = INSTR (20, 16);
7443 unsigned sn = INSTR ( 9, 5);
7444 unsigned sd = INSTR ( 4, 0);
7445
7446 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7447 aarch64_set_FP_float (cpu, sd, aarch64_get_FP_float (cpu, sn)
7448 - aarch64_get_FP_float (cpu, sm));
7449 }
7450
7451 /* Double subtract. */
7452 static void
7453 fsubd (sim_cpu *cpu)
7454 {
7455 unsigned sm = INSTR (20, 16);
7456 unsigned sn = INSTR ( 9, 5);
7457 unsigned sd = INSTR ( 4, 0);
7458
7459 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7460 aarch64_set_FP_double (cpu, sd, aarch64_get_FP_double (cpu, sn)
7461 - aarch64_get_FP_double (cpu, sm));
7462 }
7463
7464 static void
7465 do_FMINNM (sim_cpu *cpu)
7466 {
7467 /* instr[31,23] = 0 0011 1100
7468 instr[22] = float(0)/double(1)
7469 instr[21] = 1
7470 instr[20,16] = Sm
7471 instr[15,10] = 01 1110
7472 instr[9,5] = Sn
7473 instr[4,0] = Cpu */
7474
7475 unsigned sm = INSTR (20, 16);
7476 unsigned sn = INSTR ( 9, 5);
7477 unsigned sd = INSTR ( 4, 0);
7478
7479 NYI_assert (31, 23, 0x03C);
7480 NYI_assert (15, 10, 0x1E);
7481
7482 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7483 if (INSTR (22, 22))
7484 aarch64_set_FP_double (cpu, sd,
7485 dminnm (aarch64_get_FP_double (cpu, sn),
7486 aarch64_get_FP_double (cpu, sm)));
7487 else
7488 aarch64_set_FP_float (cpu, sd,
7489 fminnm (aarch64_get_FP_float (cpu, sn),
7490 aarch64_get_FP_float (cpu, sm)));
7491 }
7492
7493 static void
7494 do_FMAXNM (sim_cpu *cpu)
7495 {
7496 /* instr[31,23] = 0 0011 1100
7497 instr[22] = float(0)/double(1)
7498 instr[21] = 1
7499 instr[20,16] = Sm
7500 instr[15,10] = 01 1010
7501 instr[9,5] = Sn
7502 instr[4,0] = Cpu */
7503
7504 unsigned sm = INSTR (20, 16);
7505 unsigned sn = INSTR ( 9, 5);
7506 unsigned sd = INSTR ( 4, 0);
7507
7508 NYI_assert (31, 23, 0x03C);
7509 NYI_assert (15, 10, 0x1A);
7510
7511 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7512 if (INSTR (22, 22))
7513 aarch64_set_FP_double (cpu, sd,
7514 dmaxnm (aarch64_get_FP_double (cpu, sn),
7515 aarch64_get_FP_double (cpu, sm)));
7516 else
7517 aarch64_set_FP_float (cpu, sd,
7518 fmaxnm (aarch64_get_FP_float (cpu, sn),
7519 aarch64_get_FP_float (cpu, sm)));
7520 }
7521
7522 static void
7523 dexSimpleFPDataProc2Source (sim_cpu *cpu)
7524 {
7525 /* instr[31] ==> M : 0 ==> OK, 1 ==> UNALLOC
7526 instr[30] = 0
7527 instr[29] ==> S : 0 ==> OK, 1 ==> UNALLOC
7528 instr[28,25] = 1111
7529 instr[24] = 0
7530 instr[23,22] ==> type : 0 ==> single, 01 ==> double, 1x ==> UNALLOC
7531 instr[21] = 1
7532 instr[20,16] = Vm
7533 instr[15,12] ==> opcode : 0000 ==> FMUL, 0001 ==> FDIV
7534 0010 ==> FADD, 0011 ==> FSUB,
7535 0100 ==> FMAX, 0101 ==> FMIN
7536 0110 ==> FMAXNM, 0111 ==> FMINNM
7537 1000 ==> FNMUL, ow ==> UNALLOC
7538 instr[11,10] = 10
7539 instr[9,5] = Vn
7540 instr[4,0] = Vd */
7541
7542 uint32_t M_S = (INSTR (31, 31) << 1) | INSTR (29, 29);
7543 uint32_t type = INSTR (23, 22);
7544 /* Dispatch on opcode. */
7545 uint32_t dispatch = INSTR (15, 12);
7546
7547 if (type > 1)
7548 HALT_UNALLOC;
7549
7550 if (M_S != 0)
7551 HALT_UNALLOC;
7552
7553 if (type)
7554 switch (dispatch)
7555 {
7556 case 0: fmuld (cpu); return;
7557 case 1: fdivd (cpu); return;
7558 case 2: faddd (cpu); return;
7559 case 3: fsubd (cpu); return;
7560 case 6: do_FMAXNM (cpu); return;
7561 case 7: do_FMINNM (cpu); return;
7562 case 8: fnmuld (cpu); return;
7563
7564 /* Have not yet implemented fmax and fmin. */
7565 case 4:
7566 case 5:
7567 HALT_NYI;
7568
7569 default:
7570 HALT_UNALLOC;
7571 }
7572 else /* type == 0 => floats. */
7573 switch (dispatch)
7574 {
7575 case 0: fmuls (cpu); return;
7576 case 1: fdivs (cpu); return;
7577 case 2: fadds (cpu); return;
7578 case 3: fsubs (cpu); return;
7579 case 6: do_FMAXNM (cpu); return;
7580 case 7: do_FMINNM (cpu); return;
7581 case 8: fnmuls (cpu); return;
7582
7583 case 4:
7584 case 5:
7585 HALT_NYI;
7586
7587 default:
7588 HALT_UNALLOC;
7589 }
7590 }
7591
7592 static void
7593 dexSimpleFPCondSelect (sim_cpu *cpu)
7594 {
7595 /* FCSEL
7596 instr[31,23] = 0 0011 1100
7597 instr[22] = 0=>single 1=>double
7598 instr[21] = 1
7599 instr[20,16] = Sm
7600 instr[15,12] = cond
7601 instr[11,10] = 11
7602 instr[9,5] = Sn
7603 instr[4,0] = Cpu */
7604 unsigned sm = INSTR (20, 16);
7605 unsigned sn = INSTR ( 9, 5);
7606 unsigned sd = INSTR ( 4, 0);
7607 uint32_t set = testConditionCode (cpu, INSTR (15, 12));
7608
7609 NYI_assert (31, 23, 0x03C);
7610 NYI_assert (11, 10, 0x3);
7611
7612 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7613 if (INSTR (22, 22))
7614 aarch64_set_FP_double (cpu, sd, (set ? aarch64_get_FP_double (cpu, sn)
7615 : aarch64_get_FP_double (cpu, sm)));
7616 else
7617 aarch64_set_FP_float (cpu, sd, (set ? aarch64_get_FP_float (cpu, sn)
7618 : aarch64_get_FP_float (cpu, sm)));
7619 }
7620
7621 /* Store 32 bit unscaled signed 9 bit. */
7622 static void
7623 fsturs (sim_cpu *cpu, int32_t offset)
7624 {
7625 unsigned int rn = INSTR (9, 5);
7626 unsigned int st = INSTR (4, 0);
7627
7628 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7629 aarch64_set_mem_u32 (cpu, aarch64_get_reg_u64 (cpu, rn, 1) + offset,
7630 aarch64_get_vec_u32 (cpu, st, 0));
7631 }
7632
7633 /* Store 64 bit unscaled signed 9 bit. */
7634 static void
7635 fsturd (sim_cpu *cpu, int32_t offset)
7636 {
7637 unsigned int rn = INSTR (9, 5);
7638 unsigned int st = INSTR (4, 0);
7639
7640 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7641 aarch64_set_mem_u64 (cpu, aarch64_get_reg_u64 (cpu, rn, 1) + offset,
7642 aarch64_get_vec_u64 (cpu, st, 0));
7643 }
7644
7645 /* Store 128 bit unscaled signed 9 bit. */
7646 static void
7647 fsturq (sim_cpu *cpu, int32_t offset)
7648 {
7649 unsigned int rn = INSTR (9, 5);
7650 unsigned int st = INSTR (4, 0);
7651 FRegister a;
7652
7653 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7654 aarch64_get_FP_long_double (cpu, st, & a);
7655 aarch64_set_mem_long_double (cpu,
7656 aarch64_get_reg_u64 (cpu, rn, 1)
7657 + offset, a);
7658 }
7659
7660 /* TODO FP move register. */
7661
7662 /* 32 bit fp to fp move register. */
7663 static void
7664 ffmovs (sim_cpu *cpu)
7665 {
7666 unsigned int rn = INSTR (9, 5);
7667 unsigned int st = INSTR (4, 0);
7668
7669 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7670 aarch64_set_FP_float (cpu, st, aarch64_get_FP_float (cpu, rn));
7671 }
7672
7673 /* 64 bit fp to fp move register. */
7674 static void
7675 ffmovd (sim_cpu *cpu)
7676 {
7677 unsigned int rn = INSTR (9, 5);
7678 unsigned int st = INSTR (4, 0);
7679
7680 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7681 aarch64_set_FP_double (cpu, st, aarch64_get_FP_double (cpu, rn));
7682 }
7683
7684 /* 32 bit GReg to Vec move register. */
7685 static void
7686 fgmovs (sim_cpu *cpu)
7687 {
7688 unsigned int rn = INSTR (9, 5);
7689 unsigned int st = INSTR (4, 0);
7690
7691 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7692 aarch64_set_vec_u32 (cpu, st, 0, aarch64_get_reg_u32 (cpu, rn, NO_SP));
7693 }
7694
7695 /* 64 bit g to fp move register. */
7696 static void
7697 fgmovd (sim_cpu *cpu)
7698 {
7699 unsigned int rn = INSTR (9, 5);
7700 unsigned int st = INSTR (4, 0);
7701
7702 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7703 aarch64_set_vec_u64 (cpu, st, 0, aarch64_get_reg_u64 (cpu, rn, NO_SP));
7704 }
7705
7706 /* 32 bit fp to g move register. */
7707 static void
7708 gfmovs (sim_cpu *cpu)
7709 {
7710 unsigned int rn = INSTR (9, 5);
7711 unsigned int st = INSTR (4, 0);
7712
7713 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7714 aarch64_set_reg_u64 (cpu, st, NO_SP, aarch64_get_vec_u32 (cpu, rn, 0));
7715 }
7716
7717 /* 64 bit fp to g move register. */
7718 static void
7719 gfmovd (sim_cpu *cpu)
7720 {
7721 unsigned int rn = INSTR (9, 5);
7722 unsigned int st = INSTR (4, 0);
7723
7724 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7725 aarch64_set_reg_u64 (cpu, st, NO_SP, aarch64_get_vec_u64 (cpu, rn, 0));
7726 }
7727
7728 /* FP move immediate
7729
7730 These install an immediate 8 bit value in the target register
7731 where the 8 bits comprise 1 sign bit, 4 bits of fraction and a 3
7732 bit exponent. */
7733
7734 static void
7735 fmovs (sim_cpu *cpu)
7736 {
7737 unsigned int sd = INSTR (4, 0);
7738 uint32_t imm = INSTR (20, 13);
7739 float f = fp_immediate_for_encoding_32 (imm);
7740
7741 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7742 aarch64_set_FP_float (cpu, sd, f);
7743 }
7744
7745 static void
7746 fmovd (sim_cpu *cpu)
7747 {
7748 unsigned int sd = INSTR (4, 0);
7749 uint32_t imm = INSTR (20, 13);
7750 double d = fp_immediate_for_encoding_64 (imm);
7751
7752 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7753 aarch64_set_FP_double (cpu, sd, d);
7754 }
7755
7756 static void
7757 dexSimpleFPImmediate (sim_cpu *cpu)
7758 {
7759 /* instr[31,23] == 00111100
7760 instr[22] == type : single(0)/double(1)
7761 instr[21] == 1
7762 instr[20,13] == imm8
7763 instr[12,10] == 100
7764 instr[9,5] == imm5 : 00000 ==> PK, ow ==> UNALLOC
7765 instr[4,0] == Rd */
7766 uint32_t imm5 = INSTR (9, 5);
7767
7768 NYI_assert (31, 23, 0x3C);
7769
7770 if (imm5 != 0)
7771 HALT_UNALLOC;
7772
7773 if (INSTR (22, 22))
7774 fmovd (cpu);
7775 else
7776 fmovs (cpu);
7777 }
7778
7779 /* TODO specific decode and execute for group Load Store. */
7780
7781 /* TODO FP load/store single register (unscaled offset). */
7782
7783 /* TODO load 8 bit unscaled signed 9 bit. */
7784 /* TODO load 16 bit unscaled signed 9 bit. */
7785
7786 /* Load 32 bit unscaled signed 9 bit. */
7787 static void
7788 fldurs (sim_cpu *cpu, int32_t offset)
7789 {
7790 unsigned int rn = INSTR (9, 5);
7791 unsigned int st = INSTR (4, 0);
7792
7793 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7794 aarch64_set_vec_u32 (cpu, st, 0, aarch64_get_mem_u32
7795 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset));
7796 }
7797
7798 /* Load 64 bit unscaled signed 9 bit. */
7799 static void
7800 fldurd (sim_cpu *cpu, int32_t offset)
7801 {
7802 unsigned int rn = INSTR (9, 5);
7803 unsigned int st = INSTR (4, 0);
7804
7805 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7806 aarch64_set_vec_u64 (cpu, st, 0, aarch64_get_mem_u64
7807 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset));
7808 }
7809
7810 /* Load 128 bit unscaled signed 9 bit. */
7811 static void
7812 fldurq (sim_cpu *cpu, int32_t offset)
7813 {
7814 unsigned int rn = INSTR (9, 5);
7815 unsigned int st = INSTR (4, 0);
7816 FRegister a;
7817 uint64_t addr = aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset;
7818
7819 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7820 aarch64_get_mem_long_double (cpu, addr, & a);
7821 aarch64_set_FP_long_double (cpu, st, a);
7822 }
7823
7824 /* TODO store 8 bit unscaled signed 9 bit. */
7825 /* TODO store 16 bit unscaled signed 9 bit. */
7826
7827
7828 /* 1 source. */
7829
7830 /* Float absolute value. */
7831 static void
7832 fabss (sim_cpu *cpu)
7833 {
7834 unsigned sn = INSTR (9, 5);
7835 unsigned sd = INSTR (4, 0);
7836 float value = aarch64_get_FP_float (cpu, sn);
7837
7838 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7839 aarch64_set_FP_float (cpu, sd, fabsf (value));
7840 }
7841
7842 /* Double absolute value. */
7843 static void
7844 fabcpu (sim_cpu *cpu)
7845 {
7846 unsigned sn = INSTR (9, 5);
7847 unsigned sd = INSTR (4, 0);
7848 double value = aarch64_get_FP_double (cpu, sn);
7849
7850 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7851 aarch64_set_FP_double (cpu, sd, fabs (value));
7852 }
7853
7854 /* Float negative value. */
7855 static void
7856 fnegs (sim_cpu *cpu)
7857 {
7858 unsigned sn = INSTR (9, 5);
7859 unsigned sd = INSTR (4, 0);
7860
7861 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7862 aarch64_set_FP_float (cpu, sd, - aarch64_get_FP_float (cpu, sn));
7863 }
7864
7865 /* Double negative value. */
7866 static void
7867 fnegd (sim_cpu *cpu)
7868 {
7869 unsigned sn = INSTR (9, 5);
7870 unsigned sd = INSTR (4, 0);
7871
7872 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7873 aarch64_set_FP_double (cpu, sd, - aarch64_get_FP_double (cpu, sn));
7874 }
7875
7876 /* Float square root. */
7877 static void
7878 fsqrts (sim_cpu *cpu)
7879 {
7880 unsigned sn = INSTR (9, 5);
7881 unsigned sd = INSTR (4, 0);
7882
7883 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7884 aarch64_set_FP_float (cpu, sd, sqrtf (aarch64_get_FP_float (cpu, sn)));
7885 }
7886
7887 /* Double square root. */
7888 static void
7889 fsqrtd (sim_cpu *cpu)
7890 {
7891 unsigned sn = INSTR (9, 5);
7892 unsigned sd = INSTR (4, 0);
7893
7894 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7895 aarch64_set_FP_double (cpu, sd,
7896 sqrt (aarch64_get_FP_double (cpu, sn)));
7897 }
7898
7899 /* Convert double to float. */
7900 static void
7901 fcvtds (sim_cpu *cpu)
7902 {
7903 unsigned sn = INSTR (9, 5);
7904 unsigned sd = INSTR (4, 0);
7905
7906 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7907 aarch64_set_FP_float (cpu, sd, (float) aarch64_get_FP_double (cpu, sn));
7908 }
7909
7910 /* Convert float to double. */
7911 static void
7912 fcvtcpu (sim_cpu *cpu)
7913 {
7914 unsigned sn = INSTR (9, 5);
7915 unsigned sd = INSTR (4, 0);
7916
7917 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7918 aarch64_set_FP_double (cpu, sd, (double) aarch64_get_FP_float (cpu, sn));
7919 }
7920
7921 static void
7922 do_FRINT (sim_cpu *cpu)
7923 {
7924 /* instr[31,23] = 0001 1110 0
7925 instr[22] = single(0)/double(1)
7926 instr[21,18] = 1001
7927 instr[17,15] = rounding mode
7928 instr[14,10] = 10000
7929 instr[9,5] = source
7930 instr[4,0] = dest */
7931
7932 float val;
7933 unsigned rs = INSTR (9, 5);
7934 unsigned rd = INSTR (4, 0);
7935 unsigned int rmode = INSTR (17, 15);
7936
7937 NYI_assert (31, 23, 0x03C);
7938 NYI_assert (21, 18, 0x9);
7939 NYI_assert (14, 10, 0x10);
7940
7941 if (rmode == 6 || rmode == 7)
7942 /* FIXME: Add support for rmode == 6 exactness check. */
7943 rmode = uimm (aarch64_get_FPSR (cpu), 23, 22);
7944
7945 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7946 if (INSTR (22, 22))
7947 {
7948 double val = aarch64_get_FP_double (cpu, rs);
7949
7950 switch (rmode)
7951 {
7952 case 0: /* mode N: nearest or even. */
7953 {
7954 double rval = round (val);
7955
7956 if (val - rval == 0.5)
7957 {
7958 if (((rval / 2.0) * 2.0) != rval)
7959 rval += 1.0;
7960 }
7961
7962 aarch64_set_FP_double (cpu, rd, round (val));
7963 return;
7964 }
7965
7966 case 1: /* mode P: towards +inf. */
7967 if (val < 0.0)
7968 aarch64_set_FP_double (cpu, rd, trunc (val));
7969 else
7970 aarch64_set_FP_double (cpu, rd, round (val));
7971 return;
7972
7973 case 2: /* mode M: towards -inf. */
7974 if (val < 0.0)
7975 aarch64_set_FP_double (cpu, rd, round (val));
7976 else
7977 aarch64_set_FP_double (cpu, rd, trunc (val));
7978 return;
7979
7980 case 3: /* mode Z: towards 0. */
7981 aarch64_set_FP_double (cpu, rd, trunc (val));
7982 return;
7983
7984 case 4: /* mode A: away from 0. */
7985 aarch64_set_FP_double (cpu, rd, round (val));
7986 return;
7987
7988 case 6: /* mode X: use FPCR with exactness check. */
7989 case 7: /* mode I: use FPCR mode. */
7990 HALT_NYI;
7991
7992 default:
7993 HALT_UNALLOC;
7994 }
7995 }
7996
7997 val = aarch64_get_FP_float (cpu, rs);
7998
7999 switch (rmode)
8000 {
8001 case 0: /* mode N: nearest or even. */
8002 {
8003 float rval = roundf (val);
8004
8005 if (val - rval == 0.5)
8006 {
8007 if (((rval / 2.0) * 2.0) != rval)
8008 rval += 1.0;
8009 }
8010
8011 aarch64_set_FP_float (cpu, rd, rval);
8012 return;
8013 }
8014
8015 case 1: /* mode P: towards +inf. */
8016 if (val < 0.0)
8017 aarch64_set_FP_float (cpu, rd, truncf (val));
8018 else
8019 aarch64_set_FP_float (cpu, rd, roundf (val));
8020 return;
8021
8022 case 2: /* mode M: towards -inf. */
8023 if (val < 0.0)
8024 aarch64_set_FP_float (cpu, rd, truncf (val));
8025 else
8026 aarch64_set_FP_float (cpu, rd, roundf (val));
8027 return;
8028
8029 case 3: /* mode Z: towards 0. */
8030 aarch64_set_FP_float (cpu, rd, truncf (val));
8031 return;
8032
8033 case 4: /* mode A: away from 0. */
8034 aarch64_set_FP_float (cpu, rd, roundf (val));
8035 return;
8036
8037 case 6: /* mode X: use FPCR with exactness check. */
8038 case 7: /* mode I: use FPCR mode. */
8039 HALT_NYI;
8040
8041 default:
8042 HALT_UNALLOC;
8043 }
8044 }
8045
8046 /* Convert half to float. */
8047 static void
8048 do_FCVT_half_to_single (sim_cpu *cpu)
8049 {
8050 unsigned rn = INSTR (9, 5);
8051 unsigned rd = INSTR (4, 0);
8052
8053 NYI_assert (31, 10, 0x7B890);
8054
8055 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8056 aarch64_set_FP_float (cpu, rd, (float) aarch64_get_FP_half (cpu, rn));
8057 }
8058
8059 /* Convert half to double. */
8060 static void
8061 do_FCVT_half_to_double (sim_cpu *cpu)
8062 {
8063 unsigned rn = INSTR (9, 5);
8064 unsigned rd = INSTR (4, 0);
8065
8066 NYI_assert (31, 10, 0x7B8B0);
8067
8068 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8069 aarch64_set_FP_double (cpu, rd, (double) aarch64_get_FP_half (cpu, rn));
8070 }
8071
8072 static void
8073 do_FCVT_single_to_half (sim_cpu *cpu)
8074 {
8075 unsigned rn = INSTR (9, 5);
8076 unsigned rd = INSTR (4, 0);
8077
8078 NYI_assert (31, 10, 0x788F0);
8079
8080 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8081 aarch64_set_FP_half (cpu, rd, aarch64_get_FP_float (cpu, rn));
8082 }
8083
8084 /* Convert double to half. */
8085 static void
8086 do_FCVT_double_to_half (sim_cpu *cpu)
8087 {
8088 unsigned rn = INSTR (9, 5);
8089 unsigned rd = INSTR (4, 0);
8090
8091 NYI_assert (31, 10, 0x798F0);
8092
8093 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8094 aarch64_set_FP_half (cpu, rd, (float) aarch64_get_FP_double (cpu, rn));
8095 }
8096
8097 static void
8098 dexSimpleFPDataProc1Source (sim_cpu *cpu)
8099 {
8100 /* instr[31] ==> M : 0 ==> OK, 1 ==> UNALLOC
8101 instr[30] = 0
8102 instr[29] ==> S : 0 ==> OK, 1 ==> UNALLOC
8103 instr[28,25] = 1111
8104 instr[24] = 0
8105 instr[23,22] ==> type : 00 ==> source is single,
8106 01 ==> source is double
8107 10 ==> UNALLOC
8108 11 ==> UNALLOC or source is half
8109 instr[21] = 1
8110 instr[20,15] ==> opcode : with type 00 or 01
8111 000000 ==> FMOV, 000001 ==> FABS,
8112 000010 ==> FNEG, 000011 ==> FSQRT,
8113 000100 ==> UNALLOC, 000101 ==> FCVT,(to single/double)
8114 000110 ==> UNALLOC, 000111 ==> FCVT (to half)
8115 001000 ==> FRINTN, 001001 ==> FRINTP,
8116 001010 ==> FRINTM, 001011 ==> FRINTZ,
8117 001100 ==> FRINTA, 001101 ==> UNALLOC
8118 001110 ==> FRINTX, 001111 ==> FRINTI
8119 with type 11
8120 000100 ==> FCVT (half-to-single)
8121 000101 ==> FCVT (half-to-double)
8122 instr[14,10] = 10000. */
8123
8124 uint32_t M_S = (INSTR (31, 31) << 1) | INSTR (29, 29);
8125 uint32_t type = INSTR (23, 22);
8126 uint32_t opcode = INSTR (20, 15);
8127
8128 if (M_S != 0)
8129 HALT_UNALLOC;
8130
8131 if (type == 3)
8132 {
8133 if (opcode == 4)
8134 do_FCVT_half_to_single (cpu);
8135 else if (opcode == 5)
8136 do_FCVT_half_to_double (cpu);
8137 else
8138 HALT_UNALLOC;
8139 return;
8140 }
8141
8142 if (type == 2)
8143 HALT_UNALLOC;
8144
8145 switch (opcode)
8146 {
8147 case 0:
8148 if (type)
8149 ffmovd (cpu);
8150 else
8151 ffmovs (cpu);
8152 return;
8153
8154 case 1:
8155 if (type)
8156 fabcpu (cpu);
8157 else
8158 fabss (cpu);
8159 return;
8160
8161 case 2:
8162 if (type)
8163 fnegd (cpu);
8164 else
8165 fnegs (cpu);
8166 return;
8167
8168 case 3:
8169 if (type)
8170 fsqrtd (cpu);
8171 else
8172 fsqrts (cpu);
8173 return;
8174
8175 case 4:
8176 if (type)
8177 fcvtds (cpu);
8178 else
8179 HALT_UNALLOC;
8180 return;
8181
8182 case 5:
8183 if (type)
8184 HALT_UNALLOC;
8185 fcvtcpu (cpu);
8186 return;
8187
8188 case 8: /* FRINTN etc. */
8189 case 9:
8190 case 10:
8191 case 11:
8192 case 12:
8193 case 14:
8194 case 15:
8195 do_FRINT (cpu);
8196 return;
8197
8198 case 7:
8199 if (INSTR (22, 22))
8200 do_FCVT_double_to_half (cpu);
8201 else
8202 do_FCVT_single_to_half (cpu);
8203 return;
8204
8205 case 13:
8206 HALT_NYI;
8207
8208 default:
8209 HALT_UNALLOC;
8210 }
8211 }
8212
8213 /* 32 bit signed int to float. */
8214 static void
8215 scvtf32 (sim_cpu *cpu)
8216 {
8217 unsigned rn = INSTR (9, 5);
8218 unsigned sd = INSTR (4, 0);
8219
8220 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8221 aarch64_set_FP_float
8222 (cpu, sd, (float) aarch64_get_reg_s32 (cpu, rn, NO_SP));
8223 }
8224
8225 /* signed int to float. */
8226 static void
8227 scvtf (sim_cpu *cpu)
8228 {
8229 unsigned rn = INSTR (9, 5);
8230 unsigned sd = INSTR (4, 0);
8231
8232 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8233 aarch64_set_FP_float
8234 (cpu, sd, (float) aarch64_get_reg_s64 (cpu, rn, NO_SP));
8235 }
8236
8237 /* 32 bit signed int to double. */
8238 static void
8239 scvtd32 (sim_cpu *cpu)
8240 {
8241 unsigned rn = INSTR (9, 5);
8242 unsigned sd = INSTR (4, 0);
8243
8244 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8245 aarch64_set_FP_double
8246 (cpu, sd, (double) aarch64_get_reg_s32 (cpu, rn, NO_SP));
8247 }
8248
8249 /* signed int to double. */
8250 static void
8251 scvtd (sim_cpu *cpu)
8252 {
8253 unsigned rn = INSTR (9, 5);
8254 unsigned sd = INSTR (4, 0);
8255
8256 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8257 aarch64_set_FP_double
8258 (cpu, sd, (double) aarch64_get_reg_s64 (cpu, rn, NO_SP));
8259 }
8260
8261 static const float FLOAT_INT_MAX = (float) INT_MAX;
8262 static const float FLOAT_INT_MIN = (float) INT_MIN;
8263 static const double DOUBLE_INT_MAX = (double) INT_MAX;
8264 static const double DOUBLE_INT_MIN = (double) INT_MIN;
8265 static const float FLOAT_LONG_MAX = (float) LONG_MAX;
8266 static const float FLOAT_LONG_MIN = (float) LONG_MIN;
8267 static const double DOUBLE_LONG_MAX = (double) LONG_MAX;
8268 static const double DOUBLE_LONG_MIN = (double) LONG_MIN;
8269
8270 #define UINT_MIN 0
8271 #define ULONG_MIN 0
8272 static const float FLOAT_UINT_MAX = (float) UINT_MAX;
8273 static const float FLOAT_UINT_MIN = (float) UINT_MIN;
8274 static const double DOUBLE_UINT_MAX = (double) UINT_MAX;
8275 static const double DOUBLE_UINT_MIN = (double) UINT_MIN;
8276 static const float FLOAT_ULONG_MAX = (float) ULONG_MAX;
8277 static const float FLOAT_ULONG_MIN = (float) ULONG_MIN;
8278 static const double DOUBLE_ULONG_MAX = (double) ULONG_MAX;
8279 static const double DOUBLE_ULONG_MIN = (double) ULONG_MIN;
8280
8281 /* Check for FP exception conditions:
8282 NaN raises IO
8283 Infinity raises IO
8284 Out of Range raises IO and IX and saturates value
8285 Denormal raises ID and IX and sets to zero. */
8286 #define RAISE_EXCEPTIONS(F, VALUE, FTYPE, ITYPE) \
8287 do \
8288 { \
8289 switch (fpclassify (F)) \
8290 { \
8291 case FP_INFINITE: \
8292 case FP_NAN: \
8293 aarch64_set_FPSR (cpu, IO); \
8294 if (signbit (F)) \
8295 VALUE = ITYPE##_MAX; \
8296 else \
8297 VALUE = ITYPE##_MIN; \
8298 break; \
8299 \
8300 case FP_NORMAL: \
8301 if (F >= FTYPE##_##ITYPE##_MAX) \
8302 { \
8303 aarch64_set_FPSR_bits (cpu, IO | IX, IO | IX); \
8304 VALUE = ITYPE##_MAX; \
8305 } \
8306 else if (F <= FTYPE##_##ITYPE##_MIN) \
8307 { \
8308 aarch64_set_FPSR_bits (cpu, IO | IX, IO | IX); \
8309 VALUE = ITYPE##_MIN; \
8310 } \
8311 break; \
8312 \
8313 case FP_SUBNORMAL: \
8314 aarch64_set_FPSR_bits (cpu, IO | IX | ID, IX | ID); \
8315 VALUE = 0; \
8316 break; \
8317 \
8318 default: \
8319 case FP_ZERO: \
8320 VALUE = 0; \
8321 break; \
8322 } \
8323 } \
8324 while (0)
8325
8326 /* 32 bit convert float to signed int truncate towards zero. */
8327 static void
8328 fcvtszs32 (sim_cpu *cpu)
8329 {
8330 unsigned sn = INSTR (9, 5);
8331 unsigned rd = INSTR (4, 0);
8332 /* TODO : check that this rounds toward zero. */
8333 float f = aarch64_get_FP_float (cpu, sn);
8334 int32_t value = (int32_t) f;
8335
8336 RAISE_EXCEPTIONS (f, value, FLOAT, INT);
8337
8338 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8339 /* Avoid sign extension to 64 bit. */
8340 aarch64_set_reg_u64 (cpu, rd, NO_SP, (uint32_t) value);
8341 }
8342
8343 /* 64 bit convert float to signed int truncate towards zero. */
8344 static void
8345 fcvtszs (sim_cpu *cpu)
8346 {
8347 unsigned sn = INSTR (9, 5);
8348 unsigned rd = INSTR (4, 0);
8349 float f = aarch64_get_FP_float (cpu, sn);
8350 int64_t value = (int64_t) f;
8351
8352 RAISE_EXCEPTIONS (f, value, FLOAT, LONG);
8353
8354 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8355 aarch64_set_reg_s64 (cpu, rd, NO_SP, value);
8356 }
8357
8358 /* 32 bit convert double to signed int truncate towards zero. */
8359 static void
8360 fcvtszd32 (sim_cpu *cpu)
8361 {
8362 unsigned sn = INSTR (9, 5);
8363 unsigned rd = INSTR (4, 0);
8364 /* TODO : check that this rounds toward zero. */
8365 double d = aarch64_get_FP_double (cpu, sn);
8366 int32_t value = (int32_t) d;
8367
8368 RAISE_EXCEPTIONS (d, value, DOUBLE, INT);
8369
8370 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8371 /* Avoid sign extension to 64 bit. */
8372 aarch64_set_reg_u64 (cpu, rd, NO_SP, (uint32_t) value);
8373 }
8374
8375 /* 64 bit convert double to signed int truncate towards zero. */
8376 static void
8377 fcvtszd (sim_cpu *cpu)
8378 {
8379 unsigned sn = INSTR (9, 5);
8380 unsigned rd = INSTR (4, 0);
8381 /* TODO : check that this rounds toward zero. */
8382 double d = aarch64_get_FP_double (cpu, sn);
8383 int64_t value;
8384
8385 value = (int64_t) d;
8386
8387 RAISE_EXCEPTIONS (d, value, DOUBLE, LONG);
8388
8389 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8390 aarch64_set_reg_s64 (cpu, rd, NO_SP, value);
8391 }
8392
8393 static void
8394 do_fcvtzu (sim_cpu *cpu)
8395 {
8396 /* instr[31] = size: 32-bit (0), 64-bit (1)
8397 instr[30,23] = 00111100
8398 instr[22] = type: single (0)/ double (1)
8399 instr[21] = enable (0)/disable(1) precision
8400 instr[20,16] = 11001
8401 instr[15,10] = precision
8402 instr[9,5] = Rs
8403 instr[4,0] = Rd. */
8404
8405 unsigned rs = INSTR (9, 5);
8406 unsigned rd = INSTR (4, 0);
8407
8408 NYI_assert (30, 23, 0x3C);
8409 NYI_assert (20, 16, 0x19);
8410
8411 if (INSTR (21, 21) != 1)
8412 /* Convert to fixed point. */
8413 HALT_NYI;
8414
8415 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8416 if (INSTR (31, 31))
8417 {
8418 /* Convert to unsigned 64-bit integer. */
8419 if (INSTR (22, 22))
8420 {
8421 double d = aarch64_get_FP_double (cpu, rs);
8422 uint64_t value = (uint64_t) d;
8423
8424 /* Do not raise an exception if we have reached ULONG_MAX. */
8425 if (value != (1ULL << 63))
8426 RAISE_EXCEPTIONS (d, value, DOUBLE, ULONG);
8427
8428 aarch64_set_reg_u64 (cpu, rd, NO_SP, value);
8429 }
8430 else
8431 {
8432 float f = aarch64_get_FP_float (cpu, rs);
8433 uint64_t value = (uint64_t) f;
8434
8435 /* Do not raise an exception if we have reached ULONG_MAX. */
8436 if (value != (1ULL << 63))
8437 RAISE_EXCEPTIONS (f, value, FLOAT, ULONG);
8438
8439 aarch64_set_reg_u64 (cpu, rd, NO_SP, value);
8440 }
8441 }
8442 else
8443 {
8444 uint32_t value;
8445
8446 /* Convert to unsigned 32-bit integer. */
8447 if (INSTR (22, 22))
8448 {
8449 double d = aarch64_get_FP_double (cpu, rs);
8450
8451 value = (uint32_t) d;
8452 /* Do not raise an exception if we have reached UINT_MAX. */
8453 if (value != (1UL << 31))
8454 RAISE_EXCEPTIONS (d, value, DOUBLE, UINT);
8455 }
8456 else
8457 {
8458 float f = aarch64_get_FP_float (cpu, rs);
8459
8460 value = (uint32_t) f;
8461 /* Do not raise an exception if we have reached UINT_MAX. */
8462 if (value != (1UL << 31))
8463 RAISE_EXCEPTIONS (f, value, FLOAT, UINT);
8464 }
8465
8466 aarch64_set_reg_u64 (cpu, rd, NO_SP, value);
8467 }
8468 }
8469
8470 static void
8471 do_UCVTF (sim_cpu *cpu)
8472 {
8473 /* instr[31] = size: 32-bit (0), 64-bit (1)
8474 instr[30,23] = 001 1110 0
8475 instr[22] = type: single (0)/ double (1)
8476 instr[21] = enable (0)/disable(1) precision
8477 instr[20,16] = 0 0011
8478 instr[15,10] = precision
8479 instr[9,5] = Rs
8480 instr[4,0] = Rd. */
8481
8482 unsigned rs = INSTR (9, 5);
8483 unsigned rd = INSTR (4, 0);
8484
8485 NYI_assert (30, 23, 0x3C);
8486 NYI_assert (20, 16, 0x03);
8487
8488 if (INSTR (21, 21) != 1)
8489 HALT_NYI;
8490
8491 /* FIXME: Add exception raising. */
8492 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8493 if (INSTR (31, 31))
8494 {
8495 uint64_t value = aarch64_get_reg_u64 (cpu, rs, NO_SP);
8496
8497 if (INSTR (22, 22))
8498 aarch64_set_FP_double (cpu, rd, (double) value);
8499 else
8500 aarch64_set_FP_float (cpu, rd, (float) value);
8501 }
8502 else
8503 {
8504 uint32_t value = aarch64_get_reg_u32 (cpu, rs, NO_SP);
8505
8506 if (INSTR (22, 22))
8507 aarch64_set_FP_double (cpu, rd, (double) value);
8508 else
8509 aarch64_set_FP_float (cpu, rd, (float) value);
8510 }
8511 }
8512
8513 static void
8514 float_vector_move (sim_cpu *cpu)
8515 {
8516 /* instr[31,17] == 100 1111 0101 0111
8517 instr[16] ==> direction 0=> to GR, 1=> from GR
8518 instr[15,10] => ???
8519 instr[9,5] ==> source
8520 instr[4,0] ==> dest. */
8521
8522 unsigned rn = INSTR (9, 5);
8523 unsigned rd = INSTR (4, 0);
8524
8525 NYI_assert (31, 17, 0x4F57);
8526
8527 if (INSTR (15, 10) != 0)
8528 HALT_UNALLOC;
8529
8530 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8531 if (INSTR (16, 16))
8532 aarch64_set_vec_u64 (cpu, rd, 1, aarch64_get_reg_u64 (cpu, rn, NO_SP));
8533 else
8534 aarch64_set_reg_u64 (cpu, rd, NO_SP, aarch64_get_vec_u64 (cpu, rn, 1));
8535 }
8536
8537 static void
8538 dexSimpleFPIntegerConvert (sim_cpu *cpu)
8539 {
8540 /* instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
8541 instr[30 = 0
8542 instr[29] = S : 0 ==> OK, 1 ==> UNALLOC
8543 instr[28,25] = 1111
8544 instr[24] = 0
8545 instr[23,22] = type : 00 ==> single, 01 ==> double, 1x ==> UNALLOC
8546 instr[21] = 1
8547 instr[20,19] = rmode
8548 instr[18,16] = opcode
8549 instr[15,10] = 10 0000 */
8550
8551 uint32_t rmode_opcode;
8552 uint32_t size_type;
8553 uint32_t type;
8554 uint32_t size;
8555 uint32_t S;
8556
8557 if (INSTR (31, 17) == 0x4F57)
8558 {
8559 float_vector_move (cpu);
8560 return;
8561 }
8562
8563 size = INSTR (31, 31);
8564 S = INSTR (29, 29);
8565 if (S != 0)
8566 HALT_UNALLOC;
8567
8568 type = INSTR (23, 22);
8569 if (type > 1)
8570 HALT_UNALLOC;
8571
8572 rmode_opcode = INSTR (20, 16);
8573 size_type = (size << 1) | type; /* 0==32f, 1==32d, 2==64f, 3==64d. */
8574
8575 switch (rmode_opcode)
8576 {
8577 case 2: /* SCVTF. */
8578 switch (size_type)
8579 {
8580 case 0: scvtf32 (cpu); return;
8581 case 1: scvtd32 (cpu); return;
8582 case 2: scvtf (cpu); return;
8583 case 3: scvtd (cpu); return;
8584 }
8585
8586 case 6: /* FMOV GR, Vec. */
8587 switch (size_type)
8588 {
8589 case 0: gfmovs (cpu); return;
8590 case 3: gfmovd (cpu); return;
8591 default: HALT_UNALLOC;
8592 }
8593
8594 case 7: /* FMOV vec, GR. */
8595 switch (size_type)
8596 {
8597 case 0: fgmovs (cpu); return;
8598 case 3: fgmovd (cpu); return;
8599 default: HALT_UNALLOC;
8600 }
8601
8602 case 24: /* FCVTZS. */
8603 switch (size_type)
8604 {
8605 case 0: fcvtszs32 (cpu); return;
8606 case 1: fcvtszd32 (cpu); return;
8607 case 2: fcvtszs (cpu); return;
8608 case 3: fcvtszd (cpu); return;
8609 }
8610
8611 case 25: do_fcvtzu (cpu); return;
8612 case 3: do_UCVTF (cpu); return;
8613
8614 case 0: /* FCVTNS. */
8615 case 1: /* FCVTNU. */
8616 case 4: /* FCVTAS. */
8617 case 5: /* FCVTAU. */
8618 case 8: /* FCVPTS. */
8619 case 9: /* FCVTPU. */
8620 case 16: /* FCVTMS. */
8621 case 17: /* FCVTMU. */
8622 default:
8623 HALT_NYI;
8624 }
8625 }
8626
8627 static void
8628 set_flags_for_float_compare (sim_cpu *cpu, float fvalue1, float fvalue2)
8629 {
8630 uint32_t flags;
8631
8632 /* FIXME: Add exception raising. */
8633 if (isnan (fvalue1) || isnan (fvalue2))
8634 flags = C|V;
8635 else if (isinf (fvalue1) && isinf (fvalue2))
8636 {
8637 /* Subtracting two infinities may give a NaN. We only need to compare
8638 the signs, which we can get from isinf. */
8639 int result = isinf (fvalue1) - isinf (fvalue2);
8640
8641 if (result == 0)
8642 flags = Z|C;
8643 else if (result < 0)
8644 flags = N;
8645 else /* (result > 0). */
8646 flags = C;
8647 }
8648 else
8649 {
8650 float result = fvalue1 - fvalue2;
8651
8652 if (result == 0.0)
8653 flags = Z|C;
8654 else if (result < 0)
8655 flags = N;
8656 else /* (result > 0). */
8657 flags = C;
8658 }
8659
8660 aarch64_set_CPSR (cpu, flags);
8661 }
8662
8663 static void
8664 fcmps (sim_cpu *cpu)
8665 {
8666 unsigned sm = INSTR (20, 16);
8667 unsigned sn = INSTR ( 9, 5);
8668
8669 float fvalue1 = aarch64_get_FP_float (cpu, sn);
8670 float fvalue2 = aarch64_get_FP_float (cpu, sm);
8671
8672 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8673 set_flags_for_float_compare (cpu, fvalue1, fvalue2);
8674 }
8675
8676 /* Float compare to zero -- Invalid Operation exception
8677 only on signaling NaNs. */
8678 static void
8679 fcmpzs (sim_cpu *cpu)
8680 {
8681 unsigned sn = INSTR ( 9, 5);
8682 float fvalue1 = aarch64_get_FP_float (cpu, sn);
8683
8684 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8685 set_flags_for_float_compare (cpu, fvalue1, 0.0f);
8686 }
8687
8688 /* Float compare -- Invalid Operation exception on all NaNs. */
8689 static void
8690 fcmpes (sim_cpu *cpu)
8691 {
8692 unsigned sm = INSTR (20, 16);
8693 unsigned sn = INSTR ( 9, 5);
8694
8695 float fvalue1 = aarch64_get_FP_float (cpu, sn);
8696 float fvalue2 = aarch64_get_FP_float (cpu, sm);
8697
8698 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8699 set_flags_for_float_compare (cpu, fvalue1, fvalue2);
8700 }
8701
8702 /* Float compare to zero -- Invalid Operation exception on all NaNs. */
8703 static void
8704 fcmpzes (sim_cpu *cpu)
8705 {
8706 unsigned sn = INSTR ( 9, 5);
8707 float fvalue1 = aarch64_get_FP_float (cpu, sn);
8708
8709 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8710 set_flags_for_float_compare (cpu, fvalue1, 0.0f);
8711 }
8712
8713 static void
8714 set_flags_for_double_compare (sim_cpu *cpu, double dval1, double dval2)
8715 {
8716 uint32_t flags;
8717
8718 /* FIXME: Add exception raising. */
8719 if (isnan (dval1) || isnan (dval2))
8720 flags = C|V;
8721 else if (isinf (dval1) && isinf (dval2))
8722 {
8723 /* Subtracting two infinities may give a NaN. We only need to compare
8724 the signs, which we can get from isinf. */
8725 int result = isinf (dval1) - isinf (dval2);
8726
8727 if (result == 0)
8728 flags = Z|C;
8729 else if (result < 0)
8730 flags = N;
8731 else /* (result > 0). */
8732 flags = C;
8733 }
8734 else
8735 {
8736 double result = dval1 - dval2;
8737
8738 if (result == 0.0)
8739 flags = Z|C;
8740 else if (result < 0)
8741 flags = N;
8742 else /* (result > 0). */
8743 flags = C;
8744 }
8745
8746 aarch64_set_CPSR (cpu, flags);
8747 }
8748
8749 /* Double compare -- Invalid Operation exception only on signaling NaNs. */
8750 static void
8751 fcmpd (sim_cpu *cpu)
8752 {
8753 unsigned sm = INSTR (20, 16);
8754 unsigned sn = INSTR ( 9, 5);
8755
8756 double dvalue1 = aarch64_get_FP_double (cpu, sn);
8757 double dvalue2 = aarch64_get_FP_double (cpu, sm);
8758
8759 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8760 set_flags_for_double_compare (cpu, dvalue1, dvalue2);
8761 }
8762
8763 /* Double compare to zero -- Invalid Operation exception
8764 only on signaling NaNs. */
8765 static void
8766 fcmpzd (sim_cpu *cpu)
8767 {
8768 unsigned sn = INSTR ( 9, 5);
8769 double dvalue1 = aarch64_get_FP_double (cpu, sn);
8770
8771 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8772 set_flags_for_double_compare (cpu, dvalue1, 0.0);
8773 }
8774
8775 /* Double compare -- Invalid Operation exception on all NaNs. */
8776 static void
8777 fcmped (sim_cpu *cpu)
8778 {
8779 unsigned sm = INSTR (20, 16);
8780 unsigned sn = INSTR ( 9, 5);
8781
8782 double dvalue1 = aarch64_get_FP_double (cpu, sn);
8783 double dvalue2 = aarch64_get_FP_double (cpu, sm);
8784
8785 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8786 set_flags_for_double_compare (cpu, dvalue1, dvalue2);
8787 }
8788
8789 /* Double compare to zero -- Invalid Operation exception on all NaNs. */
8790 static void
8791 fcmpzed (sim_cpu *cpu)
8792 {
8793 unsigned sn = INSTR ( 9, 5);
8794 double dvalue1 = aarch64_get_FP_double (cpu, sn);
8795
8796 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8797 set_flags_for_double_compare (cpu, dvalue1, 0.0);
8798 }
8799
8800 static void
8801 dexSimpleFPCompare (sim_cpu *cpu)
8802 {
8803 /* assert instr[28,25] == 1111
8804 instr[30:24:21:13,10] = 0011000
8805 instr[31] = M : 0 ==> OK, 1 ==> UNALLOC
8806 instr[29] ==> S : 0 ==> OK, 1 ==> UNALLOC
8807 instr[23,22] ==> type : 0 ==> single, 01 ==> double, 1x ==> UNALLOC
8808 instr[15,14] ==> op : 00 ==> OK, ow ==> UNALLOC
8809 instr[4,0] ==> opcode2 : 00000 ==> FCMP, 10000 ==> FCMPE,
8810 01000 ==> FCMPZ, 11000 ==> FCMPEZ,
8811 ow ==> UNALLOC */
8812 uint32_t dispatch;
8813 uint32_t M_S = (INSTR (31, 31) << 1) | INSTR (29, 29);
8814 uint32_t type = INSTR (23, 22);
8815 uint32_t op = INSTR (15, 14);
8816 uint32_t op2_2_0 = INSTR (2, 0);
8817
8818 if (op2_2_0 != 0)
8819 HALT_UNALLOC;
8820
8821 if (M_S != 0)
8822 HALT_UNALLOC;
8823
8824 if (type > 1)
8825 HALT_UNALLOC;
8826
8827 if (op != 0)
8828 HALT_UNALLOC;
8829
8830 /* dispatch on type and top 2 bits of opcode. */
8831 dispatch = (type << 2) | INSTR (4, 3);
8832
8833 switch (dispatch)
8834 {
8835 case 0: fcmps (cpu); return;
8836 case 1: fcmpzs (cpu); return;
8837 case 2: fcmpes (cpu); return;
8838 case 3: fcmpzes (cpu); return;
8839 case 4: fcmpd (cpu); return;
8840 case 5: fcmpzd (cpu); return;
8841 case 6: fcmped (cpu); return;
8842 case 7: fcmpzed (cpu); return;
8843 }
8844 }
8845
8846 static void
8847 do_scalar_FADDP (sim_cpu *cpu)
8848 {
8849 /* instr [31,23] = 0111 1110 0
8850 instr [22] = single(0)/double(1)
8851 instr [21,10] = 11 0000 1101 10
8852 instr [9,5] = Fn
8853 instr [4,0] = Fd. */
8854
8855 unsigned Fn = INSTR (9, 5);
8856 unsigned Fd = INSTR (4, 0);
8857
8858 NYI_assert (31, 23, 0x0FC);
8859 NYI_assert (21, 10, 0xC36);
8860
8861 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8862 if (INSTR (22, 22))
8863 {
8864 double val1 = aarch64_get_vec_double (cpu, Fn, 0);
8865 double val2 = aarch64_get_vec_double (cpu, Fn, 1);
8866
8867 aarch64_set_FP_double (cpu, Fd, val1 + val2);
8868 }
8869 else
8870 {
8871 float val1 = aarch64_get_vec_float (cpu, Fn, 0);
8872 float val2 = aarch64_get_vec_float (cpu, Fn, 1);
8873
8874 aarch64_set_FP_float (cpu, Fd, val1 + val2);
8875 }
8876 }
8877
8878 /* Floating point absolute difference. */
8879
8880 static void
8881 do_scalar_FABD (sim_cpu *cpu)
8882 {
8883 /* instr [31,23] = 0111 1110 1
8884 instr [22] = float(0)/double(1)
8885 instr [21] = 1
8886 instr [20,16] = Rm
8887 instr [15,10] = 1101 01
8888 instr [9, 5] = Rn
8889 instr [4, 0] = Rd. */
8890
8891 unsigned rm = INSTR (20, 16);
8892 unsigned rn = INSTR (9, 5);
8893 unsigned rd = INSTR (4, 0);
8894
8895 NYI_assert (31, 23, 0x0FD);
8896 NYI_assert (21, 21, 1);
8897 NYI_assert (15, 10, 0x35);
8898
8899 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8900 if (INSTR (22, 22))
8901 aarch64_set_FP_double (cpu, rd,
8902 fabs (aarch64_get_FP_double (cpu, rn)
8903 - aarch64_get_FP_double (cpu, rm)));
8904 else
8905 aarch64_set_FP_float (cpu, rd,
8906 fabsf (aarch64_get_FP_float (cpu, rn)
8907 - aarch64_get_FP_float (cpu, rm)));
8908 }
8909
8910 static void
8911 do_scalar_CMGT (sim_cpu *cpu)
8912 {
8913 /* instr [31,21] = 0101 1110 111
8914 instr [20,16] = Rm
8915 instr [15,10] = 00 1101
8916 instr [9, 5] = Rn
8917 instr [4, 0] = Rd. */
8918
8919 unsigned rm = INSTR (20, 16);
8920 unsigned rn = INSTR (9, 5);
8921 unsigned rd = INSTR (4, 0);
8922
8923 NYI_assert (31, 21, 0x2F7);
8924 NYI_assert (15, 10, 0x0D);
8925
8926 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8927 aarch64_set_vec_u64 (cpu, rd, 0,
8928 aarch64_get_vec_u64 (cpu, rn, 0) >
8929 aarch64_get_vec_u64 (cpu, rm, 0) ? -1L : 0L);
8930 }
8931
8932 static void
8933 do_scalar_USHR (sim_cpu *cpu)
8934 {
8935 /* instr [31,23] = 0111 1111 0
8936 instr [22,16] = shift amount
8937 instr [15,10] = 0000 01
8938 instr [9, 5] = Rn
8939 instr [4, 0] = Rd. */
8940
8941 unsigned amount = 128 - INSTR (22, 16);
8942 unsigned rn = INSTR (9, 5);
8943 unsigned rd = INSTR (4, 0);
8944
8945 NYI_assert (31, 23, 0x0FE);
8946 NYI_assert (15, 10, 0x01);
8947
8948 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8949 aarch64_set_vec_u64 (cpu, rd, 0,
8950 aarch64_get_vec_u64 (cpu, rn, 0) >> amount);
8951 }
8952
8953 static void
8954 do_scalar_SSHL (sim_cpu *cpu)
8955 {
8956 /* instr [31,21] = 0101 1110 111
8957 instr [20,16] = Rm
8958 instr [15,10] = 0100 01
8959 instr [9, 5] = Rn
8960 instr [4, 0] = Rd. */
8961
8962 unsigned rm = INSTR (20, 16);
8963 unsigned rn = INSTR (9, 5);
8964 unsigned rd = INSTR (4, 0);
8965 signed int shift = aarch64_get_vec_s8 (cpu, rm, 0);
8966
8967 NYI_assert (31, 21, 0x2F7);
8968 NYI_assert (15, 10, 0x11);
8969
8970 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8971 if (shift >= 0)
8972 aarch64_set_vec_s64 (cpu, rd, 0,
8973 aarch64_get_vec_s64 (cpu, rn, 0) << shift);
8974 else
8975 aarch64_set_vec_s64 (cpu, rd, 0,
8976 aarch64_get_vec_s64 (cpu, rn, 0) >> - shift);
8977 }
8978
8979 /* Floating point scalar compare greater than or equal to 0. */
8980 static void
8981 do_scalar_FCMGE_zero (sim_cpu *cpu)
8982 {
8983 /* instr [31,23] = 0111 1110 1
8984 instr [22,22] = size
8985 instr [21,16] = 1000 00
8986 instr [15,10] = 1100 10
8987 instr [9, 5] = Rn
8988 instr [4, 0] = Rd. */
8989
8990 unsigned size = INSTR (22, 22);
8991 unsigned rn = INSTR (9, 5);
8992 unsigned rd = INSTR (4, 0);
8993
8994 NYI_assert (31, 23, 0x0FD);
8995 NYI_assert (21, 16, 0x20);
8996 NYI_assert (15, 10, 0x32);
8997
8998 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8999 if (size)
9000 aarch64_set_vec_u64 (cpu, rd, 0,
9001 aarch64_get_vec_double (cpu, rn, 0) >= 0.0 ? -1 : 0);
9002 else
9003 aarch64_set_vec_u32 (cpu, rd, 0,
9004 aarch64_get_vec_float (cpu, rn, 0) >= 0.0 ? -1 : 0);
9005 }
9006
9007 /* Floating point scalar compare less than or equal to 0. */
9008 static void
9009 do_scalar_FCMLE_zero (sim_cpu *cpu)
9010 {
9011 /* instr [31,23] = 0111 1110 1
9012 instr [22,22] = size
9013 instr [21,16] = 1000 00
9014 instr [15,10] = 1101 10
9015 instr [9, 5] = Rn
9016 instr [4, 0] = Rd. */
9017
9018 unsigned size = INSTR (22, 22);
9019 unsigned rn = INSTR (9, 5);
9020 unsigned rd = INSTR (4, 0);
9021
9022 NYI_assert (31, 23, 0x0FD);
9023 NYI_assert (21, 16, 0x20);
9024 NYI_assert (15, 10, 0x36);
9025
9026 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9027 if (size)
9028 aarch64_set_vec_u64 (cpu, rd, 0,
9029 aarch64_get_vec_double (cpu, rn, 0) <= 0.0 ? -1 : 0);
9030 else
9031 aarch64_set_vec_u32 (cpu, rd, 0,
9032 aarch64_get_vec_float (cpu, rn, 0) <= 0.0 ? -1 : 0);
9033 }
9034
9035 /* Floating point scalar compare greater than 0. */
9036 static void
9037 do_scalar_FCMGT_zero (sim_cpu *cpu)
9038 {
9039 /* instr [31,23] = 0101 1110 1
9040 instr [22,22] = size
9041 instr [21,16] = 1000 00
9042 instr [15,10] = 1100 10
9043 instr [9, 5] = Rn
9044 instr [4, 0] = Rd. */
9045
9046 unsigned size = INSTR (22, 22);
9047 unsigned rn = INSTR (9, 5);
9048 unsigned rd = INSTR (4, 0);
9049
9050 NYI_assert (31, 23, 0x0BD);
9051 NYI_assert (21, 16, 0x20);
9052 NYI_assert (15, 10, 0x32);
9053
9054 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9055 if (size)
9056 aarch64_set_vec_u64 (cpu, rd, 0,
9057 aarch64_get_vec_double (cpu, rn, 0) > 0.0 ? -1 : 0);
9058 else
9059 aarch64_set_vec_u32 (cpu, rd, 0,
9060 aarch64_get_vec_float (cpu, rn, 0) > 0.0 ? -1 : 0);
9061 }
9062
9063 /* Floating point scalar compare equal to 0. */
9064 static void
9065 do_scalar_FCMEQ_zero (sim_cpu *cpu)
9066 {
9067 /* instr [31,23] = 0101 1110 1
9068 instr [22,22] = size
9069 instr [21,16] = 1000 00
9070 instr [15,10] = 1101 10
9071 instr [9, 5] = Rn
9072 instr [4, 0] = Rd. */
9073
9074 unsigned size = INSTR (22, 22);
9075 unsigned rn = INSTR (9, 5);
9076 unsigned rd = INSTR (4, 0);
9077
9078 NYI_assert (31, 23, 0x0BD);
9079 NYI_assert (21, 16, 0x20);
9080 NYI_assert (15, 10, 0x36);
9081
9082 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9083 if (size)
9084 aarch64_set_vec_u64 (cpu, rd, 0,
9085 aarch64_get_vec_double (cpu, rn, 0) == 0.0 ? -1 : 0);
9086 else
9087 aarch64_set_vec_u32 (cpu, rd, 0,
9088 aarch64_get_vec_float (cpu, rn, 0) == 0.0 ? -1 : 0);
9089 }
9090
9091 /* Floating point scalar compare less than 0. */
9092 static void
9093 do_scalar_FCMLT_zero (sim_cpu *cpu)
9094 {
9095 /* instr [31,23] = 0101 1110 1
9096 instr [22,22] = size
9097 instr [21,16] = 1000 00
9098 instr [15,10] = 1110 10
9099 instr [9, 5] = Rn
9100 instr [4, 0] = Rd. */
9101
9102 unsigned size = INSTR (22, 22);
9103 unsigned rn = INSTR (9, 5);
9104 unsigned rd = INSTR (4, 0);
9105
9106 NYI_assert (31, 23, 0x0BD);
9107 NYI_assert (21, 16, 0x20);
9108 NYI_assert (15, 10, 0x3A);
9109
9110 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9111 if (size)
9112 aarch64_set_vec_u64 (cpu, rd, 0,
9113 aarch64_get_vec_double (cpu, rn, 0) < 0.0 ? -1 : 0);
9114 else
9115 aarch64_set_vec_u32 (cpu, rd, 0,
9116 aarch64_get_vec_float (cpu, rn, 0) < 0.0 ? -1 : 0);
9117 }
9118
9119 static void
9120 do_scalar_shift (sim_cpu *cpu)
9121 {
9122 /* instr [31,23] = 0101 1111 0
9123 instr [22,16] = shift amount
9124 instr [15,10] = 0101 01 [SHL]
9125 instr [15,10] = 0000 01 [SSHR]
9126 instr [9, 5] = Rn
9127 instr [4, 0] = Rd. */
9128
9129 unsigned rn = INSTR (9, 5);
9130 unsigned rd = INSTR (4, 0);
9131 unsigned amount;
9132
9133 NYI_assert (31, 23, 0x0BE);
9134
9135 if (INSTR (22, 22) == 0)
9136 HALT_UNALLOC;
9137
9138 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9139 switch (INSTR (15, 10))
9140 {
9141 case 0x01: /* SSHR */
9142 amount = 128 - INSTR (22, 16);
9143 aarch64_set_vec_s64 (cpu, rd, 0,
9144 aarch64_get_vec_s64 (cpu, rn, 0) >> amount);
9145 return;
9146 case 0x15: /* SHL */
9147 amount = INSTR (22, 16) - 64;
9148 aarch64_set_vec_u64 (cpu, rd, 0,
9149 aarch64_get_vec_u64 (cpu, rn, 0) << amount);
9150 return;
9151 default:
9152 HALT_NYI;
9153 }
9154 }
9155
9156 /* FCMEQ FCMGT FCMGE. */
9157 static void
9158 do_scalar_FCM (sim_cpu *cpu)
9159 {
9160 /* instr [31,30] = 01
9161 instr [29] = U
9162 instr [28,24] = 1 1110
9163 instr [23] = E
9164 instr [22] = size
9165 instr [21] = 1
9166 instr [20,16] = Rm
9167 instr [15,12] = 1110
9168 instr [11] = AC
9169 instr [10] = 1
9170 instr [9, 5] = Rn
9171 instr [4, 0] = Rd. */
9172
9173 unsigned rm = INSTR (20, 16);
9174 unsigned rn = INSTR (9, 5);
9175 unsigned rd = INSTR (4, 0);
9176 unsigned EUac = (INSTR (23, 23) << 2) | (INSTR (29, 29) << 1) | INSTR (11, 11);
9177 unsigned result;
9178 float val1;
9179 float val2;
9180
9181 NYI_assert (31, 30, 1);
9182 NYI_assert (28, 24, 0x1E);
9183 NYI_assert (21, 21, 1);
9184 NYI_assert (15, 12, 0xE);
9185 NYI_assert (10, 10, 1);
9186
9187 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9188 if (INSTR (22, 22))
9189 {
9190 double val1 = aarch64_get_FP_double (cpu, rn);
9191 double val2 = aarch64_get_FP_double (cpu, rm);
9192
9193 switch (EUac)
9194 {
9195 case 0: /* 000 */
9196 result = val1 == val2;
9197 break;
9198
9199 case 3: /* 011 */
9200 val1 = fabs (val1);
9201 val2 = fabs (val2);
9202 /* Fall through. */
9203 case 2: /* 010 */
9204 result = val1 >= val2;
9205 break;
9206
9207 case 7: /* 111 */
9208 val1 = fabs (val1);
9209 val2 = fabs (val2);
9210 /* Fall through. */
9211 case 6: /* 110 */
9212 result = val1 > val2;
9213 break;
9214
9215 default:
9216 HALT_UNALLOC;
9217 }
9218
9219 aarch64_set_vec_u32 (cpu, rd, 0, result ? -1 : 0);
9220 return;
9221 }
9222
9223 val1 = aarch64_get_FP_float (cpu, rn);
9224 val2 = aarch64_get_FP_float (cpu, rm);
9225
9226 switch (EUac)
9227 {
9228 case 0: /* 000 */
9229 result = val1 == val2;
9230 break;
9231
9232 case 3: /* 011 */
9233 val1 = fabsf (val1);
9234 val2 = fabsf (val2);
9235 /* Fall through. */
9236 case 2: /* 010 */
9237 result = val1 >= val2;
9238 break;
9239
9240 case 7: /* 111 */
9241 val1 = fabsf (val1);
9242 val2 = fabsf (val2);
9243 /* Fall through. */
9244 case 6: /* 110 */
9245 result = val1 > val2;
9246 break;
9247
9248 default:
9249 HALT_UNALLOC;
9250 }
9251
9252 aarch64_set_vec_u32 (cpu, rd, 0, result ? -1 : 0);
9253 }
9254
9255 /* An alias of DUP. */
9256 static void
9257 do_scalar_MOV (sim_cpu *cpu)
9258 {
9259 /* instr [31,21] = 0101 1110 000
9260 instr [20,16] = imm5
9261 instr [15,10] = 0000 01
9262 instr [9, 5] = Rn
9263 instr [4, 0] = Rd. */
9264
9265 unsigned rn = INSTR (9, 5);
9266 unsigned rd = INSTR (4, 0);
9267 unsigned index;
9268
9269 NYI_assert (31, 21, 0x2F0);
9270 NYI_assert (15, 10, 0x01);
9271
9272 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9273 if (INSTR (16, 16))
9274 {
9275 /* 8-bit. */
9276 index = INSTR (20, 17);
9277 aarch64_set_vec_u8
9278 (cpu, rd, 0, aarch64_get_vec_u8 (cpu, rn, index));
9279 }
9280 else if (INSTR (17, 17))
9281 {
9282 /* 16-bit. */
9283 index = INSTR (20, 18);
9284 aarch64_set_vec_u16
9285 (cpu, rd, 0, aarch64_get_vec_u16 (cpu, rn, index));
9286 }
9287 else if (INSTR (18, 18))
9288 {
9289 /* 32-bit. */
9290 index = INSTR (20, 19);
9291 aarch64_set_vec_u32
9292 (cpu, rd, 0, aarch64_get_vec_u32 (cpu, rn, index));
9293 }
9294 else if (INSTR (19, 19))
9295 {
9296 /* 64-bit. */
9297 index = INSTR (20, 20);
9298 aarch64_set_vec_u64
9299 (cpu, rd, 0, aarch64_get_vec_u64 (cpu, rn, index));
9300 }
9301 else
9302 HALT_UNALLOC;
9303 }
9304
9305 static void
9306 do_scalar_NEG (sim_cpu *cpu)
9307 {
9308 /* instr [31,10] = 0111 1110 1110 0000 1011 10
9309 instr [9, 5] = Rn
9310 instr [4, 0] = Rd. */
9311
9312 unsigned rn = INSTR (9, 5);
9313 unsigned rd = INSTR (4, 0);
9314
9315 NYI_assert (31, 10, 0x1FB82E);
9316
9317 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9318 aarch64_set_vec_u64 (cpu, rd, 0, - aarch64_get_vec_u64 (cpu, rn, 0));
9319 }
9320
9321 static void
9322 do_scalar_USHL (sim_cpu *cpu)
9323 {
9324 /* instr [31,21] = 0111 1110 111
9325 instr [20,16] = Rm
9326 instr [15,10] = 0100 01
9327 instr [9, 5] = Rn
9328 instr [4, 0] = Rd. */
9329
9330 unsigned rm = INSTR (20, 16);
9331 unsigned rn = INSTR (9, 5);
9332 unsigned rd = INSTR (4, 0);
9333 signed int shift = aarch64_get_vec_s8 (cpu, rm, 0);
9334
9335 NYI_assert (31, 21, 0x3F7);
9336 NYI_assert (15, 10, 0x11);
9337
9338 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9339 if (shift >= 0)
9340 aarch64_set_vec_u64 (cpu, rd, 0, aarch64_get_vec_u64 (cpu, rn, 0) << shift);
9341 else
9342 aarch64_set_vec_u64 (cpu, rd, 0, aarch64_get_vec_u64 (cpu, rn, 0) >> - shift);
9343 }
9344
9345 static void
9346 do_double_add (sim_cpu *cpu)
9347 {
9348 /* instr [31,21] = 0101 1110 111
9349 instr [20,16] = Fn
9350 instr [15,10] = 1000 01
9351 instr [9,5] = Fm
9352 instr [4,0] = Fd. */
9353 unsigned Fd;
9354 unsigned Fm;
9355 unsigned Fn;
9356 double val1;
9357 double val2;
9358
9359 NYI_assert (31, 21, 0x2F7);
9360 NYI_assert (15, 10, 0x21);
9361
9362 Fd = INSTR (4, 0);
9363 Fm = INSTR (9, 5);
9364 Fn = INSTR (20, 16);
9365
9366 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9367 val1 = aarch64_get_FP_double (cpu, Fm);
9368 val2 = aarch64_get_FP_double (cpu, Fn);
9369
9370 aarch64_set_FP_double (cpu, Fd, val1 + val2);
9371 }
9372
9373 static void
9374 do_scalar_UCVTF (sim_cpu *cpu)
9375 {
9376 /* instr [31,23] = 0111 1110 0
9377 instr [22] = single(0)/double(1)
9378 instr [21,10] = 10 0001 1101 10
9379 instr [9,5] = rn
9380 instr [4,0] = rd. */
9381
9382 unsigned rn = INSTR (9, 5);
9383 unsigned rd = INSTR (4, 0);
9384
9385 NYI_assert (31, 23, 0x0FC);
9386 NYI_assert (21, 10, 0x876);
9387
9388 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9389 if (INSTR (22, 22))
9390 {
9391 uint64_t val = aarch64_get_vec_u64 (cpu, rn, 0);
9392
9393 aarch64_set_vec_double (cpu, rd, 0, (double) val);
9394 }
9395 else
9396 {
9397 uint32_t val = aarch64_get_vec_u32 (cpu, rn, 0);
9398
9399 aarch64_set_vec_float (cpu, rd, 0, (float) val);
9400 }
9401 }
9402
9403 static void
9404 do_scalar_vec (sim_cpu *cpu)
9405 {
9406 /* instr [30] = 1. */
9407 /* instr [28,25] = 1111. */
9408 switch (INSTR (31, 23))
9409 {
9410 case 0xBC:
9411 switch (INSTR (15, 10))
9412 {
9413 case 0x01: do_scalar_MOV (cpu); return;
9414 case 0x39: do_scalar_FCM (cpu); return;
9415 case 0x3B: do_scalar_FCM (cpu); return;
9416 }
9417 break;
9418
9419 case 0xBE: do_scalar_shift (cpu); return;
9420
9421 case 0xFC:
9422 switch (INSTR (15, 10))
9423 {
9424 case 0x36:
9425 switch (INSTR (21, 16))
9426 {
9427 case 0x30: do_scalar_FADDP (cpu); return;
9428 case 0x21: do_scalar_UCVTF (cpu); return;
9429 }
9430 HALT_NYI;
9431 case 0x39: do_scalar_FCM (cpu); return;
9432 case 0x3B: do_scalar_FCM (cpu); return;
9433 }
9434 break;
9435
9436 case 0xFD:
9437 switch (INSTR (15, 10))
9438 {
9439 case 0x0D: do_scalar_CMGT (cpu); return;
9440 case 0x11: do_scalar_USHL (cpu); return;
9441 case 0x2E: do_scalar_NEG (cpu); return;
9442 case 0x32: do_scalar_FCMGE_zero (cpu); return;
9443 case 0x35: do_scalar_FABD (cpu); return;
9444 case 0x36: do_scalar_FCMLE_zero (cpu); return;
9445 case 0x39: do_scalar_FCM (cpu); return;
9446 case 0x3B: do_scalar_FCM (cpu); return;
9447 default:
9448 HALT_NYI;
9449 }
9450
9451 case 0xFE: do_scalar_USHR (cpu); return;
9452
9453 case 0xBD:
9454 switch (INSTR (15, 10))
9455 {
9456 case 0x21: do_double_add (cpu); return;
9457 case 0x11: do_scalar_SSHL (cpu); return;
9458 case 0x32: do_scalar_FCMGT_zero (cpu); return;
9459 case 0x36: do_scalar_FCMEQ_zero (cpu); return;
9460 case 0x3A: do_scalar_FCMLT_zero (cpu); return;
9461 default:
9462 HALT_NYI;
9463 }
9464
9465 default:
9466 HALT_NYI;
9467 }
9468 }
9469
9470 static void
9471 dexAdvSIMD1 (sim_cpu *cpu)
9472 {
9473 /* instr [28,25] = 1 111. */
9474
9475 /* We are currently only interested in the basic
9476 scalar fp routines which all have bit 30 = 0. */
9477 if (INSTR (30, 30))
9478 do_scalar_vec (cpu);
9479
9480 /* instr[24] is set for FP data processing 3-source and clear for
9481 all other basic scalar fp instruction groups. */
9482 else if (INSTR (24, 24))
9483 dexSimpleFPDataProc3Source (cpu);
9484
9485 /* instr[21] is clear for floating <-> fixed conversions and set for
9486 all other basic scalar fp instruction groups. */
9487 else if (!INSTR (21, 21))
9488 dexSimpleFPFixedConvert (cpu);
9489
9490 /* instr[11,10] : 01 ==> cond compare, 10 ==> Data Proc 2 Source
9491 11 ==> cond select, 00 ==> other. */
9492 else
9493 switch (INSTR (11, 10))
9494 {
9495 case 1: dexSimpleFPCondCompare (cpu); return;
9496 case 2: dexSimpleFPDataProc2Source (cpu); return;
9497 case 3: dexSimpleFPCondSelect (cpu); return;
9498
9499 default:
9500 /* Now an ordered cascade of tests.
9501 FP immediate has instr [12] == 1.
9502 FP compare has instr [13] == 1.
9503 FP Data Proc 1 Source has instr [14] == 1.
9504 FP floating <--> integer conversions has instr [15] == 0. */
9505 if (INSTR (12, 12))
9506 dexSimpleFPImmediate (cpu);
9507
9508 else if (INSTR (13, 13))
9509 dexSimpleFPCompare (cpu);
9510
9511 else if (INSTR (14, 14))
9512 dexSimpleFPDataProc1Source (cpu);
9513
9514 else if (!INSTR (15, 15))
9515 dexSimpleFPIntegerConvert (cpu);
9516
9517 else
9518 /* If we get here then instr[15] == 1 which means UNALLOC. */
9519 HALT_UNALLOC;
9520 }
9521 }
9522
9523 /* PC relative addressing. */
9524
9525 static void
9526 pcadr (sim_cpu *cpu)
9527 {
9528 /* instr[31] = op : 0 ==> ADR, 1 ==> ADRP
9529 instr[30,29] = immlo
9530 instr[23,5] = immhi. */
9531 uint64_t address;
9532 unsigned rd = INSTR (4, 0);
9533 uint32_t isPage = INSTR (31, 31);
9534 union { int64_t u64; uint64_t s64; } imm;
9535 uint64_t offset;
9536
9537 imm.s64 = simm64 (aarch64_get_instr (cpu), 23, 5);
9538 offset = imm.u64;
9539 offset = (offset << 2) | INSTR (30, 29);
9540
9541 address = aarch64_get_PC (cpu);
9542
9543 if (isPage)
9544 {
9545 offset <<= 12;
9546 address &= ~0xfff;
9547 }
9548
9549 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9550 aarch64_set_reg_u64 (cpu, rd, NO_SP, address + offset);
9551 }
9552
9553 /* Specific decode and execute for group Data Processing Immediate. */
9554
9555 static void
9556 dexPCRelAddressing (sim_cpu *cpu)
9557 {
9558 /* assert instr[28,24] = 10000. */
9559 pcadr (cpu);
9560 }
9561
9562 /* Immediate logical.
9563 The bimm32/64 argument is constructed by replicating a 2, 4, 8,
9564 16, 32 or 64 bit sequence pulled out at decode and possibly
9565 inverting it..
9566
9567 N.B. the output register (dest) can normally be Xn or SP
9568 the exception occurs for flag setting instructions which may
9569 only use Xn for the output (dest). The input register can
9570 never be SP. */
9571
9572 /* 32 bit and immediate. */
9573 static void
9574 and32 (sim_cpu *cpu, uint32_t bimm)
9575 {
9576 unsigned rn = INSTR (9, 5);
9577 unsigned rd = INSTR (4, 0);
9578
9579 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9580 aarch64_set_reg_u64 (cpu, rd, SP_OK,
9581 aarch64_get_reg_u32 (cpu, rn, NO_SP) & bimm);
9582 }
9583
9584 /* 64 bit and immediate. */
9585 static void
9586 and64 (sim_cpu *cpu, uint64_t bimm)
9587 {
9588 unsigned rn = INSTR (9, 5);
9589 unsigned rd = INSTR (4, 0);
9590
9591 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9592 aarch64_set_reg_u64 (cpu, rd, SP_OK,
9593 aarch64_get_reg_u64 (cpu, rn, NO_SP) & bimm);
9594 }
9595
9596 /* 32 bit and immediate set flags. */
9597 static void
9598 ands32 (sim_cpu *cpu, uint32_t bimm)
9599 {
9600 unsigned rn = INSTR (9, 5);
9601 unsigned rd = INSTR (4, 0);
9602
9603 uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP);
9604 uint32_t value2 = bimm;
9605
9606 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9607 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 & value2);
9608 set_flags_for_binop32 (cpu, value1 & value2);
9609 }
9610
9611 /* 64 bit and immediate set flags. */
9612 static void
9613 ands64 (sim_cpu *cpu, uint64_t bimm)
9614 {
9615 unsigned rn = INSTR (9, 5);
9616 unsigned rd = INSTR (4, 0);
9617
9618 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP);
9619 uint64_t value2 = bimm;
9620
9621 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9622 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 & value2);
9623 set_flags_for_binop64 (cpu, value1 & value2);
9624 }
9625
9626 /* 32 bit exclusive or immediate. */
9627 static void
9628 eor32 (sim_cpu *cpu, uint32_t bimm)
9629 {
9630 unsigned rn = INSTR (9, 5);
9631 unsigned rd = INSTR (4, 0);
9632
9633 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9634 aarch64_set_reg_u64 (cpu, rd, SP_OK,
9635 aarch64_get_reg_u32 (cpu, rn, NO_SP) ^ bimm);
9636 }
9637
9638 /* 64 bit exclusive or immediate. */
9639 static void
9640 eor64 (sim_cpu *cpu, uint64_t bimm)
9641 {
9642 unsigned rn = INSTR (9, 5);
9643 unsigned rd = INSTR (4, 0);
9644
9645 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9646 aarch64_set_reg_u64 (cpu, rd, SP_OK,
9647 aarch64_get_reg_u64 (cpu, rn, NO_SP) ^ bimm);
9648 }
9649
9650 /* 32 bit or immediate. */
9651 static void
9652 orr32 (sim_cpu *cpu, uint32_t bimm)
9653 {
9654 unsigned rn = INSTR (9, 5);
9655 unsigned rd = INSTR (4, 0);
9656
9657 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9658 aarch64_set_reg_u64 (cpu, rd, SP_OK,
9659 aarch64_get_reg_u32 (cpu, rn, NO_SP) | bimm);
9660 }
9661
9662 /* 64 bit or immediate. */
9663 static void
9664 orr64 (sim_cpu *cpu, uint64_t bimm)
9665 {
9666 unsigned rn = INSTR (9, 5);
9667 unsigned rd = INSTR (4, 0);
9668
9669 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9670 aarch64_set_reg_u64 (cpu, rd, SP_OK,
9671 aarch64_get_reg_u64 (cpu, rn, NO_SP) | bimm);
9672 }
9673
9674 /* Logical shifted register.
9675 These allow an optional LSL, ASR, LSR or ROR to the second source
9676 register with a count up to the register bit count.
9677 N.B register args may not be SP. */
9678
9679 /* 32 bit AND shifted register. */
9680 static void
9681 and32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9682 {
9683 unsigned rm = INSTR (20, 16);
9684 unsigned rn = INSTR (9, 5);
9685 unsigned rd = INSTR (4, 0);
9686
9687 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9688 aarch64_set_reg_u64
9689 (cpu, rd, NO_SP, aarch64_get_reg_u32 (cpu, rn, NO_SP)
9690 & shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP), shift, count));
9691 }
9692
9693 /* 64 bit AND shifted register. */
9694 static void
9695 and64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9696 {
9697 unsigned rm = INSTR (20, 16);
9698 unsigned rn = INSTR (9, 5);
9699 unsigned rd = INSTR (4, 0);
9700
9701 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9702 aarch64_set_reg_u64
9703 (cpu, rd, NO_SP, aarch64_get_reg_u64 (cpu, rn, NO_SP)
9704 & shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP), shift, count));
9705 }
9706
9707 /* 32 bit AND shifted register setting flags. */
9708 static void
9709 ands32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9710 {
9711 unsigned rm = INSTR (20, 16);
9712 unsigned rn = INSTR (9, 5);
9713 unsigned rd = INSTR (4, 0);
9714
9715 uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP);
9716 uint32_t value2 = shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP),
9717 shift, count);
9718
9719 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9720 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 & value2);
9721 set_flags_for_binop32 (cpu, value1 & value2);
9722 }
9723
9724 /* 64 bit AND shifted register setting flags. */
9725 static void
9726 ands64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9727 {
9728 unsigned rm = INSTR (20, 16);
9729 unsigned rn = INSTR (9, 5);
9730 unsigned rd = INSTR (4, 0);
9731
9732 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP);
9733 uint64_t value2 = shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP),
9734 shift, count);
9735
9736 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9737 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 & value2);
9738 set_flags_for_binop64 (cpu, value1 & value2);
9739 }
9740
9741 /* 32 bit BIC shifted register. */
9742 static void
9743 bic32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9744 {
9745 unsigned rm = INSTR (20, 16);
9746 unsigned rn = INSTR (9, 5);
9747 unsigned rd = INSTR (4, 0);
9748
9749 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9750 aarch64_set_reg_u64
9751 (cpu, rd, NO_SP, aarch64_get_reg_u32 (cpu, rn, NO_SP)
9752 & ~ shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP), shift, count));
9753 }
9754
9755 /* 64 bit BIC shifted register. */
9756 static void
9757 bic64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9758 {
9759 unsigned rm = INSTR (20, 16);
9760 unsigned rn = INSTR (9, 5);
9761 unsigned rd = INSTR (4, 0);
9762
9763 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9764 aarch64_set_reg_u64
9765 (cpu, rd, NO_SP, aarch64_get_reg_u64 (cpu, rn, NO_SP)
9766 & ~ shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP), shift, count));
9767 }
9768
9769 /* 32 bit BIC shifted register setting flags. */
9770 static void
9771 bics32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9772 {
9773 unsigned rm = INSTR (20, 16);
9774 unsigned rn = INSTR (9, 5);
9775 unsigned rd = INSTR (4, 0);
9776
9777 uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP);
9778 uint32_t value2 = ~ shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP),
9779 shift, count);
9780
9781 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9782 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 & value2);
9783 set_flags_for_binop32 (cpu, value1 & value2);
9784 }
9785
9786 /* 64 bit BIC shifted register setting flags. */
9787 static void
9788 bics64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9789 {
9790 unsigned rm = INSTR (20, 16);
9791 unsigned rn = INSTR (9, 5);
9792 unsigned rd = INSTR (4, 0);
9793
9794 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP);
9795 uint64_t value2 = ~ shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP),
9796 shift, count);
9797
9798 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9799 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 & value2);
9800 set_flags_for_binop64 (cpu, value1 & value2);
9801 }
9802
9803 /* 32 bit EON shifted register. */
9804 static void
9805 eon32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9806 {
9807 unsigned rm = INSTR (20, 16);
9808 unsigned rn = INSTR (9, 5);
9809 unsigned rd = INSTR (4, 0);
9810
9811 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9812 aarch64_set_reg_u64
9813 (cpu, rd, NO_SP, aarch64_get_reg_u32 (cpu, rn, NO_SP)
9814 ^ ~ shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP), shift, count));
9815 }
9816
9817 /* 64 bit EON shifted register. */
9818 static void
9819 eon64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9820 {
9821 unsigned rm = INSTR (20, 16);
9822 unsigned rn = INSTR (9, 5);
9823 unsigned rd = INSTR (4, 0);
9824
9825 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9826 aarch64_set_reg_u64
9827 (cpu, rd, NO_SP, aarch64_get_reg_u64 (cpu, rn, NO_SP)
9828 ^ ~ shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP), shift, count));
9829 }
9830
9831 /* 32 bit EOR shifted register. */
9832 static void
9833 eor32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9834 {
9835 unsigned rm = INSTR (20, 16);
9836 unsigned rn = INSTR (9, 5);
9837 unsigned rd = INSTR (4, 0);
9838
9839 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9840 aarch64_set_reg_u64
9841 (cpu, rd, NO_SP, aarch64_get_reg_u32 (cpu, rn, NO_SP)
9842 ^ shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP), shift, count));
9843 }
9844
9845 /* 64 bit EOR shifted register. */
9846 static void
9847 eor64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9848 {
9849 unsigned rm = INSTR (20, 16);
9850 unsigned rn = INSTR (9, 5);
9851 unsigned rd = INSTR (4, 0);
9852
9853 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9854 aarch64_set_reg_u64
9855 (cpu, rd, NO_SP, aarch64_get_reg_u64 (cpu, rn, NO_SP)
9856 ^ shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP), shift, count));
9857 }
9858
9859 /* 32 bit ORR shifted register. */
9860 static void
9861 orr32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9862 {
9863 unsigned rm = INSTR (20, 16);
9864 unsigned rn = INSTR (9, 5);
9865 unsigned rd = INSTR (4, 0);
9866
9867 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9868 aarch64_set_reg_u64
9869 (cpu, rd, NO_SP, aarch64_get_reg_u32 (cpu, rn, NO_SP)
9870 | shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP), shift, count));
9871 }
9872
9873 /* 64 bit ORR shifted register. */
9874 static void
9875 orr64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9876 {
9877 unsigned rm = INSTR (20, 16);
9878 unsigned rn = INSTR (9, 5);
9879 unsigned rd = INSTR (4, 0);
9880
9881 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9882 aarch64_set_reg_u64
9883 (cpu, rd, NO_SP, aarch64_get_reg_u64 (cpu, rn, NO_SP)
9884 | shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP), shift, count));
9885 }
9886
9887 /* 32 bit ORN shifted register. */
9888 static void
9889 orn32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9890 {
9891 unsigned rm = INSTR (20, 16);
9892 unsigned rn = INSTR (9, 5);
9893 unsigned rd = INSTR (4, 0);
9894
9895 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9896 aarch64_set_reg_u64
9897 (cpu, rd, NO_SP, aarch64_get_reg_u32 (cpu, rn, NO_SP)
9898 | ~ shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP), shift, count));
9899 }
9900
9901 /* 64 bit ORN shifted register. */
9902 static void
9903 orn64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9904 {
9905 unsigned rm = INSTR (20, 16);
9906 unsigned rn = INSTR (9, 5);
9907 unsigned rd = INSTR (4, 0);
9908
9909 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9910 aarch64_set_reg_u64
9911 (cpu, rd, NO_SP, aarch64_get_reg_u64 (cpu, rn, NO_SP)
9912 | ~ shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP), shift, count));
9913 }
9914
9915 static void
9916 dexLogicalImmediate (sim_cpu *cpu)
9917 {
9918 /* assert instr[28,23] = 1001000
9919 instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
9920 instr[30,29] = op : 0 ==> AND, 1 ==> ORR, 2 ==> EOR, 3 ==> ANDS
9921 instr[22] = N : used to construct immediate mask
9922 instr[21,16] = immr
9923 instr[15,10] = imms
9924 instr[9,5] = Rn
9925 instr[4,0] = Rd */
9926
9927 /* 32 bit operations must have N = 0 or else we have an UNALLOC. */
9928 uint32_t size = INSTR (31, 31);
9929 uint32_t N = INSTR (22, 22);
9930 /* uint32_t immr = INSTR (21, 16);. */
9931 /* uint32_t imms = INSTR (15, 10);. */
9932 uint32_t index = INSTR (22, 10);
9933 uint64_t bimm64 = LITable [index];
9934 uint32_t dispatch = INSTR (30, 29);
9935
9936 if (~size & N)
9937 HALT_UNALLOC;
9938
9939 if (!bimm64)
9940 HALT_UNALLOC;
9941
9942 if (size == 0)
9943 {
9944 uint32_t bimm = (uint32_t) bimm64;
9945
9946 switch (dispatch)
9947 {
9948 case 0: and32 (cpu, bimm); return;
9949 case 1: orr32 (cpu, bimm); return;
9950 case 2: eor32 (cpu, bimm); return;
9951 case 3: ands32 (cpu, bimm); return;
9952 }
9953 }
9954 else
9955 {
9956 switch (dispatch)
9957 {
9958 case 0: and64 (cpu, bimm64); return;
9959 case 1: orr64 (cpu, bimm64); return;
9960 case 2: eor64 (cpu, bimm64); return;
9961 case 3: ands64 (cpu, bimm64); return;
9962 }
9963 }
9964 HALT_UNALLOC;
9965 }
9966
9967 /* Immediate move.
9968 The uimm argument is a 16 bit value to be inserted into the
9969 target register the pos argument locates the 16 bit word in the
9970 dest register i.e. it is in {0, 1} for 32 bit and {0, 1, 2,
9971 3} for 64 bit.
9972 N.B register arg may not be SP so it should be.
9973 accessed using the setGZRegisterXXX accessors. */
9974
9975 /* 32 bit move 16 bit immediate zero remaining shorts. */
9976 static void
9977 movz32 (sim_cpu *cpu, uint32_t val, uint32_t pos)
9978 {
9979 unsigned rd = INSTR (4, 0);
9980
9981 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9982 aarch64_set_reg_u64 (cpu, rd, NO_SP, val << (pos * 16));
9983 }
9984
9985 /* 64 bit move 16 bit immediate zero remaining shorts. */
9986 static void
9987 movz64 (sim_cpu *cpu, uint32_t val, uint32_t pos)
9988 {
9989 unsigned rd = INSTR (4, 0);
9990
9991 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9992 aarch64_set_reg_u64 (cpu, rd, NO_SP, ((uint64_t) val) << (pos * 16));
9993 }
9994
9995 /* 32 bit move 16 bit immediate negated. */
9996 static void
9997 movn32 (sim_cpu *cpu, uint32_t val, uint32_t pos)
9998 {
9999 unsigned rd = INSTR (4, 0);
10000
10001 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
10002 aarch64_set_reg_u64 (cpu, rd, NO_SP, ((val << (pos * 16)) ^ 0xffffffffU));
10003 }
10004
10005 /* 64 bit move 16 bit immediate negated. */
10006 static void
10007 movn64 (sim_cpu *cpu, uint32_t val, uint32_t pos)
10008 {
10009 unsigned rd = INSTR (4, 0);
10010
10011 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
10012 aarch64_set_reg_u64
10013 (cpu, rd, NO_SP, ((((uint64_t) val) << (pos * 16))
10014 ^ 0xffffffffffffffffULL));
10015 }
10016
10017 /* 32 bit move 16 bit immediate keep remaining shorts. */
10018 static void
10019 movk32 (sim_cpu *cpu, uint32_t val, uint32_t pos)
10020 {
10021 unsigned rd = INSTR (4, 0);
10022 uint32_t current = aarch64_get_reg_u32 (cpu, rd, NO_SP);
10023 uint32_t value = val << (pos * 16);
10024 uint32_t mask = ~(0xffffU << (pos * 16));
10025
10026 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
10027 aarch64_set_reg_u64 (cpu, rd, NO_SP, (value | (current & mask)));
10028 }
10029
10030 /* 64 bit move 16 it immediate keep remaining shorts. */
10031 static void
10032 movk64 (sim_cpu *cpu, uint32_t val, uint32_t pos)
10033 {
10034 unsigned rd = INSTR (4, 0);
10035 uint64_t current = aarch64_get_reg_u64 (cpu, rd, NO_SP);
10036 uint64_t value = (uint64_t) val << (pos * 16);
10037 uint64_t mask = ~(0xffffULL << (pos * 16));
10038
10039 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
10040 aarch64_set_reg_u64 (cpu, rd, NO_SP, (value | (current & mask)));
10041 }
10042
10043 static void
10044 dexMoveWideImmediate (sim_cpu *cpu)
10045 {
10046 /* assert instr[28:23] = 100101
10047 instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
10048 instr[30,29] = op : 0 ==> MOVN, 1 ==> UNALLOC, 2 ==> MOVZ, 3 ==> MOVK
10049 instr[22,21] = shift : 00 == LSL#0, 01 = LSL#16, 10 = LSL#32, 11 = LSL#48
10050 instr[20,5] = uimm16
10051 instr[4,0] = Rd */
10052
10053 /* N.B. the (multiple of 16) shift is applied by the called routine,
10054 we just pass the multiplier. */
10055
10056 uint32_t imm;
10057 uint32_t size = INSTR (31, 31);
10058 uint32_t op = INSTR (30, 29);
10059 uint32_t shift = INSTR (22, 21);
10060
10061 /* 32 bit can only shift 0 or 1 lot of 16.
10062 anything else is an unallocated instruction. */
10063 if (size == 0 && (shift > 1))
10064 HALT_UNALLOC;
10065
10066 if (op == 1)
10067 HALT_UNALLOC;
10068
10069 imm = INSTR (20, 5);
10070
10071 if (size == 0)
10072 {
10073 if (op == 0)
10074 movn32 (cpu, imm, shift);
10075 else if (op == 2)
10076 movz32 (cpu, imm, shift);
10077 else
10078 movk32 (cpu, imm, shift);
10079 }
10080 else
10081 {
10082 if (op == 0)
10083 movn64 (cpu, imm, shift);
10084 else if (op == 2)
10085 movz64 (cpu, imm, shift);
10086 else
10087 movk64 (cpu, imm, shift);
10088 }
10089 }
10090
10091 /* Bitfield operations.
10092 These take a pair of bit positions r and s which are in {0..31}
10093 or {0..63} depending on the instruction word size.
10094 N.B register args may not be SP. */
10095
10096 /* OK, we start with ubfm which just needs to pick
10097 some bits out of source zero the rest and write
10098 the result to dest. Just need two logical shifts. */
10099
10100 /* 32 bit bitfield move, left and right of affected zeroed
10101 if r <= s Wd<s-r:0> = Wn<s:r> else Wd<32+s-r,32-r> = Wn<s:0>. */
10102 static void
10103 ubfm32 (sim_cpu *cpu, uint32_t r, uint32_t s)
10104 {
10105 unsigned rd;
10106 unsigned rn = INSTR (9, 5);
10107 uint32_t value = aarch64_get_reg_u32 (cpu, rn, NO_SP);
10108
10109 /* Pick either s+1-r or s+1 consecutive bits out of the original word. */
10110 if (r <= s)
10111 {
10112 /* 31:...:s:xxx:r:...:0 ==> 31:...:s-r:xxx:0.
10113 We want only bits s:xxx:r at the bottom of the word
10114 so we LSL bit s up to bit 31 i.e. by 31 - s
10115 and then we LSR to bring bit 31 down to bit s - r
10116 i.e. by 31 + r - s. */
10117 value <<= 31 - s;
10118 value >>= 31 + r - s;
10119 }
10120 else
10121 {
10122 /* 31:...:s:xxx:0 ==> 31:...:31-(r-1)+s:xxx:31-(r-1):...:0
10123 We want only bits s:xxx:0 starting at it 31-(r-1)
10124 so we LSL bit s up to bit 31 i.e. by 31 - s
10125 and then we LSL to bring bit 31 down to 31-(r-1)+s
10126 i.e. by r - (s + 1). */
10127 value <<= 31 - s;
10128 value >>= r - (s + 1);
10129 }
10130
10131 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
10132 rd = INSTR (4, 0);
10133 aarch64_set_reg_u64 (cpu, rd, NO_SP, value);
10134 }
10135
10136 /* 64 bit bitfield move, left and right of affected zeroed
10137 if r <= s Wd<s-r:0> = Wn<s:r> else Wd<64+s-r,64-r> = Wn<s:0>. */
10138 static void
10139 ubfm (sim_cpu *cpu, uint32_t r, uint32_t s)
10140 {
10141 unsigned rd;
10142 unsigned rn = INSTR (9, 5);
10143 uint64_t value = aarch64_get_reg_u64 (cpu, rn, NO_SP);
10144
10145 if (r <= s)
10146 {
10147 /* 63:...:s:xxx:r:...:0 ==> 63:...:s-r:xxx:0.
10148 We want only bits s:xxx:r at the bottom of the word.
10149 So we LSL bit s up to bit 63 i.e. by 63 - s
10150 and then we LSR to bring bit 63 down to bit s - r
10151 i.e. by 63 + r - s. */
10152 value <<= 63 - s;
10153 value >>= 63 + r - s;
10154 }
10155 else
10156 {
10157 /* 63:...:s:xxx:0 ==> 63:...:63-(r-1)+s:xxx:63-(r-1):...:0.
10158 We want only bits s:xxx:0 starting at it 63-(r-1).
10159 So we LSL bit s up to bit 63 i.e. by 63 - s
10160 and then we LSL to bring bit 63 down to 63-(r-1)+s
10161 i.e. by r - (s + 1). */
10162 value <<= 63 - s;
10163 value >>= r - (s + 1);
10164 }
10165
10166 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
10167 rd = INSTR (4, 0);
10168 aarch64_set_reg_u64 (cpu, rd, NO_SP, value);
10169 }
10170
10171 /* The signed versions need to insert sign bits
10172 on the left of the inserted bit field. so we do
10173 much the same as the unsigned version except we
10174 use an arithmetic shift right -- this just means
10175 we need to operate on signed values. */
10176
10177 /* 32 bit bitfield move, left of affected sign-extended, right zeroed. */
10178 /* If r <= s Wd<s-r:0> = Wn<s:r> else Wd<32+s-r,32-r> = Wn<s:0>. */
10179 static void
10180 sbfm32 (sim_cpu *cpu, uint32_t r, uint32_t s)
10181 {
10182 unsigned rd;
10183 unsigned rn = INSTR (9, 5);
10184 /* as per ubfm32 but use an ASR instead of an LSR. */
10185 int32_t value = aarch64_get_reg_s32 (cpu, rn, NO_SP);
10186
10187 if (r <= s)
10188 {
10189 value <<= 31 - s;
10190 value >>= 31 + r - s;
10191 }
10192 else
10193 {
10194 value <<= 31 - s;
10195 value >>= r - (s + 1);
10196 }
10197
10198 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
10199 rd = INSTR (4, 0);
10200 aarch64_set_reg_u64 (cpu, rd, NO_SP, (uint32_t) value);
10201 }
10202
10203 /* 64 bit bitfield move, left of affected sign-extended, right zeroed. */
10204 /* If r <= s Wd<s-r:0> = Wn<s:r> else Wd<64+s-r,64-r> = Wn<s:0>. */
10205 static void
10206 sbfm (sim_cpu *cpu, uint32_t r, uint32_t s)
10207 {
10208 unsigned rd;
10209 unsigned rn = INSTR (9, 5);
10210 /* acpu per ubfm but use an ASR instead of an LSR. */
10211 int64_t value = aarch64_get_reg_s64 (cpu, rn, NO_SP);
10212
10213 if (r <= s)
10214 {
10215 value <<= 63 - s;
10216 value >>= 63 + r - s;
10217 }
10218 else
10219 {
10220 value <<= 63 - s;
10221 value >>= r - (s + 1);
10222 }
10223
10224 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
10225 rd = INSTR (4, 0);
10226 aarch64_set_reg_s64 (cpu, rd, NO_SP, value);
10227 }
10228
10229 /* Finally, these versions leave non-affected bits
10230 as is. so we need to generate the bits as per
10231 ubfm and also generate a mask to pick the
10232 bits from the original and computed values. */
10233
10234 /* 32 bit bitfield move, non-affected bits left as is.
10235 If r <= s Wd<s-r:0> = Wn<s:r> else Wd<32+s-r,32-r> = Wn<s:0>. */
10236 static void
10237 bfm32 (sim_cpu *cpu, uint32_t r, uint32_t s)
10238 {
10239 unsigned rn = INSTR (9, 5);
10240 uint32_t value = aarch64_get_reg_u32 (cpu, rn, NO_SP);
10241 uint32_t mask = -1;
10242 unsigned rd;
10243 uint32_t value2;
10244
10245 /* Pick either s+1-r or s+1 consecutive bits out of the original word. */
10246 if (r <= s)
10247 {
10248 /* 31:...:s:xxx:r:...:0 ==> 31:...:s-r:xxx:0.
10249 We want only bits s:xxx:r at the bottom of the word
10250 so we LSL bit s up to bit 31 i.e. by 31 - s
10251 and then we LSR to bring bit 31 down to bit s - r
10252 i.e. by 31 + r - s. */
10253 value <<= 31 - s;
10254 value >>= 31 + r - s;
10255 /* the mask must include the same bits. */
10256 mask <<= 31 - s;
10257 mask >>= 31 + r - s;
10258 }
10259 else
10260 {
10261 /* 31:...:s:xxx:0 ==> 31:...:31-(r-1)+s:xxx:31-(r-1):...:0.
10262 We want only bits s:xxx:0 starting at it 31-(r-1)
10263 so we LSL bit s up to bit 31 i.e. by 31 - s
10264 and then we LSL to bring bit 31 down to 31-(r-1)+s
10265 i.e. by r - (s + 1). */
10266 value <<= 31 - s;
10267 value >>= r - (s + 1);
10268 /* The mask must include the same bits. */
10269 mask <<= 31 - s;
10270 mask >>= r - (s + 1);
10271 }
10272
10273 rd = INSTR (4, 0);
10274 value2 = aarch64_get_reg_u32 (cpu, rd, NO_SP);
10275
10276 value2 &= ~mask;
10277 value2 |= value;
10278
10279 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
10280 aarch64_set_reg_u64
10281 (cpu, rd, NO_SP, (aarch64_get_reg_u32 (cpu, rd, NO_SP) & ~mask) | value);
10282 }
10283
10284 /* 64 bit bitfield move, non-affected bits left as is.
10285 If r <= s Wd<s-r:0> = Wn<s:r> else Wd<64+s-r,64-r> = Wn<s:0>. */
10286 static void
10287 bfm (sim_cpu *cpu, uint32_t r, uint32_t s)
10288 {
10289 unsigned rd;
10290 unsigned rn = INSTR (9, 5);
10291 uint64_t value = aarch64_get_reg_u64 (cpu, rn, NO_SP);
10292 uint64_t mask = 0xffffffffffffffffULL;
10293
10294 if (r <= s)
10295 {
10296 /* 63:...:s:xxx:r:...:0 ==> 63:...:s-r:xxx:0.
10297 We want only bits s:xxx:r at the bottom of the word
10298 so we LSL bit s up to bit 63 i.e. by 63 - s
10299 and then we LSR to bring bit 63 down to bit s - r
10300 i.e. by 63 + r - s. */
10301 value <<= 63 - s;
10302 value >>= 63 + r - s;
10303 /* The mask must include the same bits. */
10304 mask <<= 63 - s;
10305 mask >>= 63 + r - s;
10306 }
10307 else
10308 {
10309 /* 63:...:s:xxx:0 ==> 63:...:63-(r-1)+s:xxx:63-(r-1):...:0
10310 We want only bits s:xxx:0 starting at it 63-(r-1)
10311 so we LSL bit s up to bit 63 i.e. by 63 - s
10312 and then we LSL to bring bit 63 down to 63-(r-1)+s
10313 i.e. by r - (s + 1). */
10314 value <<= 63 - s;
10315 value >>= r - (s + 1);
10316 /* The mask must include the same bits. */
10317 mask <<= 63 - s;
10318 mask >>= r - (s + 1);
10319 }
10320
10321 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
10322 rd = INSTR (4, 0);
10323 aarch64_set_reg_u64
10324 (cpu, rd, NO_SP, (aarch64_get_reg_u64 (cpu, rd, NO_SP) & ~mask) | value);
10325 }
10326
10327 static void
10328 dexBitfieldImmediate (sim_cpu *cpu)
10329 {
10330 /* assert instr[28:23] = 100110
10331 instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
10332 instr[30,29] = op : 0 ==> SBFM, 1 ==> BFM, 2 ==> UBFM, 3 ==> UNALLOC
10333 instr[22] = N : must be 0 for 32 bit, 1 for 64 bit ow UNALLOC
10334 instr[21,16] = immr : 0xxxxx for 32 bit, xxxxxx for 64 bit
10335 instr[15,10] = imms : 0xxxxx for 32 bit, xxxxxx for 64 bit
10336 instr[9,5] = Rn
10337 instr[4,0] = Rd */
10338
10339 /* 32 bit operations must have N = 0 or else we have an UNALLOC. */
10340 uint32_t dispatch;
10341 uint32_t imms;
10342 uint32_t size = INSTR (31, 31);
10343 uint32_t N = INSTR (22, 22);
10344 /* 32 bit operations must have immr[5] = 0 and imms[5] = 0. */
10345 /* or else we have an UNALLOC. */
10346 uint32_t immr = INSTR (21, 16);
10347
10348 if (~size & N)
10349 HALT_UNALLOC;
10350
10351 if (!size && uimm (immr, 5, 5))
10352 HALT_UNALLOC;
10353
10354 imms = INSTR (15, 10);
10355 if (!size && uimm (imms, 5, 5))
10356 HALT_UNALLOC;
10357
10358 /* Switch on combined size and op. */
10359 dispatch = INSTR (31, 29);
10360 switch (dispatch)
10361 {
10362 case 0: sbfm32 (cpu, immr, imms); return;
10363 case 1: bfm32 (cpu, immr, imms); return;
10364 case 2: ubfm32 (cpu, immr, imms); return;
10365 case 4: sbfm (cpu, immr, imms); return;
10366 case 5: bfm (cpu, immr, imms); return;
10367 case 6: ubfm (cpu, immr, imms); return;
10368 default: HALT_UNALLOC;
10369 }
10370 }
10371
10372 static void
10373 do_EXTR_32 (sim_cpu *cpu)
10374 {
10375 /* instr[31:21] = 00010011100
10376 instr[20,16] = Rm
10377 instr[15,10] = imms : 0xxxxx for 32 bit
10378 instr[9,5] = Rn
10379 instr[4,0] = Rd */
10380 unsigned rm = INSTR (20, 16);
10381 unsigned imms = INSTR (15, 10) & 31;
10382 unsigned rn = INSTR ( 9, 5);
10383 unsigned rd = INSTR ( 4, 0);
10384 uint64_t val1;
10385 uint64_t val2;
10386
10387 val1 = aarch64_get_reg_u32 (cpu, rm, NO_SP);
10388 val1 >>= imms;
10389 val2 = aarch64_get_reg_u32 (cpu, rn, NO_SP);
10390 val2 <<= (32 - imms);
10391
10392 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
10393 aarch64_set_reg_u64 (cpu, rd, NO_SP, val1 | val2);
10394 }
10395
10396 static void
10397 do_EXTR_64 (sim_cpu *cpu)
10398 {
10399 /* instr[31:21] = 10010011100
10400 instr[20,16] = Rm
10401 instr[15,10] = imms
10402 instr[9,5] = Rn
10403 instr[4,0] = Rd */
10404 unsigned rm = INSTR (20, 16);
10405 unsigned imms = INSTR (15, 10) & 63;
10406 unsigned rn = INSTR ( 9, 5);
10407 unsigned rd = INSTR ( 4, 0);
10408 uint64_t val;
10409
10410 val = aarch64_get_reg_u64 (cpu, rm, NO_SP);
10411 val >>= imms;
10412 val |= (aarch64_get_reg_u64 (cpu, rn, NO_SP) << (64 - imms));
10413
10414 aarch64_set_reg_u64 (cpu, rd, NO_SP, val);
10415 }
10416
10417 static void
10418 dexExtractImmediate (sim_cpu *cpu)
10419 {
10420 /* assert instr[28:23] = 100111
10421 instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
10422 instr[30,29] = op21 : 0 ==> EXTR, 1,2,3 ==> UNALLOC
10423 instr[22] = N : must be 0 for 32 bit, 1 for 64 bit or UNALLOC
10424 instr[21] = op0 : must be 0 or UNALLOC
10425 instr[20,16] = Rm
10426 instr[15,10] = imms : 0xxxxx for 32 bit, xxxxxx for 64 bit
10427 instr[9,5] = Rn
10428 instr[4,0] = Rd */
10429
10430 /* 32 bit operations must have N = 0 or else we have an UNALLOC. */
10431 /* 64 bit operations must have N = 1 or else we have an UNALLOC. */
10432 uint32_t dispatch;
10433 uint32_t size = INSTR (31, 31);
10434 uint32_t N = INSTR (22, 22);
10435 /* 32 bit operations must have imms[5] = 0
10436 or else we have an UNALLOC. */
10437 uint32_t imms = INSTR (15, 10);
10438
10439 if (size ^ N)
10440 HALT_UNALLOC;
10441
10442 if (!size && uimm (imms, 5, 5))
10443 HALT_UNALLOC;
10444
10445 /* Switch on combined size and op. */
10446 dispatch = INSTR (31, 29);
10447
10448 if (dispatch == 0)
10449 do_EXTR_32 (cpu);
10450
10451 else if (dispatch == 4)
10452 do_EXTR_64 (cpu);
10453
10454 else if (dispatch == 1)
10455 HALT_NYI;
10456 else
10457 HALT_UNALLOC;
10458 }
10459
10460 static void
10461 dexDPImm (sim_cpu *cpu)
10462 {
10463 /* uint32_t group = dispatchGroup (aarch64_get_instr (cpu));
10464 assert group == GROUP_DPIMM_1000 || grpoup == GROUP_DPIMM_1001
10465 bits [25,23] of a DPImm are the secondary dispatch vector. */
10466 uint32_t group2 = dispatchDPImm (aarch64_get_instr (cpu));
10467
10468 switch (group2)
10469 {
10470 case DPIMM_PCADR_000:
10471 case DPIMM_PCADR_001:
10472 dexPCRelAddressing (cpu);
10473 return;
10474
10475 case DPIMM_ADDSUB_010:
10476 case DPIMM_ADDSUB_011:
10477 dexAddSubtractImmediate (cpu);
10478 return;
10479
10480 case DPIMM_LOG_100:
10481 dexLogicalImmediate (cpu);
10482 return;
10483
10484 case DPIMM_MOV_101:
10485 dexMoveWideImmediate (cpu);
10486 return;
10487
10488 case DPIMM_BITF_110:
10489 dexBitfieldImmediate (cpu);
10490 return;
10491
10492 case DPIMM_EXTR_111:
10493 dexExtractImmediate (cpu);
10494 return;
10495
10496 default:
10497 /* Should never reach here. */
10498 HALT_NYI;
10499 }
10500 }
10501
10502 static void
10503 dexLoadUnscaledImmediate (sim_cpu *cpu)
10504 {
10505 /* instr[29,24] == 111_00
10506 instr[21] == 0
10507 instr[11,10] == 00
10508 instr[31,30] = size
10509 instr[26] = V
10510 instr[23,22] = opc
10511 instr[20,12] = simm9
10512 instr[9,5] = rn may be SP. */
10513 /* unsigned rt = INSTR (4, 0); */
10514 uint32_t V = INSTR (26, 26);
10515 uint32_t dispatch = ((INSTR (31, 30) << 2) | INSTR (23, 22));
10516 int32_t imm = simm32 (aarch64_get_instr (cpu), 20, 12);
10517
10518 if (!V)
10519 {
10520 /* GReg operations. */
10521 switch (dispatch)
10522 {
10523 case 0: sturb (cpu, imm); return;
10524 case 1: ldurb32 (cpu, imm); return;
10525 case 2: ldursb64 (cpu, imm); return;
10526 case 3: ldursb32 (cpu, imm); return;
10527 case 4: sturh (cpu, imm); return;
10528 case 5: ldurh32 (cpu, imm); return;
10529 case 6: ldursh64 (cpu, imm); return;
10530 case 7: ldursh32 (cpu, imm); return;
10531 case 8: stur32 (cpu, imm); return;
10532 case 9: ldur32 (cpu, imm); return;
10533 case 10: ldursw (cpu, imm); return;
10534 case 12: stur64 (cpu, imm); return;
10535 case 13: ldur64 (cpu, imm); return;
10536
10537 case 14:
10538 /* PRFUM NYI. */
10539 HALT_NYI;
10540
10541 default:
10542 case 11:
10543 case 15:
10544 HALT_UNALLOC;
10545 }
10546 }
10547
10548 /* FReg operations. */
10549 switch (dispatch)
10550 {
10551 case 2: fsturq (cpu, imm); return;
10552 case 3: fldurq (cpu, imm); return;
10553 case 8: fsturs (cpu, imm); return;
10554 case 9: fldurs (cpu, imm); return;
10555 case 12: fsturd (cpu, imm); return;
10556 case 13: fldurd (cpu, imm); return;
10557
10558 case 0: /* STUR 8 bit FP. */
10559 case 1: /* LDUR 8 bit FP. */
10560 case 4: /* STUR 16 bit FP. */
10561 case 5: /* LDUR 8 bit FP. */
10562 HALT_NYI;
10563
10564 default:
10565 case 6:
10566 case 7:
10567 case 10:
10568 case 11:
10569 case 14:
10570 case 15:
10571 HALT_UNALLOC;
10572 }
10573 }
10574
10575 /* N.B. A preliminary note regarding all the ldrs<x>32
10576 instructions
10577
10578 The signed value loaded by these instructions is cast to unsigned
10579 before being assigned to aarch64_get_reg_u64 (cpu, N) i.e. to the
10580 64 bit element of the GReg union. this performs a 32 bit sign extension
10581 (as required) but avoids 64 bit sign extension, thus ensuring that the
10582 top half of the register word is zero. this is what the spec demands
10583 when a 32 bit load occurs. */
10584
10585 /* 32 bit load sign-extended byte scaled unsigned 12 bit. */
10586 static void
10587 ldrsb32_abs (sim_cpu *cpu, uint32_t offset)
10588 {
10589 unsigned int rn = INSTR (9, 5);
10590 unsigned int rt = INSTR (4, 0);
10591
10592 /* The target register may not be SP but the source may be
10593 there is no scaling required for a byte load. */
10594 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset;
10595 aarch64_set_reg_u64 (cpu, rt, NO_SP,
10596 (int64_t) aarch64_get_mem_s8 (cpu, address));
10597 }
10598
10599 /* 32 bit load sign-extended byte scaled or unscaled zero-
10600 or sign-extended 32-bit register offset. */
10601 static void
10602 ldrsb32_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
10603 {
10604 unsigned int rm = INSTR (20, 16);
10605 unsigned int rn = INSTR (9, 5);
10606 unsigned int rt = INSTR (4, 0);
10607
10608 /* rn may reference SP, rm and rt must reference ZR. */
10609
10610 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10611 int64_t displacement = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
10612 extension);
10613
10614 /* There is no scaling required for a byte load. */
10615 aarch64_set_reg_u64
10616 (cpu, rt, NO_SP, (int64_t) aarch64_get_mem_s8 (cpu, address
10617 + displacement));
10618 }
10619
10620 /* 32 bit load sign-extended byte unscaled signed 9 bit with
10621 pre- or post-writeback. */
10622 static void
10623 ldrsb32_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
10624 {
10625 uint64_t address;
10626 unsigned int rn = INSTR (9, 5);
10627 unsigned int rt = INSTR (4, 0);
10628
10629 if (rn == rt && wb != NoWriteBack)
10630 HALT_UNALLOC;
10631
10632 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10633
10634 if (wb == Pre)
10635 address += offset;
10636
10637 aarch64_set_reg_u64 (cpu, rt, NO_SP,
10638 (int64_t) aarch64_get_mem_s8 (cpu, address));
10639
10640 if (wb == Post)
10641 address += offset;
10642
10643 if (wb != NoWriteBack)
10644 aarch64_set_reg_u64 (cpu, rn, NO_SP, address);
10645 }
10646
10647 /* 8 bit store scaled. */
10648 static void
10649 fstrb_abs (sim_cpu *cpu, uint32_t offset)
10650 {
10651 unsigned st = INSTR (4, 0);
10652 unsigned rn = INSTR (9, 5);
10653
10654 aarch64_set_mem_u8 (cpu,
10655 aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset,
10656 aarch64_get_vec_u8 (cpu, st, 0));
10657 }
10658
10659 /* 8 bit store scaled or unscaled zero- or
10660 sign-extended 8-bit register offset. */
10661 static void
10662 fstrb_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
10663 {
10664 unsigned rm = INSTR (20, 16);
10665 unsigned rn = INSTR (9, 5);
10666 unsigned st = INSTR (4, 0);
10667
10668 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10669 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
10670 extension);
10671 uint64_t displacement = scaling == Scaled ? extended : 0;
10672
10673 aarch64_set_mem_u8
10674 (cpu, address + displacement, aarch64_get_vec_u8 (cpu, st, 0));
10675 }
10676
10677 /* 16 bit store scaled. */
10678 static void
10679 fstrh_abs (sim_cpu *cpu, uint32_t offset)
10680 {
10681 unsigned st = INSTR (4, 0);
10682 unsigned rn = INSTR (9, 5);
10683
10684 aarch64_set_mem_u16
10685 (cpu,
10686 aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 16),
10687 aarch64_get_vec_u16 (cpu, st, 0));
10688 }
10689
10690 /* 16 bit store scaled or unscaled zero-
10691 or sign-extended 16-bit register offset. */
10692 static void
10693 fstrh_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
10694 {
10695 unsigned rm = INSTR (20, 16);
10696 unsigned rn = INSTR (9, 5);
10697 unsigned st = INSTR (4, 0);
10698
10699 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10700 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
10701 extension);
10702 uint64_t displacement = OPT_SCALE (extended, 16, scaling);
10703
10704 aarch64_set_mem_u16
10705 (cpu, address + displacement, aarch64_get_vec_u16 (cpu, st, 0));
10706 }
10707
10708 /* 32 bit store scaled unsigned 12 bit. */
10709 static void
10710 fstrs_abs (sim_cpu *cpu, uint32_t offset)
10711 {
10712 unsigned st = INSTR (4, 0);
10713 unsigned rn = INSTR (9, 5);
10714
10715 aarch64_set_mem_u32
10716 (cpu,
10717 aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 32),
10718 aarch64_get_vec_u32 (cpu, st, 0));
10719 }
10720
10721 /* 32 bit store unscaled signed 9 bit with pre- or post-writeback. */
10722 static void
10723 fstrs_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
10724 {
10725 unsigned rn = INSTR (9, 5);
10726 unsigned st = INSTR (4, 0);
10727
10728 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10729
10730 if (wb != Post)
10731 address += offset;
10732
10733 aarch64_set_mem_u32 (cpu, address, aarch64_get_vec_u32 (cpu, st, 0));
10734
10735 if (wb == Post)
10736 address += offset;
10737
10738 if (wb != NoWriteBack)
10739 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
10740 }
10741
10742 /* 32 bit store scaled or unscaled zero-
10743 or sign-extended 32-bit register offset. */
10744 static void
10745 fstrs_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
10746 {
10747 unsigned rm = INSTR (20, 16);
10748 unsigned rn = INSTR (9, 5);
10749 unsigned st = INSTR (4, 0);
10750
10751 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10752 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
10753 extension);
10754 uint64_t displacement = OPT_SCALE (extended, 32, scaling);
10755
10756 aarch64_set_mem_u32
10757 (cpu, address + displacement, aarch64_get_vec_u32 (cpu, st, 0));
10758 }
10759
10760 /* 64 bit store scaled unsigned 12 bit. */
10761 static void
10762 fstrd_abs (sim_cpu *cpu, uint32_t offset)
10763 {
10764 unsigned st = INSTR (4, 0);
10765 unsigned rn = INSTR (9, 5);
10766
10767 aarch64_set_mem_u64
10768 (cpu,
10769 aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 64),
10770 aarch64_get_vec_u64 (cpu, st, 0));
10771 }
10772
10773 /* 64 bit store unscaled signed 9 bit with pre- or post-writeback. */
10774 static void
10775 fstrd_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
10776 {
10777 unsigned rn = INSTR (9, 5);
10778 unsigned st = INSTR (4, 0);
10779
10780 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10781
10782 if (wb != Post)
10783 address += offset;
10784
10785 aarch64_set_mem_u64 (cpu, address, aarch64_get_vec_u64 (cpu, st, 0));
10786
10787 if (wb == Post)
10788 address += offset;
10789
10790 if (wb != NoWriteBack)
10791 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
10792 }
10793
10794 /* 64 bit store scaled or unscaled zero-
10795 or sign-extended 32-bit register offset. */
10796 static void
10797 fstrd_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
10798 {
10799 unsigned rm = INSTR (20, 16);
10800 unsigned rn = INSTR (9, 5);
10801 unsigned st = INSTR (4, 0);
10802
10803 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10804 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
10805 extension);
10806 uint64_t displacement = OPT_SCALE (extended, 64, scaling);
10807
10808 aarch64_set_mem_u64
10809 (cpu, address + displacement, aarch64_get_vec_u64 (cpu, st, 0));
10810 }
10811
10812 /* 128 bit store scaled unsigned 12 bit. */
10813 static void
10814 fstrq_abs (sim_cpu *cpu, uint32_t offset)
10815 {
10816 FRegister a;
10817 unsigned st = INSTR (4, 0);
10818 unsigned rn = INSTR (9, 5);
10819 uint64_t addr;
10820
10821 aarch64_get_FP_long_double (cpu, st, & a);
10822
10823 addr = aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 128);
10824 aarch64_set_mem_long_double (cpu, addr, a);
10825 }
10826
10827 /* 128 bit store unscaled signed 9 bit with pre- or post-writeback. */
10828 static void
10829 fstrq_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
10830 {
10831 FRegister a;
10832 unsigned rn = INSTR (9, 5);
10833 unsigned st = INSTR (4, 0);
10834 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10835
10836 if (wb != Post)
10837 address += offset;
10838
10839 aarch64_get_FP_long_double (cpu, st, & a);
10840 aarch64_set_mem_long_double (cpu, address, a);
10841
10842 if (wb == Post)
10843 address += offset;
10844
10845 if (wb != NoWriteBack)
10846 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
10847 }
10848
10849 /* 128 bit store scaled or unscaled zero-
10850 or sign-extended 32-bit register offset. */
10851 static void
10852 fstrq_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
10853 {
10854 unsigned rm = INSTR (20, 16);
10855 unsigned rn = INSTR (9, 5);
10856 unsigned st = INSTR (4, 0);
10857
10858 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10859 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
10860 extension);
10861 uint64_t displacement = OPT_SCALE (extended, 128, scaling);
10862
10863 FRegister a;
10864
10865 aarch64_get_FP_long_double (cpu, st, & a);
10866 aarch64_set_mem_long_double (cpu, address + displacement, a);
10867 }
10868
10869 static void
10870 dexLoadImmediatePrePost (sim_cpu *cpu)
10871 {
10872 /* instr[31,30] = size
10873 instr[29,27] = 111
10874 instr[26] = V
10875 instr[25,24] = 00
10876 instr[23,22] = opc
10877 instr[21] = 0
10878 instr[20,12] = simm9
10879 instr[11] = wb : 0 ==> Post, 1 ==> Pre
10880 instr[10] = 0
10881 instr[9,5] = Rn may be SP.
10882 instr[4,0] = Rt */
10883
10884 uint32_t V = INSTR (26, 26);
10885 uint32_t dispatch = ((INSTR (31, 30) << 2) | INSTR (23, 22));
10886 int32_t imm = simm32 (aarch64_get_instr (cpu), 20, 12);
10887 WriteBack wb = INSTR (11, 11);
10888
10889 if (!V)
10890 {
10891 /* GReg operations. */
10892 switch (dispatch)
10893 {
10894 case 0: strb_wb (cpu, imm, wb); return;
10895 case 1: ldrb32_wb (cpu, imm, wb); return;
10896 case 2: ldrsb_wb (cpu, imm, wb); return;
10897 case 3: ldrsb32_wb (cpu, imm, wb); return;
10898 case 4: strh_wb (cpu, imm, wb); return;
10899 case 5: ldrh32_wb (cpu, imm, wb); return;
10900 case 6: ldrsh64_wb (cpu, imm, wb); return;
10901 case 7: ldrsh32_wb (cpu, imm, wb); return;
10902 case 8: str32_wb (cpu, imm, wb); return;
10903 case 9: ldr32_wb (cpu, imm, wb); return;
10904 case 10: ldrsw_wb (cpu, imm, wb); return;
10905 case 12: str_wb (cpu, imm, wb); return;
10906 case 13: ldr_wb (cpu, imm, wb); return;
10907
10908 default:
10909 case 11:
10910 case 14:
10911 case 15:
10912 HALT_UNALLOC;
10913 }
10914 }
10915
10916 /* FReg operations. */
10917 switch (dispatch)
10918 {
10919 case 2: fstrq_wb (cpu, imm, wb); return;
10920 case 3: fldrq_wb (cpu, imm, wb); return;
10921 case 8: fstrs_wb (cpu, imm, wb); return;
10922 case 9: fldrs_wb (cpu, imm, wb); return;
10923 case 12: fstrd_wb (cpu, imm, wb); return;
10924 case 13: fldrd_wb (cpu, imm, wb); return;
10925
10926 case 0: /* STUR 8 bit FP. */
10927 case 1: /* LDUR 8 bit FP. */
10928 case 4: /* STUR 16 bit FP. */
10929 case 5: /* LDUR 8 bit FP. */
10930 HALT_NYI;
10931
10932 default:
10933 case 6:
10934 case 7:
10935 case 10:
10936 case 11:
10937 case 14:
10938 case 15:
10939 HALT_UNALLOC;
10940 }
10941 }
10942
10943 static void
10944 dexLoadRegisterOffset (sim_cpu *cpu)
10945 {
10946 /* instr[31,30] = size
10947 instr[29,27] = 111
10948 instr[26] = V
10949 instr[25,24] = 00
10950 instr[23,22] = opc
10951 instr[21] = 1
10952 instr[20,16] = rm
10953 instr[15,13] = option : 010 ==> UXTW, 011 ==> UXTX/LSL,
10954 110 ==> SXTW, 111 ==> SXTX,
10955 ow ==> RESERVED
10956 instr[12] = scaled
10957 instr[11,10] = 10
10958 instr[9,5] = rn
10959 instr[4,0] = rt. */
10960
10961 uint32_t V = INSTR (26, 26);
10962 uint32_t dispatch = ((INSTR (31, 30) << 2) | INSTR (23, 22));
10963 Scaling scale = INSTR (12, 12);
10964 Extension extensionType = INSTR (15, 13);
10965
10966 /* Check for illegal extension types. */
10967 if (uimm (extensionType, 1, 1) == 0)
10968 HALT_UNALLOC;
10969
10970 if (extensionType == UXTX || extensionType == SXTX)
10971 extensionType = NoExtension;
10972
10973 if (!V)
10974 {
10975 /* GReg operations. */
10976 switch (dispatch)
10977 {
10978 case 0: strb_scale_ext (cpu, scale, extensionType); return;
10979 case 1: ldrb32_scale_ext (cpu, scale, extensionType); return;
10980 case 2: ldrsb_scale_ext (cpu, scale, extensionType); return;
10981 case 3: ldrsb32_scale_ext (cpu, scale, extensionType); return;
10982 case 4: strh_scale_ext (cpu, scale, extensionType); return;
10983 case 5: ldrh32_scale_ext (cpu, scale, extensionType); return;
10984 case 6: ldrsh_scale_ext (cpu, scale, extensionType); return;
10985 case 7: ldrsh32_scale_ext (cpu, scale, extensionType); return;
10986 case 8: str32_scale_ext (cpu, scale, extensionType); return;
10987 case 9: ldr32_scale_ext (cpu, scale, extensionType); return;
10988 case 10: ldrsw_scale_ext (cpu, scale, extensionType); return;
10989 case 12: str_scale_ext (cpu, scale, extensionType); return;
10990 case 13: ldr_scale_ext (cpu, scale, extensionType); return;
10991 case 14: prfm_scale_ext (cpu, scale, extensionType); return;
10992
10993 default:
10994 case 11:
10995 case 15:
10996 HALT_UNALLOC;
10997 }
10998 }
10999
11000 /* FReg operations. */
11001 switch (dispatch)
11002 {
11003 case 1: /* LDUR 8 bit FP. */
11004 HALT_NYI;
11005 case 3: fldrq_scale_ext (cpu, scale, extensionType); return;
11006 case 5: /* LDUR 8 bit FP. */
11007 HALT_NYI;
11008 case 9: fldrs_scale_ext (cpu, scale, extensionType); return;
11009 case 13: fldrd_scale_ext (cpu, scale, extensionType); return;
11010
11011 case 0: fstrb_scale_ext (cpu, scale, extensionType); return;
11012 case 2: fstrq_scale_ext (cpu, scale, extensionType); return;
11013 case 4: fstrh_scale_ext (cpu, scale, extensionType); return;
11014 case 8: fstrs_scale_ext (cpu, scale, extensionType); return;
11015 case 12: fstrd_scale_ext (cpu, scale, extensionType); return;
11016
11017 default:
11018 case 6:
11019 case 7:
11020 case 10:
11021 case 11:
11022 case 14:
11023 case 15:
11024 HALT_UNALLOC;
11025 }
11026 }
11027
11028 static void
11029 dexLoadUnsignedImmediate (sim_cpu *cpu)
11030 {
11031 /* instr[29,24] == 111_01
11032 instr[31,30] = size
11033 instr[26] = V
11034 instr[23,22] = opc
11035 instr[21,10] = uimm12 : unsigned immediate offset
11036 instr[9,5] = rn may be SP.
11037 instr[4,0] = rt. */
11038
11039 uint32_t V = INSTR (26,26);
11040 uint32_t dispatch = ((INSTR (31, 30) << 2) | INSTR (23, 22));
11041 uint32_t imm = INSTR (21, 10);
11042
11043 if (!V)
11044 {
11045 /* GReg operations. */
11046 switch (dispatch)
11047 {
11048 case 0: strb_abs (cpu, imm); return;
11049 case 1: ldrb32_abs (cpu, imm); return;
11050 case 2: ldrsb_abs (cpu, imm); return;
11051 case 3: ldrsb32_abs (cpu, imm); return;
11052 case 4: strh_abs (cpu, imm); return;
11053 case 5: ldrh32_abs (cpu, imm); return;
11054 case 6: ldrsh_abs (cpu, imm); return;
11055 case 7: ldrsh32_abs (cpu, imm); return;
11056 case 8: str32_abs (cpu, imm); return;
11057 case 9: ldr32_abs (cpu, imm); return;
11058 case 10: ldrsw_abs (cpu, imm); return;
11059 case 12: str_abs (cpu, imm); return;
11060 case 13: ldr_abs (cpu, imm); return;
11061 case 14: prfm_abs (cpu, imm); return;
11062
11063 default:
11064 case 11:
11065 case 15:
11066 HALT_UNALLOC;
11067 }
11068 }
11069
11070 /* FReg operations. */
11071 switch (dispatch)
11072 {
11073 case 0: fstrb_abs (cpu, imm); return;
11074 case 4: fstrh_abs (cpu, imm); return;
11075 case 8: fstrs_abs (cpu, imm); return;
11076 case 12: fstrd_abs (cpu, imm); return;
11077 case 2: fstrq_abs (cpu, imm); return;
11078
11079 case 1: fldrb_abs (cpu, imm); return;
11080 case 5: fldrh_abs (cpu, imm); return;
11081 case 9: fldrs_abs (cpu, imm); return;
11082 case 13: fldrd_abs (cpu, imm); return;
11083 case 3: fldrq_abs (cpu, imm); return;
11084
11085 default:
11086 case 6:
11087 case 7:
11088 case 10:
11089 case 11:
11090 case 14:
11091 case 15:
11092 HALT_UNALLOC;
11093 }
11094 }
11095
11096 static void
11097 dexLoadExclusive (sim_cpu *cpu)
11098 {
11099 /* assert instr[29:24] = 001000;
11100 instr[31,30] = size
11101 instr[23] = 0 if exclusive
11102 instr[22] = L : 1 if load, 0 if store
11103 instr[21] = 1 if pair
11104 instr[20,16] = Rs
11105 instr[15] = o0 : 1 if ordered
11106 instr[14,10] = Rt2
11107 instr[9,5] = Rn
11108 instr[4.0] = Rt. */
11109
11110 switch (INSTR (22, 21))
11111 {
11112 case 2: ldxr (cpu); return;
11113 case 0: stxr (cpu); return;
11114 default: HALT_NYI;
11115 }
11116 }
11117
11118 static void
11119 dexLoadOther (sim_cpu *cpu)
11120 {
11121 uint32_t dispatch;
11122
11123 /* instr[29,25] = 111_0
11124 instr[24] == 0 ==> dispatch, 1 ==> ldst reg unsigned immediate
11125 instr[21:11,10] is the secondary dispatch. */
11126 if (INSTR (24, 24))
11127 {
11128 dexLoadUnsignedImmediate (cpu);
11129 return;
11130 }
11131
11132 dispatch = ((INSTR (21, 21) << 2) | INSTR (11, 10));
11133 switch (dispatch)
11134 {
11135 case 0: dexLoadUnscaledImmediate (cpu); return;
11136 case 1: dexLoadImmediatePrePost (cpu); return;
11137 case 3: dexLoadImmediatePrePost (cpu); return;
11138 case 6: dexLoadRegisterOffset (cpu); return;
11139
11140 default:
11141 case 2:
11142 case 4:
11143 case 5:
11144 case 7:
11145 HALT_NYI;
11146 }
11147 }
11148
11149 static void
11150 store_pair_u32 (sim_cpu *cpu, int32_t offset, WriteBack wb)
11151 {
11152 unsigned rn = INSTR (14, 10);
11153 unsigned rd = INSTR (9, 5);
11154 unsigned rm = INSTR (4, 0);
11155 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
11156
11157 if ((rn == rd || rm == rd) && wb != NoWriteBack)
11158 HALT_UNALLOC; /* ??? */
11159
11160 offset <<= 2;
11161
11162 if (wb != Post)
11163 address += offset;
11164
11165 aarch64_set_mem_u32 (cpu, address,
11166 aarch64_get_reg_u32 (cpu, rm, NO_SP));
11167 aarch64_set_mem_u32 (cpu, address + 4,
11168 aarch64_get_reg_u32 (cpu, rn, NO_SP));
11169
11170 if (wb == Post)
11171 address += offset;
11172
11173 if (wb != NoWriteBack)
11174 aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
11175 }
11176
11177 static void
11178 store_pair_u64 (sim_cpu *cpu, int32_t offset, WriteBack wb)
11179 {
11180 unsigned rn = INSTR (14, 10);
11181 unsigned rd = INSTR (9, 5);
11182 unsigned rm = INSTR (4, 0);
11183 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
11184
11185 if ((rn == rd || rm == rd) && wb != NoWriteBack)
11186 HALT_UNALLOC; /* ??? */
11187
11188 offset <<= 3;
11189
11190 if (wb != Post)
11191 address += offset;
11192
11193 aarch64_set_mem_u64 (cpu, address,
11194 aarch64_get_reg_u64 (cpu, rm, NO_SP));
11195 aarch64_set_mem_u64 (cpu, address + 8,
11196 aarch64_get_reg_u64 (cpu, rn, NO_SP));
11197
11198 if (wb == Post)
11199 address += offset;
11200
11201 if (wb != NoWriteBack)
11202 aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
11203 }
11204
11205 static void
11206 load_pair_u32 (sim_cpu *cpu, int32_t offset, WriteBack wb)
11207 {
11208 unsigned rn = INSTR (14, 10);
11209 unsigned rd = INSTR (9, 5);
11210 unsigned rm = INSTR (4, 0);
11211 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
11212
11213 /* Treat this as unalloc to make sure we don't do it. */
11214 if (rn == rm)
11215 HALT_UNALLOC;
11216
11217 offset <<= 2;
11218
11219 if (wb != Post)
11220 address += offset;
11221
11222 aarch64_set_reg_u64 (cpu, rm, SP_OK, aarch64_get_mem_u32 (cpu, address));
11223 aarch64_set_reg_u64 (cpu, rn, SP_OK, aarch64_get_mem_u32 (cpu, address + 4));
11224
11225 if (wb == Post)
11226 address += offset;
11227
11228 if (wb != NoWriteBack)
11229 aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
11230 }
11231
11232 static void
11233 load_pair_s32 (sim_cpu *cpu, int32_t offset, WriteBack wb)
11234 {
11235 unsigned rn = INSTR (14, 10);
11236 unsigned rd = INSTR (9, 5);
11237 unsigned rm = INSTR (4, 0);
11238 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
11239
11240 /* Treat this as unalloc to make sure we don't do it. */
11241 if (rn == rm)
11242 HALT_UNALLOC;
11243
11244 offset <<= 2;
11245
11246 if (wb != Post)
11247 address += offset;
11248
11249 aarch64_set_reg_s64 (cpu, rm, SP_OK, aarch64_get_mem_s32 (cpu, address));
11250 aarch64_set_reg_s64 (cpu, rn, SP_OK, aarch64_get_mem_s32 (cpu, address + 4));
11251
11252 if (wb == Post)
11253 address += offset;
11254
11255 if (wb != NoWriteBack)
11256 aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
11257 }
11258
11259 static void
11260 load_pair_u64 (sim_cpu *cpu, int32_t offset, WriteBack wb)
11261 {
11262 unsigned rn = INSTR (14, 10);
11263 unsigned rd = INSTR (9, 5);
11264 unsigned rm = INSTR (4, 0);
11265 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
11266
11267 /* Treat this as unalloc to make sure we don't do it. */
11268 if (rn == rm)
11269 HALT_UNALLOC;
11270
11271 offset <<= 3;
11272
11273 if (wb != Post)
11274 address += offset;
11275
11276 aarch64_set_reg_u64 (cpu, rm, SP_OK, aarch64_get_mem_u64 (cpu, address));
11277 aarch64_set_reg_u64 (cpu, rn, SP_OK, aarch64_get_mem_u64 (cpu, address + 8));
11278
11279 if (wb == Post)
11280 address += offset;
11281
11282 if (wb != NoWriteBack)
11283 aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
11284 }
11285
11286 static void
11287 dex_load_store_pair_gr (sim_cpu *cpu)
11288 {
11289 /* instr[31,30] = size (10=> 64-bit, 01=> signed 32-bit, 00=> 32-bit)
11290 instr[29,25] = instruction encoding: 101_0
11291 instr[26] = V : 1 if fp 0 if gp
11292 instr[24,23] = addressing mode (10=> offset, 01=> post, 11=> pre)
11293 instr[22] = load/store (1=> load)
11294 instr[21,15] = signed, scaled, offset
11295 instr[14,10] = Rn
11296 instr[ 9, 5] = Rd
11297 instr[ 4, 0] = Rm. */
11298
11299 uint32_t dispatch = ((INSTR (31, 30) << 3) | INSTR (24, 22));
11300 int32_t offset = simm32 (aarch64_get_instr (cpu), 21, 15);
11301
11302 switch (dispatch)
11303 {
11304 case 2: store_pair_u32 (cpu, offset, Post); return;
11305 case 3: load_pair_u32 (cpu, offset, Post); return;
11306 case 4: store_pair_u32 (cpu, offset, NoWriteBack); return;
11307 case 5: load_pair_u32 (cpu, offset, NoWriteBack); return;
11308 case 6: store_pair_u32 (cpu, offset, Pre); return;
11309 case 7: load_pair_u32 (cpu, offset, Pre); return;
11310
11311 case 11: load_pair_s32 (cpu, offset, Post); return;
11312 case 13: load_pair_s32 (cpu, offset, NoWriteBack); return;
11313 case 15: load_pair_s32 (cpu, offset, Pre); return;
11314
11315 case 18: store_pair_u64 (cpu, offset, Post); return;
11316 case 19: load_pair_u64 (cpu, offset, Post); return;
11317 case 20: store_pair_u64 (cpu, offset, NoWriteBack); return;
11318 case 21: load_pair_u64 (cpu, offset, NoWriteBack); return;
11319 case 22: store_pair_u64 (cpu, offset, Pre); return;
11320 case 23: load_pair_u64 (cpu, offset, Pre); return;
11321
11322 default:
11323 HALT_UNALLOC;
11324 }
11325 }
11326
11327 static void
11328 store_pair_float (sim_cpu *cpu, int32_t offset, WriteBack wb)
11329 {
11330 unsigned rn = INSTR (14, 10);
11331 unsigned rd = INSTR (9, 5);
11332 unsigned rm = INSTR (4, 0);
11333 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
11334
11335 offset <<= 2;
11336
11337 if (wb != Post)
11338 address += offset;
11339
11340 aarch64_set_mem_u32 (cpu, address, aarch64_get_vec_u32 (cpu, rm, 0));
11341 aarch64_set_mem_u32 (cpu, address + 4, aarch64_get_vec_u32 (cpu, rn, 0));
11342
11343 if (wb == Post)
11344 address += offset;
11345
11346 if (wb != NoWriteBack)
11347 aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
11348 }
11349
11350 static void
11351 store_pair_double (sim_cpu *cpu, int32_t offset, WriteBack wb)
11352 {
11353 unsigned rn = INSTR (14, 10);
11354 unsigned rd = INSTR (9, 5);
11355 unsigned rm = INSTR (4, 0);
11356 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
11357
11358 offset <<= 3;
11359
11360 if (wb != Post)
11361 address += offset;
11362
11363 aarch64_set_mem_u64 (cpu, address, aarch64_get_vec_u64 (cpu, rm, 0));
11364 aarch64_set_mem_u64 (cpu, address + 8, aarch64_get_vec_u64 (cpu, rn, 0));
11365
11366 if (wb == Post)
11367 address += offset;
11368
11369 if (wb != NoWriteBack)
11370 aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
11371 }
11372
11373 static void
11374 store_pair_long_double (sim_cpu *cpu, int32_t offset, WriteBack wb)
11375 {
11376 FRegister a;
11377 unsigned rn = INSTR (14, 10);
11378 unsigned rd = INSTR (9, 5);
11379 unsigned rm = INSTR (4, 0);
11380 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
11381
11382 offset <<= 4;
11383
11384 if (wb != Post)
11385 address += offset;
11386
11387 aarch64_get_FP_long_double (cpu, rm, & a);
11388 aarch64_set_mem_long_double (cpu, address, a);
11389 aarch64_get_FP_long_double (cpu, rn, & a);
11390 aarch64_set_mem_long_double (cpu, address + 16, a);
11391
11392 if (wb == Post)
11393 address += offset;
11394
11395 if (wb != NoWriteBack)
11396 aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
11397 }
11398
11399 static void
11400 load_pair_float (sim_cpu *cpu, int32_t offset, WriteBack wb)
11401 {
11402 unsigned rn = INSTR (14, 10);
11403 unsigned rd = INSTR (9, 5);
11404 unsigned rm = INSTR (4, 0);
11405 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
11406
11407 if (rm == rn)
11408 HALT_UNALLOC;
11409
11410 offset <<= 2;
11411
11412 if (wb != Post)
11413 address += offset;
11414
11415 aarch64_set_vec_u32 (cpu, rm, 0, aarch64_get_mem_u32 (cpu, address));
11416 aarch64_set_vec_u32 (cpu, rn, 0, aarch64_get_mem_u32 (cpu, address + 4));
11417
11418 if (wb == Post)
11419 address += offset;
11420
11421 if (wb != NoWriteBack)
11422 aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
11423 }
11424
11425 static void
11426 load_pair_double (sim_cpu *cpu, int32_t offset, WriteBack wb)
11427 {
11428 unsigned rn = INSTR (14, 10);
11429 unsigned rd = INSTR (9, 5);
11430 unsigned rm = INSTR (4, 0);
11431 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
11432
11433 if (rm == rn)
11434 HALT_UNALLOC;
11435
11436 offset <<= 3;
11437
11438 if (wb != Post)
11439 address += offset;
11440
11441 aarch64_set_vec_u64 (cpu, rm, 0, aarch64_get_mem_u64 (cpu, address));
11442 aarch64_set_vec_u64 (cpu, rn, 0, aarch64_get_mem_u64 (cpu, address + 8));
11443
11444 if (wb == Post)
11445 address += offset;
11446
11447 if (wb != NoWriteBack)
11448 aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
11449 }
11450
11451 static void
11452 load_pair_long_double (sim_cpu *cpu, int32_t offset, WriteBack wb)
11453 {
11454 FRegister a;
11455 unsigned rn = INSTR (14, 10);
11456 unsigned rd = INSTR (9, 5);
11457 unsigned rm = INSTR (4, 0);
11458 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
11459
11460 if (rm == rn)
11461 HALT_UNALLOC;
11462
11463 offset <<= 4;
11464
11465 if (wb != Post)
11466 address += offset;
11467
11468 aarch64_get_mem_long_double (cpu, address, & a);
11469 aarch64_set_FP_long_double (cpu, rm, a);
11470 aarch64_get_mem_long_double (cpu, address + 16, & a);
11471 aarch64_set_FP_long_double (cpu, rn, a);
11472
11473 if (wb == Post)
11474 address += offset;
11475
11476 if (wb != NoWriteBack)
11477 aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
11478 }
11479
11480 static void
11481 dex_load_store_pair_fp (sim_cpu *cpu)
11482 {
11483 /* instr[31,30] = size (10=> 128-bit, 01=> 64-bit, 00=> 32-bit)
11484 instr[29,25] = instruction encoding
11485 instr[24,23] = addressing mode (10=> offset, 01=> post, 11=> pre)
11486 instr[22] = load/store (1=> load)
11487 instr[21,15] = signed, scaled, offset
11488 instr[14,10] = Rn
11489 instr[ 9, 5] = Rd
11490 instr[ 4, 0] = Rm */
11491
11492 uint32_t dispatch = ((INSTR (31, 30) << 3) | INSTR (24, 22));
11493 int32_t offset = simm32 (aarch64_get_instr (cpu), 21, 15);
11494
11495 switch (dispatch)
11496 {
11497 case 2: store_pair_float (cpu, offset, Post); return;
11498 case 3: load_pair_float (cpu, offset, Post); return;
11499 case 4: store_pair_float (cpu, offset, NoWriteBack); return;
11500 case 5: load_pair_float (cpu, offset, NoWriteBack); return;
11501 case 6: store_pair_float (cpu, offset, Pre); return;
11502 case 7: load_pair_float (cpu, offset, Pre); return;
11503
11504 case 10: store_pair_double (cpu, offset, Post); return;
11505 case 11: load_pair_double (cpu, offset, Post); return;
11506 case 12: store_pair_double (cpu, offset, NoWriteBack); return;
11507 case 13: load_pair_double (cpu, offset, NoWriteBack); return;
11508 case 14: store_pair_double (cpu, offset, Pre); return;
11509 case 15: load_pair_double (cpu, offset, Pre); return;
11510
11511 case 18: store_pair_long_double (cpu, offset, Post); return;
11512 case 19: load_pair_long_double (cpu, offset, Post); return;
11513 case 20: store_pair_long_double (cpu, offset, NoWriteBack); return;
11514 case 21: load_pair_long_double (cpu, offset, NoWriteBack); return;
11515 case 22: store_pair_long_double (cpu, offset, Pre); return;
11516 case 23: load_pair_long_double (cpu, offset, Pre); return;
11517
11518 default:
11519 HALT_UNALLOC;
11520 }
11521 }
11522
11523 static inline unsigned
11524 vec_reg (unsigned v, unsigned o)
11525 {
11526 return (v + o) & 0x3F;
11527 }
11528
11529 /* Load multiple N-element structures to M consecutive registers. */
11530 static void
11531 vec_load (sim_cpu *cpu, uint64_t address, unsigned N, unsigned M)
11532 {
11533 int all = INSTR (30, 30);
11534 unsigned size = INSTR (11, 10);
11535 unsigned vd = INSTR (4, 0);
11536 unsigned rpt = (N == M) ? 1 : M;
11537 unsigned selem = N;
11538 unsigned i, j, k;
11539
11540 switch (size)
11541 {
11542 case 0: /* 8-bit operations. */
11543 for (i = 0; i < rpt; i++)
11544 for (j = 0; j < (8 + (8 * all)); j++)
11545 for (k = 0; k < selem; k++)
11546 {
11547 aarch64_set_vec_u8 (cpu, vec_reg (vd, i + k), j,
11548 aarch64_get_mem_u8 (cpu, address));
11549 address += 1;
11550 }
11551 return;
11552
11553 case 1: /* 16-bit operations. */
11554 for (i = 0; i < rpt; i++)
11555 for (j = 0; j < (4 + (4 * all)); j++)
11556 for (k = 0; k < selem; k++)
11557 {
11558 aarch64_set_vec_u16 (cpu, vec_reg (vd, i + k), j,
11559 aarch64_get_mem_u16 (cpu, address));
11560 address += 2;
11561 }
11562 return;
11563
11564 case 2: /* 32-bit operations. */
11565 for (i = 0; i < rpt; i++)
11566 for (j = 0; j < (2 + (2 * all)); j++)
11567 for (k = 0; k < selem; k++)
11568 {
11569 aarch64_set_vec_u32 (cpu, vec_reg (vd, i + k), j,
11570 aarch64_get_mem_u32 (cpu, address));
11571 address += 4;
11572 }
11573 return;
11574
11575 case 3: /* 64-bit operations. */
11576 for (i = 0; i < rpt; i++)
11577 for (j = 0; j < (1 + all); j++)
11578 for (k = 0; k < selem; k++)
11579 {
11580 aarch64_set_vec_u64 (cpu, vec_reg (vd, i + k), j,
11581 aarch64_get_mem_u64 (cpu, address));
11582 address += 8;
11583 }
11584 return;
11585 }
11586 }
11587
11588 /* Load multiple 4-element structures into four consecutive registers. */
11589 static void
11590 LD4 (sim_cpu *cpu, uint64_t address)
11591 {
11592 vec_load (cpu, address, 4, 4);
11593 }
11594
11595 /* Load multiple 3-element structures into three consecutive registers. */
11596 static void
11597 LD3 (sim_cpu *cpu, uint64_t address)
11598 {
11599 vec_load (cpu, address, 3, 3);
11600 }
11601
11602 /* Load multiple 2-element structures into two consecutive registers. */
11603 static void
11604 LD2 (sim_cpu *cpu, uint64_t address)
11605 {
11606 vec_load (cpu, address, 2, 2);
11607 }
11608
11609 /* Load multiple 1-element structures into one register. */
11610 static void
11611 LD1_1 (sim_cpu *cpu, uint64_t address)
11612 {
11613 vec_load (cpu, address, 1, 1);
11614 }
11615
11616 /* Load multiple 1-element structures into two registers. */
11617 static void
11618 LD1_2 (sim_cpu *cpu, uint64_t address)
11619 {
11620 vec_load (cpu, address, 1, 2);
11621 }
11622
11623 /* Load multiple 1-element structures into three registers. */
11624 static void
11625 LD1_3 (sim_cpu *cpu, uint64_t address)
11626 {
11627 vec_load (cpu, address, 1, 3);
11628 }
11629
11630 /* Load multiple 1-element structures into four registers. */
11631 static void
11632 LD1_4 (sim_cpu *cpu, uint64_t address)
11633 {
11634 vec_load (cpu, address, 1, 4);
11635 }
11636
11637 /* Store multiple N-element structures from M consecutive registers. */
11638 static void
11639 vec_store (sim_cpu *cpu, uint64_t address, unsigned N, unsigned M)
11640 {
11641 int all = INSTR (30, 30);
11642 unsigned size = INSTR (11, 10);
11643 unsigned vd = INSTR (4, 0);
11644 unsigned rpt = (N == M) ? 1 : M;
11645 unsigned selem = N;
11646 unsigned i, j, k;
11647
11648 switch (size)
11649 {
11650 case 0: /* 8-bit operations. */
11651 for (i = 0; i < rpt; i++)
11652 for (j = 0; j < (8 + (8 * all)); j++)
11653 for (k = 0; k < selem; k++)
11654 {
11655 aarch64_set_mem_u8
11656 (cpu, address,
11657 aarch64_get_vec_u8 (cpu, vec_reg (vd, i + k), j));
11658 address += 1;
11659 }
11660 return;
11661
11662 case 1: /* 16-bit operations. */
11663 for (i = 0; i < rpt; i++)
11664 for (j = 0; j < (4 + (4 * all)); j++)
11665 for (k = 0; k < selem; k++)
11666 {
11667 aarch64_set_mem_u16
11668 (cpu, address,
11669 aarch64_get_vec_u16 (cpu, vec_reg (vd, i + k), j));
11670 address += 2;
11671 }
11672 return;
11673
11674 case 2: /* 32-bit operations. */
11675 for (i = 0; i < rpt; i++)
11676 for (j = 0; j < (2 + (2 * all)); j++)
11677 for (k = 0; k < selem; k++)
11678 {
11679 aarch64_set_mem_u32
11680 (cpu, address,
11681 aarch64_get_vec_u32 (cpu, vec_reg (vd, i + k), j));
11682 address += 4;
11683 }
11684 return;
11685
11686 case 3: /* 64-bit operations. */
11687 for (i = 0; i < rpt; i++)
11688 for (j = 0; j < (1 + all); j++)
11689 for (k = 0; k < selem; k++)
11690 {
11691 aarch64_set_mem_u64
11692 (cpu, address,
11693 aarch64_get_vec_u64 (cpu, vec_reg (vd, i + k), j));
11694 address += 8;
11695 }
11696 return;
11697 }
11698 }
11699
11700 /* Store multiple 4-element structure from four consecutive registers. */
11701 static void
11702 ST4 (sim_cpu *cpu, uint64_t address)
11703 {
11704 vec_store (cpu, address, 4, 4);
11705 }
11706
11707 /* Store multiple 3-element structures from three consecutive registers. */
11708 static void
11709 ST3 (sim_cpu *cpu, uint64_t address)
11710 {
11711 vec_store (cpu, address, 3, 3);
11712 }
11713
11714 /* Store multiple 2-element structures from two consecutive registers. */
11715 static void
11716 ST2 (sim_cpu *cpu, uint64_t address)
11717 {
11718 vec_store (cpu, address, 2, 2);
11719 }
11720
11721 /* Store multiple 1-element structures from one register. */
11722 static void
11723 ST1_1 (sim_cpu *cpu, uint64_t address)
11724 {
11725 vec_store (cpu, address, 1, 1);
11726 }
11727
11728 /* Store multiple 1-element structures from two registers. */
11729 static void
11730 ST1_2 (sim_cpu *cpu, uint64_t address)
11731 {
11732 vec_store (cpu, address, 1, 2);
11733 }
11734
11735 /* Store multiple 1-element structures from three registers. */
11736 static void
11737 ST1_3 (sim_cpu *cpu, uint64_t address)
11738 {
11739 vec_store (cpu, address, 1, 3);
11740 }
11741
11742 /* Store multiple 1-element structures from four registers. */
11743 static void
11744 ST1_4 (sim_cpu *cpu, uint64_t address)
11745 {
11746 vec_store (cpu, address, 1, 4);
11747 }
11748
11749 #define LDn_STn_SINGLE_LANE_AND_SIZE() \
11750 do \
11751 { \
11752 switch (INSTR (15, 14)) \
11753 { \
11754 case 0: \
11755 lane = (full << 3) | (s << 2) | size; \
11756 size = 0; \
11757 break; \
11758 \
11759 case 1: \
11760 if ((size & 1) == 1) \
11761 HALT_UNALLOC; \
11762 lane = (full << 2) | (s << 1) | (size >> 1); \
11763 size = 1; \
11764 break; \
11765 \
11766 case 2: \
11767 if ((size & 2) == 2) \
11768 HALT_UNALLOC; \
11769 \
11770 if ((size & 1) == 0) \
11771 { \
11772 lane = (full << 1) | s; \
11773 size = 2; \
11774 } \
11775 else \
11776 { \
11777 if (s) \
11778 HALT_UNALLOC; \
11779 lane = full; \
11780 size = 3; \
11781 } \
11782 break; \
11783 \
11784 default: \
11785 HALT_UNALLOC; \
11786 } \
11787 } \
11788 while (0)
11789
11790 /* Load single structure into one lane of N registers. */
11791 static void
11792 do_vec_LDn_single (sim_cpu *cpu, uint64_t address)
11793 {
11794 /* instr[31] = 0
11795 instr[30] = element selector 0=>half, 1=>all elements
11796 instr[29,24] = 00 1101
11797 instr[23] = 0=>simple, 1=>post
11798 instr[22] = 1
11799 instr[21] = width: LD1-or-LD3 (0) / LD2-or-LD4 (1)
11800 instr[20,16] = 0 0000 (simple), Vinc (reg-post-inc, no SP),
11801 11111 (immediate post inc)
11802 instr[15,13] = opcode
11803 instr[12] = S, used for lane number
11804 instr[11,10] = size, also used for lane number
11805 instr[9,5] = address
11806 instr[4,0] = Vd */
11807
11808 unsigned full = INSTR (30, 30);
11809 unsigned vd = INSTR (4, 0);
11810 unsigned size = INSTR (11, 10);
11811 unsigned s = INSTR (12, 12);
11812 int nregs = ((INSTR (13, 13) << 1) | INSTR (21, 21)) + 1;
11813 int lane = 0;
11814 int i;
11815
11816 NYI_assert (29, 24, 0x0D);
11817 NYI_assert (22, 22, 1);
11818
11819 /* Compute the lane number first (using size), and then compute size. */
11820 LDn_STn_SINGLE_LANE_AND_SIZE ();
11821
11822 for (i = 0; i < nregs; i++)
11823 switch (size)
11824 {
11825 case 0:
11826 {
11827 uint8_t val = aarch64_get_mem_u8 (cpu, address + i);
11828 aarch64_set_vec_u8 (cpu, vd + i, lane, val);
11829 break;
11830 }
11831
11832 case 1:
11833 {
11834 uint16_t val = aarch64_get_mem_u16 (cpu, address + (i * 2));
11835 aarch64_set_vec_u16 (cpu, vd + i, lane, val);
11836 break;
11837 }
11838
11839 case 2:
11840 {
11841 uint32_t val = aarch64_get_mem_u32 (cpu, address + (i * 4));
11842 aarch64_set_vec_u32 (cpu, vd + i, lane, val);
11843 break;
11844 }
11845
11846 case 3:
11847 {
11848 uint64_t val = aarch64_get_mem_u64 (cpu, address + (i * 8));
11849 aarch64_set_vec_u64 (cpu, vd + i, lane, val);
11850 break;
11851 }
11852 }
11853 }
11854
11855 /* Store single structure from one lane from N registers. */
11856 static void
11857 do_vec_STn_single (sim_cpu *cpu, uint64_t address)
11858 {
11859 /* instr[31] = 0
11860 instr[30] = element selector 0=>half, 1=>all elements
11861 instr[29,24] = 00 1101
11862 instr[23] = 0=>simple, 1=>post
11863 instr[22] = 0
11864 instr[21] = width: LD1-or-LD3 (0) / LD2-or-LD4 (1)
11865 instr[20,16] = 0 0000 (simple), Vinc (reg-post-inc, no SP),
11866 11111 (immediate post inc)
11867 instr[15,13] = opcode
11868 instr[12] = S, used for lane number
11869 instr[11,10] = size, also used for lane number
11870 instr[9,5] = address
11871 instr[4,0] = Vd */
11872
11873 unsigned full = INSTR (30, 30);
11874 unsigned vd = INSTR (4, 0);
11875 unsigned size = INSTR (11, 10);
11876 unsigned s = INSTR (12, 12);
11877 int nregs = ((INSTR (13, 13) << 1) | INSTR (21, 21)) + 1;
11878 int lane = 0;
11879 int i;
11880
11881 NYI_assert (29, 24, 0x0D);
11882 NYI_assert (22, 22, 0);
11883
11884 /* Compute the lane number first (using size), and then compute size. */
11885 LDn_STn_SINGLE_LANE_AND_SIZE ();
11886
11887 for (i = 0; i < nregs; i++)
11888 switch (size)
11889 {
11890 case 0:
11891 {
11892 uint8_t val = aarch64_get_vec_u8 (cpu, vd + i, lane);
11893 aarch64_set_mem_u8 (cpu, address + i, val);
11894 break;
11895 }
11896
11897 case 1:
11898 {
11899 uint16_t val = aarch64_get_vec_u16 (cpu, vd + i, lane);
11900 aarch64_set_mem_u16 (cpu, address + (i * 2), val);
11901 break;
11902 }
11903
11904 case 2:
11905 {
11906 uint32_t val = aarch64_get_vec_u32 (cpu, vd + i, lane);
11907 aarch64_set_mem_u32 (cpu, address + (i * 4), val);
11908 break;
11909 }
11910
11911 case 3:
11912 {
11913 uint64_t val = aarch64_get_vec_u64 (cpu, vd + i, lane);
11914 aarch64_set_mem_u64 (cpu, address + (i * 8), val);
11915 break;
11916 }
11917 }
11918 }
11919
11920 /* Load single structure into all lanes of N registers. */
11921 static void
11922 do_vec_LDnR (sim_cpu *cpu, uint64_t address)
11923 {
11924 /* instr[31] = 0
11925 instr[30] = element selector 0=>half, 1=>all elements
11926 instr[29,24] = 00 1101
11927 instr[23] = 0=>simple, 1=>post
11928 instr[22] = 1
11929 instr[21] = width: LD1R-or-LD3R (0) / LD2R-or-LD4R (1)
11930 instr[20,16] = 0 0000 (simple), Vinc (reg-post-inc, no SP),
11931 11111 (immediate post inc)
11932 instr[15,14] = 11
11933 instr[13] = width: LD1R-or-LD2R (0) / LD3R-or-LD4R (1)
11934 instr[12] = 0
11935 instr[11,10] = element size 00=> byte(b), 01=> half(h),
11936 10=> word(s), 11=> double(d)
11937 instr[9,5] = address
11938 instr[4,0] = Vd */
11939
11940 unsigned full = INSTR (30, 30);
11941 unsigned vd = INSTR (4, 0);
11942 unsigned size = INSTR (11, 10);
11943 int nregs = ((INSTR (13, 13) << 1) | INSTR (21, 21)) + 1;
11944 int i, n;
11945
11946 NYI_assert (29, 24, 0x0D);
11947 NYI_assert (22, 22, 1);
11948 NYI_assert (15, 14, 3);
11949 NYI_assert (12, 12, 0);
11950
11951 for (n = 0; n < nregs; n++)
11952 switch (size)
11953 {
11954 case 0:
11955 {
11956 uint8_t val = aarch64_get_mem_u8 (cpu, address + n);
11957 for (i = 0; i < (full ? 16 : 8); i++)
11958 aarch64_set_vec_u8 (cpu, vd + n, i, val);
11959 break;
11960 }
11961
11962 case 1:
11963 {
11964 uint16_t val = aarch64_get_mem_u16 (cpu, address + (n * 2));
11965 for (i = 0; i < (full ? 8 : 4); i++)
11966 aarch64_set_vec_u16 (cpu, vd + n, i, val);
11967 break;
11968 }
11969
11970 case 2:
11971 {
11972 uint32_t val = aarch64_get_mem_u32 (cpu, address + (n * 4));
11973 for (i = 0; i < (full ? 4 : 2); i++)
11974 aarch64_set_vec_u32 (cpu, vd + n, i, val);
11975 break;
11976 }
11977
11978 case 3:
11979 {
11980 uint64_t val = aarch64_get_mem_u64 (cpu, address + (n * 8));
11981 for (i = 0; i < (full ? 2 : 1); i++)
11982 aarch64_set_vec_u64 (cpu, vd + n, i, val);
11983 break;
11984 }
11985
11986 default:
11987 HALT_UNALLOC;
11988 }
11989 }
11990
11991 static void
11992 do_vec_load_store (sim_cpu *cpu)
11993 {
11994 /* {LD|ST}<N> {Vd..Vd+N}, vaddr
11995
11996 instr[31] = 0
11997 instr[30] = element selector 0=>half, 1=>all elements
11998 instr[29,25] = 00110
11999 instr[24] = 0=>multiple struct, 1=>single struct
12000 instr[23] = 0=>simple, 1=>post
12001 instr[22] = 0=>store, 1=>load
12002 instr[21] = 0 (LDn) / small(0)-large(1) selector (LDnR)
12003 instr[20,16] = 00000 (simple), Vinc (reg-post-inc, no SP),
12004 11111 (immediate post inc)
12005 instr[15,12] = elements and destinations. eg for load:
12006 0000=>LD4 => load multiple 4-element to
12007 four consecutive registers
12008 0100=>LD3 => load multiple 3-element to
12009 three consecutive registers
12010 1000=>LD2 => load multiple 2-element to
12011 two consecutive registers
12012 0010=>LD1 => load multiple 1-element to
12013 four consecutive registers
12014 0110=>LD1 => load multiple 1-element to
12015 three consecutive registers
12016 1010=>LD1 => load multiple 1-element to
12017 two consecutive registers
12018 0111=>LD1 => load multiple 1-element to
12019 one register
12020 1100=>LDR1,LDR2
12021 1110=>LDR3,LDR4
12022 instr[11,10] = element size 00=> byte(b), 01=> half(h),
12023 10=> word(s), 11=> double(d)
12024 instr[9,5] = Vn, can be SP
12025 instr[4,0] = Vd */
12026
12027 int single;
12028 int post;
12029 int load;
12030 unsigned vn;
12031 uint64_t address;
12032 int type;
12033
12034 if (INSTR (31, 31) != 0 || INSTR (29, 25) != 0x06)
12035 HALT_NYI;
12036
12037 single = INSTR (24, 24);
12038 post = INSTR (23, 23);
12039 load = INSTR (22, 22);
12040 type = INSTR (15, 12);
12041 vn = INSTR (9, 5);
12042 address = aarch64_get_reg_u64 (cpu, vn, SP_OK);
12043
12044 if (! single && INSTR (21, 21) != 0)
12045 HALT_UNALLOC;
12046
12047 if (post)
12048 {
12049 unsigned vm = INSTR (20, 16);
12050
12051 if (vm == R31)
12052 {
12053 unsigned sizeof_operation;
12054
12055 if (single)
12056 {
12057 if ((type >= 0) && (type <= 11))
12058 {
12059 int nregs = ((INSTR (13, 13) << 1) | INSTR (21, 21)) + 1;
12060 switch (INSTR (15, 14))
12061 {
12062 case 0:
12063 sizeof_operation = nregs * 1;
12064 break;
12065 case 1:
12066 sizeof_operation = nregs * 2;
12067 break;
12068 case 2:
12069 if (INSTR (10, 10) == 0)
12070 sizeof_operation = nregs * 4;
12071 else
12072 sizeof_operation = nregs * 8;
12073 break;
12074 default:
12075 HALT_UNALLOC;
12076 }
12077 }
12078 else if (type == 0xC)
12079 {
12080 sizeof_operation = INSTR (21, 21) ? 2 : 1;
12081 sizeof_operation <<= INSTR (11, 10);
12082 }
12083 else if (type == 0xE)
12084 {
12085 sizeof_operation = INSTR (21, 21) ? 4 : 3;
12086 sizeof_operation <<= INSTR (11, 10);
12087 }
12088 else
12089 HALT_UNALLOC;
12090 }
12091 else
12092 {
12093 switch (type)
12094 {
12095 case 0: sizeof_operation = 32; break;
12096 case 4: sizeof_operation = 24; break;
12097 case 8: sizeof_operation = 16; break;
12098
12099 case 7:
12100 /* One register, immediate offset variant. */
12101 sizeof_operation = 8;
12102 break;
12103
12104 case 10:
12105 /* Two registers, immediate offset variant. */
12106 sizeof_operation = 16;
12107 break;
12108
12109 case 6:
12110 /* Three registers, immediate offset variant. */
12111 sizeof_operation = 24;
12112 break;
12113
12114 case 2:
12115 /* Four registers, immediate offset variant. */
12116 sizeof_operation = 32;
12117 break;
12118
12119 default:
12120 HALT_UNALLOC;
12121 }
12122
12123 if (INSTR (30, 30))
12124 sizeof_operation *= 2;
12125 }
12126
12127 aarch64_set_reg_u64 (cpu, vn, SP_OK, address + sizeof_operation);
12128 }
12129 else
12130 aarch64_set_reg_u64 (cpu, vn, SP_OK,
12131 address + aarch64_get_reg_u64 (cpu, vm, NO_SP));
12132 }
12133 else
12134 {
12135 NYI_assert (20, 16, 0);
12136 }
12137
12138 if (single)
12139 {
12140 if (load)
12141 {
12142 if ((type >= 0) && (type <= 11))
12143 do_vec_LDn_single (cpu, address);
12144 else if ((type == 0xC) || (type == 0xE))
12145 do_vec_LDnR (cpu, address);
12146 else
12147 HALT_UNALLOC;
12148 return;
12149 }
12150
12151 /* Stores. */
12152 if ((type >= 0) && (type <= 11))
12153 {
12154 do_vec_STn_single (cpu, address);
12155 return;
12156 }
12157
12158 HALT_UNALLOC;
12159 }
12160
12161 if (load)
12162 {
12163 switch (type)
12164 {
12165 case 0: LD4 (cpu, address); return;
12166 case 4: LD3 (cpu, address); return;
12167 case 8: LD2 (cpu, address); return;
12168 case 2: LD1_4 (cpu, address); return;
12169 case 6: LD1_3 (cpu, address); return;
12170 case 10: LD1_2 (cpu, address); return;
12171 case 7: LD1_1 (cpu, address); return;
12172
12173 default:
12174 HALT_UNALLOC;
12175 }
12176 }
12177
12178 /* Stores. */
12179 switch (type)
12180 {
12181 case 0: ST4 (cpu, address); return;
12182 case 4: ST3 (cpu, address); return;
12183 case 8: ST2 (cpu, address); return;
12184 case 2: ST1_4 (cpu, address); return;
12185 case 6: ST1_3 (cpu, address); return;
12186 case 10: ST1_2 (cpu, address); return;
12187 case 7: ST1_1 (cpu, address); return;
12188 default:
12189 HALT_UNALLOC;
12190 }
12191 }
12192
12193 static void
12194 dexLdSt (sim_cpu *cpu)
12195 {
12196 /* uint32_t group = dispatchGroup (aarch64_get_instr (cpu));
12197 assert group == GROUP_LDST_0100 || group == GROUP_LDST_0110 ||
12198 group == GROUP_LDST_1100 || group == GROUP_LDST_1110
12199 bits [29,28:26] of a LS are the secondary dispatch vector. */
12200 uint32_t group2 = dispatchLS (aarch64_get_instr (cpu));
12201
12202 switch (group2)
12203 {
12204 case LS_EXCL_000:
12205 dexLoadExclusive (cpu); return;
12206
12207 case LS_LIT_010:
12208 case LS_LIT_011:
12209 dexLoadLiteral (cpu); return;
12210
12211 case LS_OTHER_110:
12212 case LS_OTHER_111:
12213 dexLoadOther (cpu); return;
12214
12215 case LS_ADVSIMD_001:
12216 do_vec_load_store (cpu); return;
12217
12218 case LS_PAIR_100:
12219 dex_load_store_pair_gr (cpu); return;
12220
12221 case LS_PAIR_101:
12222 dex_load_store_pair_fp (cpu); return;
12223
12224 default:
12225 /* Should never reach here. */
12226 HALT_NYI;
12227 }
12228 }
12229
12230 /* Specific decode and execute for group Data Processing Register. */
12231
12232 static void
12233 dexLogicalShiftedRegister (sim_cpu *cpu)
12234 {
12235 /* instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
12236 instr[30,29] = op
12237 instr[28:24] = 01010
12238 instr[23,22] = shift : 0 ==> LSL, 1 ==> LSR, 2 ==> ASR, 3 ==> ROR
12239 instr[21] = N
12240 instr[20,16] = Rm
12241 instr[15,10] = count : must be 0xxxxx for 32 bit
12242 instr[9,5] = Rn
12243 instr[4,0] = Rd */
12244
12245 uint32_t size = INSTR (31, 31);
12246 Shift shiftType = INSTR (23, 22);
12247 uint32_t count = INSTR (15, 10);
12248
12249 /* 32 bit operations must have count[5] = 0.
12250 or else we have an UNALLOC. */
12251 if (size == 0 && uimm (count, 5, 5))
12252 HALT_UNALLOC;
12253
12254 /* Dispatch on size:op:N. */
12255 switch ((INSTR (31, 29) << 1) | INSTR (21, 21))
12256 {
12257 case 0: and32_shift (cpu, shiftType, count); return;
12258 case 1: bic32_shift (cpu, shiftType, count); return;
12259 case 2: orr32_shift (cpu, shiftType, count); return;
12260 case 3: orn32_shift (cpu, shiftType, count); return;
12261 case 4: eor32_shift (cpu, shiftType, count); return;
12262 case 5: eon32_shift (cpu, shiftType, count); return;
12263 case 6: ands32_shift (cpu, shiftType, count); return;
12264 case 7: bics32_shift (cpu, shiftType, count); return;
12265 case 8: and64_shift (cpu, shiftType, count); return;
12266 case 9: bic64_shift (cpu, shiftType, count); return;
12267 case 10:orr64_shift (cpu, shiftType, count); return;
12268 case 11:orn64_shift (cpu, shiftType, count); return;
12269 case 12:eor64_shift (cpu, shiftType, count); return;
12270 case 13:eon64_shift (cpu, shiftType, count); return;
12271 case 14:ands64_shift (cpu, shiftType, count); return;
12272 case 15:bics64_shift (cpu, shiftType, count); return;
12273 }
12274 }
12275
12276 /* 32 bit conditional select. */
12277 static void
12278 csel32 (sim_cpu *cpu, CondCode cc)
12279 {
12280 unsigned rm = INSTR (20, 16);
12281 unsigned rn = INSTR (9, 5);
12282 unsigned rd = INSTR (4, 0);
12283
12284 aarch64_set_reg_u64 (cpu, rd, NO_SP,
12285 testConditionCode (cpu, cc)
12286 ? aarch64_get_reg_u32 (cpu, rn, NO_SP)
12287 : aarch64_get_reg_u32 (cpu, rm, NO_SP));
12288 }
12289
12290 /* 64 bit conditional select. */
12291 static void
12292 csel64 (sim_cpu *cpu, CondCode cc)
12293 {
12294 unsigned rm = INSTR (20, 16);
12295 unsigned rn = INSTR (9, 5);
12296 unsigned rd = INSTR (4, 0);
12297
12298 aarch64_set_reg_u64 (cpu, rd, NO_SP,
12299 testConditionCode (cpu, cc)
12300 ? aarch64_get_reg_u64 (cpu, rn, NO_SP)
12301 : aarch64_get_reg_u64 (cpu, rm, NO_SP));
12302 }
12303
12304 /* 32 bit conditional increment. */
12305 static void
12306 csinc32 (sim_cpu *cpu, CondCode cc)
12307 {
12308 unsigned rm = INSTR (20, 16);
12309 unsigned rn = INSTR (9, 5);
12310 unsigned rd = INSTR (4, 0);
12311
12312 aarch64_set_reg_u64 (cpu, rd, NO_SP,
12313 testConditionCode (cpu, cc)
12314 ? aarch64_get_reg_u32 (cpu, rn, NO_SP)
12315 : aarch64_get_reg_u32 (cpu, rm, NO_SP) + 1);
12316 }
12317
12318 /* 64 bit conditional increment. */
12319 static void
12320 csinc64 (sim_cpu *cpu, CondCode cc)
12321 {
12322 unsigned rm = INSTR (20, 16);
12323 unsigned rn = INSTR (9, 5);
12324 unsigned rd = INSTR (4, 0);
12325
12326 aarch64_set_reg_u64 (cpu, rd, NO_SP,
12327 testConditionCode (cpu, cc)
12328 ? aarch64_get_reg_u64 (cpu, rn, NO_SP)
12329 : aarch64_get_reg_u64 (cpu, rm, NO_SP) + 1);
12330 }
12331
12332 /* 32 bit conditional invert. */
12333 static void
12334 csinv32 (sim_cpu *cpu, CondCode cc)
12335 {
12336 unsigned rm = INSTR (20, 16);
12337 unsigned rn = INSTR (9, 5);
12338 unsigned rd = INSTR (4, 0);
12339
12340 aarch64_set_reg_u64 (cpu, rd, NO_SP,
12341 testConditionCode (cpu, cc)
12342 ? aarch64_get_reg_u32 (cpu, rn, NO_SP)
12343 : ~ aarch64_get_reg_u32 (cpu, rm, NO_SP));
12344 }
12345
12346 /* 64 bit conditional invert. */
12347 static void
12348 csinv64 (sim_cpu *cpu, CondCode cc)
12349 {
12350 unsigned rm = INSTR (20, 16);
12351 unsigned rn = INSTR (9, 5);
12352 unsigned rd = INSTR (4, 0);
12353
12354 aarch64_set_reg_u64 (cpu, rd, NO_SP,
12355 testConditionCode (cpu, cc)
12356 ? aarch64_get_reg_u64 (cpu, rn, NO_SP)
12357 : ~ aarch64_get_reg_u64 (cpu, rm, NO_SP));
12358 }
12359
12360 /* 32 bit conditional negate. */
12361 static void
12362 csneg32 (sim_cpu *cpu, CondCode cc)
12363 {
12364 unsigned rm = INSTR (20, 16);
12365 unsigned rn = INSTR (9, 5);
12366 unsigned rd = INSTR (4, 0);
12367
12368 aarch64_set_reg_u64 (cpu, rd, NO_SP,
12369 testConditionCode (cpu, cc)
12370 ? aarch64_get_reg_u32 (cpu, rn, NO_SP)
12371 : - aarch64_get_reg_u32 (cpu, rm, NO_SP));
12372 }
12373
12374 /* 64 bit conditional negate. */
12375 static void
12376 csneg64 (sim_cpu *cpu, CondCode cc)
12377 {
12378 unsigned rm = INSTR (20, 16);
12379 unsigned rn = INSTR (9, 5);
12380 unsigned rd = INSTR (4, 0);
12381
12382 aarch64_set_reg_u64 (cpu, rd, NO_SP,
12383 testConditionCode (cpu, cc)
12384 ? aarch64_get_reg_u64 (cpu, rn, NO_SP)
12385 : - aarch64_get_reg_u64 (cpu, rm, NO_SP));
12386 }
12387
12388 static void
12389 dexCondSelect (sim_cpu *cpu)
12390 {
12391 /* instr[28,21] = 11011011
12392 instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
12393 instr[30:11,10] = op : 000 ==> CSEL, 001 ==> CSINC,
12394 100 ==> CSINV, 101 ==> CSNEG,
12395 _1_ ==> UNALLOC
12396 instr[29] = S : 0 ==> ok, 1 ==> UNALLOC
12397 instr[15,12] = cond
12398 instr[29] = S : 0 ==> ok, 1 ==> UNALLOC */
12399
12400 CondCode cc = INSTR (15, 12);
12401 uint32_t S = INSTR (29, 29);
12402 uint32_t op2 = INSTR (11, 10);
12403
12404 if (S == 1)
12405 HALT_UNALLOC;
12406
12407 if (op2 & 0x2)
12408 HALT_UNALLOC;
12409
12410 switch ((INSTR (31, 30) << 1) | op2)
12411 {
12412 case 0: csel32 (cpu, cc); return;
12413 case 1: csinc32 (cpu, cc); return;
12414 case 2: csinv32 (cpu, cc); return;
12415 case 3: csneg32 (cpu, cc); return;
12416 case 4: csel64 (cpu, cc); return;
12417 case 5: csinc64 (cpu, cc); return;
12418 case 6: csinv64 (cpu, cc); return;
12419 case 7: csneg64 (cpu, cc); return;
12420 }
12421 }
12422
12423 /* Some helpers for counting leading 1 or 0 bits. */
12424
12425 /* Counts the number of leading bits which are the same
12426 in a 32 bit value in the range 1 to 32. */
12427 static uint32_t
12428 leading32 (uint32_t value)
12429 {
12430 int32_t mask= 0xffff0000;
12431 uint32_t count= 16; /* Counts number of bits set in mask. */
12432 uint32_t lo = 1; /* Lower bound for number of sign bits. */
12433 uint32_t hi = 32; /* Upper bound for number of sign bits. */
12434
12435 while (lo + 1 < hi)
12436 {
12437 int32_t test = (value & mask);
12438
12439 if (test == 0 || test == mask)
12440 {
12441 lo = count;
12442 count = (lo + hi) / 2;
12443 mask >>= (count - lo);
12444 }
12445 else
12446 {
12447 hi = count;
12448 count = (lo + hi) / 2;
12449 mask <<= hi - count;
12450 }
12451 }
12452
12453 if (lo != hi)
12454 {
12455 int32_t test;
12456
12457 mask >>= 1;
12458 test = (value & mask);
12459
12460 if (test == 0 || test == mask)
12461 count = hi;
12462 else
12463 count = lo;
12464 }
12465
12466 return count;
12467 }
12468
12469 /* Counts the number of leading bits which are the same
12470 in a 64 bit value in the range 1 to 64. */
12471 static uint64_t
12472 leading64 (uint64_t value)
12473 {
12474 int64_t mask= 0xffffffff00000000LL;
12475 uint64_t count = 32; /* Counts number of bits set in mask. */
12476 uint64_t lo = 1; /* Lower bound for number of sign bits. */
12477 uint64_t hi = 64; /* Upper bound for number of sign bits. */
12478
12479 while (lo + 1 < hi)
12480 {
12481 int64_t test = (value & mask);
12482
12483 if (test == 0 || test == mask)
12484 {
12485 lo = count;
12486 count = (lo + hi) / 2;
12487 mask >>= (count - lo);
12488 }
12489 else
12490 {
12491 hi = count;
12492 count = (lo + hi) / 2;
12493 mask <<= hi - count;
12494 }
12495 }
12496
12497 if (lo != hi)
12498 {
12499 int64_t test;
12500
12501 mask >>= 1;
12502 test = (value & mask);
12503
12504 if (test == 0 || test == mask)
12505 count = hi;
12506 else
12507 count = lo;
12508 }
12509
12510 return count;
12511 }
12512
12513 /* Bit operations. */
12514 /* N.B register args may not be SP. */
12515
12516 /* 32 bit count leading sign bits. */
12517 static void
12518 cls32 (sim_cpu *cpu)
12519 {
12520 unsigned rn = INSTR (9, 5);
12521 unsigned rd = INSTR (4, 0);
12522
12523 /* N.B. the result needs to exclude the leading bit. */
12524 aarch64_set_reg_u64
12525 (cpu, rd, NO_SP, leading32 (aarch64_get_reg_u32 (cpu, rn, NO_SP)) - 1);
12526 }
12527
12528 /* 64 bit count leading sign bits. */
12529 static void
12530 cls64 (sim_cpu *cpu)
12531 {
12532 unsigned rn = INSTR (9, 5);
12533 unsigned rd = INSTR (4, 0);
12534
12535 /* N.B. the result needs to exclude the leading bit. */
12536 aarch64_set_reg_u64
12537 (cpu, rd, NO_SP, leading64 (aarch64_get_reg_u64 (cpu, rn, NO_SP)) - 1);
12538 }
12539
12540 /* 32 bit count leading zero bits. */
12541 static void
12542 clz32 (sim_cpu *cpu)
12543 {
12544 unsigned rn = INSTR (9, 5);
12545 unsigned rd = INSTR (4, 0);
12546 uint32_t value = aarch64_get_reg_u32 (cpu, rn, NO_SP);
12547
12548 /* if the sign (top) bit is set then the count is 0. */
12549 if (pick32 (value, 31, 31))
12550 aarch64_set_reg_u64 (cpu, rd, NO_SP, 0L);
12551 else
12552 aarch64_set_reg_u64 (cpu, rd, NO_SP, leading32 (value));
12553 }
12554
12555 /* 64 bit count leading zero bits. */
12556 static void
12557 clz64 (sim_cpu *cpu)
12558 {
12559 unsigned rn = INSTR (9, 5);
12560 unsigned rd = INSTR (4, 0);
12561 uint64_t value = aarch64_get_reg_u64 (cpu, rn, NO_SP);
12562
12563 /* if the sign (top) bit is set then the count is 0. */
12564 if (pick64 (value, 63, 63))
12565 aarch64_set_reg_u64 (cpu, rd, NO_SP, 0L);
12566 else
12567 aarch64_set_reg_u64 (cpu, rd, NO_SP, leading64 (value));
12568 }
12569
12570 /* 32 bit reverse bits. */
12571 static void
12572 rbit32 (sim_cpu *cpu)
12573 {
12574 unsigned rn = INSTR (9, 5);
12575 unsigned rd = INSTR (4, 0);
12576 uint32_t value = aarch64_get_reg_u32 (cpu, rn, NO_SP);
12577 uint32_t result = 0;
12578 int i;
12579
12580 for (i = 0; i < 32; i++)
12581 {
12582 result <<= 1;
12583 result |= (value & 1);
12584 value >>= 1;
12585 }
12586 aarch64_set_reg_u64 (cpu, rd, NO_SP, result);
12587 }
12588
12589 /* 64 bit reverse bits. */
12590 static void
12591 rbit64 (sim_cpu *cpu)
12592 {
12593 unsigned rn = INSTR (9, 5);
12594 unsigned rd = INSTR (4, 0);
12595 uint64_t value = aarch64_get_reg_u64 (cpu, rn, NO_SP);
12596 uint64_t result = 0;
12597 int i;
12598
12599 for (i = 0; i < 64; i++)
12600 {
12601 result <<= 1;
12602 result |= (value & 1UL);
12603 value >>= 1;
12604 }
12605 aarch64_set_reg_u64 (cpu, rd, NO_SP, result);
12606 }
12607
12608 /* 32 bit reverse bytes. */
12609 static void
12610 rev32 (sim_cpu *cpu)
12611 {
12612 unsigned rn = INSTR (9, 5);
12613 unsigned rd = INSTR (4, 0);
12614 uint32_t value = aarch64_get_reg_u32 (cpu, rn, NO_SP);
12615 uint32_t result = 0;
12616 int i;
12617
12618 for (i = 0; i < 4; i++)
12619 {
12620 result <<= 8;
12621 result |= (value & 0xff);
12622 value >>= 8;
12623 }
12624 aarch64_set_reg_u64 (cpu, rd, NO_SP, result);
12625 }
12626
12627 /* 64 bit reverse bytes. */
12628 static void
12629 rev64 (sim_cpu *cpu)
12630 {
12631 unsigned rn = INSTR (9, 5);
12632 unsigned rd = INSTR (4, 0);
12633 uint64_t value = aarch64_get_reg_u64 (cpu, rn, NO_SP);
12634 uint64_t result = 0;
12635 int i;
12636
12637 for (i = 0; i < 8; i++)
12638 {
12639 result <<= 8;
12640 result |= (value & 0xffULL);
12641 value >>= 8;
12642 }
12643 aarch64_set_reg_u64 (cpu, rd, NO_SP, result);
12644 }
12645
12646 /* 32 bit reverse shorts. */
12647 /* N.B.this reverses the order of the bytes in each half word. */
12648 static void
12649 revh32 (sim_cpu *cpu)
12650 {
12651 unsigned rn = INSTR (9, 5);
12652 unsigned rd = INSTR (4, 0);
12653 uint32_t value = aarch64_get_reg_u32 (cpu, rn, NO_SP);
12654 uint32_t result = 0;
12655 int i;
12656
12657 for (i = 0; i < 2; i++)
12658 {
12659 result <<= 8;
12660 result |= (value & 0x00ff00ff);
12661 value >>= 8;
12662 }
12663 aarch64_set_reg_u64 (cpu, rd, NO_SP, result);
12664 }
12665
12666 /* 64 bit reverse shorts. */
12667 /* N.B.this reverses the order of the bytes in each half word. */
12668 static void
12669 revh64 (sim_cpu *cpu)
12670 {
12671 unsigned rn = INSTR (9, 5);
12672 unsigned rd = INSTR (4, 0);
12673 uint64_t value = aarch64_get_reg_u64 (cpu, rn, NO_SP);
12674 uint64_t result = 0;
12675 int i;
12676
12677 for (i = 0; i < 2; i++)
12678 {
12679 result <<= 8;
12680 result |= (value & 0x00ff00ff00ff00ffULL);
12681 value >>= 8;
12682 }
12683 aarch64_set_reg_u64 (cpu, rd, NO_SP, result);
12684 }
12685
12686 static void
12687 dexDataProc1Source (sim_cpu *cpu)
12688 {
12689 /* instr[30] = 1
12690 instr[28,21] = 111010110
12691 instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
12692 instr[29] = S : 0 ==> ok, 1 ==> UNALLOC
12693 instr[20,16] = opcode2 : 00000 ==> ok, ow ==> UNALLOC
12694 instr[15,10] = opcode : 000000 ==> RBIT, 000001 ==> REV16,
12695 000010 ==> REV, 000011 ==> UNALLOC
12696 000100 ==> CLZ, 000101 ==> CLS
12697 ow ==> UNALLOC
12698 instr[9,5] = rn : may not be SP
12699 instr[4,0] = rd : may not be SP. */
12700
12701 uint32_t S = INSTR (29, 29);
12702 uint32_t opcode2 = INSTR (20, 16);
12703 uint32_t opcode = INSTR (15, 10);
12704 uint32_t dispatch = ((INSTR (31, 31) << 3) | opcode);
12705
12706 if (S == 1)
12707 HALT_UNALLOC;
12708
12709 if (opcode2 != 0)
12710 HALT_UNALLOC;
12711
12712 if (opcode & 0x38)
12713 HALT_UNALLOC;
12714
12715 switch (dispatch)
12716 {
12717 case 0: rbit32 (cpu); return;
12718 case 1: revh32 (cpu); return;
12719 case 2: rev32 (cpu); return;
12720 case 4: clz32 (cpu); return;
12721 case 5: cls32 (cpu); return;
12722 case 8: rbit64 (cpu); return;
12723 case 9: revh64 (cpu); return;
12724 case 10:rev32 (cpu); return;
12725 case 11:rev64 (cpu); return;
12726 case 12:clz64 (cpu); return;
12727 case 13:cls64 (cpu); return;
12728 default: HALT_UNALLOC;
12729 }
12730 }
12731
12732 /* Variable shift.
12733 Shifts by count supplied in register.
12734 N.B register args may not be SP.
12735 These all use the shifted auxiliary function for
12736 simplicity and clarity. Writing the actual shift
12737 inline would avoid a branch and so be faster but
12738 would also necessitate getting signs right. */
12739
12740 /* 32 bit arithmetic shift right. */
12741 static void
12742 asrv32 (sim_cpu *cpu)
12743 {
12744 unsigned rm = INSTR (20, 16);
12745 unsigned rn = INSTR (9, 5);
12746 unsigned rd = INSTR (4, 0);
12747
12748 aarch64_set_reg_u64
12749 (cpu, rd, NO_SP,
12750 shifted32 (aarch64_get_reg_u32 (cpu, rn, NO_SP), ASR,
12751 (aarch64_get_reg_u32 (cpu, rm, NO_SP) & 0x1f)));
12752 }
12753
12754 /* 64 bit arithmetic shift right. */
12755 static void
12756 asrv64 (sim_cpu *cpu)
12757 {
12758 unsigned rm = INSTR (20, 16);
12759 unsigned rn = INSTR (9, 5);
12760 unsigned rd = INSTR (4, 0);
12761
12762 aarch64_set_reg_u64
12763 (cpu, rd, NO_SP,
12764 shifted64 (aarch64_get_reg_u64 (cpu, rn, NO_SP), ASR,
12765 (aarch64_get_reg_u64 (cpu, rm, NO_SP) & 0x3f)));
12766 }
12767
12768 /* 32 bit logical shift left. */
12769 static void
12770 lslv32 (sim_cpu *cpu)
12771 {
12772 unsigned rm = INSTR (20, 16);
12773 unsigned rn = INSTR (9, 5);
12774 unsigned rd = INSTR (4, 0);
12775
12776 aarch64_set_reg_u64
12777 (cpu, rd, NO_SP,
12778 shifted32 (aarch64_get_reg_u32 (cpu, rn, NO_SP), LSL,
12779 (aarch64_get_reg_u32 (cpu, rm, NO_SP) & 0x1f)));
12780 }
12781
12782 /* 64 bit arithmetic shift left. */
12783 static void
12784 lslv64 (sim_cpu *cpu)
12785 {
12786 unsigned rm = INSTR (20, 16);
12787 unsigned rn = INSTR (9, 5);
12788 unsigned rd = INSTR (4, 0);
12789
12790 aarch64_set_reg_u64
12791 (cpu, rd, NO_SP,
12792 shifted64 (aarch64_get_reg_u64 (cpu, rn, NO_SP), LSL,
12793 (aarch64_get_reg_u64 (cpu, rm, NO_SP) & 0x3f)));
12794 }
12795
12796 /* 32 bit logical shift right. */
12797 static void
12798 lsrv32 (sim_cpu *cpu)
12799 {
12800 unsigned rm = INSTR (20, 16);
12801 unsigned rn = INSTR (9, 5);
12802 unsigned rd = INSTR (4, 0);
12803
12804 aarch64_set_reg_u64
12805 (cpu, rd, NO_SP,
12806 shifted32 (aarch64_get_reg_u32 (cpu, rn, NO_SP), LSR,
12807 (aarch64_get_reg_u32 (cpu, rm, NO_SP) & 0x1f)));
12808 }
12809
12810 /* 64 bit logical shift right. */
12811 static void
12812 lsrv64 (sim_cpu *cpu)
12813 {
12814 unsigned rm = INSTR (20, 16);
12815 unsigned rn = INSTR (9, 5);
12816 unsigned rd = INSTR (4, 0);
12817
12818 aarch64_set_reg_u64
12819 (cpu, rd, NO_SP,
12820 shifted64 (aarch64_get_reg_u64 (cpu, rn, NO_SP), LSR,
12821 (aarch64_get_reg_u64 (cpu, rm, NO_SP) & 0x3f)));
12822 }
12823
12824 /* 32 bit rotate right. */
12825 static void
12826 rorv32 (sim_cpu *cpu)
12827 {
12828 unsigned rm = INSTR (20, 16);
12829 unsigned rn = INSTR (9, 5);
12830 unsigned rd = INSTR (4, 0);
12831
12832 aarch64_set_reg_u64
12833 (cpu, rd, NO_SP,
12834 shifted32 (aarch64_get_reg_u32 (cpu, rn, NO_SP), ROR,
12835 (aarch64_get_reg_u32 (cpu, rm, NO_SP) & 0x1f)));
12836 }
12837
12838 /* 64 bit rotate right. */
12839 static void
12840 rorv64 (sim_cpu *cpu)
12841 {
12842 unsigned rm = INSTR (20, 16);
12843 unsigned rn = INSTR (9, 5);
12844 unsigned rd = INSTR (4, 0);
12845
12846 aarch64_set_reg_u64
12847 (cpu, rd, NO_SP,
12848 shifted64 (aarch64_get_reg_u64 (cpu, rn, NO_SP), ROR,
12849 (aarch64_get_reg_u64 (cpu, rm, NO_SP) & 0x3f)));
12850 }
12851
12852
12853 /* divide. */
12854
12855 /* 32 bit signed divide. */
12856 static void
12857 cpuiv32 (sim_cpu *cpu)
12858 {
12859 unsigned rm = INSTR (20, 16);
12860 unsigned rn = INSTR (9, 5);
12861 unsigned rd = INSTR (4, 0);
12862 /* N.B. the pseudo-code does the divide using 64 bit data. */
12863 /* TODO : check that this rounds towards zero as required. */
12864 int64_t dividend = aarch64_get_reg_s32 (cpu, rn, NO_SP);
12865 int64_t divisor = aarch64_get_reg_s32 (cpu, rm, NO_SP);
12866
12867 aarch64_set_reg_s64 (cpu, rd, NO_SP,
12868 divisor ? ((int32_t) (dividend / divisor)) : 0);
12869 }
12870
12871 /* 64 bit signed divide. */
12872 static void
12873 cpuiv64 (sim_cpu *cpu)
12874 {
12875 unsigned rm = INSTR (20, 16);
12876 unsigned rn = INSTR (9, 5);
12877 unsigned rd = INSTR (4, 0);
12878
12879 /* TODO : check that this rounds towards zero as required. */
12880 int64_t divisor = aarch64_get_reg_s64 (cpu, rm, NO_SP);
12881
12882 aarch64_set_reg_s64
12883 (cpu, rd, NO_SP,
12884 divisor ? (aarch64_get_reg_s64 (cpu, rn, NO_SP) / divisor) : 0);
12885 }
12886
12887 /* 32 bit unsigned divide. */
12888 static void
12889 udiv32 (sim_cpu *cpu)
12890 {
12891 unsigned rm = INSTR (20, 16);
12892 unsigned rn = INSTR (9, 5);
12893 unsigned rd = INSTR (4, 0);
12894
12895 /* N.B. the pseudo-code does the divide using 64 bit data. */
12896 uint64_t dividend = aarch64_get_reg_u32 (cpu, rn, NO_SP);
12897 uint64_t divisor = aarch64_get_reg_u32 (cpu, rm, NO_SP);
12898
12899 aarch64_set_reg_u64 (cpu, rd, NO_SP,
12900 divisor ? (uint32_t) (dividend / divisor) : 0);
12901 }
12902
12903 /* 64 bit unsigned divide. */
12904 static void
12905 udiv64 (sim_cpu *cpu)
12906 {
12907 unsigned rm = INSTR (20, 16);
12908 unsigned rn = INSTR (9, 5);
12909 unsigned rd = INSTR (4, 0);
12910
12911 /* TODO : check that this rounds towards zero as required. */
12912 uint64_t divisor = aarch64_get_reg_u64 (cpu, rm, NO_SP);
12913
12914 aarch64_set_reg_u64
12915 (cpu, rd, NO_SP,
12916 divisor ? (aarch64_get_reg_u64 (cpu, rn, NO_SP) / divisor) : 0);
12917 }
12918
12919 static void
12920 dexDataProc2Source (sim_cpu *cpu)
12921 {
12922 /* assert instr[30] == 0
12923 instr[28,21] == 11010110
12924 instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
12925 instr[29] = S : 0 ==> ok, 1 ==> UNALLOC
12926 instr[15,10] = opcode : 000010 ==> UDIV, 000011 ==> CPUIV,
12927 001000 ==> LSLV, 001001 ==> LSRV
12928 001010 ==> ASRV, 001011 ==> RORV
12929 ow ==> UNALLOC. */
12930
12931 uint32_t dispatch;
12932 uint32_t S = INSTR (29, 29);
12933 uint32_t opcode = INSTR (15, 10);
12934
12935 if (S == 1)
12936 HALT_UNALLOC;
12937
12938 if (opcode & 0x34)
12939 HALT_UNALLOC;
12940
12941 dispatch = ( (INSTR (31, 31) << 3)
12942 | (uimm (opcode, 3, 3) << 2)
12943 | uimm (opcode, 1, 0));
12944 switch (dispatch)
12945 {
12946 case 2: udiv32 (cpu); return;
12947 case 3: cpuiv32 (cpu); return;
12948 case 4: lslv32 (cpu); return;
12949 case 5: lsrv32 (cpu); return;
12950 case 6: asrv32 (cpu); return;
12951 case 7: rorv32 (cpu); return;
12952 case 10: udiv64 (cpu); return;
12953 case 11: cpuiv64 (cpu); return;
12954 case 12: lslv64 (cpu); return;
12955 case 13: lsrv64 (cpu); return;
12956 case 14: asrv64 (cpu); return;
12957 case 15: rorv64 (cpu); return;
12958 default: HALT_UNALLOC;
12959 }
12960 }
12961
12962
12963 /* Multiply. */
12964
12965 /* 32 bit multiply and add. */
12966 static void
12967 madd32 (sim_cpu *cpu)
12968 {
12969 unsigned rm = INSTR (20, 16);
12970 unsigned ra = INSTR (14, 10);
12971 unsigned rn = INSTR (9, 5);
12972 unsigned rd = INSTR (4, 0);
12973
12974 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
12975 aarch64_set_reg_u64 (cpu, rd, NO_SP,
12976 aarch64_get_reg_u32 (cpu, ra, NO_SP)
12977 + aarch64_get_reg_u32 (cpu, rn, NO_SP)
12978 * aarch64_get_reg_u32 (cpu, rm, NO_SP));
12979 }
12980
12981 /* 64 bit multiply and add. */
12982 static void
12983 madd64 (sim_cpu *cpu)
12984 {
12985 unsigned rm = INSTR (20, 16);
12986 unsigned ra = INSTR (14, 10);
12987 unsigned rn = INSTR (9, 5);
12988 unsigned rd = INSTR (4, 0);
12989
12990 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
12991 aarch64_set_reg_u64 (cpu, rd, NO_SP,
12992 aarch64_get_reg_u64 (cpu, ra, NO_SP)
12993 + (aarch64_get_reg_u64 (cpu, rn, NO_SP)
12994 * aarch64_get_reg_u64 (cpu, rm, NO_SP)));
12995 }
12996
12997 /* 32 bit multiply and sub. */
12998 static void
12999 msub32 (sim_cpu *cpu)
13000 {
13001 unsigned rm = INSTR (20, 16);
13002 unsigned ra = INSTR (14, 10);
13003 unsigned rn = INSTR (9, 5);
13004 unsigned rd = INSTR (4, 0);
13005
13006 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13007 aarch64_set_reg_u64 (cpu, rd, NO_SP,
13008 aarch64_get_reg_u32 (cpu, ra, NO_SP)
13009 - aarch64_get_reg_u32 (cpu, rn, NO_SP)
13010 * aarch64_get_reg_u32 (cpu, rm, NO_SP));
13011 }
13012
13013 /* 64 bit multiply and sub. */
13014 static void
13015 msub64 (sim_cpu *cpu)
13016 {
13017 unsigned rm = INSTR (20, 16);
13018 unsigned ra = INSTR (14, 10);
13019 unsigned rn = INSTR (9, 5);
13020 unsigned rd = INSTR (4, 0);
13021
13022 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13023 aarch64_set_reg_u64 (cpu, rd, NO_SP,
13024 aarch64_get_reg_u64 (cpu, ra, NO_SP)
13025 - aarch64_get_reg_u64 (cpu, rn, NO_SP)
13026 * aarch64_get_reg_u64 (cpu, rm, NO_SP));
13027 }
13028
13029 /* Signed multiply add long -- source, source2 : 32 bit, source3 : 64 bit. */
13030 static void
13031 smaddl (sim_cpu *cpu)
13032 {
13033 unsigned rm = INSTR (20, 16);
13034 unsigned ra = INSTR (14, 10);
13035 unsigned rn = INSTR (9, 5);
13036 unsigned rd = INSTR (4, 0);
13037
13038 /* N.B. we need to multiply the signed 32 bit values in rn, rm to
13039 obtain a 64 bit product. */
13040 aarch64_set_reg_s64
13041 (cpu, rd, NO_SP,
13042 aarch64_get_reg_s64 (cpu, ra, NO_SP)
13043 + ((int64_t) aarch64_get_reg_s32 (cpu, rn, NO_SP))
13044 * ((int64_t) aarch64_get_reg_s32 (cpu, rm, NO_SP)));
13045 }
13046
13047 /* Signed multiply sub long -- source, source2 : 32 bit, source3 : 64 bit. */
13048 static void
13049 smsubl (sim_cpu *cpu)
13050 {
13051 unsigned rm = INSTR (20, 16);
13052 unsigned ra = INSTR (14, 10);
13053 unsigned rn = INSTR (9, 5);
13054 unsigned rd = INSTR (4, 0);
13055
13056 /* N.B. we need to multiply the signed 32 bit values in rn, rm to
13057 obtain a 64 bit product. */
13058 aarch64_set_reg_s64
13059 (cpu, rd, NO_SP,
13060 aarch64_get_reg_s64 (cpu, ra, NO_SP)
13061 - ((int64_t) aarch64_get_reg_s32 (cpu, rn, NO_SP))
13062 * ((int64_t) aarch64_get_reg_s32 (cpu, rm, NO_SP)));
13063 }
13064
13065 /* Integer Multiply/Divide. */
13066
13067 /* First some macros and a helper function. */
13068 /* Macros to test or access elements of 64 bit words. */
13069
13070 /* Mask used to access lo 32 bits of 64 bit unsigned int. */
13071 #define LOW_WORD_MASK ((1ULL << 32) - 1)
13072 /* Return the lo 32 bit word of a 64 bit unsigned int as a 64 bit unsigned int. */
13073 #define lowWordToU64(_value_u64) ((_value_u64) & LOW_WORD_MASK)
13074 /* Return the hi 32 bit word of a 64 bit unsigned int as a 64 bit unsigned int. */
13075 #define highWordToU64(_value_u64) ((_value_u64) >> 32)
13076
13077 /* Offset of sign bit in 64 bit signed integger. */
13078 #define SIGN_SHIFT_U64 63
13079 /* The sign bit itself -- also identifies the minimum negative int value. */
13080 #define SIGN_BIT_U64 (1UL << SIGN_SHIFT_U64)
13081 /* Return true if a 64 bit signed int presented as an unsigned int is the
13082 most negative value. */
13083 #define isMinimumU64(_value_u64) ((_value_u64) == SIGN_BIT_U64)
13084 /* Return true (non-zero) if a 64 bit signed int presented as an unsigned
13085 int has its sign bit set to false. */
13086 #define isSignSetU64(_value_u64) ((_value_u64) & SIGN_BIT_U64)
13087 /* Return 1L or -1L according to whether a 64 bit signed int presented as
13088 an unsigned int has its sign bit set or not. */
13089 #define signOfU64(_value_u64) (1L + (((value_u64) >> SIGN_SHIFT_U64) * -2L)
13090 /* Clear the sign bit of a 64 bit signed int presented as an unsigned int. */
13091 #define clearSignU64(_value_u64) ((_value_u64) &= ~SIGN_BIT_U64)
13092
13093 /* Multiply two 64 bit ints and return.
13094 the hi 64 bits of the 128 bit product. */
13095
13096 static uint64_t
13097 mul64hi (uint64_t value1, uint64_t value2)
13098 {
13099 uint64_t resultmid1;
13100 uint64_t result;
13101 uint64_t value1_lo = lowWordToU64 (value1);
13102 uint64_t value1_hi = highWordToU64 (value1) ;
13103 uint64_t value2_lo = lowWordToU64 (value2);
13104 uint64_t value2_hi = highWordToU64 (value2);
13105
13106 /* Cross-multiply and collect results. */
13107 uint64_t xproductlo = value1_lo * value2_lo;
13108 uint64_t xproductmid1 = value1_lo * value2_hi;
13109 uint64_t xproductmid2 = value1_hi * value2_lo;
13110 uint64_t xproducthi = value1_hi * value2_hi;
13111 uint64_t carry = 0;
13112 /* Start accumulating 64 bit results. */
13113 /* Drop bottom half of lowest cross-product. */
13114 uint64_t resultmid = xproductlo >> 32;
13115 /* Add in middle products. */
13116 resultmid = resultmid + xproductmid1;
13117
13118 /* Check for overflow. */
13119 if (resultmid < xproductmid1)
13120 /* Carry over 1 into top cross-product. */
13121 carry++;
13122
13123 resultmid1 = resultmid + xproductmid2;
13124
13125 /* Check for overflow. */
13126 if (resultmid1 < xproductmid2)
13127 /* Carry over 1 into top cross-product. */
13128 carry++;
13129
13130 /* Drop lowest 32 bits of middle cross-product. */
13131 result = resultmid1 >> 32;
13132 /* Move carry bit to just above middle cross-product highest bit. */
13133 carry = carry << 32;
13134
13135 /* Add top cross-product plus and any carry. */
13136 result += xproducthi + carry;
13137
13138 return result;
13139 }
13140
13141 /* Signed multiply high, source, source2 :
13142 64 bit, dest <-- high 64-bit of result. */
13143 static void
13144 smulh (sim_cpu *cpu)
13145 {
13146 uint64_t uresult;
13147 int64_t result;
13148 unsigned rm = INSTR (20, 16);
13149 unsigned rn = INSTR (9, 5);
13150 unsigned rd = INSTR (4, 0);
13151 GReg ra = INSTR (14, 10);
13152 int64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP);
13153 int64_t value2 = aarch64_get_reg_u64 (cpu, rm, NO_SP);
13154 uint64_t uvalue1;
13155 uint64_t uvalue2;
13156 int negate = 0;
13157
13158 if (ra != R31)
13159 HALT_UNALLOC;
13160
13161 /* Convert to unsigned and use the unsigned mul64hi routine
13162 the fix the sign up afterwards. */
13163 if (value1 < 0)
13164 {
13165 negate = !negate;
13166 uvalue1 = -value1;
13167 }
13168 else
13169 {
13170 uvalue1 = value1;
13171 }
13172
13173 if (value2 < 0)
13174 {
13175 negate = !negate;
13176 uvalue2 = -value2;
13177 }
13178 else
13179 {
13180 uvalue2 = value2;
13181 }
13182
13183 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13184
13185 uresult = mul64hi (uvalue1, uvalue2);
13186 result = uresult;
13187
13188 if (negate)
13189 {
13190 /* Multiply 128-bit result by -1, which means highpart gets inverted,
13191 and has carry in added only if low part is 0. */
13192 result = ~result;
13193 if ((uvalue1 * uvalue2) == 0)
13194 result += 1;
13195 }
13196
13197 aarch64_set_reg_s64 (cpu, rd, NO_SP, result);
13198 }
13199
13200 /* Unsigned multiply add long -- source, source2 :
13201 32 bit, source3 : 64 bit. */
13202 static void
13203 umaddl (sim_cpu *cpu)
13204 {
13205 unsigned rm = INSTR (20, 16);
13206 unsigned ra = INSTR (14, 10);
13207 unsigned rn = INSTR (9, 5);
13208 unsigned rd = INSTR (4, 0);
13209
13210 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13211 /* N.B. we need to multiply the signed 32 bit values in rn, rm to
13212 obtain a 64 bit product. */
13213 aarch64_set_reg_u64
13214 (cpu, rd, NO_SP,
13215 aarch64_get_reg_u64 (cpu, ra, NO_SP)
13216 + ((uint64_t) aarch64_get_reg_u32 (cpu, rn, NO_SP))
13217 * ((uint64_t) aarch64_get_reg_u32 (cpu, rm, NO_SP)));
13218 }
13219
13220 /* Unsigned multiply sub long -- source, source2 : 32 bit, source3 : 64 bit. */
13221 static void
13222 umsubl (sim_cpu *cpu)
13223 {
13224 unsigned rm = INSTR (20, 16);
13225 unsigned ra = INSTR (14, 10);
13226 unsigned rn = INSTR (9, 5);
13227 unsigned rd = INSTR (4, 0);
13228
13229 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13230 /* N.B. we need to multiply the signed 32 bit values in rn, rm to
13231 obtain a 64 bit product. */
13232 aarch64_set_reg_u64
13233 (cpu, rd, NO_SP,
13234 aarch64_get_reg_u64 (cpu, ra, NO_SP)
13235 - ((uint64_t) aarch64_get_reg_u32 (cpu, rn, NO_SP))
13236 * ((uint64_t) aarch64_get_reg_u32 (cpu, rm, NO_SP)));
13237 }
13238
13239 /* Unsigned multiply high, source, source2 :
13240 64 bit, dest <-- high 64-bit of result. */
13241 static void
13242 umulh (sim_cpu *cpu)
13243 {
13244 unsigned rm = INSTR (20, 16);
13245 unsigned rn = INSTR (9, 5);
13246 unsigned rd = INSTR (4, 0);
13247 GReg ra = INSTR (14, 10);
13248
13249 if (ra != R31)
13250 HALT_UNALLOC;
13251
13252 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13253 aarch64_set_reg_u64 (cpu, rd, NO_SP,
13254 mul64hi (aarch64_get_reg_u64 (cpu, rn, NO_SP),
13255 aarch64_get_reg_u64 (cpu, rm, NO_SP)));
13256 }
13257
13258 static void
13259 dexDataProc3Source (sim_cpu *cpu)
13260 {
13261 /* assert instr[28,24] == 11011. */
13262 /* instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit (for rd at least)
13263 instr[30,29] = op54 : 00 ==> ok, ow ==> UNALLOC
13264 instr[23,21] = op31 : 111 ==> UNALLOC, o2 ==> ok
13265 instr[15] = o0 : 0/1 ==> ok
13266 instr[23,21:15] ==> op : 0000 ==> MADD, 0001 ==> MSUB, (32/64 bit)
13267 0010 ==> SMADDL, 0011 ==> SMSUBL, (64 bit only)
13268 0100 ==> SMULH, (64 bit only)
13269 1010 ==> UMADDL, 1011 ==> UNSUBL, (64 bit only)
13270 1100 ==> UMULH (64 bit only)
13271 ow ==> UNALLOC. */
13272
13273 uint32_t dispatch;
13274 uint32_t size = INSTR (31, 31);
13275 uint32_t op54 = INSTR (30, 29);
13276 uint32_t op31 = INSTR (23, 21);
13277 uint32_t o0 = INSTR (15, 15);
13278
13279 if (op54 != 0)
13280 HALT_UNALLOC;
13281
13282 if (size == 0)
13283 {
13284 if (op31 != 0)
13285 HALT_UNALLOC;
13286
13287 if (o0 == 0)
13288 madd32 (cpu);
13289 else
13290 msub32 (cpu);
13291 return;
13292 }
13293
13294 dispatch = (op31 << 1) | o0;
13295
13296 switch (dispatch)
13297 {
13298 case 0: madd64 (cpu); return;
13299 case 1: msub64 (cpu); return;
13300 case 2: smaddl (cpu); return;
13301 case 3: smsubl (cpu); return;
13302 case 4: smulh (cpu); return;
13303 case 10: umaddl (cpu); return;
13304 case 11: umsubl (cpu); return;
13305 case 12: umulh (cpu); return;
13306 default: HALT_UNALLOC;
13307 }
13308 }
13309
13310 static void
13311 dexDPReg (sim_cpu *cpu)
13312 {
13313 /* uint32_t group = dispatchGroup (aarch64_get_instr (cpu));
13314 assert group == GROUP_DPREG_0101 || group == GROUP_DPREG_1101
13315 bits [28:24:21] of a DPReg are the secondary dispatch vector. */
13316 uint32_t group2 = dispatchDPReg (aarch64_get_instr (cpu));
13317
13318 switch (group2)
13319 {
13320 case DPREG_LOG_000:
13321 case DPREG_LOG_001:
13322 dexLogicalShiftedRegister (cpu); return;
13323
13324 case DPREG_ADDSHF_010:
13325 dexAddSubtractShiftedRegister (cpu); return;
13326
13327 case DPREG_ADDEXT_011:
13328 dexAddSubtractExtendedRegister (cpu); return;
13329
13330 case DPREG_ADDCOND_100:
13331 {
13332 /* This set bundles a variety of different operations. */
13333 /* Check for. */
13334 /* 1) add/sub w carry. */
13335 uint32_t mask1 = 0x1FE00000U;
13336 uint32_t val1 = 0x1A000000U;
13337 /* 2) cond compare register/immediate. */
13338 uint32_t mask2 = 0x1FE00000U;
13339 uint32_t val2 = 0x1A400000U;
13340 /* 3) cond select. */
13341 uint32_t mask3 = 0x1FE00000U;
13342 uint32_t val3 = 0x1A800000U;
13343 /* 4) data proc 1/2 source. */
13344 uint32_t mask4 = 0x1FE00000U;
13345 uint32_t val4 = 0x1AC00000U;
13346
13347 if ((aarch64_get_instr (cpu) & mask1) == val1)
13348 dexAddSubtractWithCarry (cpu);
13349
13350 else if ((aarch64_get_instr (cpu) & mask2) == val2)
13351 CondCompare (cpu);
13352
13353 else if ((aarch64_get_instr (cpu) & mask3) == val3)
13354 dexCondSelect (cpu);
13355
13356 else if ((aarch64_get_instr (cpu) & mask4) == val4)
13357 {
13358 /* Bit 30 is clear for data proc 2 source
13359 and set for data proc 1 source. */
13360 if (aarch64_get_instr (cpu) & (1U << 30))
13361 dexDataProc1Source (cpu);
13362 else
13363 dexDataProc2Source (cpu);
13364 }
13365
13366 else
13367 /* Should not reach here. */
13368 HALT_NYI;
13369
13370 return;
13371 }
13372
13373 case DPREG_3SRC_110:
13374 dexDataProc3Source (cpu); return;
13375
13376 case DPREG_UNALLOC_101:
13377 HALT_UNALLOC;
13378
13379 case DPREG_3SRC_111:
13380 dexDataProc3Source (cpu); return;
13381
13382 default:
13383 /* Should never reach here. */
13384 HALT_NYI;
13385 }
13386 }
13387
13388 /* Unconditional Branch immediate.
13389 Offset is a PC-relative byte offset in the range +/- 128MiB.
13390 The offset is assumed to be raw from the decode i.e. the
13391 simulator is expected to scale them from word offsets to byte. */
13392
13393 /* Unconditional branch. */
13394 static void
13395 buc (sim_cpu *cpu, int32_t offset)
13396 {
13397 aarch64_set_next_PC_by_offset (cpu, offset);
13398 }
13399
13400 static unsigned stack_depth = 0;
13401
13402 /* Unconditional branch and link -- writes return PC to LR. */
13403 static void
13404 bl (sim_cpu *cpu, int32_t offset)
13405 {
13406 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13407 aarch64_save_LR (cpu);
13408 aarch64_set_next_PC_by_offset (cpu, offset);
13409
13410 if (TRACE_BRANCH_P (cpu))
13411 {
13412 ++ stack_depth;
13413 TRACE_BRANCH (cpu,
13414 " %*scall %" PRIx64 " [%s]"
13415 " [args: %" PRIx64 " %" PRIx64 " %" PRIx64 "]",
13416 stack_depth, " ", aarch64_get_next_PC (cpu),
13417 aarch64_get_func (CPU_STATE (cpu),
13418 aarch64_get_next_PC (cpu)),
13419 aarch64_get_reg_u64 (cpu, 0, NO_SP),
13420 aarch64_get_reg_u64 (cpu, 1, NO_SP),
13421 aarch64_get_reg_u64 (cpu, 2, NO_SP)
13422 );
13423 }
13424 }
13425
13426 /* Unconditional Branch register.
13427 Branch/return address is in source register. */
13428
13429 /* Unconditional branch. */
13430 static void
13431 br (sim_cpu *cpu)
13432 {
13433 unsigned rn = INSTR (9, 5);
13434 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13435 aarch64_set_next_PC (cpu, aarch64_get_reg_u64 (cpu, rn, NO_SP));
13436 }
13437
13438 /* Unconditional branch and link -- writes return PC to LR. */
13439 static void
13440 blr (sim_cpu *cpu)
13441 {
13442 /* Ensure we read the destination before we write LR. */
13443 uint64_t target = aarch64_get_reg_u64 (cpu, INSTR (9, 5), NO_SP);
13444
13445 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13446 aarch64_save_LR (cpu);
13447 aarch64_set_next_PC (cpu, target);
13448
13449 if (TRACE_BRANCH_P (cpu))
13450 {
13451 ++ stack_depth;
13452 TRACE_BRANCH (cpu,
13453 " %*scall %" PRIx64 " [%s]"
13454 " [args: %" PRIx64 " %" PRIx64 " %" PRIx64 "]",
13455 stack_depth, " ", aarch64_get_next_PC (cpu),
13456 aarch64_get_func (CPU_STATE (cpu),
13457 aarch64_get_next_PC (cpu)),
13458 aarch64_get_reg_u64 (cpu, 0, NO_SP),
13459 aarch64_get_reg_u64 (cpu, 1, NO_SP),
13460 aarch64_get_reg_u64 (cpu, 2, NO_SP)
13461 );
13462 }
13463 }
13464
13465 /* Return -- assembler will default source to LR this is functionally
13466 equivalent to br but, presumably, unlike br it side effects the
13467 branch predictor. */
13468 static void
13469 ret (sim_cpu *cpu)
13470 {
13471 unsigned rn = INSTR (9, 5);
13472 aarch64_set_next_PC (cpu, aarch64_get_reg_u64 (cpu, rn, NO_SP));
13473
13474 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13475 if (TRACE_BRANCH_P (cpu))
13476 {
13477 TRACE_BRANCH (cpu,
13478 " %*sreturn [result: %" PRIx64 "]",
13479 stack_depth, " ", aarch64_get_reg_u64 (cpu, 0, NO_SP));
13480 -- stack_depth;
13481 }
13482 }
13483
13484 /* NOP -- we implement this and call it from the decode in case we
13485 want to intercept it later. */
13486
13487 static void
13488 nop (sim_cpu *cpu)
13489 {
13490 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13491 }
13492
13493 /* Data synchronization barrier. */
13494
13495 static void
13496 dsb (sim_cpu *cpu)
13497 {
13498 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13499 }
13500
13501 /* Data memory barrier. */
13502
13503 static void
13504 dmb (sim_cpu *cpu)
13505 {
13506 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13507 }
13508
13509 /* Instruction synchronization barrier. */
13510
13511 static void
13512 isb (sim_cpu *cpu)
13513 {
13514 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13515 }
13516
13517 static void
13518 dexBranchImmediate (sim_cpu *cpu)
13519 {
13520 /* assert instr[30,26] == 00101
13521 instr[31] ==> 0 == B, 1 == BL
13522 instr[25,0] == imm26 branch offset counted in words. */
13523
13524 uint32_t top = INSTR (31, 31);
13525 /* We have a 26 byte signed word offset which we need to pass to the
13526 execute routine as a signed byte offset. */
13527 int32_t offset = simm32 (aarch64_get_instr (cpu), 25, 0) << 2;
13528
13529 if (top)
13530 bl (cpu, offset);
13531 else
13532 buc (cpu, offset);
13533 }
13534
13535 /* Control Flow. */
13536
13537 /* Conditional branch
13538
13539 Offset is a PC-relative byte offset in the range +/- 1MiB pos is
13540 a bit position in the range 0 .. 63
13541
13542 cc is a CondCode enum value as pulled out of the decode
13543
13544 N.B. any offset register (source) can only be Xn or Wn. */
13545
13546 static void
13547 bcc (sim_cpu *cpu, int32_t offset, CondCode cc)
13548 {
13549 /* The test returns TRUE if CC is met. */
13550 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13551 if (testConditionCode (cpu, cc))
13552 aarch64_set_next_PC_by_offset (cpu, offset);
13553 }
13554
13555 /* 32 bit branch on register non-zero. */
13556 static void
13557 cbnz32 (sim_cpu *cpu, int32_t offset)
13558 {
13559 unsigned rt = INSTR (4, 0);
13560
13561 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13562 if (aarch64_get_reg_u32 (cpu, rt, NO_SP) != 0)
13563 aarch64_set_next_PC_by_offset (cpu, offset);
13564 }
13565
13566 /* 64 bit branch on register zero. */
13567 static void
13568 cbnz (sim_cpu *cpu, int32_t offset)
13569 {
13570 unsigned rt = INSTR (4, 0);
13571
13572 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13573 if (aarch64_get_reg_u64 (cpu, rt, NO_SP) != 0)
13574 aarch64_set_next_PC_by_offset (cpu, offset);
13575 }
13576
13577 /* 32 bit branch on register non-zero. */
13578 static void
13579 cbz32 (sim_cpu *cpu, int32_t offset)
13580 {
13581 unsigned rt = INSTR (4, 0);
13582
13583 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13584 if (aarch64_get_reg_u32 (cpu, rt, NO_SP) == 0)
13585 aarch64_set_next_PC_by_offset (cpu, offset);
13586 }
13587
13588 /* 64 bit branch on register zero. */
13589 static void
13590 cbz (sim_cpu *cpu, int32_t offset)
13591 {
13592 unsigned rt = INSTR (4, 0);
13593
13594 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13595 if (aarch64_get_reg_u64 (cpu, rt, NO_SP) == 0)
13596 aarch64_set_next_PC_by_offset (cpu, offset);
13597 }
13598
13599 /* Branch on register bit test non-zero -- one size fits all. */
13600 static void
13601 tbnz (sim_cpu *cpu, uint32_t pos, int32_t offset)
13602 {
13603 unsigned rt = INSTR (4, 0);
13604
13605 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13606 if (aarch64_get_reg_u64 (cpu, rt, NO_SP) & (((uint64_t) 1) << pos))
13607 aarch64_set_next_PC_by_offset (cpu, offset);
13608 }
13609
13610 /* Branch on register bit test zero -- one size fits all. */
13611 static void
13612 tbz (sim_cpu *cpu, uint32_t pos, int32_t offset)
13613 {
13614 unsigned rt = INSTR (4, 0);
13615
13616 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13617 if (!(aarch64_get_reg_u64 (cpu, rt, NO_SP) & (((uint64_t) 1) << pos)))
13618 aarch64_set_next_PC_by_offset (cpu, offset);
13619 }
13620
13621 static void
13622 dexCompareBranchImmediate (sim_cpu *cpu)
13623 {
13624 /* instr[30,25] = 01 1010
13625 instr[31] = size : 0 ==> 32, 1 ==> 64
13626 instr[24] = op : 0 ==> CBZ, 1 ==> CBNZ
13627 instr[23,5] = simm19 branch offset counted in words
13628 instr[4,0] = rt */
13629
13630 uint32_t size = INSTR (31, 31);
13631 uint32_t op = INSTR (24, 24);
13632 int32_t offset = simm32 (aarch64_get_instr (cpu), 23, 5) << 2;
13633
13634 if (size == 0)
13635 {
13636 if (op == 0)
13637 cbz32 (cpu, offset);
13638 else
13639 cbnz32 (cpu, offset);
13640 }
13641 else
13642 {
13643 if (op == 0)
13644 cbz (cpu, offset);
13645 else
13646 cbnz (cpu, offset);
13647 }
13648 }
13649
13650 static void
13651 dexTestBranchImmediate (sim_cpu *cpu)
13652 {
13653 /* instr[31] = b5 : bit 5 of test bit idx
13654 instr[30,25] = 01 1011
13655 instr[24] = op : 0 ==> TBZ, 1 == TBNZ
13656 instr[23,19] = b40 : bits 4 to 0 of test bit idx
13657 instr[18,5] = simm14 : signed offset counted in words
13658 instr[4,0] = uimm5 */
13659
13660 uint32_t pos = ((INSTR (31, 31) << 5) | INSTR (23, 19));
13661 int32_t offset = simm32 (aarch64_get_instr (cpu), 18, 5) << 2;
13662
13663 NYI_assert (30, 25, 0x1b);
13664
13665 if (INSTR (24, 24) == 0)
13666 tbz (cpu, pos, offset);
13667 else
13668 tbnz (cpu, pos, offset);
13669 }
13670
13671 static void
13672 dexCondBranchImmediate (sim_cpu *cpu)
13673 {
13674 /* instr[31,25] = 010 1010
13675 instr[24] = op1; op => 00 ==> B.cond
13676 instr[23,5] = simm19 : signed offset counted in words
13677 instr[4] = op0
13678 instr[3,0] = cond */
13679
13680 int32_t offset;
13681 uint32_t op = ((INSTR (24, 24) << 1) | INSTR (4, 4));
13682
13683 NYI_assert (31, 25, 0x2a);
13684
13685 if (op != 0)
13686 HALT_UNALLOC;
13687
13688 offset = simm32 (aarch64_get_instr (cpu), 23, 5) << 2;
13689
13690 bcc (cpu, offset, INSTR (3, 0));
13691 }
13692
13693 static void
13694 dexBranchRegister (sim_cpu *cpu)
13695 {
13696 /* instr[31,25] = 110 1011
13697 instr[24,21] = op : 0 ==> BR, 1 => BLR, 2 => RET, 3 => ERET, 4 => DRPS
13698 instr[20,16] = op2 : must be 11111
13699 instr[15,10] = op3 : must be 000000
13700 instr[4,0] = op2 : must be 11111. */
13701
13702 uint32_t op = INSTR (24, 21);
13703 uint32_t op2 = INSTR (20, 16);
13704 uint32_t op3 = INSTR (15, 10);
13705 uint32_t op4 = INSTR (4, 0);
13706
13707 NYI_assert (31, 25, 0x6b);
13708
13709 if (op2 != 0x1F || op3 != 0 || op4 != 0)
13710 HALT_UNALLOC;
13711
13712 if (op == 0)
13713 br (cpu);
13714
13715 else if (op == 1)
13716 blr (cpu);
13717
13718 else if (op == 2)
13719 ret (cpu);
13720
13721 else
13722 {
13723 /* ERET and DRPS accept 0b11111 for rn = instr [4,0]. */
13724 /* anything else is unallocated. */
13725 uint32_t rn = INSTR (4, 0);
13726
13727 if (rn != 0x1f)
13728 HALT_UNALLOC;
13729
13730 if (op == 4 || op == 5)
13731 HALT_NYI;
13732
13733 HALT_UNALLOC;
13734 }
13735 }
13736
13737 /* FIXME: We should get the Angel SWI values from ../../libgloss/aarch64/svc.h
13738 but this may not be available. So instead we define the values we need
13739 here. */
13740 #define AngelSVC_Reason_Open 0x01
13741 #define AngelSVC_Reason_Close 0x02
13742 #define AngelSVC_Reason_Write 0x05
13743 #define AngelSVC_Reason_Read 0x06
13744 #define AngelSVC_Reason_IsTTY 0x09
13745 #define AngelSVC_Reason_Seek 0x0A
13746 #define AngelSVC_Reason_FLen 0x0C
13747 #define AngelSVC_Reason_Remove 0x0E
13748 #define AngelSVC_Reason_Rename 0x0F
13749 #define AngelSVC_Reason_Clock 0x10
13750 #define AngelSVC_Reason_Time 0x11
13751 #define AngelSVC_Reason_System 0x12
13752 #define AngelSVC_Reason_Errno 0x13
13753 #define AngelSVC_Reason_GetCmdLine 0x15
13754 #define AngelSVC_Reason_HeapInfo 0x16
13755 #define AngelSVC_Reason_ReportException 0x18
13756 #define AngelSVC_Reason_Elapsed 0x30
13757
13758
13759 static void
13760 handle_halt (sim_cpu *cpu, uint32_t val)
13761 {
13762 uint64_t result = 0;
13763
13764 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13765 if (val != 0xf000)
13766 {
13767 TRACE_SYSCALL (cpu, " HLT [0x%x]", val);
13768 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
13769 sim_stopped, SIM_SIGTRAP);
13770 }
13771
13772 /* We have encountered an Angel SVC call. See if we can process it. */
13773 switch (aarch64_get_reg_u32 (cpu, 0, NO_SP))
13774 {
13775 case AngelSVC_Reason_HeapInfo:
13776 {
13777 /* Get the values. */
13778 uint64_t stack_top = aarch64_get_stack_start (cpu);
13779 uint64_t heap_base = aarch64_get_heap_start (cpu);
13780
13781 /* Get the pointer */
13782 uint64_t ptr = aarch64_get_reg_u64 (cpu, 1, SP_OK);
13783 ptr = aarch64_get_mem_u64 (cpu, ptr);
13784
13785 /* Fill in the memory block. */
13786 /* Start addr of heap. */
13787 aarch64_set_mem_u64 (cpu, ptr + 0, heap_base);
13788 /* End addr of heap. */
13789 aarch64_set_mem_u64 (cpu, ptr + 8, stack_top);
13790 /* Lowest stack addr. */
13791 aarch64_set_mem_u64 (cpu, ptr + 16, heap_base);
13792 /* Initial stack addr. */
13793 aarch64_set_mem_u64 (cpu, ptr + 24, stack_top);
13794
13795 TRACE_SYSCALL (cpu, " AngelSVC: Get Heap Info");
13796 }
13797 break;
13798
13799 case AngelSVC_Reason_Open:
13800 {
13801 /* Get the pointer */
13802 /* uint64_t ptr = aarch64_get_reg_u64 (cpu, 1, SP_OK);. */
13803 /* FIXME: For now we just assume that we will only be asked
13804 to open the standard file descriptors. */
13805 static int fd = 0;
13806 result = fd ++;
13807
13808 TRACE_SYSCALL (cpu, " AngelSVC: Open file %d", fd - 1);
13809 }
13810 break;
13811
13812 case AngelSVC_Reason_Close:
13813 {
13814 uint64_t fh = aarch64_get_reg_u64 (cpu, 1, SP_OK);
13815 TRACE_SYSCALL (cpu, " AngelSVC: Close file %d", (int) fh);
13816 result = 0;
13817 }
13818 break;
13819
13820 case AngelSVC_Reason_Errno:
13821 result = 0;
13822 TRACE_SYSCALL (cpu, " AngelSVC: Get Errno");
13823 break;
13824
13825 case AngelSVC_Reason_Clock:
13826 result =
13827 #ifdef CLOCKS_PER_SEC
13828 (CLOCKS_PER_SEC >= 100)
13829 ? (clock () / (CLOCKS_PER_SEC / 100))
13830 : ((clock () * 100) / CLOCKS_PER_SEC)
13831 #else
13832 /* Presume unix... clock() returns microseconds. */
13833 (clock () / 10000)
13834 #endif
13835 ;
13836 TRACE_SYSCALL (cpu, " AngelSVC: Get Clock");
13837 break;
13838
13839 case AngelSVC_Reason_GetCmdLine:
13840 {
13841 /* Get the pointer */
13842 uint64_t ptr = aarch64_get_reg_u64 (cpu, 1, SP_OK);
13843 ptr = aarch64_get_mem_u64 (cpu, ptr);
13844
13845 /* FIXME: No command line for now. */
13846 aarch64_set_mem_u64 (cpu, ptr, 0);
13847 TRACE_SYSCALL (cpu, " AngelSVC: Get Command Line");
13848 }
13849 break;
13850
13851 case AngelSVC_Reason_IsTTY:
13852 result = 1;
13853 TRACE_SYSCALL (cpu, " AngelSVC: IsTTY ?");
13854 break;
13855
13856 case AngelSVC_Reason_Write:
13857 {
13858 /* Get the pointer */
13859 uint64_t ptr = aarch64_get_reg_u64 (cpu, 1, SP_OK);
13860 /* Get the write control block. */
13861 uint64_t fd = aarch64_get_mem_u64 (cpu, ptr);
13862 uint64_t buf = aarch64_get_mem_u64 (cpu, ptr + 8);
13863 uint64_t len = aarch64_get_mem_u64 (cpu, ptr + 16);
13864
13865 TRACE_SYSCALL (cpu, "write of %" PRIx64 " bytes from %"
13866 PRIx64 " on descriptor %" PRIx64,
13867 len, buf, fd);
13868
13869 if (len > 1280)
13870 {
13871 TRACE_SYSCALL (cpu,
13872 " AngelSVC: Write: Suspiciously long write: %ld",
13873 (long) len);
13874 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
13875 sim_stopped, SIM_SIGBUS);
13876 }
13877 else if (fd == 1)
13878 {
13879 printf ("%.*s", (int) len, aarch64_get_mem_ptr (cpu, buf));
13880 }
13881 else if (fd == 2)
13882 {
13883 TRACE (cpu, 0, "\n");
13884 sim_io_eprintf (CPU_STATE (cpu), "%.*s",
13885 (int) len, aarch64_get_mem_ptr (cpu, buf));
13886 TRACE (cpu, 0, "\n");
13887 }
13888 else
13889 {
13890 TRACE_SYSCALL (cpu,
13891 " AngelSVC: Write: Unexpected file handle: %d",
13892 (int) fd);
13893 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
13894 sim_stopped, SIM_SIGABRT);
13895 }
13896 }
13897 break;
13898
13899 case AngelSVC_Reason_ReportException:
13900 {
13901 /* Get the pointer */
13902 uint64_t ptr = aarch64_get_reg_u64 (cpu, 1, SP_OK);
13903 /*ptr = aarch64_get_mem_u64 (cpu, ptr);. */
13904 uint64_t type = aarch64_get_mem_u64 (cpu, ptr);
13905 uint64_t state = aarch64_get_mem_u64 (cpu, ptr + 8);
13906
13907 TRACE_SYSCALL (cpu,
13908 "Angel Exception: type 0x%" PRIx64 " state %" PRIx64,
13909 type, state);
13910
13911 if (type == 0x20026)
13912 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
13913 sim_exited, state);
13914 else
13915 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
13916 sim_stopped, SIM_SIGINT);
13917 }
13918 break;
13919
13920 case AngelSVC_Reason_Read:
13921 case AngelSVC_Reason_FLen:
13922 case AngelSVC_Reason_Seek:
13923 case AngelSVC_Reason_Remove:
13924 case AngelSVC_Reason_Time:
13925 case AngelSVC_Reason_System:
13926 case AngelSVC_Reason_Rename:
13927 case AngelSVC_Reason_Elapsed:
13928 default:
13929 TRACE_SYSCALL (cpu, " HLT [Unknown angel %x]",
13930 aarch64_get_reg_u32 (cpu, 0, NO_SP));
13931 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
13932 sim_stopped, SIM_SIGTRAP);
13933 }
13934
13935 aarch64_set_reg_u64 (cpu, 0, NO_SP, result);
13936 }
13937
13938 static void
13939 dexExcpnGen (sim_cpu *cpu)
13940 {
13941 /* instr[31:24] = 11010100
13942 instr[23,21] = opc : 000 ==> GEN EXCPN, 001 ==> BRK
13943 010 ==> HLT, 101 ==> DBG GEN EXCPN
13944 instr[20,5] = imm16
13945 instr[4,2] = opc2 000 ==> OK, ow ==> UNALLOC
13946 instr[1,0] = LL : discriminates opc */
13947
13948 uint32_t opc = INSTR (23, 21);
13949 uint32_t imm16 = INSTR (20, 5);
13950 uint32_t opc2 = INSTR (4, 2);
13951 uint32_t LL;
13952
13953 NYI_assert (31, 24, 0xd4);
13954
13955 if (opc2 != 0)
13956 HALT_UNALLOC;
13957
13958 LL = INSTR (1, 0);
13959
13960 /* We only implement HLT and BRK for now. */
13961 if (opc == 1 && LL == 0)
13962 {
13963 TRACE_EVENTS (cpu, " BRK [0x%x]", imm16);
13964 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
13965 sim_exited, aarch64_get_reg_s32 (cpu, R0, SP_OK));
13966 }
13967
13968 if (opc == 2 && LL == 0)
13969 handle_halt (cpu, imm16);
13970
13971 else if (opc == 0 || opc == 5)
13972 HALT_NYI;
13973
13974 else
13975 HALT_UNALLOC;
13976 }
13977
13978 /* Stub for accessing system registers. */
13979
13980 static uint64_t
13981 system_get (sim_cpu *cpu, unsigned op0, unsigned op1, unsigned crn,
13982 unsigned crm, unsigned op2)
13983 {
13984 if (crn == 0 && op1 == 3 && crm == 0 && op2 == 7)
13985 /* DCZID_EL0 - the Data Cache Zero ID register.
13986 We do not support DC ZVA at the moment, so
13987 we return a value with the disable bit set.
13988 We implement support for the DCZID register since
13989 it is used by the C library's memset function. */
13990 return ((uint64_t) 1) << 4;
13991
13992 if (crn == 0 && op1 == 3 && crm == 0 && op2 == 1)
13993 /* Cache Type Register. */
13994 return 0x80008000UL;
13995
13996 if (crn == 13 && op1 == 3 && crm == 0 && op2 == 2)
13997 /* TPIDR_EL0 - thread pointer id. */
13998 return aarch64_get_thread_id (cpu);
13999
14000 if (op1 == 3 && crm == 4 && op2 == 0)
14001 return aarch64_get_FPCR (cpu);
14002
14003 if (op1 == 3 && crm == 4 && op2 == 1)
14004 return aarch64_get_FPSR (cpu);
14005
14006 else if (op1 == 3 && crm == 2 && op2 == 0)
14007 return aarch64_get_CPSR (cpu);
14008
14009 HALT_NYI;
14010 }
14011
14012 static void
14013 system_set (sim_cpu *cpu, unsigned op0, unsigned op1, unsigned crn,
14014 unsigned crm, unsigned op2, uint64_t val)
14015 {
14016 if (op1 == 3 && crm == 4 && op2 == 0)
14017 aarch64_set_FPCR (cpu, val);
14018
14019 else if (op1 == 3 && crm == 4 && op2 == 1)
14020 aarch64_set_FPSR (cpu, val);
14021
14022 else if (op1 == 3 && crm == 2 && op2 == 0)
14023 aarch64_set_CPSR (cpu, val);
14024
14025 else
14026 HALT_NYI;
14027 }
14028
14029 static void
14030 do_mrs (sim_cpu *cpu)
14031 {
14032 /* instr[31:20] = 1101 0101 0001 1
14033 instr[19] = op0
14034 instr[18,16] = op1
14035 instr[15,12] = CRn
14036 instr[11,8] = CRm
14037 instr[7,5] = op2
14038 instr[4,0] = Rt */
14039 unsigned sys_op0 = INSTR (19, 19) + 2;
14040 unsigned sys_op1 = INSTR (18, 16);
14041 unsigned sys_crn = INSTR (15, 12);
14042 unsigned sys_crm = INSTR (11, 8);
14043 unsigned sys_op2 = INSTR (7, 5);
14044 unsigned rt = INSTR (4, 0);
14045
14046 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
14047 aarch64_set_reg_u64 (cpu, rt, NO_SP,
14048 system_get (cpu, sys_op0, sys_op1, sys_crn, sys_crm, sys_op2));
14049 }
14050
14051 static void
14052 do_MSR_immediate (sim_cpu *cpu)
14053 {
14054 /* instr[31:19] = 1101 0101 0000 0
14055 instr[18,16] = op1
14056 instr[15,12] = 0100
14057 instr[11,8] = CRm
14058 instr[7,5] = op2
14059 instr[4,0] = 1 1111 */
14060
14061 unsigned op1 = INSTR (18, 16);
14062 /*unsigned crm = INSTR (11, 8);*/
14063 unsigned op2 = INSTR (7, 5);
14064
14065 NYI_assert (31, 19, 0x1AA0);
14066 NYI_assert (15, 12, 0x4);
14067 NYI_assert (4, 0, 0x1F);
14068
14069 if (op1 == 0)
14070 {
14071 if (op2 == 5)
14072 HALT_NYI; /* set SPSel. */
14073 else
14074 HALT_UNALLOC;
14075 }
14076 else if (op1 == 3)
14077 {
14078 if (op2 == 6)
14079 HALT_NYI; /* set DAIFset. */
14080 else if (op2 == 7)
14081 HALT_NYI; /* set DAIFclr. */
14082 else
14083 HALT_UNALLOC;
14084 }
14085 else
14086 HALT_UNALLOC;
14087 }
14088
14089 static void
14090 do_MSR_reg (sim_cpu *cpu)
14091 {
14092 /* instr[31:20] = 1101 0101 0001
14093 instr[19] = op0
14094 instr[18,16] = op1
14095 instr[15,12] = CRn
14096 instr[11,8] = CRm
14097 instr[7,5] = op2
14098 instr[4,0] = Rt */
14099
14100 unsigned sys_op0 = INSTR (19, 19) + 2;
14101 unsigned sys_op1 = INSTR (18, 16);
14102 unsigned sys_crn = INSTR (15, 12);
14103 unsigned sys_crm = INSTR (11, 8);
14104 unsigned sys_op2 = INSTR (7, 5);
14105 unsigned rt = INSTR (4, 0);
14106
14107 NYI_assert (31, 20, 0xD51);
14108
14109 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
14110 system_set (cpu, sys_op0, sys_op1, sys_crn, sys_crm, sys_op2,
14111 aarch64_get_reg_u64 (cpu, rt, NO_SP));
14112 }
14113
14114 static void
14115 do_SYS (sim_cpu *cpu)
14116 {
14117 /* instr[31,19] = 1101 0101 0000 1
14118 instr[18,16] = op1
14119 instr[15,12] = CRn
14120 instr[11,8] = CRm
14121 instr[7,5] = op2
14122 instr[4,0] = Rt */
14123 NYI_assert (31, 19, 0x1AA1);
14124
14125 /* FIXME: For now we just silently accept system ops. */
14126 }
14127
14128 static void
14129 dexSystem (sim_cpu *cpu)
14130 {
14131 /* instr[31:22] = 1101 01010 0
14132 instr[21] = L
14133 instr[20,19] = op0
14134 instr[18,16] = op1
14135 instr[15,12] = CRn
14136 instr[11,8] = CRm
14137 instr[7,5] = op2
14138 instr[4,0] = uimm5 */
14139
14140 /* We are interested in HINT, DSB, DMB and ISB
14141
14142 Hint #0 encodes NOOP (this is the only hint we care about)
14143 L == 0, op0 == 0, op1 = 011, CRn = 0010, Rt = 11111,
14144 CRm op2 != 0000 000 OR CRm op2 == 0000 000 || CRm op > 0000 101
14145
14146 DSB, DMB, ISB are data store barrier, data memory barrier and
14147 instruction store barrier, respectively, where
14148
14149 L == 0, op0 == 0, op1 = 011, CRn = 0011, Rt = 11111,
14150 op2 : DSB ==> 100, DMB ==> 101, ISB ==> 110
14151 CRm<3:2> ==> domain, CRm<1:0> ==> types,
14152 domain : 00 ==> OuterShareable, 01 ==> Nonshareable,
14153 10 ==> InerShareable, 11 ==> FullSystem
14154 types : 01 ==> Reads, 10 ==> Writes,
14155 11 ==> All, 00 ==> All (domain == FullSystem). */
14156
14157 unsigned rt = INSTR (4, 0);
14158
14159 NYI_assert (31, 22, 0x354);
14160
14161 switch (INSTR (21, 12))
14162 {
14163 case 0x032:
14164 if (rt == 0x1F)
14165 {
14166 /* NOP has CRm != 0000 OR. */
14167 /* (CRm == 0000 AND (op2 == 000 OR op2 > 101)). */
14168 uint32_t crm = INSTR (11, 8);
14169 uint32_t op2 = INSTR (7, 5);
14170
14171 if (crm != 0 || (op2 == 0 || op2 > 5))
14172 {
14173 /* Actually call nop method so we can reimplement it later. */
14174 nop (cpu);
14175 return;
14176 }
14177 }
14178 HALT_NYI;
14179
14180 case 0x033:
14181 {
14182 uint32_t op2 = INSTR (7, 5);
14183
14184 switch (op2)
14185 {
14186 case 2: HALT_NYI;
14187 case 4: dsb (cpu); return;
14188 case 5: dmb (cpu); return;
14189 case 6: isb (cpu); return;
14190 default: HALT_UNALLOC;
14191 }
14192 }
14193
14194 case 0x3B0:
14195 case 0x3B4:
14196 case 0x3BD:
14197 do_mrs (cpu);
14198 return;
14199
14200 case 0x0B7:
14201 do_SYS (cpu); /* DC is an alias of SYS. */
14202 return;
14203
14204 default:
14205 if (INSTR (21, 20) == 0x1)
14206 do_MSR_reg (cpu);
14207 else if (INSTR (21, 19) == 0 && INSTR (15, 12) == 0x4)
14208 do_MSR_immediate (cpu);
14209 else
14210 HALT_NYI;
14211 return;
14212 }
14213 }
14214
14215 static void
14216 dexBr (sim_cpu *cpu)
14217 {
14218 /* uint32_t group = dispatchGroup (aarch64_get_instr (cpu));
14219 assert group == GROUP_BREXSYS_1010 || group == GROUP_BREXSYS_1011
14220 bits [31,29] of a BrExSys are the secondary dispatch vector. */
14221 uint32_t group2 = dispatchBrExSys (aarch64_get_instr (cpu));
14222
14223 switch (group2)
14224 {
14225 case BR_IMM_000:
14226 return dexBranchImmediate (cpu);
14227
14228 case BR_IMMCMP_001:
14229 /* Compare has bit 25 clear while test has it set. */
14230 if (!INSTR (25, 25))
14231 dexCompareBranchImmediate (cpu);
14232 else
14233 dexTestBranchImmediate (cpu);
14234 return;
14235
14236 case BR_IMMCOND_010:
14237 /* This is a conditional branch if bit 25 is clear otherwise
14238 unallocated. */
14239 if (!INSTR (25, 25))
14240 dexCondBranchImmediate (cpu);
14241 else
14242 HALT_UNALLOC;
14243 return;
14244
14245 case BR_UNALLOC_011:
14246 HALT_UNALLOC;
14247
14248 case BR_IMM_100:
14249 dexBranchImmediate (cpu);
14250 return;
14251
14252 case BR_IMMCMP_101:
14253 /* Compare has bit 25 clear while test has it set. */
14254 if (!INSTR (25, 25))
14255 dexCompareBranchImmediate (cpu);
14256 else
14257 dexTestBranchImmediate (cpu);
14258 return;
14259
14260 case BR_REG_110:
14261 /* Unconditional branch reg has bit 25 set. */
14262 if (INSTR (25, 25))
14263 dexBranchRegister (cpu);
14264
14265 /* This includes both Excpn Gen, System and unalloc operations.
14266 We need to decode the Excpn Gen operation BRK so we can plant
14267 debugger entry points.
14268 Excpn Gen operations have instr [24] = 0.
14269 we need to decode at least one of the System operations NOP
14270 which is an alias for HINT #0.
14271 System operations have instr [24,22] = 100. */
14272 else if (INSTR (24, 24) == 0)
14273 dexExcpnGen (cpu);
14274
14275 else if (INSTR (24, 22) == 4)
14276 dexSystem (cpu);
14277
14278 else
14279 HALT_UNALLOC;
14280
14281 return;
14282
14283 case BR_UNALLOC_111:
14284 HALT_UNALLOC;
14285
14286 default:
14287 /* Should never reach here. */
14288 HALT_NYI;
14289 }
14290 }
14291
14292 static void
14293 aarch64_decode_and_execute (sim_cpu *cpu, uint64_t pc)
14294 {
14295 /* We need to check if gdb wants an in here. */
14296 /* checkBreak (cpu);. */
14297
14298 uint64_t group = dispatchGroup (aarch64_get_instr (cpu));
14299
14300 switch (group)
14301 {
14302 case GROUP_PSEUDO_0000: dexPseudo (cpu); break;
14303 case GROUP_LDST_0100: dexLdSt (cpu); break;
14304 case GROUP_DPREG_0101: dexDPReg (cpu); break;
14305 case GROUP_LDST_0110: dexLdSt (cpu); break;
14306 case GROUP_ADVSIMD_0111: dexAdvSIMD0 (cpu); break;
14307 case GROUP_DPIMM_1000: dexDPImm (cpu); break;
14308 case GROUP_DPIMM_1001: dexDPImm (cpu); break;
14309 case GROUP_BREXSYS_1010: dexBr (cpu); break;
14310 case GROUP_BREXSYS_1011: dexBr (cpu); break;
14311 case GROUP_LDST_1100: dexLdSt (cpu); break;
14312 case GROUP_DPREG_1101: dexDPReg (cpu); break;
14313 case GROUP_LDST_1110: dexLdSt (cpu); break;
14314 case GROUP_ADVSIMD_1111: dexAdvSIMD1 (cpu); break;
14315
14316 case GROUP_UNALLOC_0001:
14317 case GROUP_UNALLOC_0010:
14318 case GROUP_UNALLOC_0011:
14319 HALT_UNALLOC;
14320
14321 default:
14322 /* Should never reach here. */
14323 HALT_NYI;
14324 }
14325 }
14326
14327 static bfd_boolean
14328 aarch64_step (sim_cpu *cpu)
14329 {
14330 uint64_t pc = aarch64_get_PC (cpu);
14331
14332 if (pc == TOP_LEVEL_RETURN_PC)
14333 return FALSE;
14334
14335 aarch64_set_next_PC (cpu, pc + 4);
14336
14337 /* Code is always little-endian. */
14338 sim_core_read_buffer (CPU_STATE (cpu), cpu, read_map,
14339 & aarch64_get_instr (cpu), pc, 4);
14340 aarch64_get_instr (cpu) = endian_le2h_4 (aarch64_get_instr (cpu));
14341
14342 TRACE_INSN (cpu, " pc = %" PRIx64 " instr = %08x", pc,
14343 aarch64_get_instr (cpu));
14344 TRACE_DISASM (cpu, pc);
14345
14346 aarch64_decode_and_execute (cpu, pc);
14347
14348 return TRUE;
14349 }
14350
14351 void
14352 aarch64_run (SIM_DESC sd)
14353 {
14354 sim_cpu *cpu = STATE_CPU (sd, 0);
14355
14356 while (aarch64_step (cpu))
14357 {
14358 aarch64_update_PC (cpu);
14359
14360 if (sim_events_tick (sd))
14361 sim_events_process (sd);
14362 }
14363
14364 sim_engine_halt (sd, cpu, NULL, aarch64_get_PC (cpu),
14365 sim_exited, aarch64_get_reg_s32 (cpu, R0, NO_SP));
14366 }
14367
14368 void
14369 aarch64_init (sim_cpu *cpu, uint64_t pc)
14370 {
14371 uint64_t sp = aarch64_get_stack_start (cpu);
14372
14373 /* Install SP, FP and PC and set LR to -20
14374 so we can detect a top-level return. */
14375 aarch64_set_reg_u64 (cpu, SP, SP_OK, sp);
14376 aarch64_set_reg_u64 (cpu, FP, SP_OK, sp);
14377 aarch64_set_reg_u64 (cpu, LR, SP_OK, TOP_LEVEL_RETURN_PC);
14378 aarch64_set_next_PC (cpu, pc);
14379 aarch64_update_PC (cpu);
14380 aarch64_init_LIT_table ();
14381 }