]> git.ipfire.org Git - thirdparty/qemu.git/blob - tcg/i386/tcg-target.inc.c
tcg: Improve the alignment check infrastructure
[thirdparty/qemu.git] / tcg / i386 / tcg-target.inc.c
1 /*
2 * Tiny Code Generator for QEMU
3 *
4 * Copyright (c) 2008 Fabrice Bellard
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
24
25 #include "tcg-be-ldst.h"
26
27 #ifdef CONFIG_DEBUG_TCG
28 static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
29 #if TCG_TARGET_REG_BITS == 64
30 "%rax", "%rcx", "%rdx", "%rbx", "%rsp", "%rbp", "%rsi", "%rdi",
31 "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15",
32 #else
33 "%eax", "%ecx", "%edx", "%ebx", "%esp", "%ebp", "%esi", "%edi",
34 #endif
35 };
36 #endif
37
38 static const int tcg_target_reg_alloc_order[] = {
39 #if TCG_TARGET_REG_BITS == 64
40 TCG_REG_RBP,
41 TCG_REG_RBX,
42 TCG_REG_R12,
43 TCG_REG_R13,
44 TCG_REG_R14,
45 TCG_REG_R15,
46 TCG_REG_R10,
47 TCG_REG_R11,
48 TCG_REG_R9,
49 TCG_REG_R8,
50 TCG_REG_RCX,
51 TCG_REG_RDX,
52 TCG_REG_RSI,
53 TCG_REG_RDI,
54 TCG_REG_RAX,
55 #else
56 TCG_REG_EBX,
57 TCG_REG_ESI,
58 TCG_REG_EDI,
59 TCG_REG_EBP,
60 TCG_REG_ECX,
61 TCG_REG_EDX,
62 TCG_REG_EAX,
63 #endif
64 };
65
66 static const int tcg_target_call_iarg_regs[] = {
67 #if TCG_TARGET_REG_BITS == 64
68 #if defined(_WIN64)
69 TCG_REG_RCX,
70 TCG_REG_RDX,
71 #else
72 TCG_REG_RDI,
73 TCG_REG_RSI,
74 TCG_REG_RDX,
75 TCG_REG_RCX,
76 #endif
77 TCG_REG_R8,
78 TCG_REG_R9,
79 #else
80 /* 32 bit mode uses stack based calling convention (GCC default). */
81 #endif
82 };
83
84 static const int tcg_target_call_oarg_regs[] = {
85 TCG_REG_EAX,
86 #if TCG_TARGET_REG_BITS == 32
87 TCG_REG_EDX
88 #endif
89 };
90
91 /* Constants we accept. */
92 #define TCG_CT_CONST_S32 0x100
93 #define TCG_CT_CONST_U32 0x200
94 #define TCG_CT_CONST_I32 0x400
95
96 /* Registers used with L constraint, which are the first argument
97 registers on x86_64, and two random call clobbered registers on
98 i386. */
99 #if TCG_TARGET_REG_BITS == 64
100 # define TCG_REG_L0 tcg_target_call_iarg_regs[0]
101 # define TCG_REG_L1 tcg_target_call_iarg_regs[1]
102 #else
103 # define TCG_REG_L0 TCG_REG_EAX
104 # define TCG_REG_L1 TCG_REG_EDX
105 #endif
106
107 /* The host compiler should supply <cpuid.h> to enable runtime features
108 detection, as we're not going to go so far as our own inline assembly.
109 If not available, default values will be assumed. */
110 #if defined(CONFIG_CPUID_H)
111 #include <cpuid.h>
112 #endif
113
114 /* For 32-bit, we are going to attempt to determine at runtime whether cmov
115 is available. */
116 #if TCG_TARGET_REG_BITS == 64
117 # define have_cmov 1
118 #elif defined(CONFIG_CPUID_H) && defined(bit_CMOV)
119 static bool have_cmov;
120 #else
121 # define have_cmov 0
122 #endif
123
124 /* If bit_MOVBE is defined in cpuid.h (added in GCC version 4.6), we are
125 going to attempt to determine at runtime whether movbe is available. */
126 #if defined(CONFIG_CPUID_H) && defined(bit_MOVBE)
127 static bool have_movbe;
128 #else
129 # define have_movbe 0
130 #endif
131
132 /* We need this symbol in tcg-target.h, and we can't properly conditionalize
133 it there. Therefore we always define the variable. */
134 bool have_bmi1;
135
136 #if defined(CONFIG_CPUID_H) && defined(bit_BMI2)
137 static bool have_bmi2;
138 #else
139 # define have_bmi2 0
140 #endif
141
142 static tcg_insn_unit *tb_ret_addr;
143
144 static void patch_reloc(tcg_insn_unit *code_ptr, int type,
145 intptr_t value, intptr_t addend)
146 {
147 value += addend;
148 switch(type) {
149 case R_386_PC32:
150 value -= (uintptr_t)code_ptr;
151 if (value != (int32_t)value) {
152 tcg_abort();
153 }
154 tcg_patch32(code_ptr, value);
155 break;
156 case R_386_PC8:
157 value -= (uintptr_t)code_ptr;
158 if (value != (int8_t)value) {
159 tcg_abort();
160 }
161 tcg_patch8(code_ptr, value);
162 break;
163 default:
164 tcg_abort();
165 }
166 }
167
168 /* parse target specific constraints */
169 static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str)
170 {
171 const char *ct_str;
172
173 ct_str = *pct_str;
174 switch(ct_str[0]) {
175 case 'a':
176 ct->ct |= TCG_CT_REG;
177 tcg_regset_set_reg(ct->u.regs, TCG_REG_EAX);
178 break;
179 case 'b':
180 ct->ct |= TCG_CT_REG;
181 tcg_regset_set_reg(ct->u.regs, TCG_REG_EBX);
182 break;
183 case 'c':
184 case_c:
185 ct->ct |= TCG_CT_REG;
186 tcg_regset_set_reg(ct->u.regs, TCG_REG_ECX);
187 break;
188 case 'd':
189 ct->ct |= TCG_CT_REG;
190 tcg_regset_set_reg(ct->u.regs, TCG_REG_EDX);
191 break;
192 case 'S':
193 ct->ct |= TCG_CT_REG;
194 tcg_regset_set_reg(ct->u.regs, TCG_REG_ESI);
195 break;
196 case 'D':
197 ct->ct |= TCG_CT_REG;
198 tcg_regset_set_reg(ct->u.regs, TCG_REG_EDI);
199 break;
200 case 'q':
201 ct->ct |= TCG_CT_REG;
202 if (TCG_TARGET_REG_BITS == 64) {
203 tcg_regset_set32(ct->u.regs, 0, 0xffff);
204 } else {
205 tcg_regset_set32(ct->u.regs, 0, 0xf);
206 }
207 break;
208 case 'Q':
209 ct->ct |= TCG_CT_REG;
210 tcg_regset_set32(ct->u.regs, 0, 0xf);
211 break;
212 case 'r':
213 case_r:
214 ct->ct |= TCG_CT_REG;
215 if (TCG_TARGET_REG_BITS == 64) {
216 tcg_regset_set32(ct->u.regs, 0, 0xffff);
217 } else {
218 tcg_regset_set32(ct->u.regs, 0, 0xff);
219 }
220 break;
221 case 'C':
222 /* With SHRX et al, we need not use ECX as shift count register. */
223 if (have_bmi2) {
224 goto case_r;
225 } else {
226 goto case_c;
227 }
228
229 /* qemu_ld/st address constraint */
230 case 'L':
231 ct->ct |= TCG_CT_REG;
232 if (TCG_TARGET_REG_BITS == 64) {
233 tcg_regset_set32(ct->u.regs, 0, 0xffff);
234 } else {
235 tcg_regset_set32(ct->u.regs, 0, 0xff);
236 }
237 tcg_regset_reset_reg(ct->u.regs, TCG_REG_L0);
238 tcg_regset_reset_reg(ct->u.regs, TCG_REG_L1);
239 break;
240
241 case 'e':
242 ct->ct |= TCG_CT_CONST_S32;
243 break;
244 case 'Z':
245 ct->ct |= TCG_CT_CONST_U32;
246 break;
247 case 'I':
248 ct->ct |= TCG_CT_CONST_I32;
249 break;
250
251 default:
252 return -1;
253 }
254 ct_str++;
255 *pct_str = ct_str;
256 return 0;
257 }
258
259 /* test if a constant matches the constraint */
260 static inline int tcg_target_const_match(tcg_target_long val, TCGType type,
261 const TCGArgConstraint *arg_ct)
262 {
263 int ct = arg_ct->ct;
264 if (ct & TCG_CT_CONST) {
265 return 1;
266 }
267 if ((ct & TCG_CT_CONST_S32) && val == (int32_t)val) {
268 return 1;
269 }
270 if ((ct & TCG_CT_CONST_U32) && val == (uint32_t)val) {
271 return 1;
272 }
273 if ((ct & TCG_CT_CONST_I32) && ~val == (int32_t)~val) {
274 return 1;
275 }
276 return 0;
277 }
278
279 #if TCG_TARGET_REG_BITS == 64
280 # define LOWREGMASK(x) ((x) & 7)
281 #else
282 # define LOWREGMASK(x) (x)
283 #endif
284
285 #define P_EXT 0x100 /* 0x0f opcode prefix */
286 #define P_EXT38 0x200 /* 0x0f 0x38 opcode prefix */
287 #define P_DATA16 0x400 /* 0x66 opcode prefix */
288 #if TCG_TARGET_REG_BITS == 64
289 # define P_ADDR32 0x800 /* 0x67 opcode prefix */
290 # define P_REXW 0x1000 /* Set REX.W = 1 */
291 # define P_REXB_R 0x2000 /* REG field as byte register */
292 # define P_REXB_RM 0x4000 /* R/M field as byte register */
293 # define P_GS 0x8000 /* gs segment override */
294 #else
295 # define P_ADDR32 0
296 # define P_REXW 0
297 # define P_REXB_R 0
298 # define P_REXB_RM 0
299 # define P_GS 0
300 #endif
301 #define P_SIMDF3 0x10000 /* 0xf3 opcode prefix */
302 #define P_SIMDF2 0x20000 /* 0xf2 opcode prefix */
303
304 #define OPC_ARITH_EvIz (0x81)
305 #define OPC_ARITH_EvIb (0x83)
306 #define OPC_ARITH_GvEv (0x03) /* ... plus (ARITH_FOO << 3) */
307 #define OPC_ANDN (0xf2 | P_EXT38)
308 #define OPC_ADD_GvEv (OPC_ARITH_GvEv | (ARITH_ADD << 3))
309 #define OPC_BSWAP (0xc8 | P_EXT)
310 #define OPC_CALL_Jz (0xe8)
311 #define OPC_CMOVCC (0x40 | P_EXT) /* ... plus condition code */
312 #define OPC_CMP_GvEv (OPC_ARITH_GvEv | (ARITH_CMP << 3))
313 #define OPC_DEC_r32 (0x48)
314 #define OPC_IMUL_GvEv (0xaf | P_EXT)
315 #define OPC_IMUL_GvEvIb (0x6b)
316 #define OPC_IMUL_GvEvIz (0x69)
317 #define OPC_INC_r32 (0x40)
318 #define OPC_JCC_long (0x80 | P_EXT) /* ... plus condition code */
319 #define OPC_JCC_short (0x70) /* ... plus condition code */
320 #define OPC_JMP_long (0xe9)
321 #define OPC_JMP_short (0xeb)
322 #define OPC_LEA (0x8d)
323 #define OPC_MOVB_EvGv (0x88) /* stores, more or less */
324 #define OPC_MOVL_EvGv (0x89) /* stores, more or less */
325 #define OPC_MOVL_GvEv (0x8b) /* loads, more or less */
326 #define OPC_MOVB_EvIz (0xc6)
327 #define OPC_MOVL_EvIz (0xc7)
328 #define OPC_MOVL_Iv (0xb8)
329 #define OPC_MOVBE_GyMy (0xf0 | P_EXT38)
330 #define OPC_MOVBE_MyGy (0xf1 | P_EXT38)
331 #define OPC_MOVSBL (0xbe | P_EXT)
332 #define OPC_MOVSWL (0xbf | P_EXT)
333 #define OPC_MOVSLQ (0x63 | P_REXW)
334 #define OPC_MOVZBL (0xb6 | P_EXT)
335 #define OPC_MOVZWL (0xb7 | P_EXT)
336 #define OPC_POP_r32 (0x58)
337 #define OPC_PUSH_r32 (0x50)
338 #define OPC_PUSH_Iv (0x68)
339 #define OPC_PUSH_Ib (0x6a)
340 #define OPC_RET (0xc3)
341 #define OPC_SETCC (0x90 | P_EXT | P_REXB_RM) /* ... plus cc */
342 #define OPC_SHIFT_1 (0xd1)
343 #define OPC_SHIFT_Ib (0xc1)
344 #define OPC_SHIFT_cl (0xd3)
345 #define OPC_SARX (0xf7 | P_EXT38 | P_SIMDF3)
346 #define OPC_SHLX (0xf7 | P_EXT38 | P_DATA16)
347 #define OPC_SHRX (0xf7 | P_EXT38 | P_SIMDF2)
348 #define OPC_TESTL (0x85)
349 #define OPC_XCHG_ax_r32 (0x90)
350
351 #define OPC_GRP3_Ev (0xf7)
352 #define OPC_GRP5 (0xff)
353
354 /* Group 1 opcode extensions for 0x80-0x83.
355 These are also used as modifiers for OPC_ARITH. */
356 #define ARITH_ADD 0
357 #define ARITH_OR 1
358 #define ARITH_ADC 2
359 #define ARITH_SBB 3
360 #define ARITH_AND 4
361 #define ARITH_SUB 5
362 #define ARITH_XOR 6
363 #define ARITH_CMP 7
364
365 /* Group 2 opcode extensions for 0xc0, 0xc1, 0xd0-0xd3. */
366 #define SHIFT_ROL 0
367 #define SHIFT_ROR 1
368 #define SHIFT_SHL 4
369 #define SHIFT_SHR 5
370 #define SHIFT_SAR 7
371
372 /* Group 3 opcode extensions for 0xf6, 0xf7. To be used with OPC_GRP3. */
373 #define EXT3_NOT 2
374 #define EXT3_NEG 3
375 #define EXT3_MUL 4
376 #define EXT3_IMUL 5
377 #define EXT3_DIV 6
378 #define EXT3_IDIV 7
379
380 /* Group 5 opcode extensions for 0xff. To be used with OPC_GRP5. */
381 #define EXT5_INC_Ev 0
382 #define EXT5_DEC_Ev 1
383 #define EXT5_CALLN_Ev 2
384 #define EXT5_JMPN_Ev 4
385
386 /* Condition codes to be added to OPC_JCC_{long,short}. */
387 #define JCC_JMP (-1)
388 #define JCC_JO 0x0
389 #define JCC_JNO 0x1
390 #define JCC_JB 0x2
391 #define JCC_JAE 0x3
392 #define JCC_JE 0x4
393 #define JCC_JNE 0x5
394 #define JCC_JBE 0x6
395 #define JCC_JA 0x7
396 #define JCC_JS 0x8
397 #define JCC_JNS 0x9
398 #define JCC_JP 0xa
399 #define JCC_JNP 0xb
400 #define JCC_JL 0xc
401 #define JCC_JGE 0xd
402 #define JCC_JLE 0xe
403 #define JCC_JG 0xf
404
405 static const uint8_t tcg_cond_to_jcc[] = {
406 [TCG_COND_EQ] = JCC_JE,
407 [TCG_COND_NE] = JCC_JNE,
408 [TCG_COND_LT] = JCC_JL,
409 [TCG_COND_GE] = JCC_JGE,
410 [TCG_COND_LE] = JCC_JLE,
411 [TCG_COND_GT] = JCC_JG,
412 [TCG_COND_LTU] = JCC_JB,
413 [TCG_COND_GEU] = JCC_JAE,
414 [TCG_COND_LEU] = JCC_JBE,
415 [TCG_COND_GTU] = JCC_JA,
416 };
417
418 #if TCG_TARGET_REG_BITS == 64
419 static void tcg_out_opc(TCGContext *s, int opc, int r, int rm, int x)
420 {
421 int rex;
422
423 if (opc & P_GS) {
424 tcg_out8(s, 0x65);
425 }
426 if (opc & P_DATA16) {
427 /* We should never be asking for both 16 and 64-bit operation. */
428 tcg_debug_assert((opc & P_REXW) == 0);
429 tcg_out8(s, 0x66);
430 }
431 if (opc & P_ADDR32) {
432 tcg_out8(s, 0x67);
433 }
434
435 rex = 0;
436 rex |= (opc & P_REXW) ? 0x8 : 0x0; /* REX.W */
437 rex |= (r & 8) >> 1; /* REX.R */
438 rex |= (x & 8) >> 2; /* REX.X */
439 rex |= (rm & 8) >> 3; /* REX.B */
440
441 /* P_REXB_{R,RM} indicates that the given register is the low byte.
442 For %[abcd]l we need no REX prefix, but for %{si,di,bp,sp}l we do,
443 as otherwise the encoding indicates %[abcd]h. Note that the values
444 that are ORed in merely indicate that the REX byte must be present;
445 those bits get discarded in output. */
446 rex |= opc & (r >= 4 ? P_REXB_R : 0);
447 rex |= opc & (rm >= 4 ? P_REXB_RM : 0);
448
449 if (rex) {
450 tcg_out8(s, (uint8_t)(rex | 0x40));
451 }
452
453 if (opc & (P_EXT | P_EXT38)) {
454 tcg_out8(s, 0x0f);
455 if (opc & P_EXT38) {
456 tcg_out8(s, 0x38);
457 }
458 }
459
460 tcg_out8(s, opc);
461 }
462 #else
463 static void tcg_out_opc(TCGContext *s, int opc)
464 {
465 if (opc & P_DATA16) {
466 tcg_out8(s, 0x66);
467 }
468 if (opc & (P_EXT | P_EXT38)) {
469 tcg_out8(s, 0x0f);
470 if (opc & P_EXT38) {
471 tcg_out8(s, 0x38);
472 }
473 }
474 tcg_out8(s, opc);
475 }
476 /* Discard the register arguments to tcg_out_opc early, so as not to penalize
477 the 32-bit compilation paths. This method works with all versions of gcc,
478 whereas relying on optimization may not be able to exclude them. */
479 #define tcg_out_opc(s, opc, r, rm, x) (tcg_out_opc)(s, opc)
480 #endif
481
482 static void tcg_out_modrm(TCGContext *s, int opc, int r, int rm)
483 {
484 tcg_out_opc(s, opc, r, rm, 0);
485 tcg_out8(s, 0xc0 | (LOWREGMASK(r) << 3) | LOWREGMASK(rm));
486 }
487
488 static void tcg_out_vex_modrm(TCGContext *s, int opc, int r, int v, int rm)
489 {
490 int tmp;
491
492 if ((opc & (P_REXW | P_EXT | P_EXT38)) || (rm & 8)) {
493 /* Three byte VEX prefix. */
494 tcg_out8(s, 0xc4);
495
496 /* VEX.m-mmmm */
497 if (opc & P_EXT38) {
498 tmp = 2;
499 } else if (opc & P_EXT) {
500 tmp = 1;
501 } else {
502 tcg_abort();
503 }
504 tmp |= 0x40; /* VEX.X */
505 tmp |= (r & 8 ? 0 : 0x80); /* VEX.R */
506 tmp |= (rm & 8 ? 0 : 0x20); /* VEX.B */
507 tcg_out8(s, tmp);
508
509 tmp = (opc & P_REXW ? 0x80 : 0); /* VEX.W */
510 } else {
511 /* Two byte VEX prefix. */
512 tcg_out8(s, 0xc5);
513
514 tmp = (r & 8 ? 0 : 0x80); /* VEX.R */
515 }
516 /* VEX.pp */
517 if (opc & P_DATA16) {
518 tmp |= 1; /* 0x66 */
519 } else if (opc & P_SIMDF3) {
520 tmp |= 2; /* 0xf3 */
521 } else if (opc & P_SIMDF2) {
522 tmp |= 3; /* 0xf2 */
523 }
524 tmp |= (~v & 15) << 3; /* VEX.vvvv */
525 tcg_out8(s, tmp);
526 tcg_out8(s, opc);
527 tcg_out8(s, 0xc0 | (LOWREGMASK(r) << 3) | LOWREGMASK(rm));
528 }
529
530 /* Output an opcode with a full "rm + (index<<shift) + offset" address mode.
531 We handle either RM and INDEX missing with a negative value. In 64-bit
532 mode for absolute addresses, ~RM is the size of the immediate operand
533 that will follow the instruction. */
534
535 static void tcg_out_modrm_sib_offset(TCGContext *s, int opc, int r, int rm,
536 int index, int shift, intptr_t offset)
537 {
538 int mod, len;
539
540 if (index < 0 && rm < 0) {
541 if (TCG_TARGET_REG_BITS == 64) {
542 /* Try for a rip-relative addressing mode. This has replaced
543 the 32-bit-mode absolute addressing encoding. */
544 intptr_t pc = (intptr_t)s->code_ptr + 5 + ~rm;
545 intptr_t disp = offset - pc;
546 if (disp == (int32_t)disp) {
547 tcg_out_opc(s, opc, r, 0, 0);
548 tcg_out8(s, (LOWREGMASK(r) << 3) | 5);
549 tcg_out32(s, disp);
550 return;
551 }
552
553 /* Try for an absolute address encoding. This requires the
554 use of the MODRM+SIB encoding and is therefore larger than
555 rip-relative addressing. */
556 if (offset == (int32_t)offset) {
557 tcg_out_opc(s, opc, r, 0, 0);
558 tcg_out8(s, (LOWREGMASK(r) << 3) | 4);
559 tcg_out8(s, (4 << 3) | 5);
560 tcg_out32(s, offset);
561 return;
562 }
563
564 /* ??? The memory isn't directly addressable. */
565 tcg_abort();
566 } else {
567 /* Absolute address. */
568 tcg_out_opc(s, opc, r, 0, 0);
569 tcg_out8(s, (r << 3) | 5);
570 tcg_out32(s, offset);
571 return;
572 }
573 }
574
575 /* Find the length of the immediate addend. Note that the encoding
576 that would be used for (%ebp) indicates absolute addressing. */
577 if (rm < 0) {
578 mod = 0, len = 4, rm = 5;
579 } else if (offset == 0 && LOWREGMASK(rm) != TCG_REG_EBP) {
580 mod = 0, len = 0;
581 } else if (offset == (int8_t)offset) {
582 mod = 0x40, len = 1;
583 } else {
584 mod = 0x80, len = 4;
585 }
586
587 /* Use a single byte MODRM format if possible. Note that the encoding
588 that would be used for %esp is the escape to the two byte form. */
589 if (index < 0 && LOWREGMASK(rm) != TCG_REG_ESP) {
590 /* Single byte MODRM format. */
591 tcg_out_opc(s, opc, r, rm, 0);
592 tcg_out8(s, mod | (LOWREGMASK(r) << 3) | LOWREGMASK(rm));
593 } else {
594 /* Two byte MODRM+SIB format. */
595
596 /* Note that the encoding that would place %esp into the index
597 field indicates no index register. In 64-bit mode, the REX.X
598 bit counts, so %r12 can be used as the index. */
599 if (index < 0) {
600 index = 4;
601 } else {
602 tcg_debug_assert(index != TCG_REG_ESP);
603 }
604
605 tcg_out_opc(s, opc, r, rm, index);
606 tcg_out8(s, mod | (LOWREGMASK(r) << 3) | 4);
607 tcg_out8(s, (shift << 6) | (LOWREGMASK(index) << 3) | LOWREGMASK(rm));
608 }
609
610 if (len == 1) {
611 tcg_out8(s, offset);
612 } else if (len == 4) {
613 tcg_out32(s, offset);
614 }
615 }
616
617 /* A simplification of the above with no index or shift. */
618 static inline void tcg_out_modrm_offset(TCGContext *s, int opc, int r,
619 int rm, intptr_t offset)
620 {
621 tcg_out_modrm_sib_offset(s, opc, r, rm, -1, 0, offset);
622 }
623
624 /* Generate dest op= src. Uses the same ARITH_* codes as tgen_arithi. */
625 static inline void tgen_arithr(TCGContext *s, int subop, int dest, int src)
626 {
627 /* Propagate an opcode prefix, such as P_REXW. */
628 int ext = subop & ~0x7;
629 subop &= 0x7;
630
631 tcg_out_modrm(s, OPC_ARITH_GvEv + (subop << 3) + ext, dest, src);
632 }
633
634 static inline void tcg_out_mov(TCGContext *s, TCGType type,
635 TCGReg ret, TCGReg arg)
636 {
637 if (arg != ret) {
638 int opc = OPC_MOVL_GvEv + (type == TCG_TYPE_I64 ? P_REXW : 0);
639 tcg_out_modrm(s, opc, ret, arg);
640 }
641 }
642
643 static void tcg_out_movi(TCGContext *s, TCGType type,
644 TCGReg ret, tcg_target_long arg)
645 {
646 tcg_target_long diff;
647
648 if (arg == 0) {
649 tgen_arithr(s, ARITH_XOR, ret, ret);
650 return;
651 }
652 if (arg == (uint32_t)arg || type == TCG_TYPE_I32) {
653 tcg_out_opc(s, OPC_MOVL_Iv + LOWREGMASK(ret), 0, ret, 0);
654 tcg_out32(s, arg);
655 return;
656 }
657 if (arg == (int32_t)arg) {
658 tcg_out_modrm(s, OPC_MOVL_EvIz + P_REXW, 0, ret);
659 tcg_out32(s, arg);
660 return;
661 }
662
663 /* Try a 7 byte pc-relative lea before the 10 byte movq. */
664 diff = arg - ((uintptr_t)s->code_ptr + 7);
665 if (diff == (int32_t)diff) {
666 tcg_out_opc(s, OPC_LEA | P_REXW, ret, 0, 0);
667 tcg_out8(s, (LOWREGMASK(ret) << 3) | 5);
668 tcg_out32(s, diff);
669 return;
670 }
671
672 tcg_out_opc(s, OPC_MOVL_Iv + P_REXW + LOWREGMASK(ret), 0, ret, 0);
673 tcg_out64(s, arg);
674 }
675
676 static inline void tcg_out_pushi(TCGContext *s, tcg_target_long val)
677 {
678 if (val == (int8_t)val) {
679 tcg_out_opc(s, OPC_PUSH_Ib, 0, 0, 0);
680 tcg_out8(s, val);
681 } else if (val == (int32_t)val) {
682 tcg_out_opc(s, OPC_PUSH_Iv, 0, 0, 0);
683 tcg_out32(s, val);
684 } else {
685 tcg_abort();
686 }
687 }
688
689 static inline void tcg_out_push(TCGContext *s, int reg)
690 {
691 tcg_out_opc(s, OPC_PUSH_r32 + LOWREGMASK(reg), 0, reg, 0);
692 }
693
694 static inline void tcg_out_pop(TCGContext *s, int reg)
695 {
696 tcg_out_opc(s, OPC_POP_r32 + LOWREGMASK(reg), 0, reg, 0);
697 }
698
699 static inline void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret,
700 TCGReg arg1, intptr_t arg2)
701 {
702 int opc = OPC_MOVL_GvEv + (type == TCG_TYPE_I64 ? P_REXW : 0);
703 tcg_out_modrm_offset(s, opc, ret, arg1, arg2);
704 }
705
706 static inline void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg,
707 TCGReg arg1, intptr_t arg2)
708 {
709 int opc = OPC_MOVL_EvGv + (type == TCG_TYPE_I64 ? P_REXW : 0);
710 tcg_out_modrm_offset(s, opc, arg, arg1, arg2);
711 }
712
713 static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
714 TCGReg base, intptr_t ofs)
715 {
716 int rexw = 0;
717 if (TCG_TARGET_REG_BITS == 64 && type == TCG_TYPE_I64) {
718 if (val != (int32_t)val) {
719 return false;
720 }
721 rexw = P_REXW;
722 }
723 tcg_out_modrm_offset(s, OPC_MOVL_EvIz | rexw, 0, base, ofs);
724 tcg_out32(s, val);
725 return true;
726 }
727
728 static void tcg_out_shifti(TCGContext *s, int subopc, int reg, int count)
729 {
730 /* Propagate an opcode prefix, such as P_DATA16. */
731 int ext = subopc & ~0x7;
732 subopc &= 0x7;
733
734 if (count == 1) {
735 tcg_out_modrm(s, OPC_SHIFT_1 + ext, subopc, reg);
736 } else {
737 tcg_out_modrm(s, OPC_SHIFT_Ib + ext, subopc, reg);
738 tcg_out8(s, count);
739 }
740 }
741
742 static inline void tcg_out_bswap32(TCGContext *s, int reg)
743 {
744 tcg_out_opc(s, OPC_BSWAP + LOWREGMASK(reg), 0, reg, 0);
745 }
746
747 static inline void tcg_out_rolw_8(TCGContext *s, int reg)
748 {
749 tcg_out_shifti(s, SHIFT_ROL + P_DATA16, reg, 8);
750 }
751
752 static inline void tcg_out_ext8u(TCGContext *s, int dest, int src)
753 {
754 /* movzbl */
755 tcg_debug_assert(src < 4 || TCG_TARGET_REG_BITS == 64);
756 tcg_out_modrm(s, OPC_MOVZBL + P_REXB_RM, dest, src);
757 }
758
759 static void tcg_out_ext8s(TCGContext *s, int dest, int src, int rexw)
760 {
761 /* movsbl */
762 tcg_debug_assert(src < 4 || TCG_TARGET_REG_BITS == 64);
763 tcg_out_modrm(s, OPC_MOVSBL + P_REXB_RM + rexw, dest, src);
764 }
765
766 static inline void tcg_out_ext16u(TCGContext *s, int dest, int src)
767 {
768 /* movzwl */
769 tcg_out_modrm(s, OPC_MOVZWL, dest, src);
770 }
771
772 static inline void tcg_out_ext16s(TCGContext *s, int dest, int src, int rexw)
773 {
774 /* movsw[lq] */
775 tcg_out_modrm(s, OPC_MOVSWL + rexw, dest, src);
776 }
777
778 static inline void tcg_out_ext32u(TCGContext *s, int dest, int src)
779 {
780 /* 32-bit mov zero extends. */
781 tcg_out_modrm(s, OPC_MOVL_GvEv, dest, src);
782 }
783
784 static inline void tcg_out_ext32s(TCGContext *s, int dest, int src)
785 {
786 tcg_out_modrm(s, OPC_MOVSLQ, dest, src);
787 }
788
789 static inline void tcg_out_bswap64(TCGContext *s, int reg)
790 {
791 tcg_out_opc(s, OPC_BSWAP + P_REXW + LOWREGMASK(reg), 0, reg, 0);
792 }
793
794 static void tgen_arithi(TCGContext *s, int c, int r0,
795 tcg_target_long val, int cf)
796 {
797 int rexw = 0;
798
799 if (TCG_TARGET_REG_BITS == 64) {
800 rexw = c & -8;
801 c &= 7;
802 }
803
804 /* ??? While INC is 2 bytes shorter than ADDL $1, they also induce
805 partial flags update stalls on Pentium4 and are not recommended
806 by current Intel optimization manuals. */
807 if (!cf && (c == ARITH_ADD || c == ARITH_SUB) && (val == 1 || val == -1)) {
808 int is_inc = (c == ARITH_ADD) ^ (val < 0);
809 if (TCG_TARGET_REG_BITS == 64) {
810 /* The single-byte increment encodings are re-tasked as the
811 REX prefixes. Use the MODRM encoding. */
812 tcg_out_modrm(s, OPC_GRP5 + rexw,
813 (is_inc ? EXT5_INC_Ev : EXT5_DEC_Ev), r0);
814 } else {
815 tcg_out8(s, (is_inc ? OPC_INC_r32 : OPC_DEC_r32) + r0);
816 }
817 return;
818 }
819
820 if (c == ARITH_AND) {
821 if (TCG_TARGET_REG_BITS == 64) {
822 if (val == 0xffffffffu) {
823 tcg_out_ext32u(s, r0, r0);
824 return;
825 }
826 if (val == (uint32_t)val) {
827 /* AND with no high bits set can use a 32-bit operation. */
828 rexw = 0;
829 }
830 }
831 if (val == 0xffu && (r0 < 4 || TCG_TARGET_REG_BITS == 64)) {
832 tcg_out_ext8u(s, r0, r0);
833 return;
834 }
835 if (val == 0xffffu) {
836 tcg_out_ext16u(s, r0, r0);
837 return;
838 }
839 }
840
841 if (val == (int8_t)val) {
842 tcg_out_modrm(s, OPC_ARITH_EvIb + rexw, c, r0);
843 tcg_out8(s, val);
844 return;
845 }
846 if (rexw == 0 || val == (int32_t)val) {
847 tcg_out_modrm(s, OPC_ARITH_EvIz + rexw, c, r0);
848 tcg_out32(s, val);
849 return;
850 }
851
852 tcg_abort();
853 }
854
855 static void tcg_out_addi(TCGContext *s, int reg, tcg_target_long val)
856 {
857 if (val != 0) {
858 tgen_arithi(s, ARITH_ADD + P_REXW, reg, val, 0);
859 }
860 }
861
862 /* Use SMALL != 0 to force a short forward branch. */
863 static void tcg_out_jxx(TCGContext *s, int opc, TCGLabel *l, int small)
864 {
865 int32_t val, val1;
866
867 if (l->has_value) {
868 val = tcg_pcrel_diff(s, l->u.value_ptr);
869 val1 = val - 2;
870 if ((int8_t)val1 == val1) {
871 if (opc == -1) {
872 tcg_out8(s, OPC_JMP_short);
873 } else {
874 tcg_out8(s, OPC_JCC_short + opc);
875 }
876 tcg_out8(s, val1);
877 } else {
878 if (small) {
879 tcg_abort();
880 }
881 if (opc == -1) {
882 tcg_out8(s, OPC_JMP_long);
883 tcg_out32(s, val - 5);
884 } else {
885 tcg_out_opc(s, OPC_JCC_long + opc, 0, 0, 0);
886 tcg_out32(s, val - 6);
887 }
888 }
889 } else if (small) {
890 if (opc == -1) {
891 tcg_out8(s, OPC_JMP_short);
892 } else {
893 tcg_out8(s, OPC_JCC_short + opc);
894 }
895 tcg_out_reloc(s, s->code_ptr, R_386_PC8, l, -1);
896 s->code_ptr += 1;
897 } else {
898 if (opc == -1) {
899 tcg_out8(s, OPC_JMP_long);
900 } else {
901 tcg_out_opc(s, OPC_JCC_long + opc, 0, 0, 0);
902 }
903 tcg_out_reloc(s, s->code_ptr, R_386_PC32, l, -4);
904 s->code_ptr += 4;
905 }
906 }
907
908 static void tcg_out_cmp(TCGContext *s, TCGArg arg1, TCGArg arg2,
909 int const_arg2, int rexw)
910 {
911 if (const_arg2) {
912 if (arg2 == 0) {
913 /* test r, r */
914 tcg_out_modrm(s, OPC_TESTL + rexw, arg1, arg1);
915 } else {
916 tgen_arithi(s, ARITH_CMP + rexw, arg1, arg2, 0);
917 }
918 } else {
919 tgen_arithr(s, ARITH_CMP + rexw, arg1, arg2);
920 }
921 }
922
923 static void tcg_out_brcond32(TCGContext *s, TCGCond cond,
924 TCGArg arg1, TCGArg arg2, int const_arg2,
925 TCGLabel *label, int small)
926 {
927 tcg_out_cmp(s, arg1, arg2, const_arg2, 0);
928 tcg_out_jxx(s, tcg_cond_to_jcc[cond], label, small);
929 }
930
931 #if TCG_TARGET_REG_BITS == 64
932 static void tcg_out_brcond64(TCGContext *s, TCGCond cond,
933 TCGArg arg1, TCGArg arg2, int const_arg2,
934 TCGLabel *label, int small)
935 {
936 tcg_out_cmp(s, arg1, arg2, const_arg2, P_REXW);
937 tcg_out_jxx(s, tcg_cond_to_jcc[cond], label, small);
938 }
939 #else
940 /* XXX: we implement it at the target level to avoid having to
941 handle cross basic blocks temporaries */
942 static void tcg_out_brcond2(TCGContext *s, const TCGArg *args,
943 const int *const_args, int small)
944 {
945 TCGLabel *label_next = gen_new_label();
946 TCGLabel *label_this = arg_label(args[5]);
947
948 switch(args[4]) {
949 case TCG_COND_EQ:
950 tcg_out_brcond32(s, TCG_COND_NE, args[0], args[2], const_args[2],
951 label_next, 1);
952 tcg_out_brcond32(s, TCG_COND_EQ, args[1], args[3], const_args[3],
953 label_this, small);
954 break;
955 case TCG_COND_NE:
956 tcg_out_brcond32(s, TCG_COND_NE, args[0], args[2], const_args[2],
957 label_this, small);
958 tcg_out_brcond32(s, TCG_COND_NE, args[1], args[3], const_args[3],
959 label_this, small);
960 break;
961 case TCG_COND_LT:
962 tcg_out_brcond32(s, TCG_COND_LT, args[1], args[3], const_args[3],
963 label_this, small);
964 tcg_out_jxx(s, JCC_JNE, label_next, 1);
965 tcg_out_brcond32(s, TCG_COND_LTU, args[0], args[2], const_args[2],
966 label_this, small);
967 break;
968 case TCG_COND_LE:
969 tcg_out_brcond32(s, TCG_COND_LT, args[1], args[3], const_args[3],
970 label_this, small);
971 tcg_out_jxx(s, JCC_JNE, label_next, 1);
972 tcg_out_brcond32(s, TCG_COND_LEU, args[0], args[2], const_args[2],
973 label_this, small);
974 break;
975 case TCG_COND_GT:
976 tcg_out_brcond32(s, TCG_COND_GT, args[1], args[3], const_args[3],
977 label_this, small);
978 tcg_out_jxx(s, JCC_JNE, label_next, 1);
979 tcg_out_brcond32(s, TCG_COND_GTU, args[0], args[2], const_args[2],
980 label_this, small);
981 break;
982 case TCG_COND_GE:
983 tcg_out_brcond32(s, TCG_COND_GT, args[1], args[3], const_args[3],
984 label_this, small);
985 tcg_out_jxx(s, JCC_JNE, label_next, 1);
986 tcg_out_brcond32(s, TCG_COND_GEU, args[0], args[2], const_args[2],
987 label_this, small);
988 break;
989 case TCG_COND_LTU:
990 tcg_out_brcond32(s, TCG_COND_LTU, args[1], args[3], const_args[3],
991 label_this, small);
992 tcg_out_jxx(s, JCC_JNE, label_next, 1);
993 tcg_out_brcond32(s, TCG_COND_LTU, args[0], args[2], const_args[2],
994 label_this, small);
995 break;
996 case TCG_COND_LEU:
997 tcg_out_brcond32(s, TCG_COND_LTU, args[1], args[3], const_args[3],
998 label_this, small);
999 tcg_out_jxx(s, JCC_JNE, label_next, 1);
1000 tcg_out_brcond32(s, TCG_COND_LEU, args[0], args[2], const_args[2],
1001 label_this, small);
1002 break;
1003 case TCG_COND_GTU:
1004 tcg_out_brcond32(s, TCG_COND_GTU, args[1], args[3], const_args[3],
1005 label_this, small);
1006 tcg_out_jxx(s, JCC_JNE, label_next, 1);
1007 tcg_out_brcond32(s, TCG_COND_GTU, args[0], args[2], const_args[2],
1008 label_this, small);
1009 break;
1010 case TCG_COND_GEU:
1011 tcg_out_brcond32(s, TCG_COND_GTU, args[1], args[3], const_args[3],
1012 label_this, small);
1013 tcg_out_jxx(s, JCC_JNE, label_next, 1);
1014 tcg_out_brcond32(s, TCG_COND_GEU, args[0], args[2], const_args[2],
1015 label_this, small);
1016 break;
1017 default:
1018 tcg_abort();
1019 }
1020 tcg_out_label(s, label_next, s->code_ptr);
1021 }
1022 #endif
1023
1024 static void tcg_out_setcond32(TCGContext *s, TCGCond cond, TCGArg dest,
1025 TCGArg arg1, TCGArg arg2, int const_arg2)
1026 {
1027 tcg_out_cmp(s, arg1, arg2, const_arg2, 0);
1028 tcg_out_modrm(s, OPC_SETCC | tcg_cond_to_jcc[cond], 0, dest);
1029 tcg_out_ext8u(s, dest, dest);
1030 }
1031
1032 #if TCG_TARGET_REG_BITS == 64
1033 static void tcg_out_setcond64(TCGContext *s, TCGCond cond, TCGArg dest,
1034 TCGArg arg1, TCGArg arg2, int const_arg2)
1035 {
1036 tcg_out_cmp(s, arg1, arg2, const_arg2, P_REXW);
1037 tcg_out_modrm(s, OPC_SETCC | tcg_cond_to_jcc[cond], 0, dest);
1038 tcg_out_ext8u(s, dest, dest);
1039 }
1040 #else
1041 static void tcg_out_setcond2(TCGContext *s, const TCGArg *args,
1042 const int *const_args)
1043 {
1044 TCGArg new_args[6];
1045 TCGLabel *label_true, *label_over;
1046
1047 memcpy(new_args, args+1, 5*sizeof(TCGArg));
1048
1049 if (args[0] == args[1] || args[0] == args[2]
1050 || (!const_args[3] && args[0] == args[3])
1051 || (!const_args[4] && args[0] == args[4])) {
1052 /* When the destination overlaps with one of the argument
1053 registers, don't do anything tricky. */
1054 label_true = gen_new_label();
1055 label_over = gen_new_label();
1056
1057 new_args[5] = label_arg(label_true);
1058 tcg_out_brcond2(s, new_args, const_args+1, 1);
1059
1060 tcg_out_movi(s, TCG_TYPE_I32, args[0], 0);
1061 tcg_out_jxx(s, JCC_JMP, label_over, 1);
1062 tcg_out_label(s, label_true, s->code_ptr);
1063
1064 tcg_out_movi(s, TCG_TYPE_I32, args[0], 1);
1065 tcg_out_label(s, label_over, s->code_ptr);
1066 } else {
1067 /* When the destination does not overlap one of the arguments,
1068 clear the destination first, jump if cond false, and emit an
1069 increment in the true case. This results in smaller code. */
1070
1071 tcg_out_movi(s, TCG_TYPE_I32, args[0], 0);
1072
1073 label_over = gen_new_label();
1074 new_args[4] = tcg_invert_cond(new_args[4]);
1075 new_args[5] = label_arg(label_over);
1076 tcg_out_brcond2(s, new_args, const_args+1, 1);
1077
1078 tgen_arithi(s, ARITH_ADD, args[0], 1, 0);
1079 tcg_out_label(s, label_over, s->code_ptr);
1080 }
1081 }
1082 #endif
1083
1084 static void tcg_out_movcond32(TCGContext *s, TCGCond cond, TCGArg dest,
1085 TCGArg c1, TCGArg c2, int const_c2,
1086 TCGArg v1)
1087 {
1088 tcg_out_cmp(s, c1, c2, const_c2, 0);
1089 if (have_cmov) {
1090 tcg_out_modrm(s, OPC_CMOVCC | tcg_cond_to_jcc[cond], dest, v1);
1091 } else {
1092 TCGLabel *over = gen_new_label();
1093 tcg_out_jxx(s, tcg_cond_to_jcc[tcg_invert_cond(cond)], over, 1);
1094 tcg_out_mov(s, TCG_TYPE_I32, dest, v1);
1095 tcg_out_label(s, over, s->code_ptr);
1096 }
1097 }
1098
1099 #if TCG_TARGET_REG_BITS == 64
1100 static void tcg_out_movcond64(TCGContext *s, TCGCond cond, TCGArg dest,
1101 TCGArg c1, TCGArg c2, int const_c2,
1102 TCGArg v1)
1103 {
1104 tcg_out_cmp(s, c1, c2, const_c2, P_REXW);
1105 tcg_out_modrm(s, OPC_CMOVCC | tcg_cond_to_jcc[cond] | P_REXW, dest, v1);
1106 }
1107 #endif
1108
1109 static void tcg_out_branch(TCGContext *s, int call, tcg_insn_unit *dest)
1110 {
1111 intptr_t disp = tcg_pcrel_diff(s, dest) - 5;
1112
1113 if (disp == (int32_t)disp) {
1114 tcg_out_opc(s, call ? OPC_CALL_Jz : OPC_JMP_long, 0, 0, 0);
1115 tcg_out32(s, disp);
1116 } else {
1117 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R10, (uintptr_t)dest);
1118 tcg_out_modrm(s, OPC_GRP5,
1119 call ? EXT5_CALLN_Ev : EXT5_JMPN_Ev, TCG_REG_R10);
1120 }
1121 }
1122
1123 static inline void tcg_out_call(TCGContext *s, tcg_insn_unit *dest)
1124 {
1125 tcg_out_branch(s, 1, dest);
1126 }
1127
1128 static void tcg_out_jmp(TCGContext *s, tcg_insn_unit *dest)
1129 {
1130 tcg_out_branch(s, 0, dest);
1131 }
1132
1133 static void tcg_out_nopn(TCGContext *s, int n)
1134 {
1135 int i;
1136 /* Emit 1 or 2 operand size prefixes for the standard one byte nop,
1137 * "xchg %eax,%eax", forming "xchg %ax,%ax". All cores accept the
1138 * duplicate prefix, and all of the interesting recent cores can
1139 * decode and discard the duplicates in a single cycle.
1140 */
1141 tcg_debug_assert(n >= 1);
1142 for (i = 1; i < n; ++i) {
1143 tcg_out8(s, 0x66);
1144 }
1145 tcg_out8(s, 0x90);
1146 }
1147
1148 #if defined(CONFIG_SOFTMMU)
1149 /* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr,
1150 * int mmu_idx, uintptr_t ra)
1151 */
1152 static void * const qemu_ld_helpers[16] = {
1153 [MO_UB] = helper_ret_ldub_mmu,
1154 [MO_LEUW] = helper_le_lduw_mmu,
1155 [MO_LEUL] = helper_le_ldul_mmu,
1156 [MO_LEQ] = helper_le_ldq_mmu,
1157 [MO_BEUW] = helper_be_lduw_mmu,
1158 [MO_BEUL] = helper_be_ldul_mmu,
1159 [MO_BEQ] = helper_be_ldq_mmu,
1160 };
1161
1162 /* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr,
1163 * uintxx_t val, int mmu_idx, uintptr_t ra)
1164 */
1165 static void * const qemu_st_helpers[16] = {
1166 [MO_UB] = helper_ret_stb_mmu,
1167 [MO_LEUW] = helper_le_stw_mmu,
1168 [MO_LEUL] = helper_le_stl_mmu,
1169 [MO_LEQ] = helper_le_stq_mmu,
1170 [MO_BEUW] = helper_be_stw_mmu,
1171 [MO_BEUL] = helper_be_stl_mmu,
1172 [MO_BEQ] = helper_be_stq_mmu,
1173 };
1174
1175 /* Perform the TLB load and compare.
1176
1177 Inputs:
1178 ADDRLO and ADDRHI contain the low and high part of the address.
1179
1180 MEM_INDEX and S_BITS are the memory context and log2 size of the load.
1181
1182 WHICH is the offset into the CPUTLBEntry structure of the slot to read.
1183 This should be offsetof addr_read or addr_write.
1184
1185 Outputs:
1186 LABEL_PTRS is filled with 1 (32-bit addresses) or 2 (64-bit addresses)
1187 positions of the displacements of forward jumps to the TLB miss case.
1188
1189 Second argument register is loaded with the low part of the address.
1190 In the TLB hit case, it has been adjusted as indicated by the TLB
1191 and so is a host address. In the TLB miss case, it continues to
1192 hold a guest address.
1193
1194 First argument register is clobbered. */
1195
1196 static inline void tcg_out_tlb_load(TCGContext *s, TCGReg addrlo, TCGReg addrhi,
1197 int mem_index, TCGMemOp opc,
1198 tcg_insn_unit **label_ptr, int which)
1199 {
1200 const TCGReg r0 = TCG_REG_L0;
1201 const TCGReg r1 = TCG_REG_L1;
1202 TCGType ttype = TCG_TYPE_I32;
1203 TCGType tlbtype = TCG_TYPE_I32;
1204 int trexw = 0, hrexw = 0, tlbrexw = 0;
1205 int a_bits = get_alignment_bits(opc);
1206 target_ulong tlb_mask;
1207
1208 if (TCG_TARGET_REG_BITS == 64) {
1209 if (TARGET_LONG_BITS == 64) {
1210 ttype = TCG_TYPE_I64;
1211 trexw = P_REXW;
1212 }
1213 if (TCG_TYPE_PTR == TCG_TYPE_I64) {
1214 hrexw = P_REXW;
1215 if (TARGET_PAGE_BITS + CPU_TLB_BITS > 32) {
1216 tlbtype = TCG_TYPE_I64;
1217 tlbrexw = P_REXW;
1218 }
1219 }
1220 }
1221
1222 tcg_out_mov(s, tlbtype, r0, addrlo);
1223 if (a_bits >= 0) {
1224 /* A byte access or an alignment check required */
1225 tcg_out_mov(s, ttype, r1, addrlo);
1226 tlb_mask = TARGET_PAGE_MASK | ((1 << a_bits) - 1);
1227 } else {
1228 /* For unaligned access check that we don't cross pages using
1229 the page address of the last byte. */
1230 tcg_out_modrm_offset(s, OPC_LEA + trexw, r1, addrlo,
1231 (1 << (opc & MO_SIZE)) - 1);
1232 tlb_mask = TARGET_PAGE_MASK;
1233 }
1234
1235 tcg_out_shifti(s, SHIFT_SHR + tlbrexw, r0,
1236 TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
1237
1238 tgen_arithi(s, ARITH_AND + trexw, r1, tlb_mask, 0);
1239 tgen_arithi(s, ARITH_AND + tlbrexw, r0,
1240 (CPU_TLB_SIZE - 1) << CPU_TLB_ENTRY_BITS, 0);
1241
1242 tcg_out_modrm_sib_offset(s, OPC_LEA + hrexw, r0, TCG_AREG0, r0, 0,
1243 offsetof(CPUArchState, tlb_table[mem_index][0])
1244 + which);
1245
1246 /* cmp 0(r0), r1 */
1247 tcg_out_modrm_offset(s, OPC_CMP_GvEv + trexw, r1, r0, 0);
1248
1249 /* Prepare for both the fast path add of the tlb addend, and the slow
1250 path function argument setup. There are two cases worth note:
1251 For 32-bit guest and x86_64 host, MOVL zero-extends the guest address
1252 before the fastpath ADDQ below. For 64-bit guest and x32 host, MOVQ
1253 copies the entire guest address for the slow path, while truncation
1254 for the 32-bit host happens with the fastpath ADDL below. */
1255 tcg_out_mov(s, ttype, r1, addrlo);
1256
1257 /* jne slow_path */
1258 tcg_out_opc(s, OPC_JCC_long + JCC_JNE, 0, 0, 0);
1259 label_ptr[0] = s->code_ptr;
1260 s->code_ptr += 4;
1261
1262 if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
1263 /* cmp 4(r0), addrhi */
1264 tcg_out_modrm_offset(s, OPC_CMP_GvEv, addrhi, r0, 4);
1265
1266 /* jne slow_path */
1267 tcg_out_opc(s, OPC_JCC_long + JCC_JNE, 0, 0, 0);
1268 label_ptr[1] = s->code_ptr;
1269 s->code_ptr += 4;
1270 }
1271
1272 /* TLB Hit. */
1273
1274 /* add addend(r0), r1 */
1275 tcg_out_modrm_offset(s, OPC_ADD_GvEv + hrexw, r1, r0,
1276 offsetof(CPUTLBEntry, addend) - which);
1277 }
1278
1279 /*
1280 * Record the context of a call to the out of line helper code for the slow path
1281 * for a load or store, so that we can later generate the correct helper code
1282 */
1283 static void add_qemu_ldst_label(TCGContext *s, bool is_ld, TCGMemOpIdx oi,
1284 TCGReg datalo, TCGReg datahi,
1285 TCGReg addrlo, TCGReg addrhi,
1286 tcg_insn_unit *raddr,
1287 tcg_insn_unit **label_ptr)
1288 {
1289 TCGLabelQemuLdst *label = new_ldst_label(s);
1290
1291 label->is_ld = is_ld;
1292 label->oi = oi;
1293 label->datalo_reg = datalo;
1294 label->datahi_reg = datahi;
1295 label->addrlo_reg = addrlo;
1296 label->addrhi_reg = addrhi;
1297 label->raddr = raddr;
1298 label->label_ptr[0] = label_ptr[0];
1299 if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
1300 label->label_ptr[1] = label_ptr[1];
1301 }
1302 }
1303
1304 /*
1305 * Generate code for the slow path for a load at the end of block
1306 */
1307 static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
1308 {
1309 TCGMemOpIdx oi = l->oi;
1310 TCGMemOp opc = get_memop(oi);
1311 TCGReg data_reg;
1312 tcg_insn_unit **label_ptr = &l->label_ptr[0];
1313
1314 /* resolve label address */
1315 tcg_patch32(label_ptr[0], s->code_ptr - label_ptr[0] - 4);
1316 if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
1317 tcg_patch32(label_ptr[1], s->code_ptr - label_ptr[1] - 4);
1318 }
1319
1320 if (TCG_TARGET_REG_BITS == 32) {
1321 int ofs = 0;
1322
1323 tcg_out_st(s, TCG_TYPE_PTR, TCG_AREG0, TCG_REG_ESP, ofs);
1324 ofs += 4;
1325
1326 tcg_out_st(s, TCG_TYPE_I32, l->addrlo_reg, TCG_REG_ESP, ofs);
1327 ofs += 4;
1328
1329 if (TARGET_LONG_BITS == 64) {
1330 tcg_out_st(s, TCG_TYPE_I32, l->addrhi_reg, TCG_REG_ESP, ofs);
1331 ofs += 4;
1332 }
1333
1334 tcg_out_sti(s, TCG_TYPE_I32, oi, TCG_REG_ESP, ofs);
1335 ofs += 4;
1336
1337 tcg_out_sti(s, TCG_TYPE_PTR, (uintptr_t)l->raddr, TCG_REG_ESP, ofs);
1338 } else {
1339 tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0], TCG_AREG0);
1340 /* The second argument is already loaded with addrlo. */
1341 tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[2], oi);
1342 tcg_out_movi(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[3],
1343 (uintptr_t)l->raddr);
1344 }
1345
1346 tcg_out_call(s, qemu_ld_helpers[opc & (MO_BSWAP | MO_SIZE)]);
1347
1348 data_reg = l->datalo_reg;
1349 switch (opc & MO_SSIZE) {
1350 case MO_SB:
1351 tcg_out_ext8s(s, data_reg, TCG_REG_EAX, P_REXW);
1352 break;
1353 case MO_SW:
1354 tcg_out_ext16s(s, data_reg, TCG_REG_EAX, P_REXW);
1355 break;
1356 #if TCG_TARGET_REG_BITS == 64
1357 case MO_SL:
1358 tcg_out_ext32s(s, data_reg, TCG_REG_EAX);
1359 break;
1360 #endif
1361 case MO_UB:
1362 case MO_UW:
1363 /* Note that the helpers have zero-extended to tcg_target_long. */
1364 case MO_UL:
1365 tcg_out_mov(s, TCG_TYPE_I32, data_reg, TCG_REG_EAX);
1366 break;
1367 case MO_Q:
1368 if (TCG_TARGET_REG_BITS == 64) {
1369 tcg_out_mov(s, TCG_TYPE_I64, data_reg, TCG_REG_RAX);
1370 } else if (data_reg == TCG_REG_EDX) {
1371 /* xchg %edx, %eax */
1372 tcg_out_opc(s, OPC_XCHG_ax_r32 + TCG_REG_EDX, 0, 0, 0);
1373 tcg_out_mov(s, TCG_TYPE_I32, l->datahi_reg, TCG_REG_EAX);
1374 } else {
1375 tcg_out_mov(s, TCG_TYPE_I32, data_reg, TCG_REG_EAX);
1376 tcg_out_mov(s, TCG_TYPE_I32, l->datahi_reg, TCG_REG_EDX);
1377 }
1378 break;
1379 default:
1380 tcg_abort();
1381 }
1382
1383 /* Jump to the code corresponding to next IR of qemu_st */
1384 tcg_out_jmp(s, l->raddr);
1385 }
1386
1387 /*
1388 * Generate code for the slow path for a store at the end of block
1389 */
1390 static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
1391 {
1392 TCGMemOpIdx oi = l->oi;
1393 TCGMemOp opc = get_memop(oi);
1394 TCGMemOp s_bits = opc & MO_SIZE;
1395 tcg_insn_unit **label_ptr = &l->label_ptr[0];
1396 TCGReg retaddr;
1397
1398 /* resolve label address */
1399 tcg_patch32(label_ptr[0], s->code_ptr - label_ptr[0] - 4);
1400 if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
1401 tcg_patch32(label_ptr[1], s->code_ptr - label_ptr[1] - 4);
1402 }
1403
1404 if (TCG_TARGET_REG_BITS == 32) {
1405 int ofs = 0;
1406
1407 tcg_out_st(s, TCG_TYPE_PTR, TCG_AREG0, TCG_REG_ESP, ofs);
1408 ofs += 4;
1409
1410 tcg_out_st(s, TCG_TYPE_I32, l->addrlo_reg, TCG_REG_ESP, ofs);
1411 ofs += 4;
1412
1413 if (TARGET_LONG_BITS == 64) {
1414 tcg_out_st(s, TCG_TYPE_I32, l->addrhi_reg, TCG_REG_ESP, ofs);
1415 ofs += 4;
1416 }
1417
1418 tcg_out_st(s, TCG_TYPE_I32, l->datalo_reg, TCG_REG_ESP, ofs);
1419 ofs += 4;
1420
1421 if (s_bits == MO_64) {
1422 tcg_out_st(s, TCG_TYPE_I32, l->datahi_reg, TCG_REG_ESP, ofs);
1423 ofs += 4;
1424 }
1425
1426 tcg_out_sti(s, TCG_TYPE_I32, oi, TCG_REG_ESP, ofs);
1427 ofs += 4;
1428
1429 retaddr = TCG_REG_EAX;
1430 tcg_out_movi(s, TCG_TYPE_PTR, retaddr, (uintptr_t)l->raddr);
1431 tcg_out_st(s, TCG_TYPE_PTR, retaddr, TCG_REG_ESP, ofs);
1432 } else {
1433 tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0], TCG_AREG0);
1434 /* The second argument is already loaded with addrlo. */
1435 tcg_out_mov(s, (s_bits == MO_64 ? TCG_TYPE_I64 : TCG_TYPE_I32),
1436 tcg_target_call_iarg_regs[2], l->datalo_reg);
1437 tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[3], oi);
1438
1439 if (ARRAY_SIZE(tcg_target_call_iarg_regs) > 4) {
1440 retaddr = tcg_target_call_iarg_regs[4];
1441 tcg_out_movi(s, TCG_TYPE_PTR, retaddr, (uintptr_t)l->raddr);
1442 } else {
1443 retaddr = TCG_REG_RAX;
1444 tcg_out_movi(s, TCG_TYPE_PTR, retaddr, (uintptr_t)l->raddr);
1445 tcg_out_st(s, TCG_TYPE_PTR, retaddr, TCG_REG_ESP,
1446 TCG_TARGET_CALL_STACK_OFFSET);
1447 }
1448 }
1449
1450 /* "Tail call" to the helper, with the return address back inline. */
1451 tcg_out_push(s, retaddr);
1452 tcg_out_jmp(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]);
1453 }
1454 #elif defined(__x86_64__) && defined(__linux__)
1455 # include <asm/prctl.h>
1456 # include <sys/prctl.h>
1457
1458 int arch_prctl(int code, unsigned long addr);
1459
1460 static int guest_base_flags;
1461 static inline void setup_guest_base_seg(void)
1462 {
1463 if (arch_prctl(ARCH_SET_GS, guest_base) == 0) {
1464 guest_base_flags = P_GS;
1465 }
1466 }
1467 #else
1468 # define guest_base_flags 0
1469 static inline void setup_guest_base_seg(void) { }
1470 #endif /* SOFTMMU */
1471
1472 static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg datalo, TCGReg datahi,
1473 TCGReg base, int index, intptr_t ofs,
1474 int seg, TCGMemOp memop)
1475 {
1476 const TCGMemOp real_bswap = memop & MO_BSWAP;
1477 TCGMemOp bswap = real_bswap;
1478 int movop = OPC_MOVL_GvEv;
1479
1480 if (have_movbe && real_bswap) {
1481 bswap = 0;
1482 movop = OPC_MOVBE_GyMy;
1483 }
1484
1485 switch (memop & MO_SSIZE) {
1486 case MO_UB:
1487 tcg_out_modrm_sib_offset(s, OPC_MOVZBL + seg, datalo,
1488 base, index, 0, ofs);
1489 break;
1490 case MO_SB:
1491 tcg_out_modrm_sib_offset(s, OPC_MOVSBL + P_REXW + seg, datalo,
1492 base, index, 0, ofs);
1493 break;
1494 case MO_UW:
1495 tcg_out_modrm_sib_offset(s, OPC_MOVZWL + seg, datalo,
1496 base, index, 0, ofs);
1497 if (real_bswap) {
1498 tcg_out_rolw_8(s, datalo);
1499 }
1500 break;
1501 case MO_SW:
1502 if (real_bswap) {
1503 if (have_movbe) {
1504 tcg_out_modrm_sib_offset(s, OPC_MOVBE_GyMy + P_DATA16 + seg,
1505 datalo, base, index, 0, ofs);
1506 } else {
1507 tcg_out_modrm_sib_offset(s, OPC_MOVZWL + seg, datalo,
1508 base, index, 0, ofs);
1509 tcg_out_rolw_8(s, datalo);
1510 }
1511 tcg_out_modrm(s, OPC_MOVSWL + P_REXW, datalo, datalo);
1512 } else {
1513 tcg_out_modrm_sib_offset(s, OPC_MOVSWL + P_REXW + seg,
1514 datalo, base, index, 0, ofs);
1515 }
1516 break;
1517 case MO_UL:
1518 tcg_out_modrm_sib_offset(s, movop + seg, datalo, base, index, 0, ofs);
1519 if (bswap) {
1520 tcg_out_bswap32(s, datalo);
1521 }
1522 break;
1523 #if TCG_TARGET_REG_BITS == 64
1524 case MO_SL:
1525 if (real_bswap) {
1526 tcg_out_modrm_sib_offset(s, movop + seg, datalo,
1527 base, index, 0, ofs);
1528 if (bswap) {
1529 tcg_out_bswap32(s, datalo);
1530 }
1531 tcg_out_ext32s(s, datalo, datalo);
1532 } else {
1533 tcg_out_modrm_sib_offset(s, OPC_MOVSLQ + seg, datalo,
1534 base, index, 0, ofs);
1535 }
1536 break;
1537 #endif
1538 case MO_Q:
1539 if (TCG_TARGET_REG_BITS == 64) {
1540 tcg_out_modrm_sib_offset(s, movop + P_REXW + seg, datalo,
1541 base, index, 0, ofs);
1542 if (bswap) {
1543 tcg_out_bswap64(s, datalo);
1544 }
1545 } else {
1546 if (real_bswap) {
1547 int t = datalo;
1548 datalo = datahi;
1549 datahi = t;
1550 }
1551 if (base != datalo) {
1552 tcg_out_modrm_sib_offset(s, movop + seg, datalo,
1553 base, index, 0, ofs);
1554 tcg_out_modrm_sib_offset(s, movop + seg, datahi,
1555 base, index, 0, ofs + 4);
1556 } else {
1557 tcg_out_modrm_sib_offset(s, movop + seg, datahi,
1558 base, index, 0, ofs + 4);
1559 tcg_out_modrm_sib_offset(s, movop + seg, datalo,
1560 base, index, 0, ofs);
1561 }
1562 if (bswap) {
1563 tcg_out_bswap32(s, datalo);
1564 tcg_out_bswap32(s, datahi);
1565 }
1566 }
1567 break;
1568 default:
1569 tcg_abort();
1570 }
1571 }
1572
1573 /* XXX: qemu_ld and qemu_st could be modified to clobber only EDX and
1574 EAX. It will be useful once fixed registers globals are less
1575 common. */
1576 static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is64)
1577 {
1578 TCGReg datalo, datahi, addrlo;
1579 TCGReg addrhi __attribute__((unused));
1580 TCGMemOpIdx oi;
1581 TCGMemOp opc;
1582 #if defined(CONFIG_SOFTMMU)
1583 int mem_index;
1584 tcg_insn_unit *label_ptr[2];
1585 #endif
1586
1587 datalo = *args++;
1588 datahi = (TCG_TARGET_REG_BITS == 32 && is64 ? *args++ : 0);
1589 addrlo = *args++;
1590 addrhi = (TARGET_LONG_BITS > TCG_TARGET_REG_BITS ? *args++ : 0);
1591 oi = *args++;
1592 opc = get_memop(oi);
1593
1594 #if defined(CONFIG_SOFTMMU)
1595 mem_index = get_mmuidx(oi);
1596
1597 tcg_out_tlb_load(s, addrlo, addrhi, mem_index, opc,
1598 label_ptr, offsetof(CPUTLBEntry, addr_read));
1599
1600 /* TLB Hit. */
1601 tcg_out_qemu_ld_direct(s, datalo, datahi, TCG_REG_L1, -1, 0, 0, opc);
1602
1603 /* Record the current context of a load into ldst label */
1604 add_qemu_ldst_label(s, true, oi, datalo, datahi, addrlo, addrhi,
1605 s->code_ptr, label_ptr);
1606 #else
1607 {
1608 int32_t offset = guest_base;
1609 TCGReg base = addrlo;
1610 int index = -1;
1611 int seg = 0;
1612
1613 /* For a 32-bit guest, the high 32 bits may contain garbage.
1614 We can do this with the ADDR32 prefix if we're not using
1615 a guest base, or when using segmentation. Otherwise we
1616 need to zero-extend manually. */
1617 if (guest_base == 0 || guest_base_flags) {
1618 seg = guest_base_flags;
1619 offset = 0;
1620 if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) {
1621 seg |= P_ADDR32;
1622 }
1623 } else if (TCG_TARGET_REG_BITS == 64) {
1624 if (TARGET_LONG_BITS == 32) {
1625 tcg_out_ext32u(s, TCG_REG_L0, base);
1626 base = TCG_REG_L0;
1627 }
1628 if (offset != guest_base) {
1629 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_L1, guest_base);
1630 index = TCG_REG_L1;
1631 offset = 0;
1632 }
1633 }
1634
1635 tcg_out_qemu_ld_direct(s, datalo, datahi,
1636 base, index, offset, seg, opc);
1637 }
1638 #endif
1639 }
1640
1641 static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg datalo, TCGReg datahi,
1642 TCGReg base, intptr_t ofs, int seg,
1643 TCGMemOp memop)
1644 {
1645 /* ??? Ideally we wouldn't need a scratch register. For user-only,
1646 we could perform the bswap twice to restore the original value
1647 instead of moving to the scratch. But as it is, the L constraint
1648 means that TCG_REG_L0 is definitely free here. */
1649 const TCGReg scratch = TCG_REG_L0;
1650 const TCGMemOp real_bswap = memop & MO_BSWAP;
1651 TCGMemOp bswap = real_bswap;
1652 int movop = OPC_MOVL_EvGv;
1653
1654 if (have_movbe && real_bswap) {
1655 bswap = 0;
1656 movop = OPC_MOVBE_MyGy;
1657 }
1658
1659 switch (memop & MO_SIZE) {
1660 case MO_8:
1661 /* In 32-bit mode, 8-bit stores can only happen from [abcd]x.
1662 Use the scratch register if necessary. */
1663 if (TCG_TARGET_REG_BITS == 32 && datalo >= 4) {
1664 tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo);
1665 datalo = scratch;
1666 }
1667 tcg_out_modrm_offset(s, OPC_MOVB_EvGv + P_REXB_R + seg,
1668 datalo, base, ofs);
1669 break;
1670 case MO_16:
1671 if (bswap) {
1672 tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo);
1673 tcg_out_rolw_8(s, scratch);
1674 datalo = scratch;
1675 }
1676 tcg_out_modrm_offset(s, movop + P_DATA16 + seg, datalo, base, ofs);
1677 break;
1678 case MO_32:
1679 if (bswap) {
1680 tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo);
1681 tcg_out_bswap32(s, scratch);
1682 datalo = scratch;
1683 }
1684 tcg_out_modrm_offset(s, movop + seg, datalo, base, ofs);
1685 break;
1686 case MO_64:
1687 if (TCG_TARGET_REG_BITS == 64) {
1688 if (bswap) {
1689 tcg_out_mov(s, TCG_TYPE_I64, scratch, datalo);
1690 tcg_out_bswap64(s, scratch);
1691 datalo = scratch;
1692 }
1693 tcg_out_modrm_offset(s, movop + P_REXW + seg, datalo, base, ofs);
1694 } else if (bswap) {
1695 tcg_out_mov(s, TCG_TYPE_I32, scratch, datahi);
1696 tcg_out_bswap32(s, scratch);
1697 tcg_out_modrm_offset(s, OPC_MOVL_EvGv + seg, scratch, base, ofs);
1698 tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo);
1699 tcg_out_bswap32(s, scratch);
1700 tcg_out_modrm_offset(s, OPC_MOVL_EvGv + seg, scratch, base, ofs+4);
1701 } else {
1702 if (real_bswap) {
1703 int t = datalo;
1704 datalo = datahi;
1705 datahi = t;
1706 }
1707 tcg_out_modrm_offset(s, movop + seg, datalo, base, ofs);
1708 tcg_out_modrm_offset(s, movop + seg, datahi, base, ofs+4);
1709 }
1710 break;
1711 default:
1712 tcg_abort();
1713 }
1714 }
1715
1716 static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64)
1717 {
1718 TCGReg datalo, datahi, addrlo;
1719 TCGReg addrhi __attribute__((unused));
1720 TCGMemOpIdx oi;
1721 TCGMemOp opc;
1722 #if defined(CONFIG_SOFTMMU)
1723 int mem_index;
1724 tcg_insn_unit *label_ptr[2];
1725 #endif
1726
1727 datalo = *args++;
1728 datahi = (TCG_TARGET_REG_BITS == 32 && is64 ? *args++ : 0);
1729 addrlo = *args++;
1730 addrhi = (TARGET_LONG_BITS > TCG_TARGET_REG_BITS ? *args++ : 0);
1731 oi = *args++;
1732 opc = get_memop(oi);
1733
1734 #if defined(CONFIG_SOFTMMU)
1735 mem_index = get_mmuidx(oi);
1736
1737 tcg_out_tlb_load(s, addrlo, addrhi, mem_index, opc,
1738 label_ptr, offsetof(CPUTLBEntry, addr_write));
1739
1740 /* TLB Hit. */
1741 tcg_out_qemu_st_direct(s, datalo, datahi, TCG_REG_L1, 0, 0, opc);
1742
1743 /* Record the current context of a store into ldst label */
1744 add_qemu_ldst_label(s, false, oi, datalo, datahi, addrlo, addrhi,
1745 s->code_ptr, label_ptr);
1746 #else
1747 {
1748 int32_t offset = guest_base;
1749 TCGReg base = addrlo;
1750 int seg = 0;
1751
1752 /* See comment in tcg_out_qemu_ld re zero-extension of addrlo. */
1753 if (guest_base == 0 || guest_base_flags) {
1754 seg = guest_base_flags;
1755 offset = 0;
1756 if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) {
1757 seg |= P_ADDR32;
1758 }
1759 } else if (TCG_TARGET_REG_BITS == 64) {
1760 /* ??? Note that we can't use the same SIB addressing scheme
1761 as for loads, since we require L0 free for bswap. */
1762 if (offset != guest_base) {
1763 if (TARGET_LONG_BITS == 32) {
1764 tcg_out_ext32u(s, TCG_REG_L0, base);
1765 base = TCG_REG_L0;
1766 }
1767 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_L1, guest_base);
1768 tgen_arithr(s, ARITH_ADD + P_REXW, TCG_REG_L1, base);
1769 base = TCG_REG_L1;
1770 offset = 0;
1771 } else if (TARGET_LONG_BITS == 32) {
1772 tcg_out_ext32u(s, TCG_REG_L1, base);
1773 base = TCG_REG_L1;
1774 }
1775 }
1776
1777 tcg_out_qemu_st_direct(s, datalo, datahi, base, offset, seg, opc);
1778 }
1779 #endif
1780 }
1781
1782 static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
1783 const TCGArg *args, const int *const_args)
1784 {
1785 int c, vexop, rexw = 0;
1786
1787 #if TCG_TARGET_REG_BITS == 64
1788 # define OP_32_64(x) \
1789 case glue(glue(INDEX_op_, x), _i64): \
1790 rexw = P_REXW; /* FALLTHRU */ \
1791 case glue(glue(INDEX_op_, x), _i32)
1792 #else
1793 # define OP_32_64(x) \
1794 case glue(glue(INDEX_op_, x), _i32)
1795 #endif
1796
1797 switch(opc) {
1798 case INDEX_op_exit_tb:
1799 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_EAX, args[0]);
1800 tcg_out_jmp(s, tb_ret_addr);
1801 break;
1802 case INDEX_op_goto_tb:
1803 if (s->tb_jmp_insn_offset) {
1804 /* direct jump method */
1805 int gap;
1806 /* jump displacement must be aligned for atomic patching;
1807 * see if we need to add extra nops before jump
1808 */
1809 gap = tcg_pcrel_diff(s, QEMU_ALIGN_PTR_UP(s->code_ptr + 1, 4));
1810 if (gap != 1) {
1811 tcg_out_nopn(s, gap - 1);
1812 }
1813 tcg_out8(s, OPC_JMP_long); /* jmp im */
1814 s->tb_jmp_insn_offset[args[0]] = tcg_current_code_size(s);
1815 tcg_out32(s, 0);
1816 } else {
1817 /* indirect jump method */
1818 tcg_out_modrm_offset(s, OPC_GRP5, EXT5_JMPN_Ev, -1,
1819 (intptr_t)(s->tb_jmp_target_addr + args[0]));
1820 }
1821 s->tb_jmp_reset_offset[args[0]] = tcg_current_code_size(s);
1822 break;
1823 case INDEX_op_br:
1824 tcg_out_jxx(s, JCC_JMP, arg_label(args[0]), 0);
1825 break;
1826 OP_32_64(ld8u):
1827 /* Note that we can ignore REXW for the zero-extend to 64-bit. */
1828 tcg_out_modrm_offset(s, OPC_MOVZBL, args[0], args[1], args[2]);
1829 break;
1830 OP_32_64(ld8s):
1831 tcg_out_modrm_offset(s, OPC_MOVSBL + rexw, args[0], args[1], args[2]);
1832 break;
1833 OP_32_64(ld16u):
1834 /* Note that we can ignore REXW for the zero-extend to 64-bit. */
1835 tcg_out_modrm_offset(s, OPC_MOVZWL, args[0], args[1], args[2]);
1836 break;
1837 OP_32_64(ld16s):
1838 tcg_out_modrm_offset(s, OPC_MOVSWL + rexw, args[0], args[1], args[2]);
1839 break;
1840 #if TCG_TARGET_REG_BITS == 64
1841 case INDEX_op_ld32u_i64:
1842 #endif
1843 case INDEX_op_ld_i32:
1844 tcg_out_ld(s, TCG_TYPE_I32, args[0], args[1], args[2]);
1845 break;
1846
1847 OP_32_64(st8):
1848 if (const_args[0]) {
1849 tcg_out_modrm_offset(s, OPC_MOVB_EvIz,
1850 0, args[1], args[2]);
1851 tcg_out8(s, args[0]);
1852 } else {
1853 tcg_out_modrm_offset(s, OPC_MOVB_EvGv | P_REXB_R,
1854 args[0], args[1], args[2]);
1855 }
1856 break;
1857 OP_32_64(st16):
1858 if (const_args[0]) {
1859 tcg_out_modrm_offset(s, OPC_MOVL_EvIz | P_DATA16,
1860 0, args[1], args[2]);
1861 tcg_out16(s, args[0]);
1862 } else {
1863 tcg_out_modrm_offset(s, OPC_MOVL_EvGv | P_DATA16,
1864 args[0], args[1], args[2]);
1865 }
1866 break;
1867 #if TCG_TARGET_REG_BITS == 64
1868 case INDEX_op_st32_i64:
1869 #endif
1870 case INDEX_op_st_i32:
1871 if (const_args[0]) {
1872 tcg_out_modrm_offset(s, OPC_MOVL_EvIz, 0, args[1], args[2]);
1873 tcg_out32(s, args[0]);
1874 } else {
1875 tcg_out_st(s, TCG_TYPE_I32, args[0], args[1], args[2]);
1876 }
1877 break;
1878
1879 OP_32_64(add):
1880 /* For 3-operand addition, use LEA. */
1881 if (args[0] != args[1]) {
1882 TCGArg a0 = args[0], a1 = args[1], a2 = args[2], c3 = 0;
1883
1884 if (const_args[2]) {
1885 c3 = a2, a2 = -1;
1886 } else if (a0 == a2) {
1887 /* Watch out for dest = src + dest, since we've removed
1888 the matching constraint on the add. */
1889 tgen_arithr(s, ARITH_ADD + rexw, a0, a1);
1890 break;
1891 }
1892
1893 tcg_out_modrm_sib_offset(s, OPC_LEA + rexw, a0, a1, a2, 0, c3);
1894 break;
1895 }
1896 c = ARITH_ADD;
1897 goto gen_arith;
1898 OP_32_64(sub):
1899 c = ARITH_SUB;
1900 goto gen_arith;
1901 OP_32_64(and):
1902 c = ARITH_AND;
1903 goto gen_arith;
1904 OP_32_64(or):
1905 c = ARITH_OR;
1906 goto gen_arith;
1907 OP_32_64(xor):
1908 c = ARITH_XOR;
1909 goto gen_arith;
1910 gen_arith:
1911 if (const_args[2]) {
1912 tgen_arithi(s, c + rexw, args[0], args[2], 0);
1913 } else {
1914 tgen_arithr(s, c + rexw, args[0], args[2]);
1915 }
1916 break;
1917
1918 OP_32_64(andc):
1919 if (const_args[2]) {
1920 tcg_out_mov(s, rexw ? TCG_TYPE_I64 : TCG_TYPE_I32,
1921 args[0], args[1]);
1922 tgen_arithi(s, ARITH_AND + rexw, args[0], ~args[2], 0);
1923 } else {
1924 tcg_out_vex_modrm(s, OPC_ANDN + rexw, args[0], args[2], args[1]);
1925 }
1926 break;
1927
1928 OP_32_64(mul):
1929 if (const_args[2]) {
1930 int32_t val;
1931 val = args[2];
1932 if (val == (int8_t)val) {
1933 tcg_out_modrm(s, OPC_IMUL_GvEvIb + rexw, args[0], args[0]);
1934 tcg_out8(s, val);
1935 } else {
1936 tcg_out_modrm(s, OPC_IMUL_GvEvIz + rexw, args[0], args[0]);
1937 tcg_out32(s, val);
1938 }
1939 } else {
1940 tcg_out_modrm(s, OPC_IMUL_GvEv + rexw, args[0], args[2]);
1941 }
1942 break;
1943
1944 OP_32_64(div2):
1945 tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_IDIV, args[4]);
1946 break;
1947 OP_32_64(divu2):
1948 tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_DIV, args[4]);
1949 break;
1950
1951 OP_32_64(shl):
1952 c = SHIFT_SHL;
1953 vexop = OPC_SHLX;
1954 goto gen_shift_maybe_vex;
1955 OP_32_64(shr):
1956 c = SHIFT_SHR;
1957 vexop = OPC_SHRX;
1958 goto gen_shift_maybe_vex;
1959 OP_32_64(sar):
1960 c = SHIFT_SAR;
1961 vexop = OPC_SARX;
1962 goto gen_shift_maybe_vex;
1963 OP_32_64(rotl):
1964 c = SHIFT_ROL;
1965 goto gen_shift;
1966 OP_32_64(rotr):
1967 c = SHIFT_ROR;
1968 goto gen_shift;
1969 gen_shift_maybe_vex:
1970 if (have_bmi2 && !const_args[2]) {
1971 tcg_out_vex_modrm(s, vexop + rexw, args[0], args[2], args[1]);
1972 break;
1973 }
1974 /* FALLTHRU */
1975 gen_shift:
1976 if (const_args[2]) {
1977 tcg_out_shifti(s, c + rexw, args[0], args[2]);
1978 } else {
1979 tcg_out_modrm(s, OPC_SHIFT_cl + rexw, c, args[0]);
1980 }
1981 break;
1982
1983 case INDEX_op_brcond_i32:
1984 tcg_out_brcond32(s, args[2], args[0], args[1], const_args[1],
1985 arg_label(args[3]), 0);
1986 break;
1987 case INDEX_op_setcond_i32:
1988 tcg_out_setcond32(s, args[3], args[0], args[1],
1989 args[2], const_args[2]);
1990 break;
1991 case INDEX_op_movcond_i32:
1992 tcg_out_movcond32(s, args[5], args[0], args[1],
1993 args[2], const_args[2], args[3]);
1994 break;
1995
1996 OP_32_64(bswap16):
1997 tcg_out_rolw_8(s, args[0]);
1998 break;
1999 OP_32_64(bswap32):
2000 tcg_out_bswap32(s, args[0]);
2001 break;
2002
2003 OP_32_64(neg):
2004 tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_NEG, args[0]);
2005 break;
2006 OP_32_64(not):
2007 tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_NOT, args[0]);
2008 break;
2009
2010 OP_32_64(ext8s):
2011 tcg_out_ext8s(s, args[0], args[1], rexw);
2012 break;
2013 OP_32_64(ext16s):
2014 tcg_out_ext16s(s, args[0], args[1], rexw);
2015 break;
2016 OP_32_64(ext8u):
2017 tcg_out_ext8u(s, args[0], args[1]);
2018 break;
2019 OP_32_64(ext16u):
2020 tcg_out_ext16u(s, args[0], args[1]);
2021 break;
2022
2023 case INDEX_op_qemu_ld_i32:
2024 tcg_out_qemu_ld(s, args, 0);
2025 break;
2026 case INDEX_op_qemu_ld_i64:
2027 tcg_out_qemu_ld(s, args, 1);
2028 break;
2029 case INDEX_op_qemu_st_i32:
2030 tcg_out_qemu_st(s, args, 0);
2031 break;
2032 case INDEX_op_qemu_st_i64:
2033 tcg_out_qemu_st(s, args, 1);
2034 break;
2035
2036 OP_32_64(mulu2):
2037 tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_MUL, args[3]);
2038 break;
2039 OP_32_64(muls2):
2040 tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_IMUL, args[3]);
2041 break;
2042 OP_32_64(add2):
2043 if (const_args[4]) {
2044 tgen_arithi(s, ARITH_ADD + rexw, args[0], args[4], 1);
2045 } else {
2046 tgen_arithr(s, ARITH_ADD + rexw, args[0], args[4]);
2047 }
2048 if (const_args[5]) {
2049 tgen_arithi(s, ARITH_ADC + rexw, args[1], args[5], 1);
2050 } else {
2051 tgen_arithr(s, ARITH_ADC + rexw, args[1], args[5]);
2052 }
2053 break;
2054 OP_32_64(sub2):
2055 if (const_args[4]) {
2056 tgen_arithi(s, ARITH_SUB + rexw, args[0], args[4], 1);
2057 } else {
2058 tgen_arithr(s, ARITH_SUB + rexw, args[0], args[4]);
2059 }
2060 if (const_args[5]) {
2061 tgen_arithi(s, ARITH_SBB + rexw, args[1], args[5], 1);
2062 } else {
2063 tgen_arithr(s, ARITH_SBB + rexw, args[1], args[5]);
2064 }
2065 break;
2066
2067 #if TCG_TARGET_REG_BITS == 32
2068 case INDEX_op_brcond2_i32:
2069 tcg_out_brcond2(s, args, const_args, 0);
2070 break;
2071 case INDEX_op_setcond2_i32:
2072 tcg_out_setcond2(s, args, const_args);
2073 break;
2074 #else /* TCG_TARGET_REG_BITS == 64 */
2075 case INDEX_op_ld32s_i64:
2076 tcg_out_modrm_offset(s, OPC_MOVSLQ, args[0], args[1], args[2]);
2077 break;
2078 case INDEX_op_ld_i64:
2079 tcg_out_ld(s, TCG_TYPE_I64, args[0], args[1], args[2]);
2080 break;
2081 case INDEX_op_st_i64:
2082 if (const_args[0]) {
2083 tcg_out_modrm_offset(s, OPC_MOVL_EvIz | P_REXW,
2084 0, args[1], args[2]);
2085 tcg_out32(s, args[0]);
2086 } else {
2087 tcg_out_st(s, TCG_TYPE_I64, args[0], args[1], args[2]);
2088 }
2089 break;
2090
2091 case INDEX_op_brcond_i64:
2092 tcg_out_brcond64(s, args[2], args[0], args[1], const_args[1],
2093 arg_label(args[3]), 0);
2094 break;
2095 case INDEX_op_setcond_i64:
2096 tcg_out_setcond64(s, args[3], args[0], args[1],
2097 args[2], const_args[2]);
2098 break;
2099 case INDEX_op_movcond_i64:
2100 tcg_out_movcond64(s, args[5], args[0], args[1],
2101 args[2], const_args[2], args[3]);
2102 break;
2103
2104 case INDEX_op_bswap64_i64:
2105 tcg_out_bswap64(s, args[0]);
2106 break;
2107 case INDEX_op_extu_i32_i64:
2108 case INDEX_op_ext32u_i64:
2109 tcg_out_ext32u(s, args[0], args[1]);
2110 break;
2111 case INDEX_op_ext_i32_i64:
2112 case INDEX_op_ext32s_i64:
2113 tcg_out_ext32s(s, args[0], args[1]);
2114 break;
2115 #endif
2116
2117 OP_32_64(deposit):
2118 if (args[3] == 0 && args[4] == 8) {
2119 /* load bits 0..7 */
2120 tcg_out_modrm(s, OPC_MOVB_EvGv | P_REXB_R | P_REXB_RM,
2121 args[2], args[0]);
2122 } else if (args[3] == 8 && args[4] == 8) {
2123 /* load bits 8..15 */
2124 tcg_out_modrm(s, OPC_MOVB_EvGv, args[2], args[0] + 4);
2125 } else if (args[3] == 0 && args[4] == 16) {
2126 /* load bits 0..15 */
2127 tcg_out_modrm(s, OPC_MOVL_EvGv | P_DATA16, args[2], args[0]);
2128 } else {
2129 tcg_abort();
2130 }
2131 break;
2132
2133 case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */
2134 case INDEX_op_mov_i64:
2135 case INDEX_op_movi_i32: /* Always emitted via tcg_out_movi. */
2136 case INDEX_op_movi_i64:
2137 case INDEX_op_call: /* Always emitted via tcg_out_call. */
2138 default:
2139 tcg_abort();
2140 }
2141
2142 #undef OP_32_64
2143 }
2144
2145 static const TCGTargetOpDef x86_op_defs[] = {
2146 { INDEX_op_exit_tb, { } },
2147 { INDEX_op_goto_tb, { } },
2148 { INDEX_op_br, { } },
2149 { INDEX_op_ld8u_i32, { "r", "r" } },
2150 { INDEX_op_ld8s_i32, { "r", "r" } },
2151 { INDEX_op_ld16u_i32, { "r", "r" } },
2152 { INDEX_op_ld16s_i32, { "r", "r" } },
2153 { INDEX_op_ld_i32, { "r", "r" } },
2154 { INDEX_op_st8_i32, { "qi", "r" } },
2155 { INDEX_op_st16_i32, { "ri", "r" } },
2156 { INDEX_op_st_i32, { "ri", "r" } },
2157
2158 { INDEX_op_add_i32, { "r", "r", "ri" } },
2159 { INDEX_op_sub_i32, { "r", "0", "ri" } },
2160 { INDEX_op_mul_i32, { "r", "0", "ri" } },
2161 { INDEX_op_div2_i32, { "a", "d", "0", "1", "r" } },
2162 { INDEX_op_divu2_i32, { "a", "d", "0", "1", "r" } },
2163 { INDEX_op_and_i32, { "r", "0", "ri" } },
2164 { INDEX_op_or_i32, { "r", "0", "ri" } },
2165 { INDEX_op_xor_i32, { "r", "0", "ri" } },
2166 { INDEX_op_andc_i32, { "r", "r", "ri" } },
2167
2168 { INDEX_op_shl_i32, { "r", "0", "Ci" } },
2169 { INDEX_op_shr_i32, { "r", "0", "Ci" } },
2170 { INDEX_op_sar_i32, { "r", "0", "Ci" } },
2171 { INDEX_op_rotl_i32, { "r", "0", "ci" } },
2172 { INDEX_op_rotr_i32, { "r", "0", "ci" } },
2173
2174 { INDEX_op_brcond_i32, { "r", "ri" } },
2175
2176 { INDEX_op_bswap16_i32, { "r", "0" } },
2177 { INDEX_op_bswap32_i32, { "r", "0" } },
2178
2179 { INDEX_op_neg_i32, { "r", "0" } },
2180
2181 { INDEX_op_not_i32, { "r", "0" } },
2182
2183 { INDEX_op_ext8s_i32, { "r", "q" } },
2184 { INDEX_op_ext16s_i32, { "r", "r" } },
2185 { INDEX_op_ext8u_i32, { "r", "q" } },
2186 { INDEX_op_ext16u_i32, { "r", "r" } },
2187
2188 { INDEX_op_setcond_i32, { "q", "r", "ri" } },
2189
2190 { INDEX_op_deposit_i32, { "Q", "0", "Q" } },
2191 { INDEX_op_movcond_i32, { "r", "r", "ri", "r", "0" } },
2192
2193 { INDEX_op_mulu2_i32, { "a", "d", "a", "r" } },
2194 { INDEX_op_muls2_i32, { "a", "d", "a", "r" } },
2195 { INDEX_op_add2_i32, { "r", "r", "0", "1", "ri", "ri" } },
2196 { INDEX_op_sub2_i32, { "r", "r", "0", "1", "ri", "ri" } },
2197
2198 #if TCG_TARGET_REG_BITS == 32
2199 { INDEX_op_brcond2_i32, { "r", "r", "ri", "ri" } },
2200 { INDEX_op_setcond2_i32, { "r", "r", "r", "ri", "ri" } },
2201 #else
2202 { INDEX_op_ld8u_i64, { "r", "r" } },
2203 { INDEX_op_ld8s_i64, { "r", "r" } },
2204 { INDEX_op_ld16u_i64, { "r", "r" } },
2205 { INDEX_op_ld16s_i64, { "r", "r" } },
2206 { INDEX_op_ld32u_i64, { "r", "r" } },
2207 { INDEX_op_ld32s_i64, { "r", "r" } },
2208 { INDEX_op_ld_i64, { "r", "r" } },
2209 { INDEX_op_st8_i64, { "ri", "r" } },
2210 { INDEX_op_st16_i64, { "ri", "r" } },
2211 { INDEX_op_st32_i64, { "ri", "r" } },
2212 { INDEX_op_st_i64, { "re", "r" } },
2213
2214 { INDEX_op_add_i64, { "r", "r", "re" } },
2215 { INDEX_op_mul_i64, { "r", "0", "re" } },
2216 { INDEX_op_div2_i64, { "a", "d", "0", "1", "r" } },
2217 { INDEX_op_divu2_i64, { "a", "d", "0", "1", "r" } },
2218 { INDEX_op_sub_i64, { "r", "0", "re" } },
2219 { INDEX_op_and_i64, { "r", "0", "reZ" } },
2220 { INDEX_op_or_i64, { "r", "0", "re" } },
2221 { INDEX_op_xor_i64, { "r", "0", "re" } },
2222 { INDEX_op_andc_i64, { "r", "r", "rI" } },
2223
2224 { INDEX_op_shl_i64, { "r", "0", "Ci" } },
2225 { INDEX_op_shr_i64, { "r", "0", "Ci" } },
2226 { INDEX_op_sar_i64, { "r", "0", "Ci" } },
2227 { INDEX_op_rotl_i64, { "r", "0", "ci" } },
2228 { INDEX_op_rotr_i64, { "r", "0", "ci" } },
2229
2230 { INDEX_op_brcond_i64, { "r", "re" } },
2231 { INDEX_op_setcond_i64, { "r", "r", "re" } },
2232
2233 { INDEX_op_bswap16_i64, { "r", "0" } },
2234 { INDEX_op_bswap32_i64, { "r", "0" } },
2235 { INDEX_op_bswap64_i64, { "r", "0" } },
2236 { INDEX_op_neg_i64, { "r", "0" } },
2237 { INDEX_op_not_i64, { "r", "0" } },
2238
2239 { INDEX_op_ext8s_i64, { "r", "r" } },
2240 { INDEX_op_ext16s_i64, { "r", "r" } },
2241 { INDEX_op_ext32s_i64, { "r", "r" } },
2242 { INDEX_op_ext8u_i64, { "r", "r" } },
2243 { INDEX_op_ext16u_i64, { "r", "r" } },
2244 { INDEX_op_ext32u_i64, { "r", "r" } },
2245
2246 { INDEX_op_ext_i32_i64, { "r", "r" } },
2247 { INDEX_op_extu_i32_i64, { "r", "r" } },
2248
2249 { INDEX_op_deposit_i64, { "Q", "0", "Q" } },
2250 { INDEX_op_movcond_i64, { "r", "r", "re", "r", "0" } },
2251
2252 { INDEX_op_mulu2_i64, { "a", "d", "a", "r" } },
2253 { INDEX_op_muls2_i64, { "a", "d", "a", "r" } },
2254 { INDEX_op_add2_i64, { "r", "r", "0", "1", "re", "re" } },
2255 { INDEX_op_sub2_i64, { "r", "r", "0", "1", "re", "re" } },
2256 #endif
2257
2258 #if TCG_TARGET_REG_BITS == 64
2259 { INDEX_op_qemu_ld_i32, { "r", "L" } },
2260 { INDEX_op_qemu_st_i32, { "L", "L" } },
2261 { INDEX_op_qemu_ld_i64, { "r", "L" } },
2262 { INDEX_op_qemu_st_i64, { "L", "L" } },
2263 #elif TARGET_LONG_BITS <= TCG_TARGET_REG_BITS
2264 { INDEX_op_qemu_ld_i32, { "r", "L" } },
2265 { INDEX_op_qemu_st_i32, { "L", "L" } },
2266 { INDEX_op_qemu_ld_i64, { "r", "r", "L" } },
2267 { INDEX_op_qemu_st_i64, { "L", "L", "L" } },
2268 #else
2269 { INDEX_op_qemu_ld_i32, { "r", "L", "L" } },
2270 { INDEX_op_qemu_st_i32, { "L", "L", "L" } },
2271 { INDEX_op_qemu_ld_i64, { "r", "r", "L", "L" } },
2272 { INDEX_op_qemu_st_i64, { "L", "L", "L", "L" } },
2273 #endif
2274 { -1 },
2275 };
2276
2277 static int tcg_target_callee_save_regs[] = {
2278 #if TCG_TARGET_REG_BITS == 64
2279 TCG_REG_RBP,
2280 TCG_REG_RBX,
2281 #if defined(_WIN64)
2282 TCG_REG_RDI,
2283 TCG_REG_RSI,
2284 #endif
2285 TCG_REG_R12,
2286 TCG_REG_R13,
2287 TCG_REG_R14, /* Currently used for the global env. */
2288 TCG_REG_R15,
2289 #else
2290 TCG_REG_EBP, /* Currently used for the global env. */
2291 TCG_REG_EBX,
2292 TCG_REG_ESI,
2293 TCG_REG_EDI,
2294 #endif
2295 };
2296
2297 /* Compute frame size via macros, to share between tcg_target_qemu_prologue
2298 and tcg_register_jit. */
2299
2300 #define PUSH_SIZE \
2301 ((1 + ARRAY_SIZE(tcg_target_callee_save_regs)) \
2302 * (TCG_TARGET_REG_BITS / 8))
2303
2304 #define FRAME_SIZE \
2305 ((PUSH_SIZE \
2306 + TCG_STATIC_CALL_ARGS_SIZE \
2307 + CPU_TEMP_BUF_NLONGS * sizeof(long) \
2308 + TCG_TARGET_STACK_ALIGN - 1) \
2309 & ~(TCG_TARGET_STACK_ALIGN - 1))
2310
2311 /* Generate global QEMU prologue and epilogue code */
2312 static void tcg_target_qemu_prologue(TCGContext *s)
2313 {
2314 int i, stack_addend;
2315
2316 /* TB prologue */
2317
2318 /* Reserve some stack space, also for TCG temps. */
2319 stack_addend = FRAME_SIZE - PUSH_SIZE;
2320 tcg_set_frame(s, TCG_REG_CALL_STACK, TCG_STATIC_CALL_ARGS_SIZE,
2321 CPU_TEMP_BUF_NLONGS * sizeof(long));
2322
2323 /* Save all callee saved registers. */
2324 for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); i++) {
2325 tcg_out_push(s, tcg_target_callee_save_regs[i]);
2326 }
2327
2328 #if TCG_TARGET_REG_BITS == 32
2329 tcg_out_ld(s, TCG_TYPE_PTR, TCG_AREG0, TCG_REG_ESP,
2330 (ARRAY_SIZE(tcg_target_callee_save_regs) + 1) * 4);
2331 tcg_out_addi(s, TCG_REG_ESP, -stack_addend);
2332 /* jmp *tb. */
2333 tcg_out_modrm_offset(s, OPC_GRP5, EXT5_JMPN_Ev, TCG_REG_ESP,
2334 (ARRAY_SIZE(tcg_target_callee_save_regs) + 2) * 4
2335 + stack_addend);
2336 #else
2337 tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
2338 tcg_out_addi(s, TCG_REG_ESP, -stack_addend);
2339 /* jmp *tb. */
2340 tcg_out_modrm(s, OPC_GRP5, EXT5_JMPN_Ev, tcg_target_call_iarg_regs[1]);
2341 #endif
2342
2343 /* TB epilogue */
2344 tb_ret_addr = s->code_ptr;
2345
2346 tcg_out_addi(s, TCG_REG_CALL_STACK, stack_addend);
2347
2348 for (i = ARRAY_SIZE(tcg_target_callee_save_regs) - 1; i >= 0; i--) {
2349 tcg_out_pop(s, tcg_target_callee_save_regs[i]);
2350 }
2351 tcg_out_opc(s, OPC_RET, 0, 0, 0);
2352
2353 #if !defined(CONFIG_SOFTMMU)
2354 /* Try to set up a segment register to point to guest_base. */
2355 if (guest_base) {
2356 setup_guest_base_seg();
2357 }
2358 #endif
2359 }
2360
2361 static void tcg_target_init(TCGContext *s)
2362 {
2363 #ifdef CONFIG_CPUID_H
2364 unsigned a, b, c, d;
2365 int max = __get_cpuid_max(0, 0);
2366
2367 if (max >= 1) {
2368 __cpuid(1, a, b, c, d);
2369 #ifndef have_cmov
2370 /* For 32-bit, 99% certainty that we're running on hardware that
2371 supports cmov, but we still need to check. In case cmov is not
2372 available, we'll use a small forward branch. */
2373 have_cmov = (d & bit_CMOV) != 0;
2374 #endif
2375 #ifndef have_movbe
2376 /* MOVBE is only available on Intel Atom and Haswell CPUs, so we
2377 need to probe for it. */
2378 have_movbe = (c & bit_MOVBE) != 0;
2379 #endif
2380 }
2381
2382 if (max >= 7) {
2383 /* BMI1 is available on AMD Piledriver and Intel Haswell CPUs. */
2384 __cpuid_count(7, 0, a, b, c, d);
2385 #ifdef bit_BMI
2386 have_bmi1 = (b & bit_BMI) != 0;
2387 #endif
2388 #ifndef have_bmi2
2389 have_bmi2 = (b & bit_BMI2) != 0;
2390 #endif
2391 }
2392 #endif
2393
2394 if (TCG_TARGET_REG_BITS == 64) {
2395 tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xffff);
2396 tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I64], 0, 0xffff);
2397 } else {
2398 tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xff);
2399 }
2400
2401 tcg_regset_clear(tcg_target_call_clobber_regs);
2402 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_EAX);
2403 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_EDX);
2404 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_ECX);
2405 if (TCG_TARGET_REG_BITS == 64) {
2406 #if !defined(_WIN64)
2407 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_RDI);
2408 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_RSI);
2409 #endif
2410 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R8);
2411 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R9);
2412 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R10);
2413 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R11);
2414 }
2415
2416 tcg_regset_clear(s->reserved_regs);
2417 tcg_regset_set_reg(s->reserved_regs, TCG_REG_CALL_STACK);
2418
2419 tcg_add_target_add_op_defs(x86_op_defs);
2420 }
2421
2422 typedef struct {
2423 DebugFrameHeader h;
2424 uint8_t fde_def_cfa[4];
2425 uint8_t fde_reg_ofs[14];
2426 } DebugFrame;
2427
2428 /* We're expecting a 2 byte uleb128 encoded value. */
2429 QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14));
2430
2431 #if !defined(__ELF__)
2432 /* Host machine without ELF. */
2433 #elif TCG_TARGET_REG_BITS == 64
2434 #define ELF_HOST_MACHINE EM_X86_64
2435 static const DebugFrame debug_frame = {
2436 .h.cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */
2437 .h.cie.id = -1,
2438 .h.cie.version = 1,
2439 .h.cie.code_align = 1,
2440 .h.cie.data_align = 0x78, /* sleb128 -8 */
2441 .h.cie.return_column = 16,
2442
2443 /* Total FDE size does not include the "len" member. */
2444 .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset),
2445
2446 .fde_def_cfa = {
2447 12, 7, /* DW_CFA_def_cfa %rsp, ... */
2448 (FRAME_SIZE & 0x7f) | 0x80, /* ... uleb128 FRAME_SIZE */
2449 (FRAME_SIZE >> 7)
2450 },
2451 .fde_reg_ofs = {
2452 0x90, 1, /* DW_CFA_offset, %rip, -8 */
2453 /* The following ordering must match tcg_target_callee_save_regs. */
2454 0x86, 2, /* DW_CFA_offset, %rbp, -16 */
2455 0x83, 3, /* DW_CFA_offset, %rbx, -24 */
2456 0x8c, 4, /* DW_CFA_offset, %r12, -32 */
2457 0x8d, 5, /* DW_CFA_offset, %r13, -40 */
2458 0x8e, 6, /* DW_CFA_offset, %r14, -48 */
2459 0x8f, 7, /* DW_CFA_offset, %r15, -56 */
2460 }
2461 };
2462 #else
2463 #define ELF_HOST_MACHINE EM_386
2464 static const DebugFrame debug_frame = {
2465 .h.cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */
2466 .h.cie.id = -1,
2467 .h.cie.version = 1,
2468 .h.cie.code_align = 1,
2469 .h.cie.data_align = 0x7c, /* sleb128 -4 */
2470 .h.cie.return_column = 8,
2471
2472 /* Total FDE size does not include the "len" member. */
2473 .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset),
2474
2475 .fde_def_cfa = {
2476 12, 4, /* DW_CFA_def_cfa %esp, ... */
2477 (FRAME_SIZE & 0x7f) | 0x80, /* ... uleb128 FRAME_SIZE */
2478 (FRAME_SIZE >> 7)
2479 },
2480 .fde_reg_ofs = {
2481 0x88, 1, /* DW_CFA_offset, %eip, -4 */
2482 /* The following ordering must match tcg_target_callee_save_regs. */
2483 0x85, 2, /* DW_CFA_offset, %ebp, -8 */
2484 0x83, 3, /* DW_CFA_offset, %ebx, -12 */
2485 0x86, 4, /* DW_CFA_offset, %esi, -16 */
2486 0x87, 5, /* DW_CFA_offset, %edi, -20 */
2487 }
2488 };
2489 #endif
2490
2491 #if defined(ELF_HOST_MACHINE)
2492 void tcg_register_jit(void *buf, size_t buf_size)
2493 {
2494 tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame));
2495 }
2496 #endif