]> git.ipfire.org Git - thirdparty/qemu.git/blame - cpus.c
cpu: Change cpu_exec_init() arg to cpu, not env
[thirdparty/qemu.git] / cpus.c
CommitLineData
296af7c9
BS
1/*
2 * QEMU System Emulator
3 *
4 * Copyright (c) 2003-2008 Fabrice Bellard
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
24
25/* Needed early for CONFIG_BSD etc. */
26#include "config-host.h"
27
83c9089e 28#include "monitor/monitor.h"
a4e15de9 29#include "qapi/qmp/qerror.h"
d49b6836 30#include "qemu/error-report.h"
9c17d615 31#include "sysemu/sysemu.h"
022c62cb 32#include "exec/gdbstub.h"
9c17d615
PB
33#include "sysemu/dma.h"
34#include "sysemu/kvm.h"
de0b36b6 35#include "qmp-commands.h"
296af7c9 36
1de7afc9 37#include "qemu/thread.h"
9c17d615
PB
38#include "sysemu/cpus.h"
39#include "sysemu/qtest.h"
1de7afc9
PB
40#include "qemu/main-loop.h"
41#include "qemu/bitmap.h"
cb365646 42#include "qemu/seqlock.h"
a4e15de9 43#include "qapi-event.h"
9cb805fd 44#include "hw/nmi.h"
0ff0fc19
JK
45
46#ifndef _WIN32
1de7afc9 47#include "qemu/compatfd.h"
0ff0fc19 48#endif
296af7c9 49
6d9cb73c
JK
50#ifdef CONFIG_LINUX
51
52#include <sys/prctl.h>
53
c0532a76
MT
54#ifndef PR_MCE_KILL
55#define PR_MCE_KILL 33
56#endif
57
6d9cb73c
JK
58#ifndef PR_MCE_KILL_SET
59#define PR_MCE_KILL_SET 1
60#endif
61
62#ifndef PR_MCE_KILL_EARLY
63#define PR_MCE_KILL_EARLY 1
64#endif
65
66#endif /* CONFIG_LINUX */
67
182735ef 68static CPUState *next_cpu;
27498bef
ST
69int64_t max_delay;
70int64_t max_advance;
296af7c9 71
321bc0b2
TC
72bool cpu_is_stopped(CPUState *cpu)
73{
74 return cpu->stopped || !runstate_is_running();
75}
76
a98ae1d8 77static bool cpu_thread_is_idle(CPUState *cpu)
ac873f1e 78{
c64ca814 79 if (cpu->stop || cpu->queued_work_first) {
ac873f1e
PM
80 return false;
81 }
321bc0b2 82 if (cpu_is_stopped(cpu)) {
ac873f1e
PM
83 return true;
84 }
8c2e1b00 85 if (!cpu->halted || cpu_has_work(cpu) ||
215e79c0 86 kvm_halt_in_kernel()) {
ac873f1e
PM
87 return false;
88 }
89 return true;
90}
91
92static bool all_cpu_threads_idle(void)
93{
182735ef 94 CPUState *cpu;
ac873f1e 95
bdc44640 96 CPU_FOREACH(cpu) {
182735ef 97 if (!cpu_thread_is_idle(cpu)) {
ac873f1e
PM
98 return false;
99 }
100 }
101 return true;
102}
103
946fb27c
PB
104/***********************************************************/
105/* guest cycle counter */
106
a3270e19
PB
107/* Protected by TimersState seqlock */
108
5045e9d9 109static bool icount_sleep = true;
71468395 110static int64_t vm_clock_warp_start = -1;
946fb27c
PB
111/* Conversion factor from emulated instructions to virtual clock ticks. */
112static int icount_time_shift;
113/* Arbitrarily pick 1MIPS as the minimum allowable speed. */
114#define MAX_ICOUNT_SHIFT 10
a3270e19 115
946fb27c
PB
116static QEMUTimer *icount_rt_timer;
117static QEMUTimer *icount_vm_timer;
118static QEMUTimer *icount_warp_timer;
946fb27c
PB
119
120typedef struct TimersState {
cb365646 121 /* Protected by BQL. */
946fb27c
PB
122 int64_t cpu_ticks_prev;
123 int64_t cpu_ticks_offset;
cb365646
LPF
124
125 /* cpu_clock_offset can be read out of BQL, so protect it with
126 * this lock.
127 */
128 QemuSeqLock vm_clock_seqlock;
946fb27c
PB
129 int64_t cpu_clock_offset;
130 int32_t cpu_ticks_enabled;
131 int64_t dummy;
c96778bb
FK
132
133 /* Compensate for varying guest execution speed. */
134 int64_t qemu_icount_bias;
135 /* Only written by TCG thread */
136 int64_t qemu_icount;
946fb27c
PB
137} TimersState;
138
d9cd4007 139static TimersState timers_state;
946fb27c 140
2a62914b 141int64_t cpu_get_icount_raw(void)
946fb27c
PB
142{
143 int64_t icount;
4917cf44 144 CPUState *cpu = current_cpu;
946fb27c 145
c96778bb 146 icount = timers_state.qemu_icount;
4917cf44 147 if (cpu) {
99df7dce 148 if (!cpu_can_do_io(cpu)) {
2a62914b
PD
149 fprintf(stderr, "Bad icount read\n");
150 exit(1);
946fb27c 151 }
28ecfd7a 152 icount -= (cpu->icount_decr.u16.low + cpu->icount_extra);
946fb27c 153 }
2a62914b
PD
154 return icount;
155}
156
157/* Return the virtual CPU time, based on the instruction counter. */
158static int64_t cpu_get_icount_locked(void)
159{
160 int64_t icount = cpu_get_icount_raw();
3f031313 161 return timers_state.qemu_icount_bias + cpu_icount_to_ns(icount);
946fb27c
PB
162}
163
17a15f1b
PB
164int64_t cpu_get_icount(void)
165{
166 int64_t icount;
167 unsigned start;
168
169 do {
170 start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
171 icount = cpu_get_icount_locked();
172 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
173
174 return icount;
175}
176
3f031313
FK
177int64_t cpu_icount_to_ns(int64_t icount)
178{
179 return icount << icount_time_shift;
180}
181
946fb27c 182/* return the host CPU cycle counter and handle stop/restart */
cb365646 183/* Caller must hold the BQL */
946fb27c
PB
184int64_t cpu_get_ticks(void)
185{
5f3e3101
PB
186 int64_t ticks;
187
946fb27c
PB
188 if (use_icount) {
189 return cpu_get_icount();
190 }
5f3e3101
PB
191
192 ticks = timers_state.cpu_ticks_offset;
193 if (timers_state.cpu_ticks_enabled) {
194 ticks += cpu_get_real_ticks();
195 }
196
197 if (timers_state.cpu_ticks_prev > ticks) {
198 /* Note: non increasing ticks may happen if the host uses
199 software suspend */
200 timers_state.cpu_ticks_offset += timers_state.cpu_ticks_prev - ticks;
201 ticks = timers_state.cpu_ticks_prev;
946fb27c 202 }
5f3e3101
PB
203
204 timers_state.cpu_ticks_prev = ticks;
205 return ticks;
946fb27c
PB
206}
207
cb365646 208static int64_t cpu_get_clock_locked(void)
946fb27c 209{
5f3e3101 210 int64_t ticks;
cb365646 211
5f3e3101
PB
212 ticks = timers_state.cpu_clock_offset;
213 if (timers_state.cpu_ticks_enabled) {
214 ticks += get_clock();
946fb27c 215 }
cb365646 216
5f3e3101 217 return ticks;
cb365646
LPF
218}
219
220/* return the host CPU monotonic timer and handle stop/restart */
221int64_t cpu_get_clock(void)
222{
223 int64_t ti;
224 unsigned start;
225
226 do {
227 start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
228 ti = cpu_get_clock_locked();
229 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
230
231 return ti;
946fb27c
PB
232}
233
cb365646
LPF
234/* enable cpu_get_ticks()
235 * Caller must hold BQL which server as mutex for vm_clock_seqlock.
236 */
946fb27c
PB
237void cpu_enable_ticks(void)
238{
cb365646
LPF
239 /* Here, the really thing protected by seqlock is cpu_clock_offset. */
240 seqlock_write_lock(&timers_state.vm_clock_seqlock);
946fb27c
PB
241 if (!timers_state.cpu_ticks_enabled) {
242 timers_state.cpu_ticks_offset -= cpu_get_real_ticks();
243 timers_state.cpu_clock_offset -= get_clock();
244 timers_state.cpu_ticks_enabled = 1;
245 }
cb365646 246 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
946fb27c
PB
247}
248
249/* disable cpu_get_ticks() : the clock is stopped. You must not call
cb365646
LPF
250 * cpu_get_ticks() after that.
251 * Caller must hold BQL which server as mutex for vm_clock_seqlock.
252 */
946fb27c
PB
253void cpu_disable_ticks(void)
254{
cb365646
LPF
255 /* Here, the really thing protected by seqlock is cpu_clock_offset. */
256 seqlock_write_lock(&timers_state.vm_clock_seqlock);
946fb27c 257 if (timers_state.cpu_ticks_enabled) {
5f3e3101 258 timers_state.cpu_ticks_offset += cpu_get_real_ticks();
cb365646 259 timers_state.cpu_clock_offset = cpu_get_clock_locked();
946fb27c
PB
260 timers_state.cpu_ticks_enabled = 0;
261 }
cb365646 262 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
946fb27c
PB
263}
264
265/* Correlation between real and virtual time is always going to be
266 fairly approximate, so ignore small variation.
267 When the guest is idle real and virtual time will be aligned in
268 the IO wait loop. */
269#define ICOUNT_WOBBLE (get_ticks_per_sec() / 10)
270
271static void icount_adjust(void)
272{
273 int64_t cur_time;
274 int64_t cur_icount;
275 int64_t delta;
a3270e19
PB
276
277 /* Protected by TimersState mutex. */
946fb27c 278 static int64_t last_delta;
468cc7cf 279
946fb27c
PB
280 /* If the VM is not running, then do nothing. */
281 if (!runstate_is_running()) {
282 return;
283 }
468cc7cf 284
17a15f1b
PB
285 seqlock_write_lock(&timers_state.vm_clock_seqlock);
286 cur_time = cpu_get_clock_locked();
287 cur_icount = cpu_get_icount_locked();
468cc7cf 288
946fb27c
PB
289 delta = cur_icount - cur_time;
290 /* FIXME: This is a very crude algorithm, somewhat prone to oscillation. */
291 if (delta > 0
292 && last_delta + ICOUNT_WOBBLE < delta * 2
293 && icount_time_shift > 0) {
294 /* The guest is getting too far ahead. Slow time down. */
295 icount_time_shift--;
296 }
297 if (delta < 0
298 && last_delta - ICOUNT_WOBBLE > delta * 2
299 && icount_time_shift < MAX_ICOUNT_SHIFT) {
300 /* The guest is getting too far behind. Speed time up. */
301 icount_time_shift++;
302 }
303 last_delta = delta;
c96778bb
FK
304 timers_state.qemu_icount_bias = cur_icount
305 - (timers_state.qemu_icount << icount_time_shift);
17a15f1b 306 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
946fb27c
PB
307}
308
309static void icount_adjust_rt(void *opaque)
310{
40daca54 311 timer_mod(icount_rt_timer,
1979b908 312 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
946fb27c
PB
313 icount_adjust();
314}
315
316static void icount_adjust_vm(void *opaque)
317{
40daca54
AB
318 timer_mod(icount_vm_timer,
319 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
320 get_ticks_per_sec() / 10);
946fb27c
PB
321 icount_adjust();
322}
323
324static int64_t qemu_icount_round(int64_t count)
325{
326 return (count + (1 << icount_time_shift) - 1) >> icount_time_shift;
327}
328
329static void icount_warp_rt(void *opaque)
330{
17a15f1b
PB
331 /* The icount_warp_timer is rescheduled soon after vm_clock_warp_start
332 * changes from -1 to another value, so the race here is okay.
333 */
334 if (atomic_read(&vm_clock_warp_start) == -1) {
946fb27c
PB
335 return;
336 }
337
17a15f1b 338 seqlock_write_lock(&timers_state.vm_clock_seqlock);
946fb27c 339 if (runstate_is_running()) {
bf2a7ddb 340 int64_t clock = cpu_get_clock_locked();
8ed961d9
PB
341 int64_t warp_delta;
342
343 warp_delta = clock - vm_clock_warp_start;
344 if (use_icount == 2) {
946fb27c 345 /*
40daca54 346 * In adaptive mode, do not let QEMU_CLOCK_VIRTUAL run too
946fb27c
PB
347 * far ahead of real time.
348 */
17a15f1b 349 int64_t cur_icount = cpu_get_icount_locked();
bf2a7ddb 350 int64_t delta = clock - cur_icount;
8ed961d9 351 warp_delta = MIN(warp_delta, delta);
946fb27c 352 }
c96778bb 353 timers_state.qemu_icount_bias += warp_delta;
946fb27c
PB
354 }
355 vm_clock_warp_start = -1;
17a15f1b 356 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
8ed961d9
PB
357
358 if (qemu_clock_expired(QEMU_CLOCK_VIRTUAL)) {
359 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
360 }
946fb27c
PB
361}
362
8156be56
PB
363void qtest_clock_warp(int64_t dest)
364{
40daca54 365 int64_t clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
efef88b3 366 AioContext *aio_context;
8156be56 367 assert(qtest_enabled());
efef88b3 368 aio_context = qemu_get_aio_context();
8156be56 369 while (clock < dest) {
40daca54 370 int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
c9299e2f 371 int64_t warp = qemu_soonest_timeout(dest - clock, deadline);
efef88b3 372
17a15f1b 373 seqlock_write_lock(&timers_state.vm_clock_seqlock);
c96778bb 374 timers_state.qemu_icount_bias += warp;
17a15f1b
PB
375 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
376
40daca54 377 qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL);
efef88b3 378 timerlist_run_timers(aio_context->tlg.tl[QEMU_CLOCK_VIRTUAL]);
40daca54 379 clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
8156be56 380 }
40daca54 381 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
8156be56
PB
382}
383
40daca54 384void qemu_clock_warp(QEMUClockType type)
946fb27c 385{
ce78d18c 386 int64_t clock;
946fb27c
PB
387 int64_t deadline;
388
389 /*
390 * There are too many global variables to make the "warp" behavior
391 * applicable to other clocks. But a clock argument removes the
392 * need for if statements all over the place.
393 */
40daca54 394 if (type != QEMU_CLOCK_VIRTUAL || !use_icount) {
946fb27c
PB
395 return;
396 }
397
5045e9d9
VC
398 if (icount_sleep) {
399 /*
400 * If the CPUs have been sleeping, advance QEMU_CLOCK_VIRTUAL timer now.
401 * This ensures that the deadline for the timer is computed correctly
402 * below.
403 * This also makes sure that the insn counter is synchronized before
404 * the CPU starts running, in case the CPU is woken by an event other
405 * than the earliest QEMU_CLOCK_VIRTUAL timer.
406 */
407 icount_warp_rt(NULL);
408 timer_del(icount_warp_timer);
409 }
ce78d18c 410 if (!all_cpu_threads_idle()) {
946fb27c
PB
411 return;
412 }
413
8156be56
PB
414 if (qtest_enabled()) {
415 /* When testing, qtest commands advance icount. */
416 return;
417 }
418
ac70aafc 419 /* We want to use the earliest deadline from ALL vm_clocks */
bf2a7ddb 420 clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT);
40daca54 421 deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
ce78d18c 422 if (deadline < 0) {
d7a0f71d
VC
423 static bool notified;
424 if (!icount_sleep && !notified) {
425 error_report("WARNING: icount sleep disabled and no active timers");
426 notified = true;
427 }
ce78d18c 428 return;
ac70aafc
AB
429 }
430
946fb27c
PB
431 if (deadline > 0) {
432 /*
40daca54 433 * Ensure QEMU_CLOCK_VIRTUAL proceeds even when the virtual CPU goes to
946fb27c
PB
434 * sleep. Otherwise, the CPU might be waiting for a future timer
435 * interrupt to wake it up, but the interrupt never comes because
436 * the vCPU isn't running any insns and thus doesn't advance the
40daca54 437 * QEMU_CLOCK_VIRTUAL.
946fb27c 438 */
5045e9d9
VC
439 if (!icount_sleep) {
440 /*
441 * We never let VCPUs sleep in no sleep icount mode.
442 * If there is a pending QEMU_CLOCK_VIRTUAL timer we just advance
443 * to the next QEMU_CLOCK_VIRTUAL event and notify it.
444 * It is useful when we want a deterministic execution time,
445 * isolated from host latencies.
446 */
447 seqlock_write_lock(&timers_state.vm_clock_seqlock);
448 timers_state.qemu_icount_bias += deadline;
449 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
450 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
451 } else {
452 /*
453 * We do stop VCPUs and only advance QEMU_CLOCK_VIRTUAL after some
454 * "real" time, (related to the time left until the next event) has
455 * passed. The QEMU_CLOCK_VIRTUAL_RT clock will do this.
456 * This avoids that the warps are visible externally; for example,
457 * you will not be sending network packets continuously instead of
458 * every 100ms.
459 */
460 seqlock_write_lock(&timers_state.vm_clock_seqlock);
461 if (vm_clock_warp_start == -1 || vm_clock_warp_start > clock) {
462 vm_clock_warp_start = clock;
463 }
464 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
465 timer_mod_anticipate(icount_warp_timer, clock + deadline);
ce78d18c 466 }
ac70aafc 467 } else if (deadline == 0) {
40daca54 468 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
946fb27c
PB
469 }
470}
471
d09eae37
FK
472static bool icount_state_needed(void *opaque)
473{
474 return use_icount;
475}
476
477/*
478 * This is a subsection for icount migration.
479 */
480static const VMStateDescription icount_vmstate_timers = {
481 .name = "timer/icount",
482 .version_id = 1,
483 .minimum_version_id = 1,
5cd8cada 484 .needed = icount_state_needed,
d09eae37
FK
485 .fields = (VMStateField[]) {
486 VMSTATE_INT64(qemu_icount_bias, TimersState),
487 VMSTATE_INT64(qemu_icount, TimersState),
488 VMSTATE_END_OF_LIST()
489 }
490};
491
946fb27c
PB
492static const VMStateDescription vmstate_timers = {
493 .name = "timer",
494 .version_id = 2,
495 .minimum_version_id = 1,
35d08458 496 .fields = (VMStateField[]) {
946fb27c
PB
497 VMSTATE_INT64(cpu_ticks_offset, TimersState),
498 VMSTATE_INT64(dummy, TimersState),
499 VMSTATE_INT64_V(cpu_clock_offset, TimersState, 2),
500 VMSTATE_END_OF_LIST()
d09eae37 501 },
5cd8cada
JQ
502 .subsections = (const VMStateDescription*[]) {
503 &icount_vmstate_timers,
504 NULL
946fb27c
PB
505 }
506};
507
4603ea01
PD
508void cpu_ticks_init(void)
509{
510 seqlock_init(&timers_state.vm_clock_seqlock, NULL);
511 vmstate_register(NULL, 0, &vmstate_timers, &timers_state);
512}
513
1ad9580b 514void configure_icount(QemuOpts *opts, Error **errp)
946fb27c 515{
1ad9580b 516 const char *option;
a8bfac37 517 char *rem_str = NULL;
1ad9580b 518
1ad9580b 519 option = qemu_opt_get(opts, "shift");
946fb27c 520 if (!option) {
a8bfac37
ST
521 if (qemu_opt_get(opts, "align") != NULL) {
522 error_setg(errp, "Please specify shift option when using align");
523 }
946fb27c
PB
524 return;
525 }
f1f4b57e
VC
526
527 icount_sleep = qemu_opt_get_bool(opts, "sleep", true);
5045e9d9
VC
528 if (icount_sleep) {
529 icount_warp_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT,
530 icount_warp_rt, NULL);
531 }
f1f4b57e 532
a8bfac37 533 icount_align_option = qemu_opt_get_bool(opts, "align", false);
f1f4b57e
VC
534
535 if (icount_align_option && !icount_sleep) {
536 error_setg(errp, "align=on and sleep=no are incompatible");
537 }
946fb27c 538 if (strcmp(option, "auto") != 0) {
a8bfac37
ST
539 errno = 0;
540 icount_time_shift = strtol(option, &rem_str, 0);
541 if (errno != 0 || *rem_str != '\0' || !strlen(option)) {
542 error_setg(errp, "icount: Invalid shift value");
543 }
946fb27c
PB
544 use_icount = 1;
545 return;
a8bfac37
ST
546 } else if (icount_align_option) {
547 error_setg(errp, "shift=auto and align=on are incompatible");
f1f4b57e
VC
548 } else if (!icount_sleep) {
549 error_setg(errp, "shift=auto and sleep=no are incompatible");
946fb27c
PB
550 }
551
552 use_icount = 2;
553
554 /* 125MIPS seems a reasonable initial guess at the guest speed.
555 It will be corrected fairly quickly anyway. */
556 icount_time_shift = 3;
557
558 /* Have both realtime and virtual time triggers for speed adjustment.
559 The realtime trigger catches emulated time passing too slowly,
560 the virtual time trigger catches emulated time passing too fast.
561 Realtime triggers occur even when idle, so use them less frequently
562 than VM triggers. */
bf2a7ddb
PD
563 icount_rt_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL_RT,
564 icount_adjust_rt, NULL);
40daca54 565 timer_mod(icount_rt_timer,
bf2a7ddb 566 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
40daca54
AB
567 icount_vm_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
568 icount_adjust_vm, NULL);
569 timer_mod(icount_vm_timer,
570 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
571 get_ticks_per_sec() / 10);
946fb27c
PB
572}
573
296af7c9
BS
574/***********************************************************/
575void hw_error(const char *fmt, ...)
576{
577 va_list ap;
55e5c285 578 CPUState *cpu;
296af7c9
BS
579
580 va_start(ap, fmt);
581 fprintf(stderr, "qemu: hardware error: ");
582 vfprintf(stderr, fmt, ap);
583 fprintf(stderr, "\n");
bdc44640 584 CPU_FOREACH(cpu) {
55e5c285 585 fprintf(stderr, "CPU #%d:\n", cpu->cpu_index);
878096ee 586 cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU);
296af7c9
BS
587 }
588 va_end(ap);
589 abort();
590}
591
592void cpu_synchronize_all_states(void)
593{
182735ef 594 CPUState *cpu;
296af7c9 595
bdc44640 596 CPU_FOREACH(cpu) {
182735ef 597 cpu_synchronize_state(cpu);
296af7c9
BS
598 }
599}
600
601void cpu_synchronize_all_post_reset(void)
602{
182735ef 603 CPUState *cpu;
296af7c9 604
bdc44640 605 CPU_FOREACH(cpu) {
182735ef 606 cpu_synchronize_post_reset(cpu);
296af7c9
BS
607 }
608}
609
610void cpu_synchronize_all_post_init(void)
611{
182735ef 612 CPUState *cpu;
296af7c9 613
bdc44640 614 CPU_FOREACH(cpu) {
182735ef 615 cpu_synchronize_post_init(cpu);
296af7c9
BS
616 }
617}
618
de9d61e8
MT
619void cpu_clean_all_dirty(void)
620{
621 CPUState *cpu;
622
623 CPU_FOREACH(cpu) {
624 cpu_clean_state(cpu);
625 }
626}
627
56983463 628static int do_vm_stop(RunState state)
296af7c9 629{
56983463
KW
630 int ret = 0;
631
1354869c 632 if (runstate_is_running()) {
296af7c9 633 cpu_disable_ticks();
296af7c9 634 pause_all_vcpus();
f5bbfba1 635 runstate_set(state);
1dfb4dd9 636 vm_state_notify(0, state);
a4e15de9 637 qapi_event_send_stop(&error_abort);
296af7c9 638 }
56983463 639
594a45ce
KW
640 bdrv_drain_all();
641 ret = bdrv_flush_all();
642
56983463 643 return ret;
296af7c9
BS
644}
645
a1fcaa73 646static bool cpu_can_run(CPUState *cpu)
296af7c9 647{
4fdeee7c 648 if (cpu->stop) {
a1fcaa73 649 return false;
0ab07c62 650 }
321bc0b2 651 if (cpu_is_stopped(cpu)) {
a1fcaa73 652 return false;
0ab07c62 653 }
a1fcaa73 654 return true;
296af7c9
BS
655}
656
91325046 657static void cpu_handle_guest_debug(CPUState *cpu)
83f338f7 658{
64f6b346 659 gdb_set_stop_cpu(cpu);
8cf71710 660 qemu_system_debug_request();
f324e766 661 cpu->stopped = true;
3c638d06
JK
662}
663
714bd040
PB
664static void cpu_signal(int sig)
665{
4917cf44
AF
666 if (current_cpu) {
667 cpu_exit(current_cpu);
714bd040
PB
668 }
669 exit_request = 1;
670}
714bd040 671
6d9cb73c
JK
672#ifdef CONFIG_LINUX
673static void sigbus_reraise(void)
674{
675 sigset_t set;
676 struct sigaction action;
677
678 memset(&action, 0, sizeof(action));
679 action.sa_handler = SIG_DFL;
680 if (!sigaction(SIGBUS, &action, NULL)) {
681 raise(SIGBUS);
682 sigemptyset(&set);
683 sigaddset(&set, SIGBUS);
684 sigprocmask(SIG_UNBLOCK, &set, NULL);
685 }
686 perror("Failed to re-raise SIGBUS!\n");
687 abort();
688}
689
690static void sigbus_handler(int n, struct qemu_signalfd_siginfo *siginfo,
691 void *ctx)
692{
693 if (kvm_on_sigbus(siginfo->ssi_code,
694 (void *)(intptr_t)siginfo->ssi_addr)) {
695 sigbus_reraise();
696 }
697}
698
699static void qemu_init_sigbus(void)
700{
701 struct sigaction action;
702
703 memset(&action, 0, sizeof(action));
704 action.sa_flags = SA_SIGINFO;
705 action.sa_sigaction = (void (*)(int, siginfo_t*, void*))sigbus_handler;
706 sigaction(SIGBUS, &action, NULL);
707
708 prctl(PR_MCE_KILL, PR_MCE_KILL_SET, PR_MCE_KILL_EARLY, 0, 0);
709}
710
290adf38 711static void qemu_kvm_eat_signals(CPUState *cpu)
1ab3c6c0
JK
712{
713 struct timespec ts = { 0, 0 };
714 siginfo_t siginfo;
715 sigset_t waitset;
716 sigset_t chkset;
717 int r;
718
719 sigemptyset(&waitset);
720 sigaddset(&waitset, SIG_IPI);
721 sigaddset(&waitset, SIGBUS);
722
723 do {
724 r = sigtimedwait(&waitset, &siginfo, &ts);
725 if (r == -1 && !(errno == EAGAIN || errno == EINTR)) {
726 perror("sigtimedwait");
727 exit(1);
728 }
729
730 switch (r) {
731 case SIGBUS:
290adf38 732 if (kvm_on_sigbus_vcpu(cpu, siginfo.si_code, siginfo.si_addr)) {
1ab3c6c0
JK
733 sigbus_reraise();
734 }
735 break;
736 default:
737 break;
738 }
739
740 r = sigpending(&chkset);
741 if (r == -1) {
742 perror("sigpending");
743 exit(1);
744 }
745 } while (sigismember(&chkset, SIG_IPI) || sigismember(&chkset, SIGBUS));
1ab3c6c0
JK
746}
747
6d9cb73c
JK
748#else /* !CONFIG_LINUX */
749
750static void qemu_init_sigbus(void)
751{
752}
1ab3c6c0 753
290adf38 754static void qemu_kvm_eat_signals(CPUState *cpu)
1ab3c6c0
JK
755{
756}
6d9cb73c
JK
757#endif /* !CONFIG_LINUX */
758
296af7c9 759#ifndef _WIN32
55f8d6ac
JK
760static void dummy_signal(int sig)
761{
762}
55f8d6ac 763
13618e05 764static void qemu_kvm_init_cpu_signals(CPUState *cpu)
714bd040
PB
765{
766 int r;
767 sigset_t set;
768 struct sigaction sigact;
769
770 memset(&sigact, 0, sizeof(sigact));
771 sigact.sa_handler = dummy_signal;
772 sigaction(SIG_IPI, &sigact, NULL);
773
714bd040
PB
774 pthread_sigmask(SIG_BLOCK, NULL, &set);
775 sigdelset(&set, SIG_IPI);
714bd040 776 sigdelset(&set, SIGBUS);
491d6e80 777 r = kvm_set_signal_mask(cpu, &set);
714bd040
PB
778 if (r) {
779 fprintf(stderr, "kvm_set_signal_mask: %s\n", strerror(-r));
780 exit(1);
781 }
782}
783
784static void qemu_tcg_init_cpu_signals(void)
785{
714bd040
PB
786 sigset_t set;
787 struct sigaction sigact;
788
789 memset(&sigact, 0, sizeof(sigact));
790 sigact.sa_handler = cpu_signal;
791 sigaction(SIG_IPI, &sigact, NULL);
792
793 sigemptyset(&set);
794 sigaddset(&set, SIG_IPI);
795 pthread_sigmask(SIG_UNBLOCK, &set, NULL);
714bd040
PB
796}
797
55f8d6ac 798#else /* _WIN32 */
13618e05 799static void qemu_kvm_init_cpu_signals(CPUState *cpu)
ff48eb5f 800{
714bd040
PB
801 abort();
802}
ff48eb5f 803
714bd040
PB
804static void qemu_tcg_init_cpu_signals(void)
805{
ff48eb5f 806}
714bd040 807#endif /* _WIN32 */
ff48eb5f 808
b2532d88 809static QemuMutex qemu_global_mutex;
46daff13 810static QemuCond qemu_io_proceeded_cond;
6b49809c 811static unsigned iothread_requesting_mutex;
296af7c9
BS
812
813static QemuThread io_thread;
814
815static QemuThread *tcg_cpu_thread;
816static QemuCond *tcg_halt_cond;
817
296af7c9
BS
818/* cpu creation */
819static QemuCond qemu_cpu_cond;
820/* system init */
296af7c9 821static QemuCond qemu_pause_cond;
e82bcec2 822static QemuCond qemu_work_cond;
296af7c9 823
d3b12f5d 824void qemu_init_cpu_loop(void)
296af7c9 825{
6d9cb73c 826 qemu_init_sigbus();
ed94592b 827 qemu_cond_init(&qemu_cpu_cond);
ed94592b
AL
828 qemu_cond_init(&qemu_pause_cond);
829 qemu_cond_init(&qemu_work_cond);
46daff13 830 qemu_cond_init(&qemu_io_proceeded_cond);
296af7c9 831 qemu_mutex_init(&qemu_global_mutex);
296af7c9 832
b7680cb6 833 qemu_thread_get_self(&io_thread);
296af7c9
BS
834}
835
f100f0b3 836void run_on_cpu(CPUState *cpu, void (*func)(void *data), void *data)
e82bcec2
MT
837{
838 struct qemu_work_item wi;
839
60e82579 840 if (qemu_cpu_is_self(cpu)) {
e82bcec2
MT
841 func(data);
842 return;
843 }
844
845 wi.func = func;
846 wi.data = data;
3c02270d 847 wi.free = false;
c64ca814
AF
848 if (cpu->queued_work_first == NULL) {
849 cpu->queued_work_first = &wi;
0ab07c62 850 } else {
c64ca814 851 cpu->queued_work_last->next = &wi;
0ab07c62 852 }
c64ca814 853 cpu->queued_work_last = &wi;
e82bcec2
MT
854 wi.next = NULL;
855 wi.done = false;
856
c08d7424 857 qemu_cpu_kick(cpu);
e82bcec2 858 while (!wi.done) {
4917cf44 859 CPUState *self_cpu = current_cpu;
e82bcec2
MT
860
861 qemu_cond_wait(&qemu_work_cond, &qemu_global_mutex);
4917cf44 862 current_cpu = self_cpu;
e82bcec2
MT
863 }
864}
865
3c02270d
CV
866void async_run_on_cpu(CPUState *cpu, void (*func)(void *data), void *data)
867{
868 struct qemu_work_item *wi;
869
870 if (qemu_cpu_is_self(cpu)) {
871 func(data);
872 return;
873 }
874
875 wi = g_malloc0(sizeof(struct qemu_work_item));
876 wi->func = func;
877 wi->data = data;
878 wi->free = true;
879 if (cpu->queued_work_first == NULL) {
880 cpu->queued_work_first = wi;
881 } else {
882 cpu->queued_work_last->next = wi;
883 }
884 cpu->queued_work_last = wi;
885 wi->next = NULL;
886 wi->done = false;
887
888 qemu_cpu_kick(cpu);
889}
890
6d45b109 891static void flush_queued_work(CPUState *cpu)
e82bcec2
MT
892{
893 struct qemu_work_item *wi;
894
c64ca814 895 if (cpu->queued_work_first == NULL) {
e82bcec2 896 return;
0ab07c62 897 }
e82bcec2 898
c64ca814
AF
899 while ((wi = cpu->queued_work_first)) {
900 cpu->queued_work_first = wi->next;
e82bcec2
MT
901 wi->func(wi->data);
902 wi->done = true;
3c02270d
CV
903 if (wi->free) {
904 g_free(wi);
905 }
e82bcec2 906 }
c64ca814 907 cpu->queued_work_last = NULL;
e82bcec2
MT
908 qemu_cond_broadcast(&qemu_work_cond);
909}
910
509a0d78 911static void qemu_wait_io_event_common(CPUState *cpu)
296af7c9 912{
4fdeee7c
AF
913 if (cpu->stop) {
914 cpu->stop = false;
f324e766 915 cpu->stopped = true;
296af7c9
BS
916 qemu_cond_signal(&qemu_pause_cond);
917 }
6d45b109 918 flush_queued_work(cpu);
216fc9a4 919 cpu->thread_kicked = false;
296af7c9
BS
920}
921
6cabe1f3 922static void qemu_tcg_wait_io_event(void)
296af7c9 923{
182735ef 924 CPUState *cpu;
6cabe1f3 925
16400322 926 while (all_cpu_threads_idle()) {
ab33fcda
PB
927 /* Start accounting real time to the virtual clock if the CPUs
928 are idle. */
40daca54 929 qemu_clock_warp(QEMU_CLOCK_VIRTUAL);
9705fbb5 930 qemu_cond_wait(tcg_halt_cond, &qemu_global_mutex);
16400322 931 }
296af7c9 932
46daff13
PB
933 while (iothread_requesting_mutex) {
934 qemu_cond_wait(&qemu_io_proceeded_cond, &qemu_global_mutex);
935 }
6cabe1f3 936
bdc44640 937 CPU_FOREACH(cpu) {
182735ef 938 qemu_wait_io_event_common(cpu);
6cabe1f3 939 }
296af7c9
BS
940}
941
fd529e8f 942static void qemu_kvm_wait_io_event(CPUState *cpu)
296af7c9 943{
a98ae1d8 944 while (cpu_thread_is_idle(cpu)) {
f5c121b8 945 qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
16400322 946 }
296af7c9 947
290adf38 948 qemu_kvm_eat_signals(cpu);
509a0d78 949 qemu_wait_io_event_common(cpu);
296af7c9
BS
950}
951
7e97cd88 952static void *qemu_kvm_cpu_thread_fn(void *arg)
296af7c9 953{
48a106bd 954 CPUState *cpu = arg;
84b4915d 955 int r;
296af7c9 956
2e7f7a3c 957 qemu_mutex_lock_iothread();
814e612e 958 qemu_thread_get_self(cpu->thread);
9f09e18a 959 cpu->thread_id = qemu_get_thread_id();
626cf8f4 960 cpu->can_do_io = 1;
4917cf44 961 current_cpu = cpu;
296af7c9 962
504134d2 963 r = kvm_init_vcpu(cpu);
84b4915d
JK
964 if (r < 0) {
965 fprintf(stderr, "kvm_init_vcpu failed: %s\n", strerror(-r));
966 exit(1);
967 }
296af7c9 968
13618e05 969 qemu_kvm_init_cpu_signals(cpu);
296af7c9
BS
970
971 /* signal CPU creation */
61a46217 972 cpu->created = true;
296af7c9
BS
973 qemu_cond_signal(&qemu_cpu_cond);
974
296af7c9 975 while (1) {
a1fcaa73 976 if (cpu_can_run(cpu)) {
1458c363 977 r = kvm_cpu_exec(cpu);
83f338f7 978 if (r == EXCP_DEBUG) {
91325046 979 cpu_handle_guest_debug(cpu);
83f338f7 980 }
0ab07c62 981 }
fd529e8f 982 qemu_kvm_wait_io_event(cpu);
296af7c9
BS
983 }
984
985 return NULL;
986}
987
c7f0f3b1
AL
988static void *qemu_dummy_cpu_thread_fn(void *arg)
989{
990#ifdef _WIN32
991 fprintf(stderr, "qtest is not supported under Windows\n");
992 exit(1);
993#else
10a9021d 994 CPUState *cpu = arg;
c7f0f3b1
AL
995 sigset_t waitset;
996 int r;
997
998 qemu_mutex_lock_iothread();
814e612e 999 qemu_thread_get_self(cpu->thread);
9f09e18a 1000 cpu->thread_id = qemu_get_thread_id();
626cf8f4 1001 cpu->can_do_io = 1;
c7f0f3b1
AL
1002
1003 sigemptyset(&waitset);
1004 sigaddset(&waitset, SIG_IPI);
1005
1006 /* signal CPU creation */
61a46217 1007 cpu->created = true;
c7f0f3b1
AL
1008 qemu_cond_signal(&qemu_cpu_cond);
1009
4917cf44 1010 current_cpu = cpu;
c7f0f3b1 1011 while (1) {
4917cf44 1012 current_cpu = NULL;
c7f0f3b1
AL
1013 qemu_mutex_unlock_iothread();
1014 do {
1015 int sig;
1016 r = sigwait(&waitset, &sig);
1017 } while (r == -1 && (errno == EAGAIN || errno == EINTR));
1018 if (r == -1) {
1019 perror("sigwait");
1020 exit(1);
1021 }
1022 qemu_mutex_lock_iothread();
4917cf44 1023 current_cpu = cpu;
509a0d78 1024 qemu_wait_io_event_common(cpu);
c7f0f3b1
AL
1025 }
1026
1027 return NULL;
1028#endif
1029}
1030
bdb7ca67
JK
1031static void tcg_exec_all(void);
1032
7e97cd88 1033static void *qemu_tcg_cpu_thread_fn(void *arg)
296af7c9 1034{
c3586ba7 1035 CPUState *cpu = arg;
296af7c9 1036
2e7f7a3c 1037 qemu_mutex_lock_iothread();
55f8d6ac 1038 qemu_tcg_init_cpu_signals();
814e612e 1039 qemu_thread_get_self(cpu->thread);
296af7c9 1040
38fcbd3f
AF
1041 CPU_FOREACH(cpu) {
1042 cpu->thread_id = qemu_get_thread_id();
1043 cpu->created = true;
626cf8f4 1044 cpu->can_do_io = 1;
38fcbd3f 1045 }
296af7c9
BS
1046 qemu_cond_signal(&qemu_cpu_cond);
1047
fa7d1867 1048 /* wait for initial kick-off after machine start */
c28e399c 1049 while (first_cpu->stopped) {
fa7d1867 1050 qemu_cond_wait(tcg_halt_cond, &qemu_global_mutex);
8e564b4e
JK
1051
1052 /* process any pending work */
bdc44640 1053 CPU_FOREACH(cpu) {
182735ef 1054 qemu_wait_io_event_common(cpu);
8e564b4e 1055 }
0ab07c62 1056 }
296af7c9 1057
21618b3e
PB
1058 /* process any pending work */
1059 exit_request = 1;
1060
296af7c9 1061 while (1) {
bdb7ca67 1062 tcg_exec_all();
ac70aafc
AB
1063
1064 if (use_icount) {
40daca54 1065 int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
ac70aafc
AB
1066
1067 if (deadline == 0) {
40daca54 1068 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
ac70aafc 1069 }
3b2319a3 1070 }
6cabe1f3 1071 qemu_tcg_wait_io_event();
296af7c9
BS
1072 }
1073
1074 return NULL;
1075}
1076
2ff09a40 1077static void qemu_cpu_kick_thread(CPUState *cpu)
cc015e9a
PB
1078{
1079#ifndef _WIN32
1080 int err;
1081
814e612e 1082 err = pthread_kill(cpu->thread->thread, SIG_IPI);
cc015e9a
PB
1083 if (err) {
1084 fprintf(stderr, "qemu:%s: %s", __func__, strerror(err));
1085 exit(1);
1086 }
1087#else /* _WIN32 */
60e82579 1088 if (!qemu_cpu_is_self(cpu)) {
ed9164a3
OH
1089 CONTEXT tcgContext;
1090
1091 if (SuspendThread(cpu->hThread) == (DWORD)-1) {
7f1721df 1092 fprintf(stderr, "qemu:%s: GetLastError:%lu\n", __func__,
ed9164a3
OH
1093 GetLastError());
1094 exit(1);
1095 }
1096
1097 /* On multi-core systems, we are not sure that the thread is actually
1098 * suspended until we can get the context.
1099 */
1100 tcgContext.ContextFlags = CONTEXT_CONTROL;
1101 while (GetThreadContext(cpu->hThread, &tcgContext) != 0) {
1102 continue;
1103 }
1104
cc015e9a 1105 cpu_signal(0);
ed9164a3
OH
1106
1107 if (ResumeThread(cpu->hThread) == (DWORD)-1) {
7f1721df 1108 fprintf(stderr, "qemu:%s: GetLastError:%lu\n", __func__,
ed9164a3
OH
1109 GetLastError());
1110 exit(1);
1111 }
cc015e9a
PB
1112 }
1113#endif
1114}
1115
c08d7424 1116void qemu_cpu_kick(CPUState *cpu)
296af7c9 1117{
f5c121b8 1118 qemu_cond_broadcast(cpu->halt_cond);
216fc9a4 1119 if (!tcg_enabled() && !cpu->thread_kicked) {
2ff09a40 1120 qemu_cpu_kick_thread(cpu);
216fc9a4 1121 cpu->thread_kicked = true;
aa2c364b 1122 }
296af7c9
BS
1123}
1124
46d62fac 1125void qemu_cpu_kick_self(void)
296af7c9 1126{
b55c22c6 1127#ifndef _WIN32
4917cf44 1128 assert(current_cpu);
296af7c9 1129
4917cf44
AF
1130 if (!current_cpu->thread_kicked) {
1131 qemu_cpu_kick_thread(current_cpu);
1132 current_cpu->thread_kicked = true;
296af7c9 1133 }
b55c22c6
PB
1134#else
1135 abort();
1136#endif
296af7c9
BS
1137}
1138
60e82579 1139bool qemu_cpu_is_self(CPUState *cpu)
296af7c9 1140{
814e612e 1141 return qemu_thread_is_self(cpu->thread);
296af7c9
BS
1142}
1143
79e2b9ae 1144bool qemu_in_vcpu_thread(void)
aa723c23 1145{
4917cf44 1146 return current_cpu && qemu_cpu_is_self(current_cpu);
aa723c23
JQ
1147}
1148
afbe7053
PB
1149static __thread bool iothread_locked = false;
1150
1151bool qemu_mutex_iothread_locked(void)
1152{
1153 return iothread_locked;
1154}
1155
296af7c9
BS
1156void qemu_mutex_lock_iothread(void)
1157{
21618b3e 1158 atomic_inc(&iothread_requesting_mutex);
2e7f7a3c
PB
1159 /* In the simple case there is no need to bump the VCPU thread out of
1160 * TCG code execution.
1161 */
1162 if (!tcg_enabled() || qemu_in_vcpu_thread() ||
1163 !first_cpu || !first_cpu->thread) {
296af7c9 1164 qemu_mutex_lock(&qemu_global_mutex);
21618b3e 1165 atomic_dec(&iothread_requesting_mutex);
1a28cac3 1166 } else {
1a28cac3 1167 if (qemu_mutex_trylock(&qemu_global_mutex)) {
182735ef 1168 qemu_cpu_kick_thread(first_cpu);
1a28cac3
MT
1169 qemu_mutex_lock(&qemu_global_mutex);
1170 }
6b49809c 1171 atomic_dec(&iothread_requesting_mutex);
46daff13 1172 qemu_cond_broadcast(&qemu_io_proceeded_cond);
1a28cac3 1173 }
afbe7053 1174 iothread_locked = true;
296af7c9
BS
1175}
1176
1177void qemu_mutex_unlock_iothread(void)
1178{
afbe7053 1179 iothread_locked = false;
296af7c9
BS
1180 qemu_mutex_unlock(&qemu_global_mutex);
1181}
1182
1183static int all_vcpus_paused(void)
1184{
bdc44640 1185 CPUState *cpu;
296af7c9 1186
bdc44640 1187 CPU_FOREACH(cpu) {
182735ef 1188 if (!cpu->stopped) {
296af7c9 1189 return 0;
0ab07c62 1190 }
296af7c9
BS
1191 }
1192
1193 return 1;
1194}
1195
1196void pause_all_vcpus(void)
1197{
bdc44640 1198 CPUState *cpu;
296af7c9 1199
40daca54 1200 qemu_clock_enable(QEMU_CLOCK_VIRTUAL, false);
bdc44640 1201 CPU_FOREACH(cpu) {
182735ef
AF
1202 cpu->stop = true;
1203 qemu_cpu_kick(cpu);
296af7c9
BS
1204 }
1205
aa723c23 1206 if (qemu_in_vcpu_thread()) {
d798e974
JK
1207 cpu_stop_current();
1208 if (!kvm_enabled()) {
bdc44640 1209 CPU_FOREACH(cpu) {
182735ef
AF
1210 cpu->stop = false;
1211 cpu->stopped = true;
d798e974
JK
1212 }
1213 return;
1214 }
1215 }
1216
296af7c9 1217 while (!all_vcpus_paused()) {
be7d6c57 1218 qemu_cond_wait(&qemu_pause_cond, &qemu_global_mutex);
bdc44640 1219 CPU_FOREACH(cpu) {
182735ef 1220 qemu_cpu_kick(cpu);
296af7c9
BS
1221 }
1222 }
1223}
1224
2993683b
IM
1225void cpu_resume(CPUState *cpu)
1226{
1227 cpu->stop = false;
1228 cpu->stopped = false;
1229 qemu_cpu_kick(cpu);
1230}
1231
296af7c9
BS
1232void resume_all_vcpus(void)
1233{
bdc44640 1234 CPUState *cpu;
296af7c9 1235
40daca54 1236 qemu_clock_enable(QEMU_CLOCK_VIRTUAL, true);
bdc44640 1237 CPU_FOREACH(cpu) {
182735ef 1238 cpu_resume(cpu);
296af7c9
BS
1239 }
1240}
1241
4900116e
DDAG
1242/* For temporary buffers for forming a name */
1243#define VCPU_THREAD_NAME_SIZE 16
1244
e5ab30a2 1245static void qemu_tcg_init_vcpu(CPUState *cpu)
296af7c9 1246{
4900116e
DDAG
1247 char thread_name[VCPU_THREAD_NAME_SIZE];
1248
09daed84
EI
1249 tcg_cpu_address_space_init(cpu, cpu->as);
1250
296af7c9
BS
1251 /* share a single thread for all cpus with TCG */
1252 if (!tcg_cpu_thread) {
814e612e 1253 cpu->thread = g_malloc0(sizeof(QemuThread));
f5c121b8
AF
1254 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1255 qemu_cond_init(cpu->halt_cond);
1256 tcg_halt_cond = cpu->halt_cond;
4900116e
DDAG
1257 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/TCG",
1258 cpu->cpu_index);
1259 qemu_thread_create(cpu->thread, thread_name, qemu_tcg_cpu_thread_fn,
1260 cpu, QEMU_THREAD_JOINABLE);
1ecf47bf 1261#ifdef _WIN32
814e612e 1262 cpu->hThread = qemu_thread_get_handle(cpu->thread);
1ecf47bf 1263#endif
61a46217 1264 while (!cpu->created) {
18a85728 1265 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
0ab07c62 1266 }
814e612e 1267 tcg_cpu_thread = cpu->thread;
296af7c9 1268 } else {
814e612e 1269 cpu->thread = tcg_cpu_thread;
f5c121b8 1270 cpu->halt_cond = tcg_halt_cond;
296af7c9
BS
1271 }
1272}
1273
48a106bd 1274static void qemu_kvm_start_vcpu(CPUState *cpu)
296af7c9 1275{
4900116e
DDAG
1276 char thread_name[VCPU_THREAD_NAME_SIZE];
1277
814e612e 1278 cpu->thread = g_malloc0(sizeof(QemuThread));
f5c121b8
AF
1279 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1280 qemu_cond_init(cpu->halt_cond);
4900116e
DDAG
1281 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/KVM",
1282 cpu->cpu_index);
1283 qemu_thread_create(cpu->thread, thread_name, qemu_kvm_cpu_thread_fn,
1284 cpu, QEMU_THREAD_JOINABLE);
61a46217 1285 while (!cpu->created) {
18a85728 1286 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
0ab07c62 1287 }
296af7c9
BS
1288}
1289
10a9021d 1290static void qemu_dummy_start_vcpu(CPUState *cpu)
c7f0f3b1 1291{
4900116e
DDAG
1292 char thread_name[VCPU_THREAD_NAME_SIZE];
1293
814e612e 1294 cpu->thread = g_malloc0(sizeof(QemuThread));
f5c121b8
AF
1295 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1296 qemu_cond_init(cpu->halt_cond);
4900116e
DDAG
1297 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/DUMMY",
1298 cpu->cpu_index);
1299 qemu_thread_create(cpu->thread, thread_name, qemu_dummy_cpu_thread_fn, cpu,
c7f0f3b1 1300 QEMU_THREAD_JOINABLE);
61a46217 1301 while (!cpu->created) {
c7f0f3b1
AL
1302 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1303 }
1304}
1305
c643bed9 1306void qemu_init_vcpu(CPUState *cpu)
296af7c9 1307{
ce3960eb
AF
1308 cpu->nr_cores = smp_cores;
1309 cpu->nr_threads = smp_threads;
f324e766 1310 cpu->stopped = true;
0ab07c62 1311 if (kvm_enabled()) {
48a106bd 1312 qemu_kvm_start_vcpu(cpu);
c7f0f3b1 1313 } else if (tcg_enabled()) {
e5ab30a2 1314 qemu_tcg_init_vcpu(cpu);
c7f0f3b1 1315 } else {
10a9021d 1316 qemu_dummy_start_vcpu(cpu);
0ab07c62 1317 }
296af7c9
BS
1318}
1319
b4a3d965 1320void cpu_stop_current(void)
296af7c9 1321{
4917cf44
AF
1322 if (current_cpu) {
1323 current_cpu->stop = false;
1324 current_cpu->stopped = true;
1325 cpu_exit(current_cpu);
67bb172f 1326 qemu_cond_signal(&qemu_pause_cond);
b4a3d965 1327 }
296af7c9
BS
1328}
1329
56983463 1330int vm_stop(RunState state)
296af7c9 1331{
aa723c23 1332 if (qemu_in_vcpu_thread()) {
74892d24 1333 qemu_system_vmstop_request_prepare();
1dfb4dd9 1334 qemu_system_vmstop_request(state);
296af7c9
BS
1335 /*
1336 * FIXME: should not return to device code in case
1337 * vm_stop() has been requested.
1338 */
b4a3d965 1339 cpu_stop_current();
56983463 1340 return 0;
296af7c9 1341 }
56983463
KW
1342
1343 return do_vm_stop(state);
296af7c9
BS
1344}
1345
8a9236f1
LC
1346/* does a state transition even if the VM is already stopped,
1347 current state is forgotten forever */
56983463 1348int vm_stop_force_state(RunState state)
8a9236f1
LC
1349{
1350 if (runstate_is_running()) {
56983463 1351 return vm_stop(state);
8a9236f1
LC
1352 } else {
1353 runstate_set(state);
594a45ce
KW
1354 /* Make sure to return an error if the flush in a previous vm_stop()
1355 * failed. */
1356 return bdrv_flush_all();
8a9236f1
LC
1357 }
1358}
1359
3d57f789 1360static int tcg_cpu_exec(CPUState *cpu)
296af7c9 1361{
3d57f789 1362 CPUArchState *env = cpu->env_ptr;
296af7c9
BS
1363 int ret;
1364#ifdef CONFIG_PROFILER
1365 int64_t ti;
1366#endif
1367
1368#ifdef CONFIG_PROFILER
1369 ti = profile_getclock();
1370#endif
1371 if (use_icount) {
1372 int64_t count;
ac70aafc 1373 int64_t deadline;
296af7c9 1374 int decr;
c96778bb
FK
1375 timers_state.qemu_icount -= (cpu->icount_decr.u16.low
1376 + cpu->icount_extra);
28ecfd7a 1377 cpu->icount_decr.u16.low = 0;
efee7340 1378 cpu->icount_extra = 0;
40daca54 1379 deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
ac70aafc
AB
1380
1381 /* Maintain prior (possibly buggy) behaviour where if no deadline
40daca54 1382 * was set (as there is no QEMU_CLOCK_VIRTUAL timer) or it is more than
ac70aafc
AB
1383 * INT32_MAX nanoseconds ahead, we still use INT32_MAX
1384 * nanoseconds.
1385 */
1386 if ((deadline < 0) || (deadline > INT32_MAX)) {
1387 deadline = INT32_MAX;
1388 }
1389
1390 count = qemu_icount_round(deadline);
c96778bb 1391 timers_state.qemu_icount += count;
296af7c9
BS
1392 decr = (count > 0xffff) ? 0xffff : count;
1393 count -= decr;
28ecfd7a 1394 cpu->icount_decr.u16.low = decr;
efee7340 1395 cpu->icount_extra = count;
296af7c9
BS
1396 }
1397 ret = cpu_exec(env);
1398#ifdef CONFIG_PROFILER
89d5cbdd 1399 tcg_time += profile_getclock() - ti;
296af7c9
BS
1400#endif
1401 if (use_icount) {
1402 /* Fold pending instructions back into the
1403 instruction counter, and clear the interrupt flag. */
c96778bb
FK
1404 timers_state.qemu_icount -= (cpu->icount_decr.u16.low
1405 + cpu->icount_extra);
28ecfd7a 1406 cpu->icount_decr.u32 = 0;
efee7340 1407 cpu->icount_extra = 0;
296af7c9
BS
1408 }
1409 return ret;
1410}
1411
bdb7ca67 1412static void tcg_exec_all(void)
296af7c9 1413{
9a36085b
JK
1414 int r;
1415
40daca54
AB
1416 /* Account partial waits to QEMU_CLOCK_VIRTUAL. */
1417 qemu_clock_warp(QEMU_CLOCK_VIRTUAL);
ab33fcda 1418
0ab07c62 1419 if (next_cpu == NULL) {
296af7c9 1420 next_cpu = first_cpu;
0ab07c62 1421 }
bdc44640 1422 for (; next_cpu != NULL && !exit_request; next_cpu = CPU_NEXT(next_cpu)) {
182735ef 1423 CPUState *cpu = next_cpu;
296af7c9 1424
40daca54 1425 qemu_clock_enable(QEMU_CLOCK_VIRTUAL,
ed2803da 1426 (cpu->singlestep_enabled & SSTEP_NOTIMER) == 0);
296af7c9 1427
a1fcaa73 1428 if (cpu_can_run(cpu)) {
3d57f789 1429 r = tcg_cpu_exec(cpu);
9a36085b 1430 if (r == EXCP_DEBUG) {
91325046 1431 cpu_handle_guest_debug(cpu);
3c638d06
JK
1432 break;
1433 }
f324e766 1434 } else if (cpu->stop || cpu->stopped) {
296af7c9
BS
1435 break;
1436 }
1437 }
c629a4bc 1438 exit_request = 0;
296af7c9
BS
1439}
1440
9a78eead 1441void list_cpus(FILE *f, fprintf_function cpu_fprintf, const char *optarg)
262353cb
BS
1442{
1443 /* XXX: implement xxx_cpu_list for targets that still miss it */
e916cbf8
PM
1444#if defined(cpu_list)
1445 cpu_list(f, cpu_fprintf);
262353cb
BS
1446#endif
1447}
de0b36b6
LC
1448
1449CpuInfoList *qmp_query_cpus(Error **errp)
1450{
1451 CpuInfoList *head = NULL, *cur_item = NULL;
182735ef 1452 CPUState *cpu;
de0b36b6 1453
bdc44640 1454 CPU_FOREACH(cpu) {
de0b36b6 1455 CpuInfoList *info;
182735ef
AF
1456#if defined(TARGET_I386)
1457 X86CPU *x86_cpu = X86_CPU(cpu);
1458 CPUX86State *env = &x86_cpu->env;
1459#elif defined(TARGET_PPC)
1460 PowerPCCPU *ppc_cpu = POWERPC_CPU(cpu);
1461 CPUPPCState *env = &ppc_cpu->env;
1462#elif defined(TARGET_SPARC)
1463 SPARCCPU *sparc_cpu = SPARC_CPU(cpu);
1464 CPUSPARCState *env = &sparc_cpu->env;
1465#elif defined(TARGET_MIPS)
1466 MIPSCPU *mips_cpu = MIPS_CPU(cpu);
1467 CPUMIPSState *env = &mips_cpu->env;
48e06fe0
BK
1468#elif defined(TARGET_TRICORE)
1469 TriCoreCPU *tricore_cpu = TRICORE_CPU(cpu);
1470 CPUTriCoreState *env = &tricore_cpu->env;
182735ef 1471#endif
de0b36b6 1472
cb446eca 1473 cpu_synchronize_state(cpu);
de0b36b6
LC
1474
1475 info = g_malloc0(sizeof(*info));
1476 info->value = g_malloc0(sizeof(*info->value));
55e5c285 1477 info->value->CPU = cpu->cpu_index;
182735ef 1478 info->value->current = (cpu == first_cpu);
259186a7 1479 info->value->halted = cpu->halted;
58f88d4b 1480 info->value->qom_path = object_get_canonical_path(OBJECT(cpu));
9f09e18a 1481 info->value->thread_id = cpu->thread_id;
de0b36b6
LC
1482#if defined(TARGET_I386)
1483 info->value->has_pc = true;
1484 info->value->pc = env->eip + env->segs[R_CS].base;
1485#elif defined(TARGET_PPC)
1486 info->value->has_nip = true;
1487 info->value->nip = env->nip;
1488#elif defined(TARGET_SPARC)
1489 info->value->has_pc = true;
1490 info->value->pc = env->pc;
1491 info->value->has_npc = true;
1492 info->value->npc = env->npc;
1493#elif defined(TARGET_MIPS)
1494 info->value->has_PC = true;
1495 info->value->PC = env->active_tc.PC;
48e06fe0
BK
1496#elif defined(TARGET_TRICORE)
1497 info->value->has_PC = true;
1498 info->value->PC = env->PC;
de0b36b6
LC
1499#endif
1500
1501 /* XXX: waiting for the qapi to support GSList */
1502 if (!cur_item) {
1503 head = cur_item = info;
1504 } else {
1505 cur_item->next = info;
1506 cur_item = info;
1507 }
1508 }
1509
1510 return head;
1511}
0cfd6a9a
LC
1512
1513void qmp_memsave(int64_t addr, int64_t size, const char *filename,
1514 bool has_cpu, int64_t cpu_index, Error **errp)
1515{
1516 FILE *f;
1517 uint32_t l;
55e5c285 1518 CPUState *cpu;
0cfd6a9a 1519 uint8_t buf[1024];
0dc9daf0 1520 int64_t orig_addr = addr, orig_size = size;
0cfd6a9a
LC
1521
1522 if (!has_cpu) {
1523 cpu_index = 0;
1524 }
1525
151d1322
AF
1526 cpu = qemu_get_cpu(cpu_index);
1527 if (cpu == NULL) {
c6bd8c70
MA
1528 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "cpu-index",
1529 "a CPU number");
0cfd6a9a
LC
1530 return;
1531 }
1532
1533 f = fopen(filename, "wb");
1534 if (!f) {
618da851 1535 error_setg_file_open(errp, errno, filename);
0cfd6a9a
LC
1536 return;
1537 }
1538
1539 while (size != 0) {
1540 l = sizeof(buf);
1541 if (l > size)
1542 l = size;
2f4d0f59 1543 if (cpu_memory_rw_debug(cpu, addr, buf, l, 0) != 0) {
0dc9daf0
BP
1544 error_setg(errp, "Invalid addr 0x%016" PRIx64 "/size %" PRId64
1545 " specified", orig_addr, orig_size);
2f4d0f59
AK
1546 goto exit;
1547 }
0cfd6a9a 1548 if (fwrite(buf, 1, l, f) != l) {
c6bd8c70 1549 error_setg(errp, QERR_IO_ERROR);
0cfd6a9a
LC
1550 goto exit;
1551 }
1552 addr += l;
1553 size -= l;
1554 }
1555
1556exit:
1557 fclose(f);
1558}
6d3962bf
LC
1559
1560void qmp_pmemsave(int64_t addr, int64_t size, const char *filename,
1561 Error **errp)
1562{
1563 FILE *f;
1564 uint32_t l;
1565 uint8_t buf[1024];
1566
1567 f = fopen(filename, "wb");
1568 if (!f) {
618da851 1569 error_setg_file_open(errp, errno, filename);
6d3962bf
LC
1570 return;
1571 }
1572
1573 while (size != 0) {
1574 l = sizeof(buf);
1575 if (l > size)
1576 l = size;
eb6282f2 1577 cpu_physical_memory_read(addr, buf, l);
6d3962bf 1578 if (fwrite(buf, 1, l, f) != l) {
c6bd8c70 1579 error_setg(errp, QERR_IO_ERROR);
6d3962bf
LC
1580 goto exit;
1581 }
1582 addr += l;
1583 size -= l;
1584 }
1585
1586exit:
1587 fclose(f);
1588}
ab49ab5c
LC
1589
1590void qmp_inject_nmi(Error **errp)
1591{
1592#if defined(TARGET_I386)
182735ef
AF
1593 CPUState *cs;
1594
bdc44640 1595 CPU_FOREACH(cs) {
182735ef 1596 X86CPU *cpu = X86_CPU(cs);
ab49ab5c 1597
02e51483 1598 if (!cpu->apic_state) {
182735ef 1599 cpu_interrupt(cs, CPU_INTERRUPT_NMI);
02c09195 1600 } else {
02e51483 1601 apic_deliver_nmi(cpu->apic_state);
02c09195 1602 }
ab49ab5c
LC
1603 }
1604#else
9cb805fd 1605 nmi_monitor_handle(monitor_get_cpu_index(), errp);
ab49ab5c
LC
1606#endif
1607}
27498bef
ST
1608
1609void dump_drift_info(FILE *f, fprintf_function cpu_fprintf)
1610{
1611 if (!use_icount) {
1612 return;
1613 }
1614
1615 cpu_fprintf(f, "Host - Guest clock %"PRIi64" ms\n",
1616 (cpu_get_clock() - cpu_get_icount())/SCALE_MS);
1617 if (icount_align_option) {
1618 cpu_fprintf(f, "Max guest delay %"PRIi64" ms\n", -max_delay/SCALE_MS);
1619 cpu_fprintf(f, "Max guest advance %"PRIi64" ms\n", max_advance/SCALE_MS);
1620 } else {
1621 cpu_fprintf(f, "Max guest delay NA\n");
1622 cpu_fprintf(f, "Max guest advance NA\n");
1623 }
1624}