]> git.ipfire.org Git - thirdparty/qemu.git/blame - cpus.c
Merge remote-tracking branch 'remotes/jasowang/tags/net-pull-request' into staging
[thirdparty/qemu.git] / cpus.c
CommitLineData
296af7c9
BS
1/*
2 * QEMU System Emulator
3 *
4 * Copyright (c) 2003-2008 Fabrice Bellard
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
24
25/* Needed early for CONFIG_BSD etc. */
7b31bbc2 26#include "qemu/osdep.h"
33c11879 27#include "qemu-common.h"
8d4e9146 28#include "qemu/config-file.h"
33c11879 29#include "cpu.h"
83c9089e 30#include "monitor/monitor.h"
a4e15de9 31#include "qapi/qmp/qerror.h"
d49b6836 32#include "qemu/error-report.h"
9c17d615 33#include "sysemu/sysemu.h"
da31d594 34#include "sysemu/block-backend.h"
022c62cb 35#include "exec/gdbstub.h"
9c17d615 36#include "sysemu/dma.h"
b3946626 37#include "sysemu/hw_accel.h"
9c17d615 38#include "sysemu/kvm.h"
b0cb0a66 39#include "sysemu/hax.h"
de0b36b6 40#include "qmp-commands.h"
63c91552 41#include "exec/exec-all.h"
296af7c9 42
1de7afc9 43#include "qemu/thread.h"
9c17d615
PB
44#include "sysemu/cpus.h"
45#include "sysemu/qtest.h"
1de7afc9
PB
46#include "qemu/main-loop.h"
47#include "qemu/bitmap.h"
cb365646 48#include "qemu/seqlock.h"
8d4e9146 49#include "tcg.h"
a4e15de9 50#include "qapi-event.h"
9cb805fd 51#include "hw/nmi.h"
8b427044 52#include "sysemu/replay.h"
afed5a5a 53#include "hw/boards.h"
0ff0fc19 54
6d9cb73c
JK
55#ifdef CONFIG_LINUX
56
57#include <sys/prctl.h>
58
c0532a76
MT
59#ifndef PR_MCE_KILL
60#define PR_MCE_KILL 33
61#endif
62
6d9cb73c
JK
63#ifndef PR_MCE_KILL_SET
64#define PR_MCE_KILL_SET 1
65#endif
66
67#ifndef PR_MCE_KILL_EARLY
68#define PR_MCE_KILL_EARLY 1
69#endif
70
71#endif /* CONFIG_LINUX */
72
27498bef
ST
73int64_t max_delay;
74int64_t max_advance;
296af7c9 75
2adcc85d
JH
76/* vcpu throttling controls */
77static QEMUTimer *throttle_timer;
78static unsigned int throttle_percentage;
79
80#define CPU_THROTTLE_PCT_MIN 1
81#define CPU_THROTTLE_PCT_MAX 99
82#define CPU_THROTTLE_TIMESLICE_NS 10000000
83
321bc0b2
TC
84bool cpu_is_stopped(CPUState *cpu)
85{
86 return cpu->stopped || !runstate_is_running();
87}
88
a98ae1d8 89static bool cpu_thread_is_idle(CPUState *cpu)
ac873f1e 90{
c64ca814 91 if (cpu->stop || cpu->queued_work_first) {
ac873f1e
PM
92 return false;
93 }
321bc0b2 94 if (cpu_is_stopped(cpu)) {
ac873f1e
PM
95 return true;
96 }
8c2e1b00 97 if (!cpu->halted || cpu_has_work(cpu) ||
215e79c0 98 kvm_halt_in_kernel()) {
ac873f1e
PM
99 return false;
100 }
101 return true;
102}
103
104static bool all_cpu_threads_idle(void)
105{
182735ef 106 CPUState *cpu;
ac873f1e 107
bdc44640 108 CPU_FOREACH(cpu) {
182735ef 109 if (!cpu_thread_is_idle(cpu)) {
ac873f1e
PM
110 return false;
111 }
112 }
113 return true;
114}
115
946fb27c
PB
116/***********************************************************/
117/* guest cycle counter */
118
a3270e19
PB
119/* Protected by TimersState seqlock */
120
5045e9d9 121static bool icount_sleep = true;
71468395 122static int64_t vm_clock_warp_start = -1;
946fb27c
PB
123/* Conversion factor from emulated instructions to virtual clock ticks. */
124static int icount_time_shift;
125/* Arbitrarily pick 1MIPS as the minimum allowable speed. */
126#define MAX_ICOUNT_SHIFT 10
a3270e19 127
946fb27c
PB
128static QEMUTimer *icount_rt_timer;
129static QEMUTimer *icount_vm_timer;
130static QEMUTimer *icount_warp_timer;
946fb27c
PB
131
132typedef struct TimersState {
cb365646 133 /* Protected by BQL. */
946fb27c
PB
134 int64_t cpu_ticks_prev;
135 int64_t cpu_ticks_offset;
cb365646
LPF
136
137 /* cpu_clock_offset can be read out of BQL, so protect it with
138 * this lock.
139 */
140 QemuSeqLock vm_clock_seqlock;
946fb27c
PB
141 int64_t cpu_clock_offset;
142 int32_t cpu_ticks_enabled;
143 int64_t dummy;
c96778bb
FK
144
145 /* Compensate for varying guest execution speed. */
146 int64_t qemu_icount_bias;
147 /* Only written by TCG thread */
148 int64_t qemu_icount;
946fb27c
PB
149} TimersState;
150
d9cd4007 151static TimersState timers_state;
8d4e9146
FK
152bool mttcg_enabled;
153
154/*
155 * We default to false if we know other options have been enabled
156 * which are currently incompatible with MTTCG. Otherwise when each
157 * guest (target) has been updated to support:
158 * - atomic instructions
159 * - memory ordering primitives (barriers)
160 * they can set the appropriate CONFIG flags in ${target}-softmmu.mak
161 *
162 * Once a guest architecture has been converted to the new primitives
163 * there are two remaining limitations to check.
164 *
165 * - The guest can't be oversized (e.g. 64 bit guest on 32 bit host)
166 * - The host must have a stronger memory order than the guest
167 *
168 * It may be possible in future to support strong guests on weak hosts
169 * but that will require tagging all load/stores in a guest with their
170 * implicit memory order requirements which would likely slow things
171 * down a lot.
172 */
173
174static bool check_tcg_memory_orders_compatible(void)
175{
176#if defined(TCG_GUEST_DEFAULT_MO) && defined(TCG_TARGET_DEFAULT_MO)
177 return (TCG_GUEST_DEFAULT_MO & ~TCG_TARGET_DEFAULT_MO) == 0;
178#else
179 return false;
180#endif
181}
182
183static bool default_mttcg_enabled(void)
184{
83fd9629 185 if (use_icount || TCG_OVERSIZED_GUEST) {
8d4e9146
FK
186 return false;
187 } else {
188#ifdef TARGET_SUPPORTS_MTTCG
189 return check_tcg_memory_orders_compatible();
190#else
191 return false;
192#endif
193 }
194}
195
196void qemu_tcg_configure(QemuOpts *opts, Error **errp)
197{
198 const char *t = qemu_opt_get(opts, "thread");
199 if (t) {
200 if (strcmp(t, "multi") == 0) {
201 if (TCG_OVERSIZED_GUEST) {
202 error_setg(errp, "No MTTCG when guest word size > hosts");
83fd9629
AB
203 } else if (use_icount) {
204 error_setg(errp, "No MTTCG when icount is enabled");
8d4e9146 205 } else {
86953503 206#ifndef TARGET_SUPPORTS_MTTCG
c34c7620
AB
207 error_report("Guest not yet converted to MTTCG - "
208 "you may get unexpected results");
209#endif
8d4e9146
FK
210 if (!check_tcg_memory_orders_compatible()) {
211 error_report("Guest expects a stronger memory ordering "
212 "than the host provides");
8cfef892 213 error_printf("This may cause strange/hard to debug errors\n");
8d4e9146
FK
214 }
215 mttcg_enabled = true;
216 }
217 } else if (strcmp(t, "single") == 0) {
218 mttcg_enabled = false;
219 } else {
220 error_setg(errp, "Invalid 'thread' setting %s", t);
221 }
222 } else {
223 mttcg_enabled = default_mttcg_enabled();
224 }
225}
946fb27c 226
e4cd9657
AB
227/* The current number of executed instructions is based on what we
228 * originally budgeted minus the current state of the decrementing
229 * icount counters in extra/u16.low.
230 */
231static int64_t cpu_get_icount_executed(CPUState *cpu)
232{
233 return cpu->icount_budget - (cpu->icount_decr.u16.low + cpu->icount_extra);
234}
235
512d3c80
AB
236/*
237 * Update the global shared timer_state.qemu_icount to take into
238 * account executed instructions. This is done by the TCG vCPU
239 * thread so the main-loop can see time has moved forward.
240 */
241void cpu_update_icount(CPUState *cpu)
242{
243 int64_t executed = cpu_get_icount_executed(cpu);
244 cpu->icount_budget -= executed;
245
246#ifdef CONFIG_ATOMIC64
247 atomic_set__nocheck(&timers_state.qemu_icount,
248 atomic_read__nocheck(&timers_state.qemu_icount) +
249 executed);
250#else /* FIXME: we need 64bit atomics to do this safely */
251 timers_state.qemu_icount += executed;
252#endif
253}
254
2a62914b 255int64_t cpu_get_icount_raw(void)
946fb27c 256{
4917cf44 257 CPUState *cpu = current_cpu;
946fb27c 258
243c5f77 259 if (cpu && cpu->running) {
414b15c9 260 if (!cpu->can_do_io) {
2a62914b
PD
261 fprintf(stderr, "Bad icount read\n");
262 exit(1);
946fb27c 263 }
e4cd9657 264 /* Take into account what has run */
1d05906b 265 cpu_update_icount(cpu);
946fb27c 266 }
1d05906b
AB
267#ifdef CONFIG_ATOMIC64
268 return atomic_read__nocheck(&timers_state.qemu_icount);
269#else /* FIXME: we need 64bit atomics to do this safely */
270 return timers_state.qemu_icount;
271#endif
2a62914b
PD
272}
273
274/* Return the virtual CPU time, based on the instruction counter. */
275static int64_t cpu_get_icount_locked(void)
276{
277 int64_t icount = cpu_get_icount_raw();
3f031313 278 return timers_state.qemu_icount_bias + cpu_icount_to_ns(icount);
946fb27c
PB
279}
280
17a15f1b
PB
281int64_t cpu_get_icount(void)
282{
283 int64_t icount;
284 unsigned start;
285
286 do {
287 start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
288 icount = cpu_get_icount_locked();
289 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
290
291 return icount;
292}
293
3f031313
FK
294int64_t cpu_icount_to_ns(int64_t icount)
295{
296 return icount << icount_time_shift;
297}
298
d90f3cca
C
299/* return the time elapsed in VM between vm_start and vm_stop. Unless
300 * icount is active, cpu_get_ticks() uses units of the host CPU cycle
301 * counter.
302 *
303 * Caller must hold the BQL
304 */
946fb27c
PB
305int64_t cpu_get_ticks(void)
306{
5f3e3101
PB
307 int64_t ticks;
308
946fb27c
PB
309 if (use_icount) {
310 return cpu_get_icount();
311 }
5f3e3101
PB
312
313 ticks = timers_state.cpu_ticks_offset;
314 if (timers_state.cpu_ticks_enabled) {
4a7428c5 315 ticks += cpu_get_host_ticks();
5f3e3101
PB
316 }
317
318 if (timers_state.cpu_ticks_prev > ticks) {
319 /* Note: non increasing ticks may happen if the host uses
320 software suspend */
321 timers_state.cpu_ticks_offset += timers_state.cpu_ticks_prev - ticks;
322 ticks = timers_state.cpu_ticks_prev;
946fb27c 323 }
5f3e3101
PB
324
325 timers_state.cpu_ticks_prev = ticks;
326 return ticks;
946fb27c
PB
327}
328
cb365646 329static int64_t cpu_get_clock_locked(void)
946fb27c 330{
1d45cea5 331 int64_t time;
cb365646 332
1d45cea5 333 time = timers_state.cpu_clock_offset;
5f3e3101 334 if (timers_state.cpu_ticks_enabled) {
1d45cea5 335 time += get_clock();
946fb27c 336 }
cb365646 337
1d45cea5 338 return time;
cb365646
LPF
339}
340
d90f3cca 341/* Return the monotonic time elapsed in VM, i.e.,
8212ff86
PM
342 * the time between vm_start and vm_stop
343 */
cb365646
LPF
344int64_t cpu_get_clock(void)
345{
346 int64_t ti;
347 unsigned start;
348
349 do {
350 start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
351 ti = cpu_get_clock_locked();
352 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
353
354 return ti;
946fb27c
PB
355}
356
cb365646 357/* enable cpu_get_ticks()
3224e878 358 * Caller must hold BQL which serves as mutex for vm_clock_seqlock.
cb365646 359 */
946fb27c
PB
360void cpu_enable_ticks(void)
361{
cb365646 362 /* Here, the really thing protected by seqlock is cpu_clock_offset. */
03719e44 363 seqlock_write_begin(&timers_state.vm_clock_seqlock);
946fb27c 364 if (!timers_state.cpu_ticks_enabled) {
4a7428c5 365 timers_state.cpu_ticks_offset -= cpu_get_host_ticks();
946fb27c
PB
366 timers_state.cpu_clock_offset -= get_clock();
367 timers_state.cpu_ticks_enabled = 1;
368 }
03719e44 369 seqlock_write_end(&timers_state.vm_clock_seqlock);
946fb27c
PB
370}
371
372/* disable cpu_get_ticks() : the clock is stopped. You must not call
cb365646 373 * cpu_get_ticks() after that.
3224e878 374 * Caller must hold BQL which serves as mutex for vm_clock_seqlock.
cb365646 375 */
946fb27c
PB
376void cpu_disable_ticks(void)
377{
cb365646 378 /* Here, the really thing protected by seqlock is cpu_clock_offset. */
03719e44 379 seqlock_write_begin(&timers_state.vm_clock_seqlock);
946fb27c 380 if (timers_state.cpu_ticks_enabled) {
4a7428c5 381 timers_state.cpu_ticks_offset += cpu_get_host_ticks();
cb365646 382 timers_state.cpu_clock_offset = cpu_get_clock_locked();
946fb27c
PB
383 timers_state.cpu_ticks_enabled = 0;
384 }
03719e44 385 seqlock_write_end(&timers_state.vm_clock_seqlock);
946fb27c
PB
386}
387
388/* Correlation between real and virtual time is always going to be
389 fairly approximate, so ignore small variation.
390 When the guest is idle real and virtual time will be aligned in
391 the IO wait loop. */
73bcb24d 392#define ICOUNT_WOBBLE (NANOSECONDS_PER_SECOND / 10)
946fb27c
PB
393
394static void icount_adjust(void)
395{
396 int64_t cur_time;
397 int64_t cur_icount;
398 int64_t delta;
a3270e19
PB
399
400 /* Protected by TimersState mutex. */
946fb27c 401 static int64_t last_delta;
468cc7cf 402
946fb27c
PB
403 /* If the VM is not running, then do nothing. */
404 if (!runstate_is_running()) {
405 return;
406 }
468cc7cf 407
03719e44 408 seqlock_write_begin(&timers_state.vm_clock_seqlock);
17a15f1b
PB
409 cur_time = cpu_get_clock_locked();
410 cur_icount = cpu_get_icount_locked();
468cc7cf 411
946fb27c
PB
412 delta = cur_icount - cur_time;
413 /* FIXME: This is a very crude algorithm, somewhat prone to oscillation. */
414 if (delta > 0
415 && last_delta + ICOUNT_WOBBLE < delta * 2
416 && icount_time_shift > 0) {
417 /* The guest is getting too far ahead. Slow time down. */
418 icount_time_shift--;
419 }
420 if (delta < 0
421 && last_delta - ICOUNT_WOBBLE > delta * 2
422 && icount_time_shift < MAX_ICOUNT_SHIFT) {
423 /* The guest is getting too far behind. Speed time up. */
424 icount_time_shift++;
425 }
426 last_delta = delta;
c96778bb
FK
427 timers_state.qemu_icount_bias = cur_icount
428 - (timers_state.qemu_icount << icount_time_shift);
03719e44 429 seqlock_write_end(&timers_state.vm_clock_seqlock);
946fb27c
PB
430}
431
432static void icount_adjust_rt(void *opaque)
433{
40daca54 434 timer_mod(icount_rt_timer,
1979b908 435 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
946fb27c
PB
436 icount_adjust();
437}
438
439static void icount_adjust_vm(void *opaque)
440{
40daca54
AB
441 timer_mod(icount_vm_timer,
442 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
73bcb24d 443 NANOSECONDS_PER_SECOND / 10);
946fb27c
PB
444 icount_adjust();
445}
446
447static int64_t qemu_icount_round(int64_t count)
448{
449 return (count + (1 << icount_time_shift) - 1) >> icount_time_shift;
450}
451
efab87cf 452static void icount_warp_rt(void)
946fb27c 453{
ccffff48
AB
454 unsigned seq;
455 int64_t warp_start;
456
17a15f1b
PB
457 /* The icount_warp_timer is rescheduled soon after vm_clock_warp_start
458 * changes from -1 to another value, so the race here is okay.
459 */
ccffff48
AB
460 do {
461 seq = seqlock_read_begin(&timers_state.vm_clock_seqlock);
462 warp_start = vm_clock_warp_start;
463 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, seq));
464
465 if (warp_start == -1) {
946fb27c
PB
466 return;
467 }
468
03719e44 469 seqlock_write_begin(&timers_state.vm_clock_seqlock);
946fb27c 470 if (runstate_is_running()) {
8eda206e
PD
471 int64_t clock = REPLAY_CLOCK(REPLAY_CLOCK_VIRTUAL_RT,
472 cpu_get_clock_locked());
8ed961d9
PB
473 int64_t warp_delta;
474
475 warp_delta = clock - vm_clock_warp_start;
476 if (use_icount == 2) {
946fb27c 477 /*
40daca54 478 * In adaptive mode, do not let QEMU_CLOCK_VIRTUAL run too
946fb27c
PB
479 * far ahead of real time.
480 */
17a15f1b 481 int64_t cur_icount = cpu_get_icount_locked();
bf2a7ddb 482 int64_t delta = clock - cur_icount;
8ed961d9 483 warp_delta = MIN(warp_delta, delta);
946fb27c 484 }
c96778bb 485 timers_state.qemu_icount_bias += warp_delta;
946fb27c
PB
486 }
487 vm_clock_warp_start = -1;
03719e44 488 seqlock_write_end(&timers_state.vm_clock_seqlock);
8ed961d9
PB
489
490 if (qemu_clock_expired(QEMU_CLOCK_VIRTUAL)) {
491 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
492 }
946fb27c
PB
493}
494
e76d1798 495static void icount_timer_cb(void *opaque)
efab87cf 496{
e76d1798
PD
497 /* No need for a checkpoint because the timer already synchronizes
498 * with CHECKPOINT_CLOCK_VIRTUAL_RT.
499 */
500 icount_warp_rt();
efab87cf
PD
501}
502
8156be56
PB
503void qtest_clock_warp(int64_t dest)
504{
40daca54 505 int64_t clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
efef88b3 506 AioContext *aio_context;
8156be56 507 assert(qtest_enabled());
efef88b3 508 aio_context = qemu_get_aio_context();
8156be56 509 while (clock < dest) {
40daca54 510 int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
c9299e2f 511 int64_t warp = qemu_soonest_timeout(dest - clock, deadline);
efef88b3 512
03719e44 513 seqlock_write_begin(&timers_state.vm_clock_seqlock);
c96778bb 514 timers_state.qemu_icount_bias += warp;
03719e44 515 seqlock_write_end(&timers_state.vm_clock_seqlock);
17a15f1b 516
40daca54 517 qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL);
efef88b3 518 timerlist_run_timers(aio_context->tlg.tl[QEMU_CLOCK_VIRTUAL]);
40daca54 519 clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
8156be56 520 }
40daca54 521 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
8156be56
PB
522}
523
e76d1798 524void qemu_start_warp_timer(void)
946fb27c 525{
ce78d18c 526 int64_t clock;
946fb27c
PB
527 int64_t deadline;
528
e76d1798 529 if (!use_icount) {
946fb27c
PB
530 return;
531 }
532
8bd7f71d
PD
533 /* Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers
534 * do not fire, so computing the deadline does not make sense.
535 */
536 if (!runstate_is_running()) {
537 return;
538 }
539
540 /* warp clock deterministically in record/replay mode */
e76d1798 541 if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_START)) {
8bd7f71d
PD
542 return;
543 }
544
ce78d18c 545 if (!all_cpu_threads_idle()) {
946fb27c
PB
546 return;
547 }
548
8156be56
PB
549 if (qtest_enabled()) {
550 /* When testing, qtest commands advance icount. */
e76d1798 551 return;
8156be56
PB
552 }
553
ac70aafc 554 /* We want to use the earliest deadline from ALL vm_clocks */
bf2a7ddb 555 clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT);
40daca54 556 deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
ce78d18c 557 if (deadline < 0) {
d7a0f71d
VC
558 static bool notified;
559 if (!icount_sleep && !notified) {
560 error_report("WARNING: icount sleep disabled and no active timers");
561 notified = true;
562 }
ce78d18c 563 return;
ac70aafc
AB
564 }
565
946fb27c
PB
566 if (deadline > 0) {
567 /*
40daca54 568 * Ensure QEMU_CLOCK_VIRTUAL proceeds even when the virtual CPU goes to
946fb27c
PB
569 * sleep. Otherwise, the CPU might be waiting for a future timer
570 * interrupt to wake it up, but the interrupt never comes because
571 * the vCPU isn't running any insns and thus doesn't advance the
40daca54 572 * QEMU_CLOCK_VIRTUAL.
946fb27c 573 */
5045e9d9
VC
574 if (!icount_sleep) {
575 /*
576 * We never let VCPUs sleep in no sleep icount mode.
577 * If there is a pending QEMU_CLOCK_VIRTUAL timer we just advance
578 * to the next QEMU_CLOCK_VIRTUAL event and notify it.
579 * It is useful when we want a deterministic execution time,
580 * isolated from host latencies.
581 */
03719e44 582 seqlock_write_begin(&timers_state.vm_clock_seqlock);
5045e9d9 583 timers_state.qemu_icount_bias += deadline;
03719e44 584 seqlock_write_end(&timers_state.vm_clock_seqlock);
5045e9d9
VC
585 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
586 } else {
587 /*
588 * We do stop VCPUs and only advance QEMU_CLOCK_VIRTUAL after some
589 * "real" time, (related to the time left until the next event) has
590 * passed. The QEMU_CLOCK_VIRTUAL_RT clock will do this.
591 * This avoids that the warps are visible externally; for example,
592 * you will not be sending network packets continuously instead of
593 * every 100ms.
594 */
03719e44 595 seqlock_write_begin(&timers_state.vm_clock_seqlock);
5045e9d9
VC
596 if (vm_clock_warp_start == -1 || vm_clock_warp_start > clock) {
597 vm_clock_warp_start = clock;
598 }
03719e44 599 seqlock_write_end(&timers_state.vm_clock_seqlock);
5045e9d9 600 timer_mod_anticipate(icount_warp_timer, clock + deadline);
ce78d18c 601 }
ac70aafc 602 } else if (deadline == 0) {
40daca54 603 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
946fb27c
PB
604 }
605}
606
e76d1798
PD
607static void qemu_account_warp_timer(void)
608{
609 if (!use_icount || !icount_sleep) {
610 return;
611 }
612
613 /* Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers
614 * do not fire, so computing the deadline does not make sense.
615 */
616 if (!runstate_is_running()) {
617 return;
618 }
619
620 /* warp clock deterministically in record/replay mode */
621 if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_ACCOUNT)) {
622 return;
623 }
624
625 timer_del(icount_warp_timer);
626 icount_warp_rt();
627}
628
d09eae37
FK
629static bool icount_state_needed(void *opaque)
630{
631 return use_icount;
632}
633
634/*
635 * This is a subsection for icount migration.
636 */
637static const VMStateDescription icount_vmstate_timers = {
638 .name = "timer/icount",
639 .version_id = 1,
640 .minimum_version_id = 1,
5cd8cada 641 .needed = icount_state_needed,
d09eae37
FK
642 .fields = (VMStateField[]) {
643 VMSTATE_INT64(qemu_icount_bias, TimersState),
644 VMSTATE_INT64(qemu_icount, TimersState),
645 VMSTATE_END_OF_LIST()
646 }
647};
648
946fb27c
PB
649static const VMStateDescription vmstate_timers = {
650 .name = "timer",
651 .version_id = 2,
652 .minimum_version_id = 1,
35d08458 653 .fields = (VMStateField[]) {
946fb27c
PB
654 VMSTATE_INT64(cpu_ticks_offset, TimersState),
655 VMSTATE_INT64(dummy, TimersState),
656 VMSTATE_INT64_V(cpu_clock_offset, TimersState, 2),
657 VMSTATE_END_OF_LIST()
d09eae37 658 },
5cd8cada
JQ
659 .subsections = (const VMStateDescription*[]) {
660 &icount_vmstate_timers,
661 NULL
946fb27c
PB
662 }
663};
664
14e6fe12 665static void cpu_throttle_thread(CPUState *cpu, run_on_cpu_data opaque)
2adcc85d 666{
2adcc85d
JH
667 double pct;
668 double throttle_ratio;
669 long sleeptime_ns;
670
671 if (!cpu_throttle_get_percentage()) {
672 return;
673 }
674
675 pct = (double)cpu_throttle_get_percentage()/100;
676 throttle_ratio = pct / (1 - pct);
677 sleeptime_ns = (long)(throttle_ratio * CPU_THROTTLE_TIMESLICE_NS);
678
679 qemu_mutex_unlock_iothread();
680 atomic_set(&cpu->throttle_thread_scheduled, 0);
681 g_usleep(sleeptime_ns / 1000); /* Convert ns to us for usleep call */
682 qemu_mutex_lock_iothread();
683}
684
685static void cpu_throttle_timer_tick(void *opaque)
686{
687 CPUState *cpu;
688 double pct;
689
690 /* Stop the timer if needed */
691 if (!cpu_throttle_get_percentage()) {
692 return;
693 }
694 CPU_FOREACH(cpu) {
695 if (!atomic_xchg(&cpu->throttle_thread_scheduled, 1)) {
14e6fe12
PB
696 async_run_on_cpu(cpu, cpu_throttle_thread,
697 RUN_ON_CPU_NULL);
2adcc85d
JH
698 }
699 }
700
701 pct = (double)cpu_throttle_get_percentage()/100;
702 timer_mod(throttle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT) +
703 CPU_THROTTLE_TIMESLICE_NS / (1-pct));
704}
705
706void cpu_throttle_set(int new_throttle_pct)
707{
708 /* Ensure throttle percentage is within valid range */
709 new_throttle_pct = MIN(new_throttle_pct, CPU_THROTTLE_PCT_MAX);
710 new_throttle_pct = MAX(new_throttle_pct, CPU_THROTTLE_PCT_MIN);
711
712 atomic_set(&throttle_percentage, new_throttle_pct);
713
714 timer_mod(throttle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT) +
715 CPU_THROTTLE_TIMESLICE_NS);
716}
717
718void cpu_throttle_stop(void)
719{
720 atomic_set(&throttle_percentage, 0);
721}
722
723bool cpu_throttle_active(void)
724{
725 return (cpu_throttle_get_percentage() != 0);
726}
727
728int cpu_throttle_get_percentage(void)
729{
730 return atomic_read(&throttle_percentage);
731}
732
4603ea01
PD
733void cpu_ticks_init(void)
734{
ccdb3c1f 735 seqlock_init(&timers_state.vm_clock_seqlock);
4603ea01 736 vmstate_register(NULL, 0, &vmstate_timers, &timers_state);
2adcc85d
JH
737 throttle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT,
738 cpu_throttle_timer_tick, NULL);
4603ea01
PD
739}
740
1ad9580b 741void configure_icount(QemuOpts *opts, Error **errp)
946fb27c 742{
1ad9580b 743 const char *option;
a8bfac37 744 char *rem_str = NULL;
1ad9580b 745
1ad9580b 746 option = qemu_opt_get(opts, "shift");
946fb27c 747 if (!option) {
a8bfac37
ST
748 if (qemu_opt_get(opts, "align") != NULL) {
749 error_setg(errp, "Please specify shift option when using align");
750 }
946fb27c
PB
751 return;
752 }
f1f4b57e
VC
753
754 icount_sleep = qemu_opt_get_bool(opts, "sleep", true);
5045e9d9
VC
755 if (icount_sleep) {
756 icount_warp_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT,
e76d1798 757 icount_timer_cb, NULL);
5045e9d9 758 }
f1f4b57e 759
a8bfac37 760 icount_align_option = qemu_opt_get_bool(opts, "align", false);
f1f4b57e
VC
761
762 if (icount_align_option && !icount_sleep) {
778d9f9b 763 error_setg(errp, "align=on and sleep=off are incompatible");
f1f4b57e 764 }
946fb27c 765 if (strcmp(option, "auto") != 0) {
a8bfac37
ST
766 errno = 0;
767 icount_time_shift = strtol(option, &rem_str, 0);
768 if (errno != 0 || *rem_str != '\0' || !strlen(option)) {
769 error_setg(errp, "icount: Invalid shift value");
770 }
946fb27c
PB
771 use_icount = 1;
772 return;
a8bfac37
ST
773 } else if (icount_align_option) {
774 error_setg(errp, "shift=auto and align=on are incompatible");
f1f4b57e 775 } else if (!icount_sleep) {
778d9f9b 776 error_setg(errp, "shift=auto and sleep=off are incompatible");
946fb27c
PB
777 }
778
779 use_icount = 2;
780
781 /* 125MIPS seems a reasonable initial guess at the guest speed.
782 It will be corrected fairly quickly anyway. */
783 icount_time_shift = 3;
784
785 /* Have both realtime and virtual time triggers for speed adjustment.
786 The realtime trigger catches emulated time passing too slowly,
787 the virtual time trigger catches emulated time passing too fast.
788 Realtime triggers occur even when idle, so use them less frequently
789 than VM triggers. */
bf2a7ddb
PD
790 icount_rt_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL_RT,
791 icount_adjust_rt, NULL);
40daca54 792 timer_mod(icount_rt_timer,
bf2a7ddb 793 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
40daca54
AB
794 icount_vm_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
795 icount_adjust_vm, NULL);
796 timer_mod(icount_vm_timer,
797 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
73bcb24d 798 NANOSECONDS_PER_SECOND / 10);
946fb27c
PB
799}
800
6546706d
AB
801/***********************************************************/
802/* TCG vCPU kick timer
803 *
804 * The kick timer is responsible for moving single threaded vCPU
805 * emulation on to the next vCPU. If more than one vCPU is running a
806 * timer event with force a cpu->exit so the next vCPU can get
807 * scheduled.
808 *
809 * The timer is removed if all vCPUs are idle and restarted again once
810 * idleness is complete.
811 */
812
813static QEMUTimer *tcg_kick_vcpu_timer;
791158d9 814static CPUState *tcg_current_rr_cpu;
6546706d
AB
815
816#define TCG_KICK_PERIOD (NANOSECONDS_PER_SECOND / 10)
817
818static inline int64_t qemu_tcg_next_kick(void)
819{
820 return qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + TCG_KICK_PERIOD;
821}
822
791158d9
AB
823/* Kick the currently round-robin scheduled vCPU */
824static void qemu_cpu_kick_rr_cpu(void)
825{
826 CPUState *cpu;
791158d9
AB
827 do {
828 cpu = atomic_mb_read(&tcg_current_rr_cpu);
829 if (cpu) {
830 cpu_exit(cpu);
831 }
832 } while (cpu != atomic_mb_read(&tcg_current_rr_cpu));
833}
834
6b8f0187
PB
835static void do_nothing(CPUState *cpu, run_on_cpu_data unused)
836{
837}
838
3f53bc61
PB
839void qemu_timer_notify_cb(void *opaque, QEMUClockType type)
840{
6b8f0187
PB
841 if (!use_icount || type != QEMU_CLOCK_VIRTUAL) {
842 qemu_notify_event();
843 return;
844 }
845
846 if (!qemu_in_vcpu_thread() && first_cpu) {
847 /* qemu_cpu_kick is not enough to kick a halted CPU out of
848 * qemu_tcg_wait_io_event. async_run_on_cpu, instead,
849 * causes cpu_thread_is_idle to return false. This way,
850 * handle_icount_deadline can run.
851 */
852 async_run_on_cpu(first_cpu, do_nothing, RUN_ON_CPU_NULL);
853 }
3f53bc61
PB
854}
855
6546706d
AB
856static void kick_tcg_thread(void *opaque)
857{
858 timer_mod(tcg_kick_vcpu_timer, qemu_tcg_next_kick());
791158d9 859 qemu_cpu_kick_rr_cpu();
6546706d
AB
860}
861
862static void start_tcg_kick_timer(void)
863{
37257942 864 if (!mttcg_enabled && !tcg_kick_vcpu_timer && CPU_NEXT(first_cpu)) {
6546706d
AB
865 tcg_kick_vcpu_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
866 kick_tcg_thread, NULL);
867 timer_mod(tcg_kick_vcpu_timer, qemu_tcg_next_kick());
868 }
869}
870
871static void stop_tcg_kick_timer(void)
872{
873 if (tcg_kick_vcpu_timer) {
874 timer_del(tcg_kick_vcpu_timer);
875 tcg_kick_vcpu_timer = NULL;
876 }
877}
878
296af7c9
BS
879/***********************************************************/
880void hw_error(const char *fmt, ...)
881{
882 va_list ap;
55e5c285 883 CPUState *cpu;
296af7c9
BS
884
885 va_start(ap, fmt);
886 fprintf(stderr, "qemu: hardware error: ");
887 vfprintf(stderr, fmt, ap);
888 fprintf(stderr, "\n");
bdc44640 889 CPU_FOREACH(cpu) {
55e5c285 890 fprintf(stderr, "CPU #%d:\n", cpu->cpu_index);
878096ee 891 cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU);
296af7c9
BS
892 }
893 va_end(ap);
894 abort();
895}
896
897void cpu_synchronize_all_states(void)
898{
182735ef 899 CPUState *cpu;
296af7c9 900
bdc44640 901 CPU_FOREACH(cpu) {
182735ef 902 cpu_synchronize_state(cpu);
296af7c9
BS
903 }
904}
905
906void cpu_synchronize_all_post_reset(void)
907{
182735ef 908 CPUState *cpu;
296af7c9 909
bdc44640 910 CPU_FOREACH(cpu) {
182735ef 911 cpu_synchronize_post_reset(cpu);
296af7c9
BS
912 }
913}
914
915void cpu_synchronize_all_post_init(void)
916{
182735ef 917 CPUState *cpu;
296af7c9 918
bdc44640 919 CPU_FOREACH(cpu) {
182735ef 920 cpu_synchronize_post_init(cpu);
296af7c9
BS
921 }
922}
923
75e972da
DG
924void cpu_synchronize_all_pre_loadvm(void)
925{
926 CPUState *cpu;
927
928 CPU_FOREACH(cpu) {
929 cpu_synchronize_pre_loadvm(cpu);
930 }
931}
932
56983463 933static int do_vm_stop(RunState state)
296af7c9 934{
56983463
KW
935 int ret = 0;
936
1354869c 937 if (runstate_is_running()) {
296af7c9 938 cpu_disable_ticks();
296af7c9 939 pause_all_vcpus();
f5bbfba1 940 runstate_set(state);
1dfb4dd9 941 vm_state_notify(0, state);
a4e15de9 942 qapi_event_send_stop(&error_abort);
296af7c9 943 }
56983463 944
594a45ce 945 bdrv_drain_all();
6d0ceb80 946 replay_disable_events();
22af08ea 947 ret = bdrv_flush_all();
594a45ce 948
56983463 949 return ret;
296af7c9
BS
950}
951
a1fcaa73 952static bool cpu_can_run(CPUState *cpu)
296af7c9 953{
4fdeee7c 954 if (cpu->stop) {
a1fcaa73 955 return false;
0ab07c62 956 }
321bc0b2 957 if (cpu_is_stopped(cpu)) {
a1fcaa73 958 return false;
0ab07c62 959 }
a1fcaa73 960 return true;
296af7c9
BS
961}
962
91325046 963static void cpu_handle_guest_debug(CPUState *cpu)
83f338f7 964{
64f6b346 965 gdb_set_stop_cpu(cpu);
8cf71710 966 qemu_system_debug_request();
f324e766 967 cpu->stopped = true;
3c638d06
JK
968}
969
6d9cb73c
JK
970#ifdef CONFIG_LINUX
971static void sigbus_reraise(void)
972{
973 sigset_t set;
974 struct sigaction action;
975
976 memset(&action, 0, sizeof(action));
977 action.sa_handler = SIG_DFL;
978 if (!sigaction(SIGBUS, &action, NULL)) {
979 raise(SIGBUS);
980 sigemptyset(&set);
981 sigaddset(&set, SIGBUS);
a2d1761d 982 pthread_sigmask(SIG_UNBLOCK, &set, NULL);
6d9cb73c
JK
983 }
984 perror("Failed to re-raise SIGBUS!\n");
985 abort();
986}
987
d98d4072 988static void sigbus_handler(int n, siginfo_t *siginfo, void *ctx)
6d9cb73c 989{
a16fc07e
PB
990 if (siginfo->si_code != BUS_MCEERR_AO && siginfo->si_code != BUS_MCEERR_AR) {
991 sigbus_reraise();
992 }
993
2ae41db2
PB
994 if (current_cpu) {
995 /* Called asynchronously in VCPU thread. */
996 if (kvm_on_sigbus_vcpu(current_cpu, siginfo->si_code, siginfo->si_addr)) {
997 sigbus_reraise();
998 }
999 } else {
1000 /* Called synchronously (via signalfd) in main thread. */
1001 if (kvm_on_sigbus(siginfo->si_code, siginfo->si_addr)) {
1002 sigbus_reraise();
1003 }
6d9cb73c
JK
1004 }
1005}
1006
1007static void qemu_init_sigbus(void)
1008{
1009 struct sigaction action;
1010
1011 memset(&action, 0, sizeof(action));
1012 action.sa_flags = SA_SIGINFO;
d98d4072 1013 action.sa_sigaction = sigbus_handler;
6d9cb73c
JK
1014 sigaction(SIGBUS, &action, NULL);
1015
1016 prctl(PR_MCE_KILL, PR_MCE_KILL_SET, PR_MCE_KILL_EARLY, 0, 0);
1017}
6d9cb73c 1018#else /* !CONFIG_LINUX */
6d9cb73c
JK
1019static void qemu_init_sigbus(void)
1020{
1021}
a16fc07e 1022#endif /* !CONFIG_LINUX */
ff48eb5f 1023
b2532d88 1024static QemuMutex qemu_global_mutex;
296af7c9
BS
1025
1026static QemuThread io_thread;
1027
296af7c9
BS
1028/* cpu creation */
1029static QemuCond qemu_cpu_cond;
1030/* system init */
296af7c9
BS
1031static QemuCond qemu_pause_cond;
1032
d3b12f5d 1033void qemu_init_cpu_loop(void)
296af7c9 1034{
6d9cb73c 1035 qemu_init_sigbus();
ed94592b 1036 qemu_cond_init(&qemu_cpu_cond);
ed94592b 1037 qemu_cond_init(&qemu_pause_cond);
296af7c9 1038 qemu_mutex_init(&qemu_global_mutex);
296af7c9 1039
b7680cb6 1040 qemu_thread_get_self(&io_thread);
296af7c9
BS
1041}
1042
14e6fe12 1043void run_on_cpu(CPUState *cpu, run_on_cpu_func func, run_on_cpu_data data)
e82bcec2 1044{
d148d90e 1045 do_run_on_cpu(cpu, func, data, &qemu_global_mutex);
3c02270d
CV
1046}
1047
4c055ab5
GZ
1048static void qemu_kvm_destroy_vcpu(CPUState *cpu)
1049{
1050 if (kvm_destroy_vcpu(cpu) < 0) {
1051 error_report("kvm_destroy_vcpu failed");
1052 exit(EXIT_FAILURE);
1053 }
1054}
1055
1056static void qemu_tcg_destroy_vcpu(CPUState *cpu)
1057{
1058}
1059
509a0d78 1060static void qemu_wait_io_event_common(CPUState *cpu)
296af7c9 1061{
37257942 1062 atomic_mb_set(&cpu->thread_kicked, false);
4fdeee7c
AF
1063 if (cpu->stop) {
1064 cpu->stop = false;
f324e766 1065 cpu->stopped = true;
96bce683 1066 qemu_cond_broadcast(&qemu_pause_cond);
296af7c9 1067 }
a5403c69 1068 process_queued_cpu_work(cpu);
37257942
AB
1069}
1070
1071static bool qemu_tcg_should_sleep(CPUState *cpu)
1072{
1073 if (mttcg_enabled) {
1074 return cpu_thread_is_idle(cpu);
1075 } else {
1076 return all_cpu_threads_idle();
1077 }
296af7c9
BS
1078}
1079
d5f8d613 1080static void qemu_tcg_wait_io_event(CPUState *cpu)
296af7c9 1081{
37257942 1082 while (qemu_tcg_should_sleep(cpu)) {
6546706d 1083 stop_tcg_kick_timer();
d5f8d613 1084 qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
16400322 1085 }
296af7c9 1086
6546706d
AB
1087 start_tcg_kick_timer();
1088
37257942 1089 qemu_wait_io_event_common(cpu);
296af7c9
BS
1090}
1091
fd529e8f 1092static void qemu_kvm_wait_io_event(CPUState *cpu)
296af7c9 1093{
a98ae1d8 1094 while (cpu_thread_is_idle(cpu)) {
f5c121b8 1095 qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
16400322 1096 }
296af7c9 1097
509a0d78 1098 qemu_wait_io_event_common(cpu);
296af7c9
BS
1099}
1100
7e97cd88 1101static void *qemu_kvm_cpu_thread_fn(void *arg)
296af7c9 1102{
48a106bd 1103 CPUState *cpu = arg;
84b4915d 1104 int r;
296af7c9 1105
ab28bd23
PB
1106 rcu_register_thread();
1107
2e7f7a3c 1108 qemu_mutex_lock_iothread();
814e612e 1109 qemu_thread_get_self(cpu->thread);
9f09e18a 1110 cpu->thread_id = qemu_get_thread_id();
626cf8f4 1111 cpu->can_do_io = 1;
4917cf44 1112 current_cpu = cpu;
296af7c9 1113
504134d2 1114 r = kvm_init_vcpu(cpu);
84b4915d
JK
1115 if (r < 0) {
1116 fprintf(stderr, "kvm_init_vcpu failed: %s\n", strerror(-r));
1117 exit(1);
1118 }
296af7c9 1119
18268b60 1120 kvm_init_cpu_signals(cpu);
296af7c9
BS
1121
1122 /* signal CPU creation */
61a46217 1123 cpu->created = true;
296af7c9
BS
1124 qemu_cond_signal(&qemu_cpu_cond);
1125
4c055ab5 1126 do {
a1fcaa73 1127 if (cpu_can_run(cpu)) {
1458c363 1128 r = kvm_cpu_exec(cpu);
83f338f7 1129 if (r == EXCP_DEBUG) {
91325046 1130 cpu_handle_guest_debug(cpu);
83f338f7 1131 }
0ab07c62 1132 }
fd529e8f 1133 qemu_kvm_wait_io_event(cpu);
4c055ab5 1134 } while (!cpu->unplug || cpu_can_run(cpu));
296af7c9 1135
4c055ab5 1136 qemu_kvm_destroy_vcpu(cpu);
2c579042
BR
1137 cpu->created = false;
1138 qemu_cond_signal(&qemu_cpu_cond);
4c055ab5 1139 qemu_mutex_unlock_iothread();
296af7c9
BS
1140 return NULL;
1141}
1142
c7f0f3b1
AL
1143static void *qemu_dummy_cpu_thread_fn(void *arg)
1144{
1145#ifdef _WIN32
1146 fprintf(stderr, "qtest is not supported under Windows\n");
1147 exit(1);
1148#else
10a9021d 1149 CPUState *cpu = arg;
c7f0f3b1
AL
1150 sigset_t waitset;
1151 int r;
1152
ab28bd23
PB
1153 rcu_register_thread();
1154
c7f0f3b1 1155 qemu_mutex_lock_iothread();
814e612e 1156 qemu_thread_get_self(cpu->thread);
9f09e18a 1157 cpu->thread_id = qemu_get_thread_id();
626cf8f4 1158 cpu->can_do_io = 1;
37257942 1159 current_cpu = cpu;
c7f0f3b1
AL
1160
1161 sigemptyset(&waitset);
1162 sigaddset(&waitset, SIG_IPI);
1163
1164 /* signal CPU creation */
61a46217 1165 cpu->created = true;
c7f0f3b1
AL
1166 qemu_cond_signal(&qemu_cpu_cond);
1167
c7f0f3b1 1168 while (1) {
c7f0f3b1
AL
1169 qemu_mutex_unlock_iothread();
1170 do {
1171 int sig;
1172 r = sigwait(&waitset, &sig);
1173 } while (r == -1 && (errno == EAGAIN || errno == EINTR));
1174 if (r == -1) {
1175 perror("sigwait");
1176 exit(1);
1177 }
1178 qemu_mutex_lock_iothread();
509a0d78 1179 qemu_wait_io_event_common(cpu);
c7f0f3b1
AL
1180 }
1181
1182 return NULL;
1183#endif
1184}
1185
1be7fcb8
AB
1186static int64_t tcg_get_icount_limit(void)
1187{
1188 int64_t deadline;
1189
1190 if (replay_mode != REPLAY_MODE_PLAY) {
1191 deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
1192
1193 /* Maintain prior (possibly buggy) behaviour where if no deadline
1194 * was set (as there is no QEMU_CLOCK_VIRTUAL timer) or it is more than
1195 * INT32_MAX nanoseconds ahead, we still use INT32_MAX
1196 * nanoseconds.
1197 */
1198 if ((deadline < 0) || (deadline > INT32_MAX)) {
1199 deadline = INT32_MAX;
1200 }
1201
1202 return qemu_icount_round(deadline);
1203 } else {
1204 return replay_get_instructions();
1205 }
1206}
1207
12e9700d
AB
1208static void handle_icount_deadline(void)
1209{
6b8f0187 1210 assert(qemu_in_vcpu_thread());
12e9700d
AB
1211 if (use_icount) {
1212 int64_t deadline =
1213 qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
1214
1215 if (deadline == 0) {
6b8f0187 1216 /* Wake up other AioContexts. */
12e9700d 1217 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
6b8f0187 1218 qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL);
12e9700d
AB
1219 }
1220 }
1221}
1222
05248382 1223static void prepare_icount_for_run(CPUState *cpu)
1be7fcb8 1224{
1be7fcb8 1225 if (use_icount) {
eda5f7c6 1226 int insns_left;
05248382
AB
1227
1228 /* These should always be cleared by process_icount_data after
1229 * each vCPU execution. However u16.high can be raised
1230 * asynchronously by cpu_exit/cpu_interrupt/tcg_handle_interrupt
1231 */
1232 g_assert(cpu->icount_decr.u16.low == 0);
1233 g_assert(cpu->icount_extra == 0);
1234
eda5f7c6
AB
1235 cpu->icount_budget = tcg_get_icount_limit();
1236 insns_left = MIN(0xffff, cpu->icount_budget);
1237 cpu->icount_decr.u16.low = insns_left;
1238 cpu->icount_extra = cpu->icount_budget - insns_left;
1be7fcb8 1239 }
05248382
AB
1240}
1241
1242static void process_icount_data(CPUState *cpu)
1243{
1be7fcb8 1244 if (use_icount) {
e4cd9657 1245 /* Account for executed instructions */
512d3c80 1246 cpu_update_icount(cpu);
05248382
AB
1247
1248 /* Reset the counters */
1249 cpu->icount_decr.u16.low = 0;
1be7fcb8 1250 cpu->icount_extra = 0;
e4cd9657
AB
1251 cpu->icount_budget = 0;
1252
1be7fcb8
AB
1253 replay_account_executed_instructions();
1254 }
05248382
AB
1255}
1256
1257
1258static int tcg_cpu_exec(CPUState *cpu)
1259{
1260 int ret;
1261#ifdef CONFIG_PROFILER
1262 int64_t ti;
1263#endif
1264
1265#ifdef CONFIG_PROFILER
1266 ti = profile_getclock();
1267#endif
1268 qemu_mutex_unlock_iothread();
1269 cpu_exec_start(cpu);
1270 ret = cpu_exec(cpu);
1271 cpu_exec_end(cpu);
1272 qemu_mutex_lock_iothread();
1273#ifdef CONFIG_PROFILER
1274 tcg_time += profile_getclock() - ti;
1275#endif
1be7fcb8
AB
1276 return ret;
1277}
1278
c93bbbef
AB
1279/* Destroy any remaining vCPUs which have been unplugged and have
1280 * finished running
1281 */
1282static void deal_with_unplugged_cpus(void)
1be7fcb8 1283{
c93bbbef 1284 CPUState *cpu;
1be7fcb8 1285
c93bbbef
AB
1286 CPU_FOREACH(cpu) {
1287 if (cpu->unplug && !cpu_can_run(cpu)) {
1288 qemu_tcg_destroy_vcpu(cpu);
1289 cpu->created = false;
1290 qemu_cond_signal(&qemu_cpu_cond);
1be7fcb8
AB
1291 break;
1292 }
1293 }
1be7fcb8 1294}
bdb7ca67 1295
6546706d
AB
1296/* Single-threaded TCG
1297 *
1298 * In the single-threaded case each vCPU is simulated in turn. If
1299 * there is more than a single vCPU we create a simple timer to kick
1300 * the vCPU and ensure we don't get stuck in a tight loop in one vCPU.
1301 * This is done explicitly rather than relying on side-effects
1302 * elsewhere.
1303 */
1304
37257942 1305static void *qemu_tcg_rr_cpu_thread_fn(void *arg)
296af7c9 1306{
c3586ba7 1307 CPUState *cpu = arg;
296af7c9 1308
ab28bd23
PB
1309 rcu_register_thread();
1310
2e7f7a3c 1311 qemu_mutex_lock_iothread();
814e612e 1312 qemu_thread_get_self(cpu->thread);
296af7c9 1313
38fcbd3f
AF
1314 CPU_FOREACH(cpu) {
1315 cpu->thread_id = qemu_get_thread_id();
1316 cpu->created = true;
626cf8f4 1317 cpu->can_do_io = 1;
38fcbd3f 1318 }
296af7c9
BS
1319 qemu_cond_signal(&qemu_cpu_cond);
1320
fa7d1867 1321 /* wait for initial kick-off after machine start */
c28e399c 1322 while (first_cpu->stopped) {
d5f8d613 1323 qemu_cond_wait(first_cpu->halt_cond, &qemu_global_mutex);
8e564b4e
JK
1324
1325 /* process any pending work */
bdc44640 1326 CPU_FOREACH(cpu) {
37257942 1327 current_cpu = cpu;
182735ef 1328 qemu_wait_io_event_common(cpu);
8e564b4e 1329 }
0ab07c62 1330 }
296af7c9 1331
6546706d
AB
1332 start_tcg_kick_timer();
1333
c93bbbef
AB
1334 cpu = first_cpu;
1335
e5143e30
AB
1336 /* process any pending work */
1337 cpu->exit_request = 1;
1338
296af7c9 1339 while (1) {
c93bbbef
AB
1340 /* Account partial waits to QEMU_CLOCK_VIRTUAL. */
1341 qemu_account_warp_timer();
1342
6b8f0187
PB
1343 /* Run the timers here. This is much more efficient than
1344 * waking up the I/O thread and waiting for completion.
1345 */
1346 handle_icount_deadline();
1347
c93bbbef
AB
1348 if (!cpu) {
1349 cpu = first_cpu;
1350 }
1351
e5143e30
AB
1352 while (cpu && !cpu->queued_work_first && !cpu->exit_request) {
1353
791158d9 1354 atomic_mb_set(&tcg_current_rr_cpu, cpu);
37257942 1355 current_cpu = cpu;
c93bbbef
AB
1356
1357 qemu_clock_enable(QEMU_CLOCK_VIRTUAL,
1358 (cpu->singlestep_enabled & SSTEP_NOTIMER) == 0);
1359
1360 if (cpu_can_run(cpu)) {
1361 int r;
05248382
AB
1362
1363 prepare_icount_for_run(cpu);
1364
c93bbbef 1365 r = tcg_cpu_exec(cpu);
05248382
AB
1366
1367 process_icount_data(cpu);
1368
c93bbbef
AB
1369 if (r == EXCP_DEBUG) {
1370 cpu_handle_guest_debug(cpu);
1371 break;
08e73c48
PK
1372 } else if (r == EXCP_ATOMIC) {
1373 qemu_mutex_unlock_iothread();
1374 cpu_exec_step_atomic(cpu);
1375 qemu_mutex_lock_iothread();
1376 break;
c93bbbef 1377 }
37257942 1378 } else if (cpu->stop) {
c93bbbef
AB
1379 if (cpu->unplug) {
1380 cpu = CPU_NEXT(cpu);
1381 }
1382 break;
1383 }
1384
e5143e30
AB
1385 cpu = CPU_NEXT(cpu);
1386 } /* while (cpu && !cpu->exit_request).. */
1387
791158d9
AB
1388 /* Does not need atomic_mb_set because a spurious wakeup is okay. */
1389 atomic_set(&tcg_current_rr_cpu, NULL);
c93bbbef 1390
e5143e30
AB
1391 if (cpu && cpu->exit_request) {
1392 atomic_mb_set(&cpu->exit_request, 0);
1393 }
ac70aafc 1394
37257942 1395 qemu_tcg_wait_io_event(cpu ? cpu : QTAILQ_FIRST(&cpus));
c93bbbef 1396 deal_with_unplugged_cpus();
296af7c9
BS
1397 }
1398
1399 return NULL;
1400}
1401
b0cb0a66
VP
1402static void *qemu_hax_cpu_thread_fn(void *arg)
1403{
1404 CPUState *cpu = arg;
1405 int r;
b3d3a426
VP
1406
1407 qemu_mutex_lock_iothread();
b0cb0a66 1408 qemu_thread_get_self(cpu->thread);
b0cb0a66
VP
1409
1410 cpu->thread_id = qemu_get_thread_id();
1411 cpu->created = true;
1412 cpu->halted = 0;
1413 current_cpu = cpu;
1414
1415 hax_init_vcpu(cpu);
1416 qemu_cond_signal(&qemu_cpu_cond);
1417
1418 while (1) {
1419 if (cpu_can_run(cpu)) {
1420 r = hax_smp_cpu_exec(cpu);
1421 if (r == EXCP_DEBUG) {
1422 cpu_handle_guest_debug(cpu);
1423 }
1424 }
1425
1426 while (cpu_thread_is_idle(cpu)) {
1427 qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
1428 }
1429#ifdef _WIN32
1430 SleepEx(0, TRUE);
1431#endif
1432 qemu_wait_io_event_common(cpu);
1433 }
1434 return NULL;
1435}
1436
1437#ifdef _WIN32
1438static void CALLBACK dummy_apc_func(ULONG_PTR unused)
1439{
1440}
1441#endif
1442
37257942
AB
1443/* Multi-threaded TCG
1444 *
1445 * In the multi-threaded case each vCPU has its own thread. The TLS
1446 * variable current_cpu can be used deep in the code to find the
1447 * current CPUState for a given thread.
1448 */
1449
1450static void *qemu_tcg_cpu_thread_fn(void *arg)
1451{
1452 CPUState *cpu = arg;
1453
bf51c720
AB
1454 g_assert(!use_icount);
1455
37257942
AB
1456 rcu_register_thread();
1457
1458 qemu_mutex_lock_iothread();
1459 qemu_thread_get_self(cpu->thread);
1460
1461 cpu->thread_id = qemu_get_thread_id();
1462 cpu->created = true;
1463 cpu->can_do_io = 1;
1464 current_cpu = cpu;
1465 qemu_cond_signal(&qemu_cpu_cond);
1466
1467 /* process any pending work */
1468 cpu->exit_request = 1;
1469
1470 while (1) {
1471 if (cpu_can_run(cpu)) {
1472 int r;
1473 r = tcg_cpu_exec(cpu);
1474 switch (r) {
1475 case EXCP_DEBUG:
1476 cpu_handle_guest_debug(cpu);
1477 break;
1478 case EXCP_HALTED:
1479 /* during start-up the vCPU is reset and the thread is
1480 * kicked several times. If we don't ensure we go back
1481 * to sleep in the halted state we won't cleanly
1482 * start-up when the vCPU is enabled.
1483 *
1484 * cpu->halted should ensure we sleep in wait_io_event
1485 */
1486 g_assert(cpu->halted);
1487 break;
08e73c48
PK
1488 case EXCP_ATOMIC:
1489 qemu_mutex_unlock_iothread();
1490 cpu_exec_step_atomic(cpu);
1491 qemu_mutex_lock_iothread();
37257942
AB
1492 default:
1493 /* Ignore everything else? */
1494 break;
1495 }
a3e53273
BR
1496 } else if (cpu->unplug) {
1497 qemu_tcg_destroy_vcpu(cpu);
1498 cpu->created = false;
1499 qemu_cond_signal(&qemu_cpu_cond);
1500 qemu_mutex_unlock_iothread();
1501 return NULL;
37257942
AB
1502 }
1503
37257942
AB
1504 atomic_mb_set(&cpu->exit_request, 0);
1505 qemu_tcg_wait_io_event(cpu);
1506 }
1507
1508 return NULL;
1509}
1510
2ff09a40 1511static void qemu_cpu_kick_thread(CPUState *cpu)
cc015e9a
PB
1512{
1513#ifndef _WIN32
1514 int err;
1515
e0c38211
PB
1516 if (cpu->thread_kicked) {
1517 return;
9102deda 1518 }
e0c38211 1519 cpu->thread_kicked = true;
814e612e 1520 err = pthread_kill(cpu->thread->thread, SIG_IPI);
cc015e9a
PB
1521 if (err) {
1522 fprintf(stderr, "qemu:%s: %s", __func__, strerror(err));
1523 exit(1);
1524 }
1525#else /* _WIN32 */
b0cb0a66
VP
1526 if (!qemu_cpu_is_self(cpu)) {
1527 if (!QueueUserAPC(dummy_apc_func, cpu->hThread, 0)) {
1528 fprintf(stderr, "%s: QueueUserAPC failed with error %lu\n",
1529 __func__, GetLastError());
1530 exit(1);
1531 }
1532 }
e0c38211
PB
1533#endif
1534}
ed9164a3 1535
c08d7424 1536void qemu_cpu_kick(CPUState *cpu)
296af7c9 1537{
f5c121b8 1538 qemu_cond_broadcast(cpu->halt_cond);
e0c38211 1539 if (tcg_enabled()) {
791158d9 1540 cpu_exit(cpu);
37257942 1541 /* NOP unless doing single-thread RR */
791158d9 1542 qemu_cpu_kick_rr_cpu();
e0c38211 1543 } else {
b0cb0a66
VP
1544 if (hax_enabled()) {
1545 /*
1546 * FIXME: race condition with the exit_request check in
1547 * hax_vcpu_hax_exec
1548 */
1549 cpu->exit_request = 1;
1550 }
e0c38211
PB
1551 qemu_cpu_kick_thread(cpu);
1552 }
296af7c9
BS
1553}
1554
46d62fac 1555void qemu_cpu_kick_self(void)
296af7c9 1556{
4917cf44 1557 assert(current_cpu);
9102deda 1558 qemu_cpu_kick_thread(current_cpu);
296af7c9
BS
1559}
1560
60e82579 1561bool qemu_cpu_is_self(CPUState *cpu)
296af7c9 1562{
814e612e 1563 return qemu_thread_is_self(cpu->thread);
296af7c9
BS
1564}
1565
79e2b9ae 1566bool qemu_in_vcpu_thread(void)
aa723c23 1567{
4917cf44 1568 return current_cpu && qemu_cpu_is_self(current_cpu);
aa723c23
JQ
1569}
1570
afbe7053
PB
1571static __thread bool iothread_locked = false;
1572
1573bool qemu_mutex_iothread_locked(void)
1574{
1575 return iothread_locked;
1576}
1577
296af7c9
BS
1578void qemu_mutex_lock_iothread(void)
1579{
8d04fb55
JK
1580 g_assert(!qemu_mutex_iothread_locked());
1581 qemu_mutex_lock(&qemu_global_mutex);
afbe7053 1582 iothread_locked = true;
296af7c9
BS
1583}
1584
1585void qemu_mutex_unlock_iothread(void)
1586{
8d04fb55 1587 g_assert(qemu_mutex_iothread_locked());
afbe7053 1588 iothread_locked = false;
296af7c9
BS
1589 qemu_mutex_unlock(&qemu_global_mutex);
1590}
1591
e8faee06 1592static bool all_vcpus_paused(void)
296af7c9 1593{
bdc44640 1594 CPUState *cpu;
296af7c9 1595
bdc44640 1596 CPU_FOREACH(cpu) {
182735ef 1597 if (!cpu->stopped) {
e8faee06 1598 return false;
0ab07c62 1599 }
296af7c9
BS
1600 }
1601
e8faee06 1602 return true;
296af7c9
BS
1603}
1604
1605void pause_all_vcpus(void)
1606{
bdc44640 1607 CPUState *cpu;
296af7c9 1608
40daca54 1609 qemu_clock_enable(QEMU_CLOCK_VIRTUAL, false);
bdc44640 1610 CPU_FOREACH(cpu) {
182735ef
AF
1611 cpu->stop = true;
1612 qemu_cpu_kick(cpu);
296af7c9
BS
1613 }
1614
aa723c23 1615 if (qemu_in_vcpu_thread()) {
d798e974 1616 cpu_stop_current();
d798e974
JK
1617 }
1618
296af7c9 1619 while (!all_vcpus_paused()) {
be7d6c57 1620 qemu_cond_wait(&qemu_pause_cond, &qemu_global_mutex);
bdc44640 1621 CPU_FOREACH(cpu) {
182735ef 1622 qemu_cpu_kick(cpu);
296af7c9
BS
1623 }
1624 }
1625}
1626
2993683b
IM
1627void cpu_resume(CPUState *cpu)
1628{
1629 cpu->stop = false;
1630 cpu->stopped = false;
1631 qemu_cpu_kick(cpu);
1632}
1633
296af7c9
BS
1634void resume_all_vcpus(void)
1635{
bdc44640 1636 CPUState *cpu;
296af7c9 1637
40daca54 1638 qemu_clock_enable(QEMU_CLOCK_VIRTUAL, true);
bdc44640 1639 CPU_FOREACH(cpu) {
182735ef 1640 cpu_resume(cpu);
296af7c9
BS
1641 }
1642}
1643
4c055ab5
GZ
1644void cpu_remove(CPUState *cpu)
1645{
1646 cpu->stop = true;
1647 cpu->unplug = true;
1648 qemu_cpu_kick(cpu);
1649}
1650
2c579042
BR
1651void cpu_remove_sync(CPUState *cpu)
1652{
1653 cpu_remove(cpu);
1654 while (cpu->created) {
1655 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1656 }
1657}
1658
4900116e
DDAG
1659/* For temporary buffers for forming a name */
1660#define VCPU_THREAD_NAME_SIZE 16
1661
e5ab30a2 1662static void qemu_tcg_init_vcpu(CPUState *cpu)
296af7c9 1663{
4900116e 1664 char thread_name[VCPU_THREAD_NAME_SIZE];
37257942
AB
1665 static QemuCond *single_tcg_halt_cond;
1666 static QemuThread *single_tcg_cpu_thread;
4900116e 1667
37257942 1668 if (qemu_tcg_mttcg_enabled() || !single_tcg_cpu_thread) {
814e612e 1669 cpu->thread = g_malloc0(sizeof(QemuThread));
f5c121b8
AF
1670 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1671 qemu_cond_init(cpu->halt_cond);
37257942
AB
1672
1673 if (qemu_tcg_mttcg_enabled()) {
1674 /* create a thread per vCPU with TCG (MTTCG) */
1675 parallel_cpus = true;
1676 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/TCG",
4900116e 1677 cpu->cpu_index);
37257942
AB
1678
1679 qemu_thread_create(cpu->thread, thread_name, qemu_tcg_cpu_thread_fn,
1680 cpu, QEMU_THREAD_JOINABLE);
1681
1682 } else {
1683 /* share a single thread for all cpus with TCG */
1684 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "ALL CPUs/TCG");
1685 qemu_thread_create(cpu->thread, thread_name,
1686 qemu_tcg_rr_cpu_thread_fn,
1687 cpu, QEMU_THREAD_JOINABLE);
1688
1689 single_tcg_halt_cond = cpu->halt_cond;
1690 single_tcg_cpu_thread = cpu->thread;
1691 }
1ecf47bf 1692#ifdef _WIN32
814e612e 1693 cpu->hThread = qemu_thread_get_handle(cpu->thread);
1ecf47bf 1694#endif
61a46217 1695 while (!cpu->created) {
18a85728 1696 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
0ab07c62 1697 }
296af7c9 1698 } else {
37257942
AB
1699 /* For non-MTTCG cases we share the thread */
1700 cpu->thread = single_tcg_cpu_thread;
1701 cpu->halt_cond = single_tcg_halt_cond;
296af7c9
BS
1702 }
1703}
1704
b0cb0a66
VP
1705static void qemu_hax_start_vcpu(CPUState *cpu)
1706{
1707 char thread_name[VCPU_THREAD_NAME_SIZE];
1708
1709 cpu->thread = g_malloc0(sizeof(QemuThread));
1710 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1711 qemu_cond_init(cpu->halt_cond);
1712
1713 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/HAX",
1714 cpu->cpu_index);
1715 qemu_thread_create(cpu->thread, thread_name, qemu_hax_cpu_thread_fn,
1716 cpu, QEMU_THREAD_JOINABLE);
1717#ifdef _WIN32
1718 cpu->hThread = qemu_thread_get_handle(cpu->thread);
1719#endif
1720 while (!cpu->created) {
1721 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1722 }
1723}
1724
48a106bd 1725static void qemu_kvm_start_vcpu(CPUState *cpu)
296af7c9 1726{
4900116e
DDAG
1727 char thread_name[VCPU_THREAD_NAME_SIZE];
1728
814e612e 1729 cpu->thread = g_malloc0(sizeof(QemuThread));
f5c121b8
AF
1730 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1731 qemu_cond_init(cpu->halt_cond);
4900116e
DDAG
1732 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/KVM",
1733 cpu->cpu_index);
1734 qemu_thread_create(cpu->thread, thread_name, qemu_kvm_cpu_thread_fn,
1735 cpu, QEMU_THREAD_JOINABLE);
61a46217 1736 while (!cpu->created) {
18a85728 1737 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
0ab07c62 1738 }
296af7c9
BS
1739}
1740
10a9021d 1741static void qemu_dummy_start_vcpu(CPUState *cpu)
c7f0f3b1 1742{
4900116e
DDAG
1743 char thread_name[VCPU_THREAD_NAME_SIZE];
1744
814e612e 1745 cpu->thread = g_malloc0(sizeof(QemuThread));
f5c121b8
AF
1746 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1747 qemu_cond_init(cpu->halt_cond);
4900116e
DDAG
1748 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/DUMMY",
1749 cpu->cpu_index);
1750 qemu_thread_create(cpu->thread, thread_name, qemu_dummy_cpu_thread_fn, cpu,
c7f0f3b1 1751 QEMU_THREAD_JOINABLE);
61a46217 1752 while (!cpu->created) {
c7f0f3b1
AL
1753 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1754 }
1755}
1756
c643bed9 1757void qemu_init_vcpu(CPUState *cpu)
296af7c9 1758{
ce3960eb
AF
1759 cpu->nr_cores = smp_cores;
1760 cpu->nr_threads = smp_threads;
f324e766 1761 cpu->stopped = true;
56943e8c
PM
1762
1763 if (!cpu->as) {
1764 /* If the target cpu hasn't set up any address spaces itself,
1765 * give it the default one.
1766 */
6731d864
PC
1767 AddressSpace *as = address_space_init_shareable(cpu->memory,
1768 "cpu-memory");
12ebc9a7 1769 cpu->num_ases = 1;
6731d864 1770 cpu_address_space_init(cpu, as, 0);
56943e8c
PM
1771 }
1772
0ab07c62 1773 if (kvm_enabled()) {
48a106bd 1774 qemu_kvm_start_vcpu(cpu);
b0cb0a66
VP
1775 } else if (hax_enabled()) {
1776 qemu_hax_start_vcpu(cpu);
c7f0f3b1 1777 } else if (tcg_enabled()) {
e5ab30a2 1778 qemu_tcg_init_vcpu(cpu);
c7f0f3b1 1779 } else {
10a9021d 1780 qemu_dummy_start_vcpu(cpu);
0ab07c62 1781 }
296af7c9
BS
1782}
1783
b4a3d965 1784void cpu_stop_current(void)
296af7c9 1785{
4917cf44
AF
1786 if (current_cpu) {
1787 current_cpu->stop = false;
1788 current_cpu->stopped = true;
1789 cpu_exit(current_cpu);
96bce683 1790 qemu_cond_broadcast(&qemu_pause_cond);
b4a3d965 1791 }
296af7c9
BS
1792}
1793
56983463 1794int vm_stop(RunState state)
296af7c9 1795{
aa723c23 1796 if (qemu_in_vcpu_thread()) {
74892d24 1797 qemu_system_vmstop_request_prepare();
1dfb4dd9 1798 qemu_system_vmstop_request(state);
296af7c9
BS
1799 /*
1800 * FIXME: should not return to device code in case
1801 * vm_stop() has been requested.
1802 */
b4a3d965 1803 cpu_stop_current();
56983463 1804 return 0;
296af7c9 1805 }
56983463
KW
1806
1807 return do_vm_stop(state);
296af7c9
BS
1808}
1809
2d76e823
CI
1810/**
1811 * Prepare for (re)starting the VM.
1812 * Returns -1 if the vCPUs are not to be restarted (e.g. if they are already
1813 * running or in case of an error condition), 0 otherwise.
1814 */
1815int vm_prepare_start(void)
1816{
1817 RunState requested;
1818 int res = 0;
1819
1820 qemu_vmstop_requested(&requested);
1821 if (runstate_is_running() && requested == RUN_STATE__MAX) {
1822 return -1;
1823 }
1824
1825 /* Ensure that a STOP/RESUME pair of events is emitted if a
1826 * vmstop request was pending. The BLOCK_IO_ERROR event, for
1827 * example, according to documentation is always followed by
1828 * the STOP event.
1829 */
1830 if (runstate_is_running()) {
1831 qapi_event_send_stop(&error_abort);
1832 res = -1;
1833 } else {
1834 replay_enable_events();
1835 cpu_enable_ticks();
1836 runstate_set(RUN_STATE_RUNNING);
1837 vm_state_notify(1, RUN_STATE_RUNNING);
1838 }
1839
1840 /* We are sending this now, but the CPUs will be resumed shortly later */
1841 qapi_event_send_resume(&error_abort);
1842 return res;
1843}
1844
1845void vm_start(void)
1846{
1847 if (!vm_prepare_start()) {
1848 resume_all_vcpus();
1849 }
1850}
1851
8a9236f1
LC
1852/* does a state transition even if the VM is already stopped,
1853 current state is forgotten forever */
56983463 1854int vm_stop_force_state(RunState state)
8a9236f1
LC
1855{
1856 if (runstate_is_running()) {
56983463 1857 return vm_stop(state);
8a9236f1
LC
1858 } else {
1859 runstate_set(state);
b2780d32
WC
1860
1861 bdrv_drain_all();
594a45ce
KW
1862 /* Make sure to return an error if the flush in a previous vm_stop()
1863 * failed. */
22af08ea 1864 return bdrv_flush_all();
8a9236f1
LC
1865 }
1866}
1867
9a78eead 1868void list_cpus(FILE *f, fprintf_function cpu_fprintf, const char *optarg)
262353cb
BS
1869{
1870 /* XXX: implement xxx_cpu_list for targets that still miss it */
e916cbf8
PM
1871#if defined(cpu_list)
1872 cpu_list(f, cpu_fprintf);
262353cb
BS
1873#endif
1874}
de0b36b6
LC
1875
1876CpuInfoList *qmp_query_cpus(Error **errp)
1877{
afed5a5a
IM
1878 MachineState *ms = MACHINE(qdev_get_machine());
1879 MachineClass *mc = MACHINE_GET_CLASS(ms);
de0b36b6 1880 CpuInfoList *head = NULL, *cur_item = NULL;
182735ef 1881 CPUState *cpu;
de0b36b6 1882
bdc44640 1883 CPU_FOREACH(cpu) {
de0b36b6 1884 CpuInfoList *info;
182735ef
AF
1885#if defined(TARGET_I386)
1886 X86CPU *x86_cpu = X86_CPU(cpu);
1887 CPUX86State *env = &x86_cpu->env;
1888#elif defined(TARGET_PPC)
1889 PowerPCCPU *ppc_cpu = POWERPC_CPU(cpu);
1890 CPUPPCState *env = &ppc_cpu->env;
1891#elif defined(TARGET_SPARC)
1892 SPARCCPU *sparc_cpu = SPARC_CPU(cpu);
1893 CPUSPARCState *env = &sparc_cpu->env;
1894#elif defined(TARGET_MIPS)
1895 MIPSCPU *mips_cpu = MIPS_CPU(cpu);
1896 CPUMIPSState *env = &mips_cpu->env;
48e06fe0
BK
1897#elif defined(TARGET_TRICORE)
1898 TriCoreCPU *tricore_cpu = TRICORE_CPU(cpu);
1899 CPUTriCoreState *env = &tricore_cpu->env;
182735ef 1900#endif
de0b36b6 1901
cb446eca 1902 cpu_synchronize_state(cpu);
de0b36b6
LC
1903
1904 info = g_malloc0(sizeof(*info));
1905 info->value = g_malloc0(sizeof(*info->value));
55e5c285 1906 info->value->CPU = cpu->cpu_index;
182735ef 1907 info->value->current = (cpu == first_cpu);
259186a7 1908 info->value->halted = cpu->halted;
58f88d4b 1909 info->value->qom_path = object_get_canonical_path(OBJECT(cpu));
9f09e18a 1910 info->value->thread_id = cpu->thread_id;
de0b36b6 1911#if defined(TARGET_I386)
86f4b687 1912 info->value->arch = CPU_INFO_ARCH_X86;
544a3731 1913 info->value->u.x86.pc = env->eip + env->segs[R_CS].base;
de0b36b6 1914#elif defined(TARGET_PPC)
86f4b687 1915 info->value->arch = CPU_INFO_ARCH_PPC;
544a3731 1916 info->value->u.ppc.nip = env->nip;
de0b36b6 1917#elif defined(TARGET_SPARC)
86f4b687 1918 info->value->arch = CPU_INFO_ARCH_SPARC;
544a3731
EB
1919 info->value->u.q_sparc.pc = env->pc;
1920 info->value->u.q_sparc.npc = env->npc;
de0b36b6 1921#elif defined(TARGET_MIPS)
86f4b687 1922 info->value->arch = CPU_INFO_ARCH_MIPS;
544a3731 1923 info->value->u.q_mips.PC = env->active_tc.PC;
48e06fe0 1924#elif defined(TARGET_TRICORE)
86f4b687 1925 info->value->arch = CPU_INFO_ARCH_TRICORE;
544a3731 1926 info->value->u.tricore.PC = env->PC;
86f4b687
EB
1927#else
1928 info->value->arch = CPU_INFO_ARCH_OTHER;
de0b36b6 1929#endif
afed5a5a
IM
1930 info->value->has_props = !!mc->cpu_index_to_instance_props;
1931 if (info->value->has_props) {
1932 CpuInstanceProperties *props;
1933 props = g_malloc0(sizeof(*props));
1934 *props = mc->cpu_index_to_instance_props(ms, cpu->cpu_index);
1935 info->value->props = props;
1936 }
de0b36b6
LC
1937
1938 /* XXX: waiting for the qapi to support GSList */
1939 if (!cur_item) {
1940 head = cur_item = info;
1941 } else {
1942 cur_item->next = info;
1943 cur_item = info;
1944 }
1945 }
1946
1947 return head;
1948}
0cfd6a9a
LC
1949
1950void qmp_memsave(int64_t addr, int64_t size, const char *filename,
1951 bool has_cpu, int64_t cpu_index, Error **errp)
1952{
1953 FILE *f;
1954 uint32_t l;
55e5c285 1955 CPUState *cpu;
0cfd6a9a 1956 uint8_t buf[1024];
0dc9daf0 1957 int64_t orig_addr = addr, orig_size = size;
0cfd6a9a
LC
1958
1959 if (!has_cpu) {
1960 cpu_index = 0;
1961 }
1962
151d1322
AF
1963 cpu = qemu_get_cpu(cpu_index);
1964 if (cpu == NULL) {
c6bd8c70
MA
1965 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "cpu-index",
1966 "a CPU number");
0cfd6a9a
LC
1967 return;
1968 }
1969
1970 f = fopen(filename, "wb");
1971 if (!f) {
618da851 1972 error_setg_file_open(errp, errno, filename);
0cfd6a9a
LC
1973 return;
1974 }
1975
1976 while (size != 0) {
1977 l = sizeof(buf);
1978 if (l > size)
1979 l = size;
2f4d0f59 1980 if (cpu_memory_rw_debug(cpu, addr, buf, l, 0) != 0) {
0dc9daf0
BP
1981 error_setg(errp, "Invalid addr 0x%016" PRIx64 "/size %" PRId64
1982 " specified", orig_addr, orig_size);
2f4d0f59
AK
1983 goto exit;
1984 }
0cfd6a9a 1985 if (fwrite(buf, 1, l, f) != l) {
c6bd8c70 1986 error_setg(errp, QERR_IO_ERROR);
0cfd6a9a
LC
1987 goto exit;
1988 }
1989 addr += l;
1990 size -= l;
1991 }
1992
1993exit:
1994 fclose(f);
1995}
6d3962bf
LC
1996
1997void qmp_pmemsave(int64_t addr, int64_t size, const char *filename,
1998 Error **errp)
1999{
2000 FILE *f;
2001 uint32_t l;
2002 uint8_t buf[1024];
2003
2004 f = fopen(filename, "wb");
2005 if (!f) {
618da851 2006 error_setg_file_open(errp, errno, filename);
6d3962bf
LC
2007 return;
2008 }
2009
2010 while (size != 0) {
2011 l = sizeof(buf);
2012 if (l > size)
2013 l = size;
eb6282f2 2014 cpu_physical_memory_read(addr, buf, l);
6d3962bf 2015 if (fwrite(buf, 1, l, f) != l) {
c6bd8c70 2016 error_setg(errp, QERR_IO_ERROR);
6d3962bf
LC
2017 goto exit;
2018 }
2019 addr += l;
2020 size -= l;
2021 }
2022
2023exit:
2024 fclose(f);
2025}
ab49ab5c
LC
2026
2027void qmp_inject_nmi(Error **errp)
2028{
9cb805fd 2029 nmi_monitor_handle(monitor_get_cpu_index(), errp);
ab49ab5c 2030}
27498bef
ST
2031
2032void dump_drift_info(FILE *f, fprintf_function cpu_fprintf)
2033{
2034 if (!use_icount) {
2035 return;
2036 }
2037
2038 cpu_fprintf(f, "Host - Guest clock %"PRIi64" ms\n",
2039 (cpu_get_clock() - cpu_get_icount())/SCALE_MS);
2040 if (icount_align_option) {
2041 cpu_fprintf(f, "Max guest delay %"PRIi64" ms\n", -max_delay/SCALE_MS);
2042 cpu_fprintf(f, "Max guest advance %"PRIi64" ms\n", max_advance/SCALE_MS);
2043 } else {
2044 cpu_fprintf(f, "Max guest delay NA\n");
2045 cpu_fprintf(f, "Max guest advance NA\n");
2046 }
2047}