]> git.ipfire.org Git - thirdparty/qemu.git/blame - cpus.c
virtio-serial: switch to standard-headers
[thirdparty/qemu.git] / cpus.c
CommitLineData
296af7c9
BS
1/*
2 * QEMU System Emulator
3 *
4 * Copyright (c) 2003-2008 Fabrice Bellard
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
24
25/* Needed early for CONFIG_BSD etc. */
26#include "config-host.h"
27
83c9089e 28#include "monitor/monitor.h"
a4e15de9 29#include "qapi/qmp/qerror.h"
9c17d615 30#include "sysemu/sysemu.h"
022c62cb 31#include "exec/gdbstub.h"
9c17d615
PB
32#include "sysemu/dma.h"
33#include "sysemu/kvm.h"
de0b36b6 34#include "qmp-commands.h"
296af7c9 35
1de7afc9 36#include "qemu/thread.h"
9c17d615
PB
37#include "sysemu/cpus.h"
38#include "sysemu/qtest.h"
1de7afc9
PB
39#include "qemu/main-loop.h"
40#include "qemu/bitmap.h"
cb365646 41#include "qemu/seqlock.h"
a4e15de9 42#include "qapi-event.h"
9cb805fd 43#include "hw/nmi.h"
0ff0fc19
JK
44
45#ifndef _WIN32
1de7afc9 46#include "qemu/compatfd.h"
0ff0fc19 47#endif
296af7c9 48
6d9cb73c
JK
49#ifdef CONFIG_LINUX
50
51#include <sys/prctl.h>
52
c0532a76
MT
53#ifndef PR_MCE_KILL
54#define PR_MCE_KILL 33
55#endif
56
6d9cb73c
JK
57#ifndef PR_MCE_KILL_SET
58#define PR_MCE_KILL_SET 1
59#endif
60
61#ifndef PR_MCE_KILL_EARLY
62#define PR_MCE_KILL_EARLY 1
63#endif
64
65#endif /* CONFIG_LINUX */
66
182735ef 67static CPUState *next_cpu;
27498bef
ST
68int64_t max_delay;
69int64_t max_advance;
296af7c9 70
321bc0b2
TC
71bool cpu_is_stopped(CPUState *cpu)
72{
73 return cpu->stopped || !runstate_is_running();
74}
75
a98ae1d8 76static bool cpu_thread_is_idle(CPUState *cpu)
ac873f1e 77{
c64ca814 78 if (cpu->stop || cpu->queued_work_first) {
ac873f1e
PM
79 return false;
80 }
321bc0b2 81 if (cpu_is_stopped(cpu)) {
ac873f1e
PM
82 return true;
83 }
8c2e1b00 84 if (!cpu->halted || cpu_has_work(cpu) ||
215e79c0 85 kvm_halt_in_kernel()) {
ac873f1e
PM
86 return false;
87 }
88 return true;
89}
90
91static bool all_cpu_threads_idle(void)
92{
182735ef 93 CPUState *cpu;
ac873f1e 94
bdc44640 95 CPU_FOREACH(cpu) {
182735ef 96 if (!cpu_thread_is_idle(cpu)) {
ac873f1e
PM
97 return false;
98 }
99 }
100 return true;
101}
102
946fb27c
PB
103/***********************************************************/
104/* guest cycle counter */
105
a3270e19
PB
106/* Protected by TimersState seqlock */
107
71468395 108static int64_t vm_clock_warp_start = -1;
946fb27c
PB
109/* Conversion factor from emulated instructions to virtual clock ticks. */
110static int icount_time_shift;
111/* Arbitrarily pick 1MIPS as the minimum allowable speed. */
112#define MAX_ICOUNT_SHIFT 10
a3270e19 113
946fb27c
PB
114static QEMUTimer *icount_rt_timer;
115static QEMUTimer *icount_vm_timer;
116static QEMUTimer *icount_warp_timer;
946fb27c
PB
117
118typedef struct TimersState {
cb365646 119 /* Protected by BQL. */
946fb27c
PB
120 int64_t cpu_ticks_prev;
121 int64_t cpu_ticks_offset;
cb365646
LPF
122
123 /* cpu_clock_offset can be read out of BQL, so protect it with
124 * this lock.
125 */
126 QemuSeqLock vm_clock_seqlock;
946fb27c
PB
127 int64_t cpu_clock_offset;
128 int32_t cpu_ticks_enabled;
129 int64_t dummy;
c96778bb
FK
130
131 /* Compensate for varying guest execution speed. */
132 int64_t qemu_icount_bias;
133 /* Only written by TCG thread */
134 int64_t qemu_icount;
946fb27c
PB
135} TimersState;
136
d9cd4007 137static TimersState timers_state;
946fb27c 138
2a62914b 139int64_t cpu_get_icount_raw(void)
946fb27c
PB
140{
141 int64_t icount;
4917cf44 142 CPUState *cpu = current_cpu;
946fb27c 143
c96778bb 144 icount = timers_state.qemu_icount;
4917cf44 145 if (cpu) {
99df7dce 146 if (!cpu_can_do_io(cpu)) {
2a62914b
PD
147 fprintf(stderr, "Bad icount read\n");
148 exit(1);
946fb27c 149 }
28ecfd7a 150 icount -= (cpu->icount_decr.u16.low + cpu->icount_extra);
946fb27c 151 }
2a62914b
PD
152 return icount;
153}
154
155/* Return the virtual CPU time, based on the instruction counter. */
156static int64_t cpu_get_icount_locked(void)
157{
158 int64_t icount = cpu_get_icount_raw();
3f031313 159 return timers_state.qemu_icount_bias + cpu_icount_to_ns(icount);
946fb27c
PB
160}
161
17a15f1b
PB
162int64_t cpu_get_icount(void)
163{
164 int64_t icount;
165 unsigned start;
166
167 do {
168 start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
169 icount = cpu_get_icount_locked();
170 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
171
172 return icount;
173}
174
3f031313
FK
175int64_t cpu_icount_to_ns(int64_t icount)
176{
177 return icount << icount_time_shift;
178}
179
946fb27c 180/* return the host CPU cycle counter and handle stop/restart */
cb365646 181/* Caller must hold the BQL */
946fb27c
PB
182int64_t cpu_get_ticks(void)
183{
5f3e3101
PB
184 int64_t ticks;
185
946fb27c
PB
186 if (use_icount) {
187 return cpu_get_icount();
188 }
5f3e3101
PB
189
190 ticks = timers_state.cpu_ticks_offset;
191 if (timers_state.cpu_ticks_enabled) {
192 ticks += cpu_get_real_ticks();
193 }
194
195 if (timers_state.cpu_ticks_prev > ticks) {
196 /* Note: non increasing ticks may happen if the host uses
197 software suspend */
198 timers_state.cpu_ticks_offset += timers_state.cpu_ticks_prev - ticks;
199 ticks = timers_state.cpu_ticks_prev;
946fb27c 200 }
5f3e3101
PB
201
202 timers_state.cpu_ticks_prev = ticks;
203 return ticks;
946fb27c
PB
204}
205
cb365646 206static int64_t cpu_get_clock_locked(void)
946fb27c 207{
5f3e3101 208 int64_t ticks;
cb365646 209
5f3e3101
PB
210 ticks = timers_state.cpu_clock_offset;
211 if (timers_state.cpu_ticks_enabled) {
212 ticks += get_clock();
946fb27c 213 }
cb365646 214
5f3e3101 215 return ticks;
cb365646
LPF
216}
217
218/* return the host CPU monotonic timer and handle stop/restart */
219int64_t cpu_get_clock(void)
220{
221 int64_t ti;
222 unsigned start;
223
224 do {
225 start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
226 ti = cpu_get_clock_locked();
227 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
228
229 return ti;
946fb27c
PB
230}
231
cb365646
LPF
232/* enable cpu_get_ticks()
233 * Caller must hold BQL which server as mutex for vm_clock_seqlock.
234 */
946fb27c
PB
235void cpu_enable_ticks(void)
236{
cb365646
LPF
237 /* Here, the really thing protected by seqlock is cpu_clock_offset. */
238 seqlock_write_lock(&timers_state.vm_clock_seqlock);
946fb27c
PB
239 if (!timers_state.cpu_ticks_enabled) {
240 timers_state.cpu_ticks_offset -= cpu_get_real_ticks();
241 timers_state.cpu_clock_offset -= get_clock();
242 timers_state.cpu_ticks_enabled = 1;
243 }
cb365646 244 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
946fb27c
PB
245}
246
247/* disable cpu_get_ticks() : the clock is stopped. You must not call
cb365646
LPF
248 * cpu_get_ticks() after that.
249 * Caller must hold BQL which server as mutex for vm_clock_seqlock.
250 */
946fb27c
PB
251void cpu_disable_ticks(void)
252{
cb365646
LPF
253 /* Here, the really thing protected by seqlock is cpu_clock_offset. */
254 seqlock_write_lock(&timers_state.vm_clock_seqlock);
946fb27c 255 if (timers_state.cpu_ticks_enabled) {
5f3e3101 256 timers_state.cpu_ticks_offset += cpu_get_real_ticks();
cb365646 257 timers_state.cpu_clock_offset = cpu_get_clock_locked();
946fb27c
PB
258 timers_state.cpu_ticks_enabled = 0;
259 }
cb365646 260 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
946fb27c
PB
261}
262
263/* Correlation between real and virtual time is always going to be
264 fairly approximate, so ignore small variation.
265 When the guest is idle real and virtual time will be aligned in
266 the IO wait loop. */
267#define ICOUNT_WOBBLE (get_ticks_per_sec() / 10)
268
269static void icount_adjust(void)
270{
271 int64_t cur_time;
272 int64_t cur_icount;
273 int64_t delta;
a3270e19
PB
274
275 /* Protected by TimersState mutex. */
946fb27c 276 static int64_t last_delta;
468cc7cf 277
946fb27c
PB
278 /* If the VM is not running, then do nothing. */
279 if (!runstate_is_running()) {
280 return;
281 }
468cc7cf 282
17a15f1b
PB
283 seqlock_write_lock(&timers_state.vm_clock_seqlock);
284 cur_time = cpu_get_clock_locked();
285 cur_icount = cpu_get_icount_locked();
468cc7cf 286
946fb27c
PB
287 delta = cur_icount - cur_time;
288 /* FIXME: This is a very crude algorithm, somewhat prone to oscillation. */
289 if (delta > 0
290 && last_delta + ICOUNT_WOBBLE < delta * 2
291 && icount_time_shift > 0) {
292 /* The guest is getting too far ahead. Slow time down. */
293 icount_time_shift--;
294 }
295 if (delta < 0
296 && last_delta - ICOUNT_WOBBLE > delta * 2
297 && icount_time_shift < MAX_ICOUNT_SHIFT) {
298 /* The guest is getting too far behind. Speed time up. */
299 icount_time_shift++;
300 }
301 last_delta = delta;
c96778bb
FK
302 timers_state.qemu_icount_bias = cur_icount
303 - (timers_state.qemu_icount << icount_time_shift);
17a15f1b 304 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
946fb27c
PB
305}
306
307static void icount_adjust_rt(void *opaque)
308{
40daca54 309 timer_mod(icount_rt_timer,
1979b908 310 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
946fb27c
PB
311 icount_adjust();
312}
313
314static void icount_adjust_vm(void *opaque)
315{
40daca54
AB
316 timer_mod(icount_vm_timer,
317 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
318 get_ticks_per_sec() / 10);
946fb27c
PB
319 icount_adjust();
320}
321
322static int64_t qemu_icount_round(int64_t count)
323{
324 return (count + (1 << icount_time_shift) - 1) >> icount_time_shift;
325}
326
327static void icount_warp_rt(void *opaque)
328{
17a15f1b
PB
329 /* The icount_warp_timer is rescheduled soon after vm_clock_warp_start
330 * changes from -1 to another value, so the race here is okay.
331 */
332 if (atomic_read(&vm_clock_warp_start) == -1) {
946fb27c
PB
333 return;
334 }
335
17a15f1b 336 seqlock_write_lock(&timers_state.vm_clock_seqlock);
946fb27c 337 if (runstate_is_running()) {
bf2a7ddb 338 int64_t clock = cpu_get_clock_locked();
8ed961d9
PB
339 int64_t warp_delta;
340
341 warp_delta = clock - vm_clock_warp_start;
342 if (use_icount == 2) {
946fb27c 343 /*
40daca54 344 * In adaptive mode, do not let QEMU_CLOCK_VIRTUAL run too
946fb27c
PB
345 * far ahead of real time.
346 */
17a15f1b 347 int64_t cur_icount = cpu_get_icount_locked();
bf2a7ddb 348 int64_t delta = clock - cur_icount;
8ed961d9 349 warp_delta = MIN(warp_delta, delta);
946fb27c 350 }
c96778bb 351 timers_state.qemu_icount_bias += warp_delta;
946fb27c
PB
352 }
353 vm_clock_warp_start = -1;
17a15f1b 354 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
8ed961d9
PB
355
356 if (qemu_clock_expired(QEMU_CLOCK_VIRTUAL)) {
357 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
358 }
946fb27c
PB
359}
360
8156be56
PB
361void qtest_clock_warp(int64_t dest)
362{
40daca54 363 int64_t clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
efef88b3 364 AioContext *aio_context;
8156be56 365 assert(qtest_enabled());
efef88b3 366 aio_context = qemu_get_aio_context();
8156be56 367 while (clock < dest) {
40daca54 368 int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
c9299e2f 369 int64_t warp = qemu_soonest_timeout(dest - clock, deadline);
efef88b3 370
17a15f1b 371 seqlock_write_lock(&timers_state.vm_clock_seqlock);
c96778bb 372 timers_state.qemu_icount_bias += warp;
17a15f1b
PB
373 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
374
40daca54 375 qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL);
efef88b3 376 timerlist_run_timers(aio_context->tlg.tl[QEMU_CLOCK_VIRTUAL]);
40daca54 377 clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
8156be56 378 }
40daca54 379 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
8156be56
PB
380}
381
40daca54 382void qemu_clock_warp(QEMUClockType type)
946fb27c 383{
ce78d18c 384 int64_t clock;
946fb27c
PB
385 int64_t deadline;
386
387 /*
388 * There are too many global variables to make the "warp" behavior
389 * applicable to other clocks. But a clock argument removes the
390 * need for if statements all over the place.
391 */
40daca54 392 if (type != QEMU_CLOCK_VIRTUAL || !use_icount) {
946fb27c
PB
393 return;
394 }
395
396 /*
40daca54
AB
397 * If the CPUs have been sleeping, advance QEMU_CLOCK_VIRTUAL timer now.
398 * This ensures that the deadline for the timer is computed correctly below.
946fb27c
PB
399 * This also makes sure that the insn counter is synchronized before the
400 * CPU starts running, in case the CPU is woken by an event other than
40daca54 401 * the earliest QEMU_CLOCK_VIRTUAL timer.
946fb27c
PB
402 */
403 icount_warp_rt(NULL);
ce78d18c
PB
404 timer_del(icount_warp_timer);
405 if (!all_cpu_threads_idle()) {
946fb27c
PB
406 return;
407 }
408
8156be56
PB
409 if (qtest_enabled()) {
410 /* When testing, qtest commands advance icount. */
411 return;
412 }
413
ac70aafc 414 /* We want to use the earliest deadline from ALL vm_clocks */
bf2a7ddb 415 clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT);
40daca54 416 deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
ce78d18c
PB
417 if (deadline < 0) {
418 return;
ac70aafc
AB
419 }
420
946fb27c
PB
421 if (deadline > 0) {
422 /*
40daca54 423 * Ensure QEMU_CLOCK_VIRTUAL proceeds even when the virtual CPU goes to
946fb27c
PB
424 * sleep. Otherwise, the CPU might be waiting for a future timer
425 * interrupt to wake it up, but the interrupt never comes because
426 * the vCPU isn't running any insns and thus doesn't advance the
40daca54 427 * QEMU_CLOCK_VIRTUAL.
946fb27c
PB
428 *
429 * An extreme solution for this problem would be to never let VCPUs
40daca54
AB
430 * sleep in icount mode if there is a pending QEMU_CLOCK_VIRTUAL
431 * timer; rather time could just advance to the next QEMU_CLOCK_VIRTUAL
432 * event. Instead, we do stop VCPUs and only advance QEMU_CLOCK_VIRTUAL
bf2a7ddb
PD
433 * after some "real" time, (related to the time left until the next
434 * event) has passed. The QEMU_CLOCK_VIRTUAL_RT clock will do this.
40daca54
AB
435 * This avoids that the warps are visible externally; for example,
436 * you will not be sending network packets continuously instead of
437 * every 100ms.
946fb27c 438 */
17a15f1b 439 seqlock_write_lock(&timers_state.vm_clock_seqlock);
ce78d18c
PB
440 if (vm_clock_warp_start == -1 || vm_clock_warp_start > clock) {
441 vm_clock_warp_start = clock;
442 }
17a15f1b 443 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
ce78d18c 444 timer_mod_anticipate(icount_warp_timer, clock + deadline);
ac70aafc 445 } else if (deadline == 0) {
40daca54 446 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
946fb27c
PB
447 }
448}
449
d09eae37
FK
450static bool icount_state_needed(void *opaque)
451{
452 return use_icount;
453}
454
455/*
456 * This is a subsection for icount migration.
457 */
458static const VMStateDescription icount_vmstate_timers = {
459 .name = "timer/icount",
460 .version_id = 1,
461 .minimum_version_id = 1,
462 .fields = (VMStateField[]) {
463 VMSTATE_INT64(qemu_icount_bias, TimersState),
464 VMSTATE_INT64(qemu_icount, TimersState),
465 VMSTATE_END_OF_LIST()
466 }
467};
468
946fb27c
PB
469static const VMStateDescription vmstate_timers = {
470 .name = "timer",
471 .version_id = 2,
472 .minimum_version_id = 1,
35d08458 473 .fields = (VMStateField[]) {
946fb27c
PB
474 VMSTATE_INT64(cpu_ticks_offset, TimersState),
475 VMSTATE_INT64(dummy, TimersState),
476 VMSTATE_INT64_V(cpu_clock_offset, TimersState, 2),
477 VMSTATE_END_OF_LIST()
d09eae37
FK
478 },
479 .subsections = (VMStateSubsection[]) {
480 {
481 .vmsd = &icount_vmstate_timers,
482 .needed = icount_state_needed,
483 }, {
484 /* empty */
485 }
946fb27c
PB
486 }
487};
488
4603ea01
PD
489void cpu_ticks_init(void)
490{
491 seqlock_init(&timers_state.vm_clock_seqlock, NULL);
492 vmstate_register(NULL, 0, &vmstate_timers, &timers_state);
493}
494
1ad9580b 495void configure_icount(QemuOpts *opts, Error **errp)
946fb27c 496{
1ad9580b 497 const char *option;
a8bfac37 498 char *rem_str = NULL;
1ad9580b 499
1ad9580b 500 option = qemu_opt_get(opts, "shift");
946fb27c 501 if (!option) {
a8bfac37
ST
502 if (qemu_opt_get(opts, "align") != NULL) {
503 error_setg(errp, "Please specify shift option when using align");
504 }
946fb27c
PB
505 return;
506 }
a8bfac37 507 icount_align_option = qemu_opt_get_bool(opts, "align", false);
bf2a7ddb
PD
508 icount_warp_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT,
509 icount_warp_rt, NULL);
946fb27c 510 if (strcmp(option, "auto") != 0) {
a8bfac37
ST
511 errno = 0;
512 icount_time_shift = strtol(option, &rem_str, 0);
513 if (errno != 0 || *rem_str != '\0' || !strlen(option)) {
514 error_setg(errp, "icount: Invalid shift value");
515 }
946fb27c
PB
516 use_icount = 1;
517 return;
a8bfac37
ST
518 } else if (icount_align_option) {
519 error_setg(errp, "shift=auto and align=on are incompatible");
946fb27c
PB
520 }
521
522 use_icount = 2;
523
524 /* 125MIPS seems a reasonable initial guess at the guest speed.
525 It will be corrected fairly quickly anyway. */
526 icount_time_shift = 3;
527
528 /* Have both realtime and virtual time triggers for speed adjustment.
529 The realtime trigger catches emulated time passing too slowly,
530 the virtual time trigger catches emulated time passing too fast.
531 Realtime triggers occur even when idle, so use them less frequently
532 than VM triggers. */
bf2a7ddb
PD
533 icount_rt_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL_RT,
534 icount_adjust_rt, NULL);
40daca54 535 timer_mod(icount_rt_timer,
bf2a7ddb 536 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
40daca54
AB
537 icount_vm_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
538 icount_adjust_vm, NULL);
539 timer_mod(icount_vm_timer,
540 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
541 get_ticks_per_sec() / 10);
946fb27c
PB
542}
543
296af7c9
BS
544/***********************************************************/
545void hw_error(const char *fmt, ...)
546{
547 va_list ap;
55e5c285 548 CPUState *cpu;
296af7c9
BS
549
550 va_start(ap, fmt);
551 fprintf(stderr, "qemu: hardware error: ");
552 vfprintf(stderr, fmt, ap);
553 fprintf(stderr, "\n");
bdc44640 554 CPU_FOREACH(cpu) {
55e5c285 555 fprintf(stderr, "CPU #%d:\n", cpu->cpu_index);
878096ee 556 cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU);
296af7c9
BS
557 }
558 va_end(ap);
559 abort();
560}
561
562void cpu_synchronize_all_states(void)
563{
182735ef 564 CPUState *cpu;
296af7c9 565
bdc44640 566 CPU_FOREACH(cpu) {
182735ef 567 cpu_synchronize_state(cpu);
296af7c9
BS
568 }
569}
570
571void cpu_synchronize_all_post_reset(void)
572{
182735ef 573 CPUState *cpu;
296af7c9 574
bdc44640 575 CPU_FOREACH(cpu) {
182735ef 576 cpu_synchronize_post_reset(cpu);
296af7c9
BS
577 }
578}
579
580void cpu_synchronize_all_post_init(void)
581{
182735ef 582 CPUState *cpu;
296af7c9 583
bdc44640 584 CPU_FOREACH(cpu) {
182735ef 585 cpu_synchronize_post_init(cpu);
296af7c9
BS
586 }
587}
588
de9d61e8
MT
589void cpu_clean_all_dirty(void)
590{
591 CPUState *cpu;
592
593 CPU_FOREACH(cpu) {
594 cpu_clean_state(cpu);
595 }
596}
597
56983463 598static int do_vm_stop(RunState state)
296af7c9 599{
56983463
KW
600 int ret = 0;
601
1354869c 602 if (runstate_is_running()) {
296af7c9 603 cpu_disable_ticks();
296af7c9 604 pause_all_vcpus();
f5bbfba1 605 runstate_set(state);
1dfb4dd9 606 vm_state_notify(0, state);
a4e15de9 607 qapi_event_send_stop(&error_abort);
296af7c9 608 }
56983463 609
594a45ce
KW
610 bdrv_drain_all();
611 ret = bdrv_flush_all();
612
56983463 613 return ret;
296af7c9
BS
614}
615
a1fcaa73 616static bool cpu_can_run(CPUState *cpu)
296af7c9 617{
4fdeee7c 618 if (cpu->stop) {
a1fcaa73 619 return false;
0ab07c62 620 }
321bc0b2 621 if (cpu_is_stopped(cpu)) {
a1fcaa73 622 return false;
0ab07c62 623 }
a1fcaa73 624 return true;
296af7c9
BS
625}
626
91325046 627static void cpu_handle_guest_debug(CPUState *cpu)
83f338f7 628{
64f6b346 629 gdb_set_stop_cpu(cpu);
8cf71710 630 qemu_system_debug_request();
f324e766 631 cpu->stopped = true;
3c638d06
JK
632}
633
714bd040
PB
634static void cpu_signal(int sig)
635{
4917cf44
AF
636 if (current_cpu) {
637 cpu_exit(current_cpu);
714bd040
PB
638 }
639 exit_request = 1;
640}
714bd040 641
6d9cb73c
JK
642#ifdef CONFIG_LINUX
643static void sigbus_reraise(void)
644{
645 sigset_t set;
646 struct sigaction action;
647
648 memset(&action, 0, sizeof(action));
649 action.sa_handler = SIG_DFL;
650 if (!sigaction(SIGBUS, &action, NULL)) {
651 raise(SIGBUS);
652 sigemptyset(&set);
653 sigaddset(&set, SIGBUS);
654 sigprocmask(SIG_UNBLOCK, &set, NULL);
655 }
656 perror("Failed to re-raise SIGBUS!\n");
657 abort();
658}
659
660static void sigbus_handler(int n, struct qemu_signalfd_siginfo *siginfo,
661 void *ctx)
662{
663 if (kvm_on_sigbus(siginfo->ssi_code,
664 (void *)(intptr_t)siginfo->ssi_addr)) {
665 sigbus_reraise();
666 }
667}
668
669static void qemu_init_sigbus(void)
670{
671 struct sigaction action;
672
673 memset(&action, 0, sizeof(action));
674 action.sa_flags = SA_SIGINFO;
675 action.sa_sigaction = (void (*)(int, siginfo_t*, void*))sigbus_handler;
676 sigaction(SIGBUS, &action, NULL);
677
678 prctl(PR_MCE_KILL, PR_MCE_KILL_SET, PR_MCE_KILL_EARLY, 0, 0);
679}
680
290adf38 681static void qemu_kvm_eat_signals(CPUState *cpu)
1ab3c6c0
JK
682{
683 struct timespec ts = { 0, 0 };
684 siginfo_t siginfo;
685 sigset_t waitset;
686 sigset_t chkset;
687 int r;
688
689 sigemptyset(&waitset);
690 sigaddset(&waitset, SIG_IPI);
691 sigaddset(&waitset, SIGBUS);
692
693 do {
694 r = sigtimedwait(&waitset, &siginfo, &ts);
695 if (r == -1 && !(errno == EAGAIN || errno == EINTR)) {
696 perror("sigtimedwait");
697 exit(1);
698 }
699
700 switch (r) {
701 case SIGBUS:
290adf38 702 if (kvm_on_sigbus_vcpu(cpu, siginfo.si_code, siginfo.si_addr)) {
1ab3c6c0
JK
703 sigbus_reraise();
704 }
705 break;
706 default:
707 break;
708 }
709
710 r = sigpending(&chkset);
711 if (r == -1) {
712 perror("sigpending");
713 exit(1);
714 }
715 } while (sigismember(&chkset, SIG_IPI) || sigismember(&chkset, SIGBUS));
1ab3c6c0
JK
716}
717
6d9cb73c
JK
718#else /* !CONFIG_LINUX */
719
720static void qemu_init_sigbus(void)
721{
722}
1ab3c6c0 723
290adf38 724static void qemu_kvm_eat_signals(CPUState *cpu)
1ab3c6c0
JK
725{
726}
6d9cb73c
JK
727#endif /* !CONFIG_LINUX */
728
296af7c9 729#ifndef _WIN32
55f8d6ac
JK
730static void dummy_signal(int sig)
731{
732}
55f8d6ac 733
13618e05 734static void qemu_kvm_init_cpu_signals(CPUState *cpu)
714bd040
PB
735{
736 int r;
737 sigset_t set;
738 struct sigaction sigact;
739
740 memset(&sigact, 0, sizeof(sigact));
741 sigact.sa_handler = dummy_signal;
742 sigaction(SIG_IPI, &sigact, NULL);
743
714bd040
PB
744 pthread_sigmask(SIG_BLOCK, NULL, &set);
745 sigdelset(&set, SIG_IPI);
714bd040 746 sigdelset(&set, SIGBUS);
491d6e80 747 r = kvm_set_signal_mask(cpu, &set);
714bd040
PB
748 if (r) {
749 fprintf(stderr, "kvm_set_signal_mask: %s\n", strerror(-r));
750 exit(1);
751 }
752}
753
754static void qemu_tcg_init_cpu_signals(void)
755{
714bd040
PB
756 sigset_t set;
757 struct sigaction sigact;
758
759 memset(&sigact, 0, sizeof(sigact));
760 sigact.sa_handler = cpu_signal;
761 sigaction(SIG_IPI, &sigact, NULL);
762
763 sigemptyset(&set);
764 sigaddset(&set, SIG_IPI);
765 pthread_sigmask(SIG_UNBLOCK, &set, NULL);
714bd040
PB
766}
767
55f8d6ac 768#else /* _WIN32 */
13618e05 769static void qemu_kvm_init_cpu_signals(CPUState *cpu)
ff48eb5f 770{
714bd040
PB
771 abort();
772}
ff48eb5f 773
714bd040
PB
774static void qemu_tcg_init_cpu_signals(void)
775{
ff48eb5f 776}
714bd040 777#endif /* _WIN32 */
ff48eb5f 778
b2532d88 779static QemuMutex qemu_global_mutex;
46daff13
PB
780static QemuCond qemu_io_proceeded_cond;
781static bool iothread_requesting_mutex;
296af7c9
BS
782
783static QemuThread io_thread;
784
785static QemuThread *tcg_cpu_thread;
786static QemuCond *tcg_halt_cond;
787
296af7c9
BS
788/* cpu creation */
789static QemuCond qemu_cpu_cond;
790/* system init */
296af7c9 791static QemuCond qemu_pause_cond;
e82bcec2 792static QemuCond qemu_work_cond;
296af7c9 793
d3b12f5d 794void qemu_init_cpu_loop(void)
296af7c9 795{
6d9cb73c 796 qemu_init_sigbus();
ed94592b 797 qemu_cond_init(&qemu_cpu_cond);
ed94592b
AL
798 qemu_cond_init(&qemu_pause_cond);
799 qemu_cond_init(&qemu_work_cond);
46daff13 800 qemu_cond_init(&qemu_io_proceeded_cond);
296af7c9 801 qemu_mutex_init(&qemu_global_mutex);
296af7c9 802
b7680cb6 803 qemu_thread_get_self(&io_thread);
296af7c9
BS
804}
805
f100f0b3 806void run_on_cpu(CPUState *cpu, void (*func)(void *data), void *data)
e82bcec2
MT
807{
808 struct qemu_work_item wi;
809
60e82579 810 if (qemu_cpu_is_self(cpu)) {
e82bcec2
MT
811 func(data);
812 return;
813 }
814
815 wi.func = func;
816 wi.data = data;
3c02270d 817 wi.free = false;
c64ca814
AF
818 if (cpu->queued_work_first == NULL) {
819 cpu->queued_work_first = &wi;
0ab07c62 820 } else {
c64ca814 821 cpu->queued_work_last->next = &wi;
0ab07c62 822 }
c64ca814 823 cpu->queued_work_last = &wi;
e82bcec2
MT
824 wi.next = NULL;
825 wi.done = false;
826
c08d7424 827 qemu_cpu_kick(cpu);
e82bcec2 828 while (!wi.done) {
4917cf44 829 CPUState *self_cpu = current_cpu;
e82bcec2
MT
830
831 qemu_cond_wait(&qemu_work_cond, &qemu_global_mutex);
4917cf44 832 current_cpu = self_cpu;
e82bcec2
MT
833 }
834}
835
3c02270d
CV
836void async_run_on_cpu(CPUState *cpu, void (*func)(void *data), void *data)
837{
838 struct qemu_work_item *wi;
839
840 if (qemu_cpu_is_self(cpu)) {
841 func(data);
842 return;
843 }
844
845 wi = g_malloc0(sizeof(struct qemu_work_item));
846 wi->func = func;
847 wi->data = data;
848 wi->free = true;
849 if (cpu->queued_work_first == NULL) {
850 cpu->queued_work_first = wi;
851 } else {
852 cpu->queued_work_last->next = wi;
853 }
854 cpu->queued_work_last = wi;
855 wi->next = NULL;
856 wi->done = false;
857
858 qemu_cpu_kick(cpu);
859}
860
6d45b109 861static void flush_queued_work(CPUState *cpu)
e82bcec2
MT
862{
863 struct qemu_work_item *wi;
864
c64ca814 865 if (cpu->queued_work_first == NULL) {
e82bcec2 866 return;
0ab07c62 867 }
e82bcec2 868
c64ca814
AF
869 while ((wi = cpu->queued_work_first)) {
870 cpu->queued_work_first = wi->next;
e82bcec2
MT
871 wi->func(wi->data);
872 wi->done = true;
3c02270d
CV
873 if (wi->free) {
874 g_free(wi);
875 }
e82bcec2 876 }
c64ca814 877 cpu->queued_work_last = NULL;
e82bcec2
MT
878 qemu_cond_broadcast(&qemu_work_cond);
879}
880
509a0d78 881static void qemu_wait_io_event_common(CPUState *cpu)
296af7c9 882{
4fdeee7c
AF
883 if (cpu->stop) {
884 cpu->stop = false;
f324e766 885 cpu->stopped = true;
296af7c9
BS
886 qemu_cond_signal(&qemu_pause_cond);
887 }
6d45b109 888 flush_queued_work(cpu);
216fc9a4 889 cpu->thread_kicked = false;
296af7c9
BS
890}
891
6cabe1f3 892static void qemu_tcg_wait_io_event(void)
296af7c9 893{
182735ef 894 CPUState *cpu;
6cabe1f3 895
16400322 896 while (all_cpu_threads_idle()) {
ab33fcda
PB
897 /* Start accounting real time to the virtual clock if the CPUs
898 are idle. */
40daca54 899 qemu_clock_warp(QEMU_CLOCK_VIRTUAL);
9705fbb5 900 qemu_cond_wait(tcg_halt_cond, &qemu_global_mutex);
16400322 901 }
296af7c9 902
46daff13
PB
903 while (iothread_requesting_mutex) {
904 qemu_cond_wait(&qemu_io_proceeded_cond, &qemu_global_mutex);
905 }
6cabe1f3 906
bdc44640 907 CPU_FOREACH(cpu) {
182735ef 908 qemu_wait_io_event_common(cpu);
6cabe1f3 909 }
296af7c9
BS
910}
911
fd529e8f 912static void qemu_kvm_wait_io_event(CPUState *cpu)
296af7c9 913{
a98ae1d8 914 while (cpu_thread_is_idle(cpu)) {
f5c121b8 915 qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
16400322 916 }
296af7c9 917
290adf38 918 qemu_kvm_eat_signals(cpu);
509a0d78 919 qemu_wait_io_event_common(cpu);
296af7c9
BS
920}
921
7e97cd88 922static void *qemu_kvm_cpu_thread_fn(void *arg)
296af7c9 923{
48a106bd 924 CPUState *cpu = arg;
84b4915d 925 int r;
296af7c9 926
6164e6d6 927 qemu_mutex_lock(&qemu_global_mutex);
814e612e 928 qemu_thread_get_self(cpu->thread);
9f09e18a 929 cpu->thread_id = qemu_get_thread_id();
626cf8f4 930 cpu->can_do_io = 1;
4917cf44 931 current_cpu = cpu;
296af7c9 932
504134d2 933 r = kvm_init_vcpu(cpu);
84b4915d
JK
934 if (r < 0) {
935 fprintf(stderr, "kvm_init_vcpu failed: %s\n", strerror(-r));
936 exit(1);
937 }
296af7c9 938
13618e05 939 qemu_kvm_init_cpu_signals(cpu);
296af7c9
BS
940
941 /* signal CPU creation */
61a46217 942 cpu->created = true;
296af7c9
BS
943 qemu_cond_signal(&qemu_cpu_cond);
944
296af7c9 945 while (1) {
a1fcaa73 946 if (cpu_can_run(cpu)) {
1458c363 947 r = kvm_cpu_exec(cpu);
83f338f7 948 if (r == EXCP_DEBUG) {
91325046 949 cpu_handle_guest_debug(cpu);
83f338f7 950 }
0ab07c62 951 }
fd529e8f 952 qemu_kvm_wait_io_event(cpu);
296af7c9
BS
953 }
954
955 return NULL;
956}
957
c7f0f3b1
AL
958static void *qemu_dummy_cpu_thread_fn(void *arg)
959{
960#ifdef _WIN32
961 fprintf(stderr, "qtest is not supported under Windows\n");
962 exit(1);
963#else
10a9021d 964 CPUState *cpu = arg;
c7f0f3b1
AL
965 sigset_t waitset;
966 int r;
967
968 qemu_mutex_lock_iothread();
814e612e 969 qemu_thread_get_self(cpu->thread);
9f09e18a 970 cpu->thread_id = qemu_get_thread_id();
626cf8f4 971 cpu->can_do_io = 1;
c7f0f3b1
AL
972
973 sigemptyset(&waitset);
974 sigaddset(&waitset, SIG_IPI);
975
976 /* signal CPU creation */
61a46217 977 cpu->created = true;
c7f0f3b1
AL
978 qemu_cond_signal(&qemu_cpu_cond);
979
4917cf44 980 current_cpu = cpu;
c7f0f3b1 981 while (1) {
4917cf44 982 current_cpu = NULL;
c7f0f3b1
AL
983 qemu_mutex_unlock_iothread();
984 do {
985 int sig;
986 r = sigwait(&waitset, &sig);
987 } while (r == -1 && (errno == EAGAIN || errno == EINTR));
988 if (r == -1) {
989 perror("sigwait");
990 exit(1);
991 }
992 qemu_mutex_lock_iothread();
4917cf44 993 current_cpu = cpu;
509a0d78 994 qemu_wait_io_event_common(cpu);
c7f0f3b1
AL
995 }
996
997 return NULL;
998#endif
999}
1000
bdb7ca67
JK
1001static void tcg_exec_all(void);
1002
7e97cd88 1003static void *qemu_tcg_cpu_thread_fn(void *arg)
296af7c9 1004{
c3586ba7 1005 CPUState *cpu = arg;
296af7c9 1006
55f8d6ac 1007 qemu_tcg_init_cpu_signals();
814e612e 1008 qemu_thread_get_self(cpu->thread);
296af7c9 1009
296af7c9 1010 qemu_mutex_lock(&qemu_global_mutex);
38fcbd3f
AF
1011 CPU_FOREACH(cpu) {
1012 cpu->thread_id = qemu_get_thread_id();
1013 cpu->created = true;
626cf8f4 1014 cpu->can_do_io = 1;
38fcbd3f 1015 }
296af7c9
BS
1016 qemu_cond_signal(&qemu_cpu_cond);
1017
fa7d1867 1018 /* wait for initial kick-off after machine start */
bdc44640 1019 while (QTAILQ_FIRST(&cpus)->stopped) {
fa7d1867 1020 qemu_cond_wait(tcg_halt_cond, &qemu_global_mutex);
8e564b4e
JK
1021
1022 /* process any pending work */
bdc44640 1023 CPU_FOREACH(cpu) {
182735ef 1024 qemu_wait_io_event_common(cpu);
8e564b4e 1025 }
0ab07c62 1026 }
296af7c9
BS
1027
1028 while (1) {
bdb7ca67 1029 tcg_exec_all();
ac70aafc
AB
1030
1031 if (use_icount) {
40daca54 1032 int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
ac70aafc
AB
1033
1034 if (deadline == 0) {
40daca54 1035 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
ac70aafc 1036 }
3b2319a3 1037 }
6cabe1f3 1038 qemu_tcg_wait_io_event();
296af7c9
BS
1039 }
1040
1041 return NULL;
1042}
1043
2ff09a40 1044static void qemu_cpu_kick_thread(CPUState *cpu)
cc015e9a
PB
1045{
1046#ifndef _WIN32
1047 int err;
1048
814e612e 1049 err = pthread_kill(cpu->thread->thread, SIG_IPI);
cc015e9a
PB
1050 if (err) {
1051 fprintf(stderr, "qemu:%s: %s", __func__, strerror(err));
1052 exit(1);
1053 }
1054#else /* _WIN32 */
60e82579 1055 if (!qemu_cpu_is_self(cpu)) {
ed9164a3
OH
1056 CONTEXT tcgContext;
1057
1058 if (SuspendThread(cpu->hThread) == (DWORD)-1) {
7f1721df 1059 fprintf(stderr, "qemu:%s: GetLastError:%lu\n", __func__,
ed9164a3
OH
1060 GetLastError());
1061 exit(1);
1062 }
1063
1064 /* On multi-core systems, we are not sure that the thread is actually
1065 * suspended until we can get the context.
1066 */
1067 tcgContext.ContextFlags = CONTEXT_CONTROL;
1068 while (GetThreadContext(cpu->hThread, &tcgContext) != 0) {
1069 continue;
1070 }
1071
cc015e9a 1072 cpu_signal(0);
ed9164a3
OH
1073
1074 if (ResumeThread(cpu->hThread) == (DWORD)-1) {
7f1721df 1075 fprintf(stderr, "qemu:%s: GetLastError:%lu\n", __func__,
ed9164a3
OH
1076 GetLastError());
1077 exit(1);
1078 }
cc015e9a
PB
1079 }
1080#endif
1081}
1082
c08d7424 1083void qemu_cpu_kick(CPUState *cpu)
296af7c9 1084{
f5c121b8 1085 qemu_cond_broadcast(cpu->halt_cond);
216fc9a4 1086 if (!tcg_enabled() && !cpu->thread_kicked) {
2ff09a40 1087 qemu_cpu_kick_thread(cpu);
216fc9a4 1088 cpu->thread_kicked = true;
aa2c364b 1089 }
296af7c9
BS
1090}
1091
46d62fac 1092void qemu_cpu_kick_self(void)
296af7c9 1093{
b55c22c6 1094#ifndef _WIN32
4917cf44 1095 assert(current_cpu);
296af7c9 1096
4917cf44
AF
1097 if (!current_cpu->thread_kicked) {
1098 qemu_cpu_kick_thread(current_cpu);
1099 current_cpu->thread_kicked = true;
296af7c9 1100 }
b55c22c6
PB
1101#else
1102 abort();
1103#endif
296af7c9
BS
1104}
1105
60e82579 1106bool qemu_cpu_is_self(CPUState *cpu)
296af7c9 1107{
814e612e 1108 return qemu_thread_is_self(cpu->thread);
296af7c9
BS
1109}
1110
79e2b9ae 1111bool qemu_in_vcpu_thread(void)
aa723c23 1112{
4917cf44 1113 return current_cpu && qemu_cpu_is_self(current_cpu);
aa723c23
JQ
1114}
1115
296af7c9
BS
1116void qemu_mutex_lock_iothread(void)
1117{
c7f0f3b1 1118 if (!tcg_enabled()) {
296af7c9 1119 qemu_mutex_lock(&qemu_global_mutex);
1a28cac3 1120 } else {
46daff13 1121 iothread_requesting_mutex = true;
1a28cac3 1122 if (qemu_mutex_trylock(&qemu_global_mutex)) {
182735ef 1123 qemu_cpu_kick_thread(first_cpu);
1a28cac3
MT
1124 qemu_mutex_lock(&qemu_global_mutex);
1125 }
46daff13
PB
1126 iothread_requesting_mutex = false;
1127 qemu_cond_broadcast(&qemu_io_proceeded_cond);
1a28cac3 1128 }
296af7c9
BS
1129}
1130
1131void qemu_mutex_unlock_iothread(void)
1132{
1133 qemu_mutex_unlock(&qemu_global_mutex);
1134}
1135
1136static int all_vcpus_paused(void)
1137{
bdc44640 1138 CPUState *cpu;
296af7c9 1139
bdc44640 1140 CPU_FOREACH(cpu) {
182735ef 1141 if (!cpu->stopped) {
296af7c9 1142 return 0;
0ab07c62 1143 }
296af7c9
BS
1144 }
1145
1146 return 1;
1147}
1148
1149void pause_all_vcpus(void)
1150{
bdc44640 1151 CPUState *cpu;
296af7c9 1152
40daca54 1153 qemu_clock_enable(QEMU_CLOCK_VIRTUAL, false);
bdc44640 1154 CPU_FOREACH(cpu) {
182735ef
AF
1155 cpu->stop = true;
1156 qemu_cpu_kick(cpu);
296af7c9
BS
1157 }
1158
aa723c23 1159 if (qemu_in_vcpu_thread()) {
d798e974
JK
1160 cpu_stop_current();
1161 if (!kvm_enabled()) {
bdc44640 1162 CPU_FOREACH(cpu) {
182735ef
AF
1163 cpu->stop = false;
1164 cpu->stopped = true;
d798e974
JK
1165 }
1166 return;
1167 }
1168 }
1169
296af7c9 1170 while (!all_vcpus_paused()) {
be7d6c57 1171 qemu_cond_wait(&qemu_pause_cond, &qemu_global_mutex);
bdc44640 1172 CPU_FOREACH(cpu) {
182735ef 1173 qemu_cpu_kick(cpu);
296af7c9
BS
1174 }
1175 }
1176}
1177
2993683b
IM
1178void cpu_resume(CPUState *cpu)
1179{
1180 cpu->stop = false;
1181 cpu->stopped = false;
1182 qemu_cpu_kick(cpu);
1183}
1184
296af7c9
BS
1185void resume_all_vcpus(void)
1186{
bdc44640 1187 CPUState *cpu;
296af7c9 1188
40daca54 1189 qemu_clock_enable(QEMU_CLOCK_VIRTUAL, true);
bdc44640 1190 CPU_FOREACH(cpu) {
182735ef 1191 cpu_resume(cpu);
296af7c9
BS
1192 }
1193}
1194
4900116e
DDAG
1195/* For temporary buffers for forming a name */
1196#define VCPU_THREAD_NAME_SIZE 16
1197
e5ab30a2 1198static void qemu_tcg_init_vcpu(CPUState *cpu)
296af7c9 1199{
4900116e
DDAG
1200 char thread_name[VCPU_THREAD_NAME_SIZE];
1201
09daed84
EI
1202 tcg_cpu_address_space_init(cpu, cpu->as);
1203
296af7c9
BS
1204 /* share a single thread for all cpus with TCG */
1205 if (!tcg_cpu_thread) {
814e612e 1206 cpu->thread = g_malloc0(sizeof(QemuThread));
f5c121b8
AF
1207 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1208 qemu_cond_init(cpu->halt_cond);
1209 tcg_halt_cond = cpu->halt_cond;
4900116e
DDAG
1210 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/TCG",
1211 cpu->cpu_index);
1212 qemu_thread_create(cpu->thread, thread_name, qemu_tcg_cpu_thread_fn,
1213 cpu, QEMU_THREAD_JOINABLE);
1ecf47bf 1214#ifdef _WIN32
814e612e 1215 cpu->hThread = qemu_thread_get_handle(cpu->thread);
1ecf47bf 1216#endif
61a46217 1217 while (!cpu->created) {
18a85728 1218 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
0ab07c62 1219 }
814e612e 1220 tcg_cpu_thread = cpu->thread;
296af7c9 1221 } else {
814e612e 1222 cpu->thread = tcg_cpu_thread;
f5c121b8 1223 cpu->halt_cond = tcg_halt_cond;
296af7c9
BS
1224 }
1225}
1226
48a106bd 1227static void qemu_kvm_start_vcpu(CPUState *cpu)
296af7c9 1228{
4900116e
DDAG
1229 char thread_name[VCPU_THREAD_NAME_SIZE];
1230
814e612e 1231 cpu->thread = g_malloc0(sizeof(QemuThread));
f5c121b8
AF
1232 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1233 qemu_cond_init(cpu->halt_cond);
4900116e
DDAG
1234 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/KVM",
1235 cpu->cpu_index);
1236 qemu_thread_create(cpu->thread, thread_name, qemu_kvm_cpu_thread_fn,
1237 cpu, QEMU_THREAD_JOINABLE);
61a46217 1238 while (!cpu->created) {
18a85728 1239 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
0ab07c62 1240 }
296af7c9
BS
1241}
1242
10a9021d 1243static void qemu_dummy_start_vcpu(CPUState *cpu)
c7f0f3b1 1244{
4900116e
DDAG
1245 char thread_name[VCPU_THREAD_NAME_SIZE];
1246
814e612e 1247 cpu->thread = g_malloc0(sizeof(QemuThread));
f5c121b8
AF
1248 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1249 qemu_cond_init(cpu->halt_cond);
4900116e
DDAG
1250 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/DUMMY",
1251 cpu->cpu_index);
1252 qemu_thread_create(cpu->thread, thread_name, qemu_dummy_cpu_thread_fn, cpu,
c7f0f3b1 1253 QEMU_THREAD_JOINABLE);
61a46217 1254 while (!cpu->created) {
c7f0f3b1
AL
1255 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1256 }
1257}
1258
c643bed9 1259void qemu_init_vcpu(CPUState *cpu)
296af7c9 1260{
ce3960eb
AF
1261 cpu->nr_cores = smp_cores;
1262 cpu->nr_threads = smp_threads;
f324e766 1263 cpu->stopped = true;
0ab07c62 1264 if (kvm_enabled()) {
48a106bd 1265 qemu_kvm_start_vcpu(cpu);
c7f0f3b1 1266 } else if (tcg_enabled()) {
e5ab30a2 1267 qemu_tcg_init_vcpu(cpu);
c7f0f3b1 1268 } else {
10a9021d 1269 qemu_dummy_start_vcpu(cpu);
0ab07c62 1270 }
296af7c9
BS
1271}
1272
b4a3d965 1273void cpu_stop_current(void)
296af7c9 1274{
4917cf44
AF
1275 if (current_cpu) {
1276 current_cpu->stop = false;
1277 current_cpu->stopped = true;
1278 cpu_exit(current_cpu);
67bb172f 1279 qemu_cond_signal(&qemu_pause_cond);
b4a3d965 1280 }
296af7c9
BS
1281}
1282
56983463 1283int vm_stop(RunState state)
296af7c9 1284{
aa723c23 1285 if (qemu_in_vcpu_thread()) {
74892d24 1286 qemu_system_vmstop_request_prepare();
1dfb4dd9 1287 qemu_system_vmstop_request(state);
296af7c9
BS
1288 /*
1289 * FIXME: should not return to device code in case
1290 * vm_stop() has been requested.
1291 */
b4a3d965 1292 cpu_stop_current();
56983463 1293 return 0;
296af7c9 1294 }
56983463
KW
1295
1296 return do_vm_stop(state);
296af7c9
BS
1297}
1298
8a9236f1
LC
1299/* does a state transition even if the VM is already stopped,
1300 current state is forgotten forever */
56983463 1301int vm_stop_force_state(RunState state)
8a9236f1
LC
1302{
1303 if (runstate_is_running()) {
56983463 1304 return vm_stop(state);
8a9236f1
LC
1305 } else {
1306 runstate_set(state);
594a45ce
KW
1307 /* Make sure to return an error if the flush in a previous vm_stop()
1308 * failed. */
1309 return bdrv_flush_all();
8a9236f1
LC
1310 }
1311}
1312
9349b4f9 1313static int tcg_cpu_exec(CPUArchState *env)
296af7c9 1314{
efee7340 1315 CPUState *cpu = ENV_GET_CPU(env);
296af7c9
BS
1316 int ret;
1317#ifdef CONFIG_PROFILER
1318 int64_t ti;
1319#endif
1320
1321#ifdef CONFIG_PROFILER
1322 ti = profile_getclock();
1323#endif
1324 if (use_icount) {
1325 int64_t count;
ac70aafc 1326 int64_t deadline;
296af7c9 1327 int decr;
c96778bb
FK
1328 timers_state.qemu_icount -= (cpu->icount_decr.u16.low
1329 + cpu->icount_extra);
28ecfd7a 1330 cpu->icount_decr.u16.low = 0;
efee7340 1331 cpu->icount_extra = 0;
40daca54 1332 deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
ac70aafc
AB
1333
1334 /* Maintain prior (possibly buggy) behaviour where if no deadline
40daca54 1335 * was set (as there is no QEMU_CLOCK_VIRTUAL timer) or it is more than
ac70aafc
AB
1336 * INT32_MAX nanoseconds ahead, we still use INT32_MAX
1337 * nanoseconds.
1338 */
1339 if ((deadline < 0) || (deadline > INT32_MAX)) {
1340 deadline = INT32_MAX;
1341 }
1342
1343 count = qemu_icount_round(deadline);
c96778bb 1344 timers_state.qemu_icount += count;
296af7c9
BS
1345 decr = (count > 0xffff) ? 0xffff : count;
1346 count -= decr;
28ecfd7a 1347 cpu->icount_decr.u16.low = decr;
efee7340 1348 cpu->icount_extra = count;
296af7c9
BS
1349 }
1350 ret = cpu_exec(env);
1351#ifdef CONFIG_PROFILER
1352 qemu_time += profile_getclock() - ti;
1353#endif
1354 if (use_icount) {
1355 /* Fold pending instructions back into the
1356 instruction counter, and clear the interrupt flag. */
c96778bb
FK
1357 timers_state.qemu_icount -= (cpu->icount_decr.u16.low
1358 + cpu->icount_extra);
28ecfd7a 1359 cpu->icount_decr.u32 = 0;
efee7340 1360 cpu->icount_extra = 0;
296af7c9
BS
1361 }
1362 return ret;
1363}
1364
bdb7ca67 1365static void tcg_exec_all(void)
296af7c9 1366{
9a36085b
JK
1367 int r;
1368
40daca54
AB
1369 /* Account partial waits to QEMU_CLOCK_VIRTUAL. */
1370 qemu_clock_warp(QEMU_CLOCK_VIRTUAL);
ab33fcda 1371
0ab07c62 1372 if (next_cpu == NULL) {
296af7c9 1373 next_cpu = first_cpu;
0ab07c62 1374 }
bdc44640 1375 for (; next_cpu != NULL && !exit_request; next_cpu = CPU_NEXT(next_cpu)) {
182735ef
AF
1376 CPUState *cpu = next_cpu;
1377 CPUArchState *env = cpu->env_ptr;
296af7c9 1378
40daca54 1379 qemu_clock_enable(QEMU_CLOCK_VIRTUAL,
ed2803da 1380 (cpu->singlestep_enabled & SSTEP_NOTIMER) == 0);
296af7c9 1381
a1fcaa73 1382 if (cpu_can_run(cpu)) {
bdb7ca67 1383 r = tcg_cpu_exec(env);
9a36085b 1384 if (r == EXCP_DEBUG) {
91325046 1385 cpu_handle_guest_debug(cpu);
3c638d06
JK
1386 break;
1387 }
f324e766 1388 } else if (cpu->stop || cpu->stopped) {
296af7c9
BS
1389 break;
1390 }
1391 }
c629a4bc 1392 exit_request = 0;
296af7c9
BS
1393}
1394
9a78eead 1395void list_cpus(FILE *f, fprintf_function cpu_fprintf, const char *optarg)
262353cb
BS
1396{
1397 /* XXX: implement xxx_cpu_list for targets that still miss it */
e916cbf8
PM
1398#if defined(cpu_list)
1399 cpu_list(f, cpu_fprintf);
262353cb
BS
1400#endif
1401}
de0b36b6
LC
1402
1403CpuInfoList *qmp_query_cpus(Error **errp)
1404{
1405 CpuInfoList *head = NULL, *cur_item = NULL;
182735ef 1406 CPUState *cpu;
de0b36b6 1407
bdc44640 1408 CPU_FOREACH(cpu) {
de0b36b6 1409 CpuInfoList *info;
182735ef
AF
1410#if defined(TARGET_I386)
1411 X86CPU *x86_cpu = X86_CPU(cpu);
1412 CPUX86State *env = &x86_cpu->env;
1413#elif defined(TARGET_PPC)
1414 PowerPCCPU *ppc_cpu = POWERPC_CPU(cpu);
1415 CPUPPCState *env = &ppc_cpu->env;
1416#elif defined(TARGET_SPARC)
1417 SPARCCPU *sparc_cpu = SPARC_CPU(cpu);
1418 CPUSPARCState *env = &sparc_cpu->env;
1419#elif defined(TARGET_MIPS)
1420 MIPSCPU *mips_cpu = MIPS_CPU(cpu);
1421 CPUMIPSState *env = &mips_cpu->env;
48e06fe0
BK
1422#elif defined(TARGET_TRICORE)
1423 TriCoreCPU *tricore_cpu = TRICORE_CPU(cpu);
1424 CPUTriCoreState *env = &tricore_cpu->env;
182735ef 1425#endif
de0b36b6 1426
cb446eca 1427 cpu_synchronize_state(cpu);
de0b36b6
LC
1428
1429 info = g_malloc0(sizeof(*info));
1430 info->value = g_malloc0(sizeof(*info->value));
55e5c285 1431 info->value->CPU = cpu->cpu_index;
182735ef 1432 info->value->current = (cpu == first_cpu);
259186a7 1433 info->value->halted = cpu->halted;
9f09e18a 1434 info->value->thread_id = cpu->thread_id;
de0b36b6
LC
1435#if defined(TARGET_I386)
1436 info->value->has_pc = true;
1437 info->value->pc = env->eip + env->segs[R_CS].base;
1438#elif defined(TARGET_PPC)
1439 info->value->has_nip = true;
1440 info->value->nip = env->nip;
1441#elif defined(TARGET_SPARC)
1442 info->value->has_pc = true;
1443 info->value->pc = env->pc;
1444 info->value->has_npc = true;
1445 info->value->npc = env->npc;
1446#elif defined(TARGET_MIPS)
1447 info->value->has_PC = true;
1448 info->value->PC = env->active_tc.PC;
48e06fe0
BK
1449#elif defined(TARGET_TRICORE)
1450 info->value->has_PC = true;
1451 info->value->PC = env->PC;
de0b36b6
LC
1452#endif
1453
1454 /* XXX: waiting for the qapi to support GSList */
1455 if (!cur_item) {
1456 head = cur_item = info;
1457 } else {
1458 cur_item->next = info;
1459 cur_item = info;
1460 }
1461 }
1462
1463 return head;
1464}
0cfd6a9a
LC
1465
1466void qmp_memsave(int64_t addr, int64_t size, const char *filename,
1467 bool has_cpu, int64_t cpu_index, Error **errp)
1468{
1469 FILE *f;
1470 uint32_t l;
55e5c285 1471 CPUState *cpu;
0cfd6a9a
LC
1472 uint8_t buf[1024];
1473
1474 if (!has_cpu) {
1475 cpu_index = 0;
1476 }
1477
151d1322
AF
1478 cpu = qemu_get_cpu(cpu_index);
1479 if (cpu == NULL) {
0cfd6a9a
LC
1480 error_set(errp, QERR_INVALID_PARAMETER_VALUE, "cpu-index",
1481 "a CPU number");
1482 return;
1483 }
1484
1485 f = fopen(filename, "wb");
1486 if (!f) {
618da851 1487 error_setg_file_open(errp, errno, filename);
0cfd6a9a
LC
1488 return;
1489 }
1490
1491 while (size != 0) {
1492 l = sizeof(buf);
1493 if (l > size)
1494 l = size;
2f4d0f59
AK
1495 if (cpu_memory_rw_debug(cpu, addr, buf, l, 0) != 0) {
1496 error_setg(errp, "Invalid addr 0x%016" PRIx64 "specified", addr);
1497 goto exit;
1498 }
0cfd6a9a
LC
1499 if (fwrite(buf, 1, l, f) != l) {
1500 error_set(errp, QERR_IO_ERROR);
1501 goto exit;
1502 }
1503 addr += l;
1504 size -= l;
1505 }
1506
1507exit:
1508 fclose(f);
1509}
6d3962bf
LC
1510
1511void qmp_pmemsave(int64_t addr, int64_t size, const char *filename,
1512 Error **errp)
1513{
1514 FILE *f;
1515 uint32_t l;
1516 uint8_t buf[1024];
1517
1518 f = fopen(filename, "wb");
1519 if (!f) {
618da851 1520 error_setg_file_open(errp, errno, filename);
6d3962bf
LC
1521 return;
1522 }
1523
1524 while (size != 0) {
1525 l = sizeof(buf);
1526 if (l > size)
1527 l = size;
eb6282f2 1528 cpu_physical_memory_read(addr, buf, l);
6d3962bf
LC
1529 if (fwrite(buf, 1, l, f) != l) {
1530 error_set(errp, QERR_IO_ERROR);
1531 goto exit;
1532 }
1533 addr += l;
1534 size -= l;
1535 }
1536
1537exit:
1538 fclose(f);
1539}
ab49ab5c
LC
1540
1541void qmp_inject_nmi(Error **errp)
1542{
1543#if defined(TARGET_I386)
182735ef
AF
1544 CPUState *cs;
1545
bdc44640 1546 CPU_FOREACH(cs) {
182735ef 1547 X86CPU *cpu = X86_CPU(cs);
ab49ab5c 1548
02e51483 1549 if (!cpu->apic_state) {
182735ef 1550 cpu_interrupt(cs, CPU_INTERRUPT_NMI);
02c09195 1551 } else {
02e51483 1552 apic_deliver_nmi(cpu->apic_state);
02c09195 1553 }
ab49ab5c
LC
1554 }
1555#else
9cb805fd 1556 nmi_monitor_handle(monitor_get_cpu_index(), errp);
ab49ab5c
LC
1557#endif
1558}
27498bef
ST
1559
1560void dump_drift_info(FILE *f, fprintf_function cpu_fprintf)
1561{
1562 if (!use_icount) {
1563 return;
1564 }
1565
1566 cpu_fprintf(f, "Host - Guest clock %"PRIi64" ms\n",
1567 (cpu_get_clock() - cpu_get_icount())/SCALE_MS);
1568 if (icount_align_option) {
1569 cpu_fprintf(f, "Max guest delay %"PRIi64" ms\n", -max_delay/SCALE_MS);
1570 cpu_fprintf(f, "Max guest advance %"PRIi64" ms\n", max_advance/SCALE_MS);
1571 } else {
1572 cpu_fprintf(f, "Max guest delay NA\n");
1573 cpu_fprintf(f, "Max guest advance NA\n");
1574 }
1575}