]> git.ipfire.org Git - thirdparty/qemu.git/blame - cpus.c
Update version for 2.8.1.1 release
[thirdparty/qemu.git] / cpus.c
CommitLineData
296af7c9
BS
1/*
2 * QEMU System Emulator
3 *
4 * Copyright (c) 2003-2008 Fabrice Bellard
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
24
25/* Needed early for CONFIG_BSD etc. */
7b31bbc2 26#include "qemu/osdep.h"
33c11879
PB
27#include "qemu-common.h"
28#include "cpu.h"
83c9089e 29#include "monitor/monitor.h"
a4e15de9 30#include "qapi/qmp/qerror.h"
d49b6836 31#include "qemu/error-report.h"
9c17d615 32#include "sysemu/sysemu.h"
da31d594 33#include "sysemu/block-backend.h"
022c62cb 34#include "exec/gdbstub.h"
9c17d615
PB
35#include "sysemu/dma.h"
36#include "sysemu/kvm.h"
de0b36b6 37#include "qmp-commands.h"
63c91552 38#include "exec/exec-all.h"
296af7c9 39
1de7afc9 40#include "qemu/thread.h"
9c17d615
PB
41#include "sysemu/cpus.h"
42#include "sysemu/qtest.h"
1de7afc9
PB
43#include "qemu/main-loop.h"
44#include "qemu/bitmap.h"
cb365646 45#include "qemu/seqlock.h"
a4e15de9 46#include "qapi-event.h"
9cb805fd 47#include "hw/nmi.h"
8b427044 48#include "sysemu/replay.h"
0ff0fc19
JK
49
50#ifndef _WIN32
1de7afc9 51#include "qemu/compatfd.h"
0ff0fc19 52#endif
296af7c9 53
6d9cb73c
JK
54#ifdef CONFIG_LINUX
55
56#include <sys/prctl.h>
57
c0532a76
MT
58#ifndef PR_MCE_KILL
59#define PR_MCE_KILL 33
60#endif
61
6d9cb73c
JK
62#ifndef PR_MCE_KILL_SET
63#define PR_MCE_KILL_SET 1
64#endif
65
66#ifndef PR_MCE_KILL_EARLY
67#define PR_MCE_KILL_EARLY 1
68#endif
69
70#endif /* CONFIG_LINUX */
71
27498bef
ST
72int64_t max_delay;
73int64_t max_advance;
296af7c9 74
2adcc85d
JH
75/* vcpu throttling controls */
76static QEMUTimer *throttle_timer;
77static unsigned int throttle_percentage;
78
79#define CPU_THROTTLE_PCT_MIN 1
80#define CPU_THROTTLE_PCT_MAX 99
81#define CPU_THROTTLE_TIMESLICE_NS 10000000
82
321bc0b2
TC
83bool cpu_is_stopped(CPUState *cpu)
84{
85 return cpu->stopped || !runstate_is_running();
86}
87
a98ae1d8 88static bool cpu_thread_is_idle(CPUState *cpu)
ac873f1e 89{
c64ca814 90 if (cpu->stop || cpu->queued_work_first) {
ac873f1e
PM
91 return false;
92 }
321bc0b2 93 if (cpu_is_stopped(cpu)) {
ac873f1e
PM
94 return true;
95 }
8c2e1b00 96 if (!cpu->halted || cpu_has_work(cpu) ||
215e79c0 97 kvm_halt_in_kernel()) {
ac873f1e
PM
98 return false;
99 }
100 return true;
101}
102
103static bool all_cpu_threads_idle(void)
104{
182735ef 105 CPUState *cpu;
ac873f1e 106
bdc44640 107 CPU_FOREACH(cpu) {
182735ef 108 if (!cpu_thread_is_idle(cpu)) {
ac873f1e
PM
109 return false;
110 }
111 }
112 return true;
113}
114
946fb27c
PB
115/***********************************************************/
116/* guest cycle counter */
117
a3270e19
PB
118/* Protected by TimersState seqlock */
119
5045e9d9 120static bool icount_sleep = true;
71468395 121static int64_t vm_clock_warp_start = -1;
946fb27c
PB
122/* Conversion factor from emulated instructions to virtual clock ticks. */
123static int icount_time_shift;
124/* Arbitrarily pick 1MIPS as the minimum allowable speed. */
125#define MAX_ICOUNT_SHIFT 10
a3270e19 126
946fb27c
PB
127static QEMUTimer *icount_rt_timer;
128static QEMUTimer *icount_vm_timer;
129static QEMUTimer *icount_warp_timer;
946fb27c
PB
130
131typedef struct TimersState {
cb365646 132 /* Protected by BQL. */
946fb27c
PB
133 int64_t cpu_ticks_prev;
134 int64_t cpu_ticks_offset;
cb365646
LPF
135
136 /* cpu_clock_offset can be read out of BQL, so protect it with
137 * this lock.
138 */
139 QemuSeqLock vm_clock_seqlock;
946fb27c
PB
140 int64_t cpu_clock_offset;
141 int32_t cpu_ticks_enabled;
142 int64_t dummy;
c96778bb
FK
143
144 /* Compensate for varying guest execution speed. */
145 int64_t qemu_icount_bias;
146 /* Only written by TCG thread */
147 int64_t qemu_icount;
946fb27c
PB
148} TimersState;
149
d9cd4007 150static TimersState timers_state;
946fb27c 151
2a62914b 152int64_t cpu_get_icount_raw(void)
946fb27c
PB
153{
154 int64_t icount;
4917cf44 155 CPUState *cpu = current_cpu;
946fb27c 156
c96778bb 157 icount = timers_state.qemu_icount;
4917cf44 158 if (cpu) {
414b15c9 159 if (!cpu->can_do_io) {
2a62914b
PD
160 fprintf(stderr, "Bad icount read\n");
161 exit(1);
946fb27c 162 }
28ecfd7a 163 icount -= (cpu->icount_decr.u16.low + cpu->icount_extra);
946fb27c 164 }
2a62914b
PD
165 return icount;
166}
167
168/* Return the virtual CPU time, based on the instruction counter. */
169static int64_t cpu_get_icount_locked(void)
170{
171 int64_t icount = cpu_get_icount_raw();
3f031313 172 return timers_state.qemu_icount_bias + cpu_icount_to_ns(icount);
946fb27c
PB
173}
174
17a15f1b
PB
175int64_t cpu_get_icount(void)
176{
177 int64_t icount;
178 unsigned start;
179
180 do {
181 start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
182 icount = cpu_get_icount_locked();
183 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
184
185 return icount;
186}
187
3f031313
FK
188int64_t cpu_icount_to_ns(int64_t icount)
189{
190 return icount << icount_time_shift;
191}
192
d90f3cca
C
193/* return the time elapsed in VM between vm_start and vm_stop. Unless
194 * icount is active, cpu_get_ticks() uses units of the host CPU cycle
195 * counter.
196 *
197 * Caller must hold the BQL
198 */
946fb27c
PB
199int64_t cpu_get_ticks(void)
200{
5f3e3101
PB
201 int64_t ticks;
202
946fb27c
PB
203 if (use_icount) {
204 return cpu_get_icount();
205 }
5f3e3101
PB
206
207 ticks = timers_state.cpu_ticks_offset;
208 if (timers_state.cpu_ticks_enabled) {
4a7428c5 209 ticks += cpu_get_host_ticks();
5f3e3101
PB
210 }
211
212 if (timers_state.cpu_ticks_prev > ticks) {
213 /* Note: non increasing ticks may happen if the host uses
214 software suspend */
215 timers_state.cpu_ticks_offset += timers_state.cpu_ticks_prev - ticks;
216 ticks = timers_state.cpu_ticks_prev;
946fb27c 217 }
5f3e3101
PB
218
219 timers_state.cpu_ticks_prev = ticks;
220 return ticks;
946fb27c
PB
221}
222
cb365646 223static int64_t cpu_get_clock_locked(void)
946fb27c 224{
1d45cea5 225 int64_t time;
cb365646 226
1d45cea5 227 time = timers_state.cpu_clock_offset;
5f3e3101 228 if (timers_state.cpu_ticks_enabled) {
1d45cea5 229 time += get_clock();
946fb27c 230 }
cb365646 231
1d45cea5 232 return time;
cb365646
LPF
233}
234
d90f3cca 235/* Return the monotonic time elapsed in VM, i.e.,
8212ff86
PM
236 * the time between vm_start and vm_stop
237 */
cb365646
LPF
238int64_t cpu_get_clock(void)
239{
240 int64_t ti;
241 unsigned start;
242
243 do {
244 start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
245 ti = cpu_get_clock_locked();
246 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
247
248 return ti;
946fb27c
PB
249}
250
cb365646 251/* enable cpu_get_ticks()
3224e878 252 * Caller must hold BQL which serves as mutex for vm_clock_seqlock.
cb365646 253 */
946fb27c
PB
254void cpu_enable_ticks(void)
255{
cb365646 256 /* Here, the really thing protected by seqlock is cpu_clock_offset. */
03719e44 257 seqlock_write_begin(&timers_state.vm_clock_seqlock);
946fb27c 258 if (!timers_state.cpu_ticks_enabled) {
4a7428c5 259 timers_state.cpu_ticks_offset -= cpu_get_host_ticks();
946fb27c
PB
260 timers_state.cpu_clock_offset -= get_clock();
261 timers_state.cpu_ticks_enabled = 1;
262 }
03719e44 263 seqlock_write_end(&timers_state.vm_clock_seqlock);
946fb27c
PB
264}
265
266/* disable cpu_get_ticks() : the clock is stopped. You must not call
cb365646 267 * cpu_get_ticks() after that.
3224e878 268 * Caller must hold BQL which serves as mutex for vm_clock_seqlock.
cb365646 269 */
946fb27c
PB
270void cpu_disable_ticks(void)
271{
cb365646 272 /* Here, the really thing protected by seqlock is cpu_clock_offset. */
03719e44 273 seqlock_write_begin(&timers_state.vm_clock_seqlock);
946fb27c 274 if (timers_state.cpu_ticks_enabled) {
4a7428c5 275 timers_state.cpu_ticks_offset += cpu_get_host_ticks();
cb365646 276 timers_state.cpu_clock_offset = cpu_get_clock_locked();
946fb27c
PB
277 timers_state.cpu_ticks_enabled = 0;
278 }
03719e44 279 seqlock_write_end(&timers_state.vm_clock_seqlock);
946fb27c
PB
280}
281
282/* Correlation between real and virtual time is always going to be
283 fairly approximate, so ignore small variation.
284 When the guest is idle real and virtual time will be aligned in
285 the IO wait loop. */
73bcb24d 286#define ICOUNT_WOBBLE (NANOSECONDS_PER_SECOND / 10)
946fb27c
PB
287
288static void icount_adjust(void)
289{
290 int64_t cur_time;
291 int64_t cur_icount;
292 int64_t delta;
a3270e19
PB
293
294 /* Protected by TimersState mutex. */
946fb27c 295 static int64_t last_delta;
468cc7cf 296
946fb27c
PB
297 /* If the VM is not running, then do nothing. */
298 if (!runstate_is_running()) {
299 return;
300 }
468cc7cf 301
03719e44 302 seqlock_write_begin(&timers_state.vm_clock_seqlock);
17a15f1b
PB
303 cur_time = cpu_get_clock_locked();
304 cur_icount = cpu_get_icount_locked();
468cc7cf 305
946fb27c
PB
306 delta = cur_icount - cur_time;
307 /* FIXME: This is a very crude algorithm, somewhat prone to oscillation. */
308 if (delta > 0
309 && last_delta + ICOUNT_WOBBLE < delta * 2
310 && icount_time_shift > 0) {
311 /* The guest is getting too far ahead. Slow time down. */
312 icount_time_shift--;
313 }
314 if (delta < 0
315 && last_delta - ICOUNT_WOBBLE > delta * 2
316 && icount_time_shift < MAX_ICOUNT_SHIFT) {
317 /* The guest is getting too far behind. Speed time up. */
318 icount_time_shift++;
319 }
320 last_delta = delta;
c96778bb
FK
321 timers_state.qemu_icount_bias = cur_icount
322 - (timers_state.qemu_icount << icount_time_shift);
03719e44 323 seqlock_write_end(&timers_state.vm_clock_seqlock);
946fb27c
PB
324}
325
326static void icount_adjust_rt(void *opaque)
327{
40daca54 328 timer_mod(icount_rt_timer,
1979b908 329 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
946fb27c
PB
330 icount_adjust();
331}
332
333static void icount_adjust_vm(void *opaque)
334{
40daca54
AB
335 timer_mod(icount_vm_timer,
336 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
73bcb24d 337 NANOSECONDS_PER_SECOND / 10);
946fb27c
PB
338 icount_adjust();
339}
340
341static int64_t qemu_icount_round(int64_t count)
342{
343 return (count + (1 << icount_time_shift) - 1) >> icount_time_shift;
344}
345
efab87cf 346static void icount_warp_rt(void)
946fb27c 347{
ccffff48
AB
348 unsigned seq;
349 int64_t warp_start;
350
17a15f1b
PB
351 /* The icount_warp_timer is rescheduled soon after vm_clock_warp_start
352 * changes from -1 to another value, so the race here is okay.
353 */
ccffff48
AB
354 do {
355 seq = seqlock_read_begin(&timers_state.vm_clock_seqlock);
356 warp_start = vm_clock_warp_start;
357 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, seq));
358
359 if (warp_start == -1) {
946fb27c
PB
360 return;
361 }
362
03719e44 363 seqlock_write_begin(&timers_state.vm_clock_seqlock);
946fb27c 364 if (runstate_is_running()) {
8eda206e
PD
365 int64_t clock = REPLAY_CLOCK(REPLAY_CLOCK_VIRTUAL_RT,
366 cpu_get_clock_locked());
8ed961d9
PB
367 int64_t warp_delta;
368
369 warp_delta = clock - vm_clock_warp_start;
370 if (use_icount == 2) {
946fb27c 371 /*
40daca54 372 * In adaptive mode, do not let QEMU_CLOCK_VIRTUAL run too
946fb27c
PB
373 * far ahead of real time.
374 */
17a15f1b 375 int64_t cur_icount = cpu_get_icount_locked();
bf2a7ddb 376 int64_t delta = clock - cur_icount;
8ed961d9 377 warp_delta = MIN(warp_delta, delta);
946fb27c 378 }
c96778bb 379 timers_state.qemu_icount_bias += warp_delta;
946fb27c
PB
380 }
381 vm_clock_warp_start = -1;
03719e44 382 seqlock_write_end(&timers_state.vm_clock_seqlock);
8ed961d9
PB
383
384 if (qemu_clock_expired(QEMU_CLOCK_VIRTUAL)) {
385 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
386 }
946fb27c
PB
387}
388
e76d1798 389static void icount_timer_cb(void *opaque)
efab87cf 390{
e76d1798
PD
391 /* No need for a checkpoint because the timer already synchronizes
392 * with CHECKPOINT_CLOCK_VIRTUAL_RT.
393 */
394 icount_warp_rt();
efab87cf
PD
395}
396
8156be56
PB
397void qtest_clock_warp(int64_t dest)
398{
40daca54 399 int64_t clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
efef88b3 400 AioContext *aio_context;
8156be56 401 assert(qtest_enabled());
efef88b3 402 aio_context = qemu_get_aio_context();
8156be56 403 while (clock < dest) {
40daca54 404 int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
c9299e2f 405 int64_t warp = qemu_soonest_timeout(dest - clock, deadline);
efef88b3 406
03719e44 407 seqlock_write_begin(&timers_state.vm_clock_seqlock);
c96778bb 408 timers_state.qemu_icount_bias += warp;
03719e44 409 seqlock_write_end(&timers_state.vm_clock_seqlock);
17a15f1b 410
40daca54 411 qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL);
efef88b3 412 timerlist_run_timers(aio_context->tlg.tl[QEMU_CLOCK_VIRTUAL]);
40daca54 413 clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
8156be56 414 }
40daca54 415 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
8156be56
PB
416}
417
e76d1798 418void qemu_start_warp_timer(void)
946fb27c 419{
ce78d18c 420 int64_t clock;
946fb27c
PB
421 int64_t deadline;
422
e76d1798 423 if (!use_icount) {
946fb27c
PB
424 return;
425 }
426
8bd7f71d
PD
427 /* Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers
428 * do not fire, so computing the deadline does not make sense.
429 */
430 if (!runstate_is_running()) {
431 return;
432 }
433
434 /* warp clock deterministically in record/replay mode */
e76d1798 435 if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_START)) {
8bd7f71d
PD
436 return;
437 }
438
ce78d18c 439 if (!all_cpu_threads_idle()) {
946fb27c
PB
440 return;
441 }
442
8156be56
PB
443 if (qtest_enabled()) {
444 /* When testing, qtest commands advance icount. */
e76d1798 445 return;
8156be56
PB
446 }
447
ac70aafc 448 /* We want to use the earliest deadline from ALL vm_clocks */
bf2a7ddb 449 clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT);
40daca54 450 deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
ce78d18c 451 if (deadline < 0) {
d7a0f71d
VC
452 static bool notified;
453 if (!icount_sleep && !notified) {
454 error_report("WARNING: icount sleep disabled and no active timers");
455 notified = true;
456 }
ce78d18c 457 return;
ac70aafc
AB
458 }
459
946fb27c
PB
460 if (deadline > 0) {
461 /*
40daca54 462 * Ensure QEMU_CLOCK_VIRTUAL proceeds even when the virtual CPU goes to
946fb27c
PB
463 * sleep. Otherwise, the CPU might be waiting for a future timer
464 * interrupt to wake it up, but the interrupt never comes because
465 * the vCPU isn't running any insns and thus doesn't advance the
40daca54 466 * QEMU_CLOCK_VIRTUAL.
946fb27c 467 */
5045e9d9
VC
468 if (!icount_sleep) {
469 /*
470 * We never let VCPUs sleep in no sleep icount mode.
471 * If there is a pending QEMU_CLOCK_VIRTUAL timer we just advance
472 * to the next QEMU_CLOCK_VIRTUAL event and notify it.
473 * It is useful when we want a deterministic execution time,
474 * isolated from host latencies.
475 */
03719e44 476 seqlock_write_begin(&timers_state.vm_clock_seqlock);
5045e9d9 477 timers_state.qemu_icount_bias += deadline;
03719e44 478 seqlock_write_end(&timers_state.vm_clock_seqlock);
5045e9d9
VC
479 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
480 } else {
481 /*
482 * We do stop VCPUs and only advance QEMU_CLOCK_VIRTUAL after some
483 * "real" time, (related to the time left until the next event) has
484 * passed. The QEMU_CLOCK_VIRTUAL_RT clock will do this.
485 * This avoids that the warps are visible externally; for example,
486 * you will not be sending network packets continuously instead of
487 * every 100ms.
488 */
03719e44 489 seqlock_write_begin(&timers_state.vm_clock_seqlock);
5045e9d9
VC
490 if (vm_clock_warp_start == -1 || vm_clock_warp_start > clock) {
491 vm_clock_warp_start = clock;
492 }
03719e44 493 seqlock_write_end(&timers_state.vm_clock_seqlock);
5045e9d9 494 timer_mod_anticipate(icount_warp_timer, clock + deadline);
ce78d18c 495 }
ac70aafc 496 } else if (deadline == 0) {
40daca54 497 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
946fb27c
PB
498 }
499}
500
e76d1798
PD
501static void qemu_account_warp_timer(void)
502{
503 if (!use_icount || !icount_sleep) {
504 return;
505 }
506
507 /* Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers
508 * do not fire, so computing the deadline does not make sense.
509 */
510 if (!runstate_is_running()) {
511 return;
512 }
513
514 /* warp clock deterministically in record/replay mode */
515 if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_ACCOUNT)) {
516 return;
517 }
518
519 timer_del(icount_warp_timer);
520 icount_warp_rt();
521}
522
d09eae37
FK
523static bool icount_state_needed(void *opaque)
524{
525 return use_icount;
526}
527
528/*
529 * This is a subsection for icount migration.
530 */
531static const VMStateDescription icount_vmstate_timers = {
532 .name = "timer/icount",
533 .version_id = 1,
534 .minimum_version_id = 1,
5cd8cada 535 .needed = icount_state_needed,
d09eae37
FK
536 .fields = (VMStateField[]) {
537 VMSTATE_INT64(qemu_icount_bias, TimersState),
538 VMSTATE_INT64(qemu_icount, TimersState),
539 VMSTATE_END_OF_LIST()
540 }
541};
542
946fb27c
PB
543static const VMStateDescription vmstate_timers = {
544 .name = "timer",
545 .version_id = 2,
546 .minimum_version_id = 1,
35d08458 547 .fields = (VMStateField[]) {
946fb27c
PB
548 VMSTATE_INT64(cpu_ticks_offset, TimersState),
549 VMSTATE_INT64(dummy, TimersState),
550 VMSTATE_INT64_V(cpu_clock_offset, TimersState, 2),
551 VMSTATE_END_OF_LIST()
d09eae37 552 },
5cd8cada
JQ
553 .subsections = (const VMStateDescription*[]) {
554 &icount_vmstate_timers,
555 NULL
946fb27c
PB
556 }
557};
558
14e6fe12 559static void cpu_throttle_thread(CPUState *cpu, run_on_cpu_data opaque)
2adcc85d 560{
2adcc85d
JH
561 double pct;
562 double throttle_ratio;
563 long sleeptime_ns;
564
565 if (!cpu_throttle_get_percentage()) {
566 return;
567 }
568
569 pct = (double)cpu_throttle_get_percentage()/100;
570 throttle_ratio = pct / (1 - pct);
571 sleeptime_ns = (long)(throttle_ratio * CPU_THROTTLE_TIMESLICE_NS);
572
573 qemu_mutex_unlock_iothread();
574 atomic_set(&cpu->throttle_thread_scheduled, 0);
575 g_usleep(sleeptime_ns / 1000); /* Convert ns to us for usleep call */
576 qemu_mutex_lock_iothread();
577}
578
579static void cpu_throttle_timer_tick(void *opaque)
580{
581 CPUState *cpu;
582 double pct;
583
584 /* Stop the timer if needed */
585 if (!cpu_throttle_get_percentage()) {
586 return;
587 }
588 CPU_FOREACH(cpu) {
589 if (!atomic_xchg(&cpu->throttle_thread_scheduled, 1)) {
14e6fe12
PB
590 async_run_on_cpu(cpu, cpu_throttle_thread,
591 RUN_ON_CPU_NULL);
2adcc85d
JH
592 }
593 }
594
595 pct = (double)cpu_throttle_get_percentage()/100;
596 timer_mod(throttle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT) +
597 CPU_THROTTLE_TIMESLICE_NS / (1-pct));
598}
599
600void cpu_throttle_set(int new_throttle_pct)
601{
602 /* Ensure throttle percentage is within valid range */
603 new_throttle_pct = MIN(new_throttle_pct, CPU_THROTTLE_PCT_MAX);
604 new_throttle_pct = MAX(new_throttle_pct, CPU_THROTTLE_PCT_MIN);
605
606 atomic_set(&throttle_percentage, new_throttle_pct);
607
608 timer_mod(throttle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT) +
609 CPU_THROTTLE_TIMESLICE_NS);
610}
611
612void cpu_throttle_stop(void)
613{
614 atomic_set(&throttle_percentage, 0);
615}
616
617bool cpu_throttle_active(void)
618{
619 return (cpu_throttle_get_percentage() != 0);
620}
621
622int cpu_throttle_get_percentage(void)
623{
624 return atomic_read(&throttle_percentage);
625}
626
4603ea01
PD
627void cpu_ticks_init(void)
628{
ccdb3c1f 629 seqlock_init(&timers_state.vm_clock_seqlock);
4603ea01 630 vmstate_register(NULL, 0, &vmstate_timers, &timers_state);
2adcc85d
JH
631 throttle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT,
632 cpu_throttle_timer_tick, NULL);
4603ea01
PD
633}
634
1ad9580b 635void configure_icount(QemuOpts *opts, Error **errp)
946fb27c 636{
1ad9580b 637 const char *option;
a8bfac37 638 char *rem_str = NULL;
1ad9580b 639
1ad9580b 640 option = qemu_opt_get(opts, "shift");
946fb27c 641 if (!option) {
a8bfac37
ST
642 if (qemu_opt_get(opts, "align") != NULL) {
643 error_setg(errp, "Please specify shift option when using align");
644 }
946fb27c
PB
645 return;
646 }
f1f4b57e
VC
647
648 icount_sleep = qemu_opt_get_bool(opts, "sleep", true);
5045e9d9
VC
649 if (icount_sleep) {
650 icount_warp_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT,
e76d1798 651 icount_timer_cb, NULL);
5045e9d9 652 }
f1f4b57e 653
a8bfac37 654 icount_align_option = qemu_opt_get_bool(opts, "align", false);
f1f4b57e
VC
655
656 if (icount_align_option && !icount_sleep) {
778d9f9b 657 error_setg(errp, "align=on and sleep=off are incompatible");
f1f4b57e 658 }
946fb27c 659 if (strcmp(option, "auto") != 0) {
a8bfac37
ST
660 errno = 0;
661 icount_time_shift = strtol(option, &rem_str, 0);
662 if (errno != 0 || *rem_str != '\0' || !strlen(option)) {
663 error_setg(errp, "icount: Invalid shift value");
664 }
946fb27c
PB
665 use_icount = 1;
666 return;
a8bfac37
ST
667 } else if (icount_align_option) {
668 error_setg(errp, "shift=auto and align=on are incompatible");
f1f4b57e 669 } else if (!icount_sleep) {
778d9f9b 670 error_setg(errp, "shift=auto and sleep=off are incompatible");
946fb27c
PB
671 }
672
673 use_icount = 2;
674
675 /* 125MIPS seems a reasonable initial guess at the guest speed.
676 It will be corrected fairly quickly anyway. */
677 icount_time_shift = 3;
678
679 /* Have both realtime and virtual time triggers for speed adjustment.
680 The realtime trigger catches emulated time passing too slowly,
681 the virtual time trigger catches emulated time passing too fast.
682 Realtime triggers occur even when idle, so use them less frequently
683 than VM triggers. */
bf2a7ddb
PD
684 icount_rt_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL_RT,
685 icount_adjust_rt, NULL);
40daca54 686 timer_mod(icount_rt_timer,
bf2a7ddb 687 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
40daca54
AB
688 icount_vm_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
689 icount_adjust_vm, NULL);
690 timer_mod(icount_vm_timer,
691 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
73bcb24d 692 NANOSECONDS_PER_SECOND / 10);
946fb27c
PB
693}
694
296af7c9
BS
695/***********************************************************/
696void hw_error(const char *fmt, ...)
697{
698 va_list ap;
55e5c285 699 CPUState *cpu;
296af7c9
BS
700
701 va_start(ap, fmt);
702 fprintf(stderr, "qemu: hardware error: ");
703 vfprintf(stderr, fmt, ap);
704 fprintf(stderr, "\n");
bdc44640 705 CPU_FOREACH(cpu) {
55e5c285 706 fprintf(stderr, "CPU #%d:\n", cpu->cpu_index);
878096ee 707 cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU);
296af7c9
BS
708 }
709 va_end(ap);
710 abort();
711}
712
713void cpu_synchronize_all_states(void)
714{
182735ef 715 CPUState *cpu;
296af7c9 716
bdc44640 717 CPU_FOREACH(cpu) {
182735ef 718 cpu_synchronize_state(cpu);
296af7c9
BS
719 }
720}
721
722void cpu_synchronize_all_post_reset(void)
723{
182735ef 724 CPUState *cpu;
296af7c9 725
bdc44640 726 CPU_FOREACH(cpu) {
182735ef 727 cpu_synchronize_post_reset(cpu);
296af7c9
BS
728 }
729}
730
731void cpu_synchronize_all_post_init(void)
732{
182735ef 733 CPUState *cpu;
296af7c9 734
bdc44640 735 CPU_FOREACH(cpu) {
182735ef 736 cpu_synchronize_post_init(cpu);
296af7c9
BS
737 }
738}
739
56983463 740static int do_vm_stop(RunState state)
296af7c9 741{
56983463
KW
742 int ret = 0;
743
1354869c 744 if (runstate_is_running()) {
296af7c9 745 cpu_disable_ticks();
296af7c9 746 pause_all_vcpus();
f5bbfba1 747 runstate_set(state);
1dfb4dd9 748 vm_state_notify(0, state);
a4e15de9 749 qapi_event_send_stop(&error_abort);
296af7c9 750 }
56983463 751
594a45ce 752 bdrv_drain_all();
6d0ceb80 753 replay_disable_events();
22af08ea 754 ret = bdrv_flush_all();
594a45ce 755
56983463 756 return ret;
296af7c9
BS
757}
758
a1fcaa73 759static bool cpu_can_run(CPUState *cpu)
296af7c9 760{
4fdeee7c 761 if (cpu->stop) {
a1fcaa73 762 return false;
0ab07c62 763 }
321bc0b2 764 if (cpu_is_stopped(cpu)) {
a1fcaa73 765 return false;
0ab07c62 766 }
a1fcaa73 767 return true;
296af7c9
BS
768}
769
91325046 770static void cpu_handle_guest_debug(CPUState *cpu)
83f338f7 771{
64f6b346 772 gdb_set_stop_cpu(cpu);
8cf71710 773 qemu_system_debug_request();
f324e766 774 cpu->stopped = true;
3c638d06
JK
775}
776
6d9cb73c
JK
777#ifdef CONFIG_LINUX
778static void sigbus_reraise(void)
779{
780 sigset_t set;
781 struct sigaction action;
782
783 memset(&action, 0, sizeof(action));
784 action.sa_handler = SIG_DFL;
785 if (!sigaction(SIGBUS, &action, NULL)) {
786 raise(SIGBUS);
787 sigemptyset(&set);
788 sigaddset(&set, SIGBUS);
a2d1761d 789 pthread_sigmask(SIG_UNBLOCK, &set, NULL);
6d9cb73c
JK
790 }
791 perror("Failed to re-raise SIGBUS!\n");
792 abort();
793}
794
795static void sigbus_handler(int n, struct qemu_signalfd_siginfo *siginfo,
796 void *ctx)
797{
798 if (kvm_on_sigbus(siginfo->ssi_code,
799 (void *)(intptr_t)siginfo->ssi_addr)) {
800 sigbus_reraise();
801 }
802}
803
804static void qemu_init_sigbus(void)
805{
806 struct sigaction action;
807
808 memset(&action, 0, sizeof(action));
809 action.sa_flags = SA_SIGINFO;
810 action.sa_sigaction = (void (*)(int, siginfo_t*, void*))sigbus_handler;
811 sigaction(SIGBUS, &action, NULL);
812
813 prctl(PR_MCE_KILL, PR_MCE_KILL_SET, PR_MCE_KILL_EARLY, 0, 0);
814}
815
290adf38 816static void qemu_kvm_eat_signals(CPUState *cpu)
1ab3c6c0
JK
817{
818 struct timespec ts = { 0, 0 };
819 siginfo_t siginfo;
820 sigset_t waitset;
821 sigset_t chkset;
822 int r;
823
824 sigemptyset(&waitset);
825 sigaddset(&waitset, SIG_IPI);
826 sigaddset(&waitset, SIGBUS);
827
828 do {
829 r = sigtimedwait(&waitset, &siginfo, &ts);
830 if (r == -1 && !(errno == EAGAIN || errno == EINTR)) {
831 perror("sigtimedwait");
832 exit(1);
833 }
834
835 switch (r) {
836 case SIGBUS:
290adf38 837 if (kvm_on_sigbus_vcpu(cpu, siginfo.si_code, siginfo.si_addr)) {
1ab3c6c0
JK
838 sigbus_reraise();
839 }
840 break;
841 default:
842 break;
843 }
844
845 r = sigpending(&chkset);
846 if (r == -1) {
847 perror("sigpending");
848 exit(1);
849 }
850 } while (sigismember(&chkset, SIG_IPI) || sigismember(&chkset, SIGBUS));
1ab3c6c0
JK
851}
852
6d9cb73c
JK
853#else /* !CONFIG_LINUX */
854
855static void qemu_init_sigbus(void)
856{
857}
1ab3c6c0 858
290adf38 859static void qemu_kvm_eat_signals(CPUState *cpu)
1ab3c6c0
JK
860{
861}
6d9cb73c
JK
862#endif /* !CONFIG_LINUX */
863
296af7c9 864#ifndef _WIN32
55f8d6ac
JK
865static void dummy_signal(int sig)
866{
867}
55f8d6ac 868
13618e05 869static void qemu_kvm_init_cpu_signals(CPUState *cpu)
714bd040
PB
870{
871 int r;
872 sigset_t set;
873 struct sigaction sigact;
874
875 memset(&sigact, 0, sizeof(sigact));
876 sigact.sa_handler = dummy_signal;
877 sigaction(SIG_IPI, &sigact, NULL);
878
714bd040
PB
879 pthread_sigmask(SIG_BLOCK, NULL, &set);
880 sigdelset(&set, SIG_IPI);
714bd040 881 sigdelset(&set, SIGBUS);
491d6e80 882 r = kvm_set_signal_mask(cpu, &set);
714bd040
PB
883 if (r) {
884 fprintf(stderr, "kvm_set_signal_mask: %s\n", strerror(-r));
885 exit(1);
886 }
887}
888
55f8d6ac 889#else /* _WIN32 */
13618e05 890static void qemu_kvm_init_cpu_signals(CPUState *cpu)
ff48eb5f 891{
714bd040
PB
892 abort();
893}
714bd040 894#endif /* _WIN32 */
ff48eb5f 895
b2532d88 896static QemuMutex qemu_global_mutex;
46daff13 897static QemuCond qemu_io_proceeded_cond;
6b49809c 898static unsigned iothread_requesting_mutex;
296af7c9
BS
899
900static QemuThread io_thread;
901
296af7c9
BS
902/* cpu creation */
903static QemuCond qemu_cpu_cond;
904/* system init */
296af7c9
BS
905static QemuCond qemu_pause_cond;
906
d3b12f5d 907void qemu_init_cpu_loop(void)
296af7c9 908{
6d9cb73c 909 qemu_init_sigbus();
ed94592b 910 qemu_cond_init(&qemu_cpu_cond);
ed94592b 911 qemu_cond_init(&qemu_pause_cond);
46daff13 912 qemu_cond_init(&qemu_io_proceeded_cond);
296af7c9 913 qemu_mutex_init(&qemu_global_mutex);
296af7c9 914
b7680cb6 915 qemu_thread_get_self(&io_thread);
296af7c9
BS
916}
917
14e6fe12 918void run_on_cpu(CPUState *cpu, run_on_cpu_func func, run_on_cpu_data data)
e82bcec2 919{
d148d90e 920 do_run_on_cpu(cpu, func, data, &qemu_global_mutex);
3c02270d
CV
921}
922
4c055ab5
GZ
923static void qemu_kvm_destroy_vcpu(CPUState *cpu)
924{
925 if (kvm_destroy_vcpu(cpu) < 0) {
926 error_report("kvm_destroy_vcpu failed");
927 exit(EXIT_FAILURE);
928 }
929}
930
931static void qemu_tcg_destroy_vcpu(CPUState *cpu)
932{
933}
934
509a0d78 935static void qemu_wait_io_event_common(CPUState *cpu)
296af7c9 936{
4fdeee7c
AF
937 if (cpu->stop) {
938 cpu->stop = false;
f324e766 939 cpu->stopped = true;
96bce683 940 qemu_cond_broadcast(&qemu_pause_cond);
296af7c9 941 }
a5403c69 942 process_queued_cpu_work(cpu);
216fc9a4 943 cpu->thread_kicked = false;
296af7c9
BS
944}
945
d5f8d613 946static void qemu_tcg_wait_io_event(CPUState *cpu)
296af7c9 947{
16400322 948 while (all_cpu_threads_idle()) {
d5f8d613 949 qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
16400322 950 }
296af7c9 951
46daff13
PB
952 while (iothread_requesting_mutex) {
953 qemu_cond_wait(&qemu_io_proceeded_cond, &qemu_global_mutex);
954 }
6cabe1f3 955
bdc44640 956 CPU_FOREACH(cpu) {
182735ef 957 qemu_wait_io_event_common(cpu);
6cabe1f3 958 }
296af7c9
BS
959}
960
fd529e8f 961static void qemu_kvm_wait_io_event(CPUState *cpu)
296af7c9 962{
a98ae1d8 963 while (cpu_thread_is_idle(cpu)) {
f5c121b8 964 qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
16400322 965 }
296af7c9 966
290adf38 967 qemu_kvm_eat_signals(cpu);
509a0d78 968 qemu_wait_io_event_common(cpu);
296af7c9
BS
969}
970
7e97cd88 971static void *qemu_kvm_cpu_thread_fn(void *arg)
296af7c9 972{
48a106bd 973 CPUState *cpu = arg;
84b4915d 974 int r;
296af7c9 975
ab28bd23
PB
976 rcu_register_thread();
977
2e7f7a3c 978 qemu_mutex_lock_iothread();
814e612e 979 qemu_thread_get_self(cpu->thread);
9f09e18a 980 cpu->thread_id = qemu_get_thread_id();
626cf8f4 981 cpu->can_do_io = 1;
4917cf44 982 current_cpu = cpu;
296af7c9 983
504134d2 984 r = kvm_init_vcpu(cpu);
84b4915d
JK
985 if (r < 0) {
986 fprintf(stderr, "kvm_init_vcpu failed: %s\n", strerror(-r));
987 exit(1);
988 }
296af7c9 989
13618e05 990 qemu_kvm_init_cpu_signals(cpu);
296af7c9
BS
991
992 /* signal CPU creation */
61a46217 993 cpu->created = true;
296af7c9
BS
994 qemu_cond_signal(&qemu_cpu_cond);
995
4c055ab5 996 do {
a1fcaa73 997 if (cpu_can_run(cpu)) {
1458c363 998 r = kvm_cpu_exec(cpu);
83f338f7 999 if (r == EXCP_DEBUG) {
91325046 1000 cpu_handle_guest_debug(cpu);
83f338f7 1001 }
0ab07c62 1002 }
fd529e8f 1003 qemu_kvm_wait_io_event(cpu);
4c055ab5 1004 } while (!cpu->unplug || cpu_can_run(cpu));
296af7c9 1005
4c055ab5 1006 qemu_kvm_destroy_vcpu(cpu);
2c579042
BR
1007 cpu->created = false;
1008 qemu_cond_signal(&qemu_cpu_cond);
4c055ab5 1009 qemu_mutex_unlock_iothread();
296af7c9
BS
1010 return NULL;
1011}
1012
c7f0f3b1
AL
1013static void *qemu_dummy_cpu_thread_fn(void *arg)
1014{
1015#ifdef _WIN32
1016 fprintf(stderr, "qtest is not supported under Windows\n");
1017 exit(1);
1018#else
10a9021d 1019 CPUState *cpu = arg;
c7f0f3b1
AL
1020 sigset_t waitset;
1021 int r;
1022
ab28bd23
PB
1023 rcu_register_thread();
1024
c7f0f3b1 1025 qemu_mutex_lock_iothread();
814e612e 1026 qemu_thread_get_self(cpu->thread);
9f09e18a 1027 cpu->thread_id = qemu_get_thread_id();
626cf8f4 1028 cpu->can_do_io = 1;
c7f0f3b1
AL
1029
1030 sigemptyset(&waitset);
1031 sigaddset(&waitset, SIG_IPI);
1032
1033 /* signal CPU creation */
61a46217 1034 cpu->created = true;
c7f0f3b1
AL
1035 qemu_cond_signal(&qemu_cpu_cond);
1036
4917cf44 1037 current_cpu = cpu;
c7f0f3b1 1038 while (1) {
4917cf44 1039 current_cpu = NULL;
c7f0f3b1
AL
1040 qemu_mutex_unlock_iothread();
1041 do {
1042 int sig;
1043 r = sigwait(&waitset, &sig);
1044 } while (r == -1 && (errno == EAGAIN || errno == EINTR));
1045 if (r == -1) {
1046 perror("sigwait");
1047 exit(1);
1048 }
1049 qemu_mutex_lock_iothread();
4917cf44 1050 current_cpu = cpu;
509a0d78 1051 qemu_wait_io_event_common(cpu);
c7f0f3b1
AL
1052 }
1053
1054 return NULL;
1055#endif
1056}
1057
1be7fcb8
AB
1058static int64_t tcg_get_icount_limit(void)
1059{
1060 int64_t deadline;
1061
1062 if (replay_mode != REPLAY_MODE_PLAY) {
1063 deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
1064
1065 /* Maintain prior (possibly buggy) behaviour where if no deadline
1066 * was set (as there is no QEMU_CLOCK_VIRTUAL timer) or it is more than
1067 * INT32_MAX nanoseconds ahead, we still use INT32_MAX
1068 * nanoseconds.
1069 */
1070 if ((deadline < 0) || (deadline > INT32_MAX)) {
1071 deadline = INT32_MAX;
1072 }
1073
1074 return qemu_icount_round(deadline);
1075 } else {
1076 return replay_get_instructions();
1077 }
1078}
1079
12e9700d
AB
1080static void handle_icount_deadline(void)
1081{
1082 if (use_icount) {
1083 int64_t deadline =
1084 qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
1085
1086 if (deadline == 0) {
1087 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
1088 }
1089 }
1090}
1091
1be7fcb8
AB
1092static int tcg_cpu_exec(CPUState *cpu)
1093{
1094 int ret;
1095#ifdef CONFIG_PROFILER
1096 int64_t ti;
1097#endif
1098
1099#ifdef CONFIG_PROFILER
1100 ti = profile_getclock();
1101#endif
1102 if (use_icount) {
1103 int64_t count;
1104 int decr;
1105 timers_state.qemu_icount -= (cpu->icount_decr.u16.low
1106 + cpu->icount_extra);
1107 cpu->icount_decr.u16.low = 0;
1108 cpu->icount_extra = 0;
1109 count = tcg_get_icount_limit();
1110 timers_state.qemu_icount += count;
1111 decr = (count > 0xffff) ? 0xffff : count;
1112 count -= decr;
1113 cpu->icount_decr.u16.low = decr;
1114 cpu->icount_extra = count;
1115 }
1116 cpu_exec_start(cpu);
1117 ret = cpu_exec(cpu);
1118 cpu_exec_end(cpu);
1119#ifdef CONFIG_PROFILER
1120 tcg_time += profile_getclock() - ti;
1121#endif
1122 if (use_icount) {
1123 /* Fold pending instructions back into the
1124 instruction counter, and clear the interrupt flag. */
1125 timers_state.qemu_icount -= (cpu->icount_decr.u16.low
1126 + cpu->icount_extra);
1127 cpu->icount_decr.u32 = 0;
1128 cpu->icount_extra = 0;
1129 replay_account_executed_instructions();
1130 }
1131 return ret;
1132}
1133
c93bbbef
AB
1134/* Destroy any remaining vCPUs which have been unplugged and have
1135 * finished running
1136 */
1137static void deal_with_unplugged_cpus(void)
1be7fcb8 1138{
c93bbbef 1139 CPUState *cpu;
1be7fcb8 1140
c93bbbef
AB
1141 CPU_FOREACH(cpu) {
1142 if (cpu->unplug && !cpu_can_run(cpu)) {
1143 qemu_tcg_destroy_vcpu(cpu);
1144 cpu->created = false;
1145 qemu_cond_signal(&qemu_cpu_cond);
1be7fcb8
AB
1146 break;
1147 }
1148 }
1be7fcb8 1149}
bdb7ca67 1150
7e97cd88 1151static void *qemu_tcg_cpu_thread_fn(void *arg)
296af7c9 1152{
c3586ba7 1153 CPUState *cpu = arg;
296af7c9 1154
ab28bd23
PB
1155 rcu_register_thread();
1156
2e7f7a3c 1157 qemu_mutex_lock_iothread();
814e612e 1158 qemu_thread_get_self(cpu->thread);
296af7c9 1159
38fcbd3f
AF
1160 CPU_FOREACH(cpu) {
1161 cpu->thread_id = qemu_get_thread_id();
1162 cpu->created = true;
626cf8f4 1163 cpu->can_do_io = 1;
38fcbd3f 1164 }
296af7c9
BS
1165 qemu_cond_signal(&qemu_cpu_cond);
1166
fa7d1867 1167 /* wait for initial kick-off after machine start */
c28e399c 1168 while (first_cpu->stopped) {
d5f8d613 1169 qemu_cond_wait(first_cpu->halt_cond, &qemu_global_mutex);
8e564b4e
JK
1170
1171 /* process any pending work */
bdc44640 1172 CPU_FOREACH(cpu) {
182735ef 1173 qemu_wait_io_event_common(cpu);
8e564b4e 1174 }
0ab07c62 1175 }
296af7c9 1176
21618b3e 1177 /* process any pending work */
aed807c8 1178 atomic_mb_set(&exit_request, 1);
21618b3e 1179
c93bbbef
AB
1180 cpu = first_cpu;
1181
296af7c9 1182 while (1) {
c93bbbef
AB
1183 /* Account partial waits to QEMU_CLOCK_VIRTUAL. */
1184 qemu_account_warp_timer();
1185
1186 if (!cpu) {
1187 cpu = first_cpu;
1188 }
1189
1190 for (; cpu != NULL && !exit_request; cpu = CPU_NEXT(cpu)) {
1191
1192 qemu_clock_enable(QEMU_CLOCK_VIRTUAL,
1193 (cpu->singlestep_enabled & SSTEP_NOTIMER) == 0);
1194
1195 if (cpu_can_run(cpu)) {
1196 int r;
1197 r = tcg_cpu_exec(cpu);
1198 if (r == EXCP_DEBUG) {
1199 cpu_handle_guest_debug(cpu);
1200 break;
1201 }
1202 } else if (cpu->stop || cpu->stopped) {
1203 if (cpu->unplug) {
1204 cpu = CPU_NEXT(cpu);
1205 }
1206 break;
1207 }
1208
1209 } /* for cpu.. */
1210
1211 /* Pairs with smp_wmb in qemu_cpu_kick. */
1212 atomic_mb_set(&exit_request, 0);
ac70aafc 1213
12e9700d 1214 handle_icount_deadline();
ac70aafc 1215
d5f8d613 1216 qemu_tcg_wait_io_event(QTAILQ_FIRST(&cpus));
c93bbbef 1217 deal_with_unplugged_cpus();
296af7c9
BS
1218 }
1219
1220 return NULL;
1221}
1222
2ff09a40 1223static void qemu_cpu_kick_thread(CPUState *cpu)
cc015e9a
PB
1224{
1225#ifndef _WIN32
1226 int err;
1227
e0c38211
PB
1228 if (cpu->thread_kicked) {
1229 return;
9102deda 1230 }
e0c38211 1231 cpu->thread_kicked = true;
814e612e 1232 err = pthread_kill(cpu->thread->thread, SIG_IPI);
cc015e9a
PB
1233 if (err) {
1234 fprintf(stderr, "qemu:%s: %s", __func__, strerror(err));
1235 exit(1);
1236 }
1237#else /* _WIN32 */
e0c38211
PB
1238 abort();
1239#endif
1240}
ed9164a3 1241
e0c38211
PB
1242static void qemu_cpu_kick_no_halt(void)
1243{
1244 CPUState *cpu;
1245 /* Ensure whatever caused the exit has reached the CPU threads before
1246 * writing exit_request.
1247 */
1248 atomic_mb_set(&exit_request, 1);
1249 cpu = atomic_mb_read(&tcg_current_cpu);
1250 if (cpu) {
1251 cpu_exit(cpu);
cc015e9a 1252 }
cc015e9a
PB
1253}
1254
c08d7424 1255void qemu_cpu_kick(CPUState *cpu)
296af7c9 1256{
f5c121b8 1257 qemu_cond_broadcast(cpu->halt_cond);
e0c38211
PB
1258 if (tcg_enabled()) {
1259 qemu_cpu_kick_no_halt();
1260 } else {
1261 qemu_cpu_kick_thread(cpu);
1262 }
296af7c9
BS
1263}
1264
46d62fac 1265void qemu_cpu_kick_self(void)
296af7c9 1266{
4917cf44 1267 assert(current_cpu);
9102deda 1268 qemu_cpu_kick_thread(current_cpu);
296af7c9
BS
1269}
1270
60e82579 1271bool qemu_cpu_is_self(CPUState *cpu)
296af7c9 1272{
814e612e 1273 return qemu_thread_is_self(cpu->thread);
296af7c9
BS
1274}
1275
79e2b9ae 1276bool qemu_in_vcpu_thread(void)
aa723c23 1277{
4917cf44 1278 return current_cpu && qemu_cpu_is_self(current_cpu);
aa723c23
JQ
1279}
1280
afbe7053
PB
1281static __thread bool iothread_locked = false;
1282
1283bool qemu_mutex_iothread_locked(void)
1284{
1285 return iothread_locked;
1286}
1287
296af7c9
BS
1288void qemu_mutex_lock_iothread(void)
1289{
21618b3e 1290 atomic_inc(&iothread_requesting_mutex);
2e7f7a3c
PB
1291 /* In the simple case there is no need to bump the VCPU thread out of
1292 * TCG code execution.
1293 */
1294 if (!tcg_enabled() || qemu_in_vcpu_thread() ||
46036b24 1295 !first_cpu || !first_cpu->created) {
296af7c9 1296 qemu_mutex_lock(&qemu_global_mutex);
21618b3e 1297 atomic_dec(&iothread_requesting_mutex);
1a28cac3 1298 } else {
1a28cac3 1299 if (qemu_mutex_trylock(&qemu_global_mutex)) {
e0c38211 1300 qemu_cpu_kick_no_halt();
1a28cac3
MT
1301 qemu_mutex_lock(&qemu_global_mutex);
1302 }
6b49809c 1303 atomic_dec(&iothread_requesting_mutex);
46daff13 1304 qemu_cond_broadcast(&qemu_io_proceeded_cond);
1a28cac3 1305 }
afbe7053 1306 iothread_locked = true;
296af7c9
BS
1307}
1308
1309void qemu_mutex_unlock_iothread(void)
1310{
afbe7053 1311 iothread_locked = false;
296af7c9
BS
1312 qemu_mutex_unlock(&qemu_global_mutex);
1313}
1314
e8faee06 1315static bool all_vcpus_paused(void)
296af7c9 1316{
bdc44640 1317 CPUState *cpu;
296af7c9 1318
bdc44640 1319 CPU_FOREACH(cpu) {
182735ef 1320 if (!cpu->stopped) {
e8faee06 1321 return false;
0ab07c62 1322 }
296af7c9
BS
1323 }
1324
e8faee06 1325 return true;
296af7c9
BS
1326}
1327
1328void pause_all_vcpus(void)
1329{
bdc44640 1330 CPUState *cpu;
296af7c9 1331
40daca54 1332 qemu_clock_enable(QEMU_CLOCK_VIRTUAL, false);
bdc44640 1333 CPU_FOREACH(cpu) {
182735ef
AF
1334 cpu->stop = true;
1335 qemu_cpu_kick(cpu);
296af7c9
BS
1336 }
1337
aa723c23 1338 if (qemu_in_vcpu_thread()) {
d798e974
JK
1339 cpu_stop_current();
1340 if (!kvm_enabled()) {
bdc44640 1341 CPU_FOREACH(cpu) {
182735ef
AF
1342 cpu->stop = false;
1343 cpu->stopped = true;
d798e974
JK
1344 }
1345 return;
1346 }
1347 }
1348
296af7c9 1349 while (!all_vcpus_paused()) {
be7d6c57 1350 qemu_cond_wait(&qemu_pause_cond, &qemu_global_mutex);
bdc44640 1351 CPU_FOREACH(cpu) {
182735ef 1352 qemu_cpu_kick(cpu);
296af7c9
BS
1353 }
1354 }
1355}
1356
2993683b
IM
1357void cpu_resume(CPUState *cpu)
1358{
1359 cpu->stop = false;
1360 cpu->stopped = false;
1361 qemu_cpu_kick(cpu);
1362}
1363
296af7c9
BS
1364void resume_all_vcpus(void)
1365{
bdc44640 1366 CPUState *cpu;
296af7c9 1367
40daca54 1368 qemu_clock_enable(QEMU_CLOCK_VIRTUAL, true);
bdc44640 1369 CPU_FOREACH(cpu) {
182735ef 1370 cpu_resume(cpu);
296af7c9
BS
1371 }
1372}
1373
4c055ab5
GZ
1374void cpu_remove(CPUState *cpu)
1375{
1376 cpu->stop = true;
1377 cpu->unplug = true;
1378 qemu_cpu_kick(cpu);
1379}
1380
2c579042
BR
1381void cpu_remove_sync(CPUState *cpu)
1382{
1383 cpu_remove(cpu);
1384 while (cpu->created) {
1385 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1386 }
1387}
1388
4900116e
DDAG
1389/* For temporary buffers for forming a name */
1390#define VCPU_THREAD_NAME_SIZE 16
1391
e5ab30a2 1392static void qemu_tcg_init_vcpu(CPUState *cpu)
296af7c9 1393{
4900116e 1394 char thread_name[VCPU_THREAD_NAME_SIZE];
d5f8d613
FK
1395 static QemuCond *tcg_halt_cond;
1396 static QemuThread *tcg_cpu_thread;
4900116e 1397
296af7c9
BS
1398 /* share a single thread for all cpus with TCG */
1399 if (!tcg_cpu_thread) {
814e612e 1400 cpu->thread = g_malloc0(sizeof(QemuThread));
f5c121b8
AF
1401 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1402 qemu_cond_init(cpu->halt_cond);
1403 tcg_halt_cond = cpu->halt_cond;
4900116e
DDAG
1404 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/TCG",
1405 cpu->cpu_index);
1406 qemu_thread_create(cpu->thread, thread_name, qemu_tcg_cpu_thread_fn,
1407 cpu, QEMU_THREAD_JOINABLE);
1ecf47bf 1408#ifdef _WIN32
814e612e 1409 cpu->hThread = qemu_thread_get_handle(cpu->thread);
1ecf47bf 1410#endif
61a46217 1411 while (!cpu->created) {
18a85728 1412 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
0ab07c62 1413 }
814e612e 1414 tcg_cpu_thread = cpu->thread;
296af7c9 1415 } else {
814e612e 1416 cpu->thread = tcg_cpu_thread;
f5c121b8 1417 cpu->halt_cond = tcg_halt_cond;
296af7c9
BS
1418 }
1419}
1420
48a106bd 1421static void qemu_kvm_start_vcpu(CPUState *cpu)
296af7c9 1422{
4900116e
DDAG
1423 char thread_name[VCPU_THREAD_NAME_SIZE];
1424
814e612e 1425 cpu->thread = g_malloc0(sizeof(QemuThread));
f5c121b8
AF
1426 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1427 qemu_cond_init(cpu->halt_cond);
4900116e
DDAG
1428 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/KVM",
1429 cpu->cpu_index);
1430 qemu_thread_create(cpu->thread, thread_name, qemu_kvm_cpu_thread_fn,
1431 cpu, QEMU_THREAD_JOINABLE);
61a46217 1432 while (!cpu->created) {
18a85728 1433 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
0ab07c62 1434 }
296af7c9
BS
1435}
1436
10a9021d 1437static void qemu_dummy_start_vcpu(CPUState *cpu)
c7f0f3b1 1438{
4900116e
DDAG
1439 char thread_name[VCPU_THREAD_NAME_SIZE];
1440
814e612e 1441 cpu->thread = g_malloc0(sizeof(QemuThread));
f5c121b8
AF
1442 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1443 qemu_cond_init(cpu->halt_cond);
4900116e
DDAG
1444 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/DUMMY",
1445 cpu->cpu_index);
1446 qemu_thread_create(cpu->thread, thread_name, qemu_dummy_cpu_thread_fn, cpu,
c7f0f3b1 1447 QEMU_THREAD_JOINABLE);
61a46217 1448 while (!cpu->created) {
c7f0f3b1
AL
1449 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1450 }
1451}
1452
c643bed9 1453void qemu_init_vcpu(CPUState *cpu)
296af7c9 1454{
ce3960eb
AF
1455 cpu->nr_cores = smp_cores;
1456 cpu->nr_threads = smp_threads;
f324e766 1457 cpu->stopped = true;
56943e8c
PM
1458
1459 if (!cpu->as) {
1460 /* If the target cpu hasn't set up any address spaces itself,
1461 * give it the default one.
1462 */
6731d864
PC
1463 AddressSpace *as = address_space_init_shareable(cpu->memory,
1464 "cpu-memory");
12ebc9a7 1465 cpu->num_ases = 1;
6731d864 1466 cpu_address_space_init(cpu, as, 0);
56943e8c
PM
1467 }
1468
0ab07c62 1469 if (kvm_enabled()) {
48a106bd 1470 qemu_kvm_start_vcpu(cpu);
c7f0f3b1 1471 } else if (tcg_enabled()) {
e5ab30a2 1472 qemu_tcg_init_vcpu(cpu);
c7f0f3b1 1473 } else {
10a9021d 1474 qemu_dummy_start_vcpu(cpu);
0ab07c62 1475 }
296af7c9
BS
1476}
1477
b4a3d965 1478void cpu_stop_current(void)
296af7c9 1479{
4917cf44
AF
1480 if (current_cpu) {
1481 current_cpu->stop = false;
1482 current_cpu->stopped = true;
1483 cpu_exit(current_cpu);
96bce683 1484 qemu_cond_broadcast(&qemu_pause_cond);
b4a3d965 1485 }
296af7c9
BS
1486}
1487
56983463 1488int vm_stop(RunState state)
296af7c9 1489{
aa723c23 1490 if (qemu_in_vcpu_thread()) {
74892d24 1491 qemu_system_vmstop_request_prepare();
1dfb4dd9 1492 qemu_system_vmstop_request(state);
296af7c9
BS
1493 /*
1494 * FIXME: should not return to device code in case
1495 * vm_stop() has been requested.
1496 */
b4a3d965 1497 cpu_stop_current();
56983463 1498 return 0;
296af7c9 1499 }
56983463
KW
1500
1501 return do_vm_stop(state);
296af7c9
BS
1502}
1503
8a9236f1
LC
1504/* does a state transition even if the VM is already stopped,
1505 current state is forgotten forever */
56983463 1506int vm_stop_force_state(RunState state)
8a9236f1
LC
1507{
1508 if (runstate_is_running()) {
56983463 1509 return vm_stop(state);
8a9236f1
LC
1510 } else {
1511 runstate_set(state);
b2780d32
WC
1512
1513 bdrv_drain_all();
594a45ce
KW
1514 /* Make sure to return an error if the flush in a previous vm_stop()
1515 * failed. */
22af08ea 1516 return bdrv_flush_all();
8a9236f1
LC
1517 }
1518}
1519
9a78eead 1520void list_cpus(FILE *f, fprintf_function cpu_fprintf, const char *optarg)
262353cb
BS
1521{
1522 /* XXX: implement xxx_cpu_list for targets that still miss it */
e916cbf8
PM
1523#if defined(cpu_list)
1524 cpu_list(f, cpu_fprintf);
262353cb
BS
1525#endif
1526}
de0b36b6
LC
1527
1528CpuInfoList *qmp_query_cpus(Error **errp)
1529{
1530 CpuInfoList *head = NULL, *cur_item = NULL;
182735ef 1531 CPUState *cpu;
de0b36b6 1532
bdc44640 1533 CPU_FOREACH(cpu) {
de0b36b6 1534 CpuInfoList *info;
182735ef
AF
1535#if defined(TARGET_I386)
1536 X86CPU *x86_cpu = X86_CPU(cpu);
1537 CPUX86State *env = &x86_cpu->env;
1538#elif defined(TARGET_PPC)
1539 PowerPCCPU *ppc_cpu = POWERPC_CPU(cpu);
1540 CPUPPCState *env = &ppc_cpu->env;
1541#elif defined(TARGET_SPARC)
1542 SPARCCPU *sparc_cpu = SPARC_CPU(cpu);
1543 CPUSPARCState *env = &sparc_cpu->env;
1544#elif defined(TARGET_MIPS)
1545 MIPSCPU *mips_cpu = MIPS_CPU(cpu);
1546 CPUMIPSState *env = &mips_cpu->env;
48e06fe0
BK
1547#elif defined(TARGET_TRICORE)
1548 TriCoreCPU *tricore_cpu = TRICORE_CPU(cpu);
1549 CPUTriCoreState *env = &tricore_cpu->env;
182735ef 1550#endif
de0b36b6 1551
cb446eca 1552 cpu_synchronize_state(cpu);
de0b36b6
LC
1553
1554 info = g_malloc0(sizeof(*info));
1555 info->value = g_malloc0(sizeof(*info->value));
55e5c285 1556 info->value->CPU = cpu->cpu_index;
182735ef 1557 info->value->current = (cpu == first_cpu);
259186a7 1558 info->value->halted = cpu->halted;
58f88d4b 1559 info->value->qom_path = object_get_canonical_path(OBJECT(cpu));
9f09e18a 1560 info->value->thread_id = cpu->thread_id;
de0b36b6 1561#if defined(TARGET_I386)
86f4b687 1562 info->value->arch = CPU_INFO_ARCH_X86;
544a3731 1563 info->value->u.x86.pc = env->eip + env->segs[R_CS].base;
de0b36b6 1564#elif defined(TARGET_PPC)
86f4b687 1565 info->value->arch = CPU_INFO_ARCH_PPC;
544a3731 1566 info->value->u.ppc.nip = env->nip;
de0b36b6 1567#elif defined(TARGET_SPARC)
86f4b687 1568 info->value->arch = CPU_INFO_ARCH_SPARC;
544a3731
EB
1569 info->value->u.q_sparc.pc = env->pc;
1570 info->value->u.q_sparc.npc = env->npc;
de0b36b6 1571#elif defined(TARGET_MIPS)
86f4b687 1572 info->value->arch = CPU_INFO_ARCH_MIPS;
544a3731 1573 info->value->u.q_mips.PC = env->active_tc.PC;
48e06fe0 1574#elif defined(TARGET_TRICORE)
86f4b687 1575 info->value->arch = CPU_INFO_ARCH_TRICORE;
544a3731 1576 info->value->u.tricore.PC = env->PC;
86f4b687
EB
1577#else
1578 info->value->arch = CPU_INFO_ARCH_OTHER;
de0b36b6
LC
1579#endif
1580
1581 /* XXX: waiting for the qapi to support GSList */
1582 if (!cur_item) {
1583 head = cur_item = info;
1584 } else {
1585 cur_item->next = info;
1586 cur_item = info;
1587 }
1588 }
1589
1590 return head;
1591}
0cfd6a9a
LC
1592
1593void qmp_memsave(int64_t addr, int64_t size, const char *filename,
1594 bool has_cpu, int64_t cpu_index, Error **errp)
1595{
1596 FILE *f;
1597 uint32_t l;
55e5c285 1598 CPUState *cpu;
0cfd6a9a 1599 uint8_t buf[1024];
0dc9daf0 1600 int64_t orig_addr = addr, orig_size = size;
0cfd6a9a
LC
1601
1602 if (!has_cpu) {
1603 cpu_index = 0;
1604 }
1605
151d1322
AF
1606 cpu = qemu_get_cpu(cpu_index);
1607 if (cpu == NULL) {
c6bd8c70
MA
1608 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "cpu-index",
1609 "a CPU number");
0cfd6a9a
LC
1610 return;
1611 }
1612
1613 f = fopen(filename, "wb");
1614 if (!f) {
618da851 1615 error_setg_file_open(errp, errno, filename);
0cfd6a9a
LC
1616 return;
1617 }
1618
1619 while (size != 0) {
1620 l = sizeof(buf);
1621 if (l > size)
1622 l = size;
2f4d0f59 1623 if (cpu_memory_rw_debug(cpu, addr, buf, l, 0) != 0) {
0dc9daf0
BP
1624 error_setg(errp, "Invalid addr 0x%016" PRIx64 "/size %" PRId64
1625 " specified", orig_addr, orig_size);
2f4d0f59
AK
1626 goto exit;
1627 }
0cfd6a9a 1628 if (fwrite(buf, 1, l, f) != l) {
c6bd8c70 1629 error_setg(errp, QERR_IO_ERROR);
0cfd6a9a
LC
1630 goto exit;
1631 }
1632 addr += l;
1633 size -= l;
1634 }
1635
1636exit:
1637 fclose(f);
1638}
6d3962bf
LC
1639
1640void qmp_pmemsave(int64_t addr, int64_t size, const char *filename,
1641 Error **errp)
1642{
1643 FILE *f;
1644 uint32_t l;
1645 uint8_t buf[1024];
1646
1647 f = fopen(filename, "wb");
1648 if (!f) {
618da851 1649 error_setg_file_open(errp, errno, filename);
6d3962bf
LC
1650 return;
1651 }
1652
1653 while (size != 0) {
1654 l = sizeof(buf);
1655 if (l > size)
1656 l = size;
eb6282f2 1657 cpu_physical_memory_read(addr, buf, l);
6d3962bf 1658 if (fwrite(buf, 1, l, f) != l) {
c6bd8c70 1659 error_setg(errp, QERR_IO_ERROR);
6d3962bf
LC
1660 goto exit;
1661 }
1662 addr += l;
1663 size -= l;
1664 }
1665
1666exit:
1667 fclose(f);
1668}
ab49ab5c
LC
1669
1670void qmp_inject_nmi(Error **errp)
1671{
9cb805fd 1672 nmi_monitor_handle(monitor_get_cpu_index(), errp);
ab49ab5c 1673}
27498bef
ST
1674
1675void dump_drift_info(FILE *f, fprintf_function cpu_fprintf)
1676{
1677 if (!use_icount) {
1678 return;
1679 }
1680
1681 cpu_fprintf(f, "Host - Guest clock %"PRIi64" ms\n",
1682 (cpu_get_clock() - cpu_get_icount())/SCALE_MS);
1683 if (icount_align_option) {
1684 cpu_fprintf(f, "Max guest delay %"PRIi64" ms\n", -max_delay/SCALE_MS);
1685 cpu_fprintf(f, "Max guest advance %"PRIi64" ms\n", max_advance/SCALE_MS);
1686 } else {
1687 cpu_fprintf(f, "Max guest delay NA\n");
1688 cpu_fprintf(f, "Max guest advance NA\n");
1689 }
1690}