]> git.ipfire.org Git - thirdparty/qemu.git/blob - cpus.c
target/m68k: implement fmove.l #<data>,FPCR
[thirdparty/qemu.git] / cpus.c
1 /*
2 * QEMU System Emulator
3 *
4 * Copyright (c) 2003-2008 Fabrice Bellard
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
24
25 #include "qemu/osdep.h"
26 #include "qemu-common.h"
27 #include "qemu/config-file.h"
28 #include "qemu/cutils.h"
29 #include "migration/vmstate.h"
30 #include "monitor/monitor.h"
31 #include "qapi/error.h"
32 #include "qapi/qapi-commands-misc.h"
33 #include "qapi/qapi-events-run-state.h"
34 #include "qapi/qmp/qerror.h"
35 #include "qemu/error-report.h"
36 #include "qemu/qemu-print.h"
37 #include "sysemu/tcg.h"
38 #include "sysemu/block-backend.h"
39 #include "exec/gdbstub.h"
40 #include "sysemu/dma.h"
41 #include "sysemu/hw_accel.h"
42 #include "sysemu/kvm.h"
43 #include "sysemu/hax.h"
44 #include "sysemu/hvf.h"
45 #include "sysemu/whpx.h"
46 #include "exec/exec-all.h"
47
48 #include "qemu/thread.h"
49 #include "qemu/plugin.h"
50 #include "sysemu/cpus.h"
51 #include "sysemu/qtest.h"
52 #include "qemu/main-loop.h"
53 #include "qemu/option.h"
54 #include "qemu/bitmap.h"
55 #include "qemu/seqlock.h"
56 #include "qemu/guest-random.h"
57 #include "tcg/tcg.h"
58 #include "hw/nmi.h"
59 #include "sysemu/replay.h"
60 #include "sysemu/runstate.h"
61 #include "hw/boards.h"
62 #include "hw/hw.h"
63
64 #ifdef CONFIG_LINUX
65
66 #include <sys/prctl.h>
67
68 #ifndef PR_MCE_KILL
69 #define PR_MCE_KILL 33
70 #endif
71
72 #ifndef PR_MCE_KILL_SET
73 #define PR_MCE_KILL_SET 1
74 #endif
75
76 #ifndef PR_MCE_KILL_EARLY
77 #define PR_MCE_KILL_EARLY 1
78 #endif
79
80 #endif /* CONFIG_LINUX */
81
82 static QemuMutex qemu_global_mutex;
83
84 int64_t max_delay;
85 int64_t max_advance;
86
87 /* vcpu throttling controls */
88 static QEMUTimer *throttle_timer;
89 static unsigned int throttle_percentage;
90
91 #define CPU_THROTTLE_PCT_MIN 1
92 #define CPU_THROTTLE_PCT_MAX 99
93 #define CPU_THROTTLE_TIMESLICE_NS 10000000
94
95 bool cpu_is_stopped(CPUState *cpu)
96 {
97 return cpu->stopped || !runstate_is_running();
98 }
99
100 static bool cpu_thread_is_idle(CPUState *cpu)
101 {
102 if (cpu->stop || cpu->queued_work_first) {
103 return false;
104 }
105 if (cpu_is_stopped(cpu)) {
106 return true;
107 }
108 if (!cpu->halted || cpu_has_work(cpu) ||
109 kvm_halt_in_kernel()) {
110 return false;
111 }
112 return true;
113 }
114
115 static bool all_cpu_threads_idle(void)
116 {
117 CPUState *cpu;
118
119 CPU_FOREACH(cpu) {
120 if (!cpu_thread_is_idle(cpu)) {
121 return false;
122 }
123 }
124 return true;
125 }
126
127 /***********************************************************/
128 /* guest cycle counter */
129
130 /* Protected by TimersState seqlock */
131
132 static bool icount_sleep = true;
133 /* Arbitrarily pick 1MIPS as the minimum allowable speed. */
134 #define MAX_ICOUNT_SHIFT 10
135
136 typedef struct TimersState {
137 /* Protected by BQL. */
138 int64_t cpu_ticks_prev;
139 int64_t cpu_ticks_offset;
140
141 /* Protect fields that can be respectively read outside the
142 * BQL, and written from multiple threads.
143 */
144 QemuSeqLock vm_clock_seqlock;
145 QemuSpin vm_clock_lock;
146
147 int16_t cpu_ticks_enabled;
148
149 /* Conversion factor from emulated instructions to virtual clock ticks. */
150 int16_t icount_time_shift;
151
152 /* Compensate for varying guest execution speed. */
153 int64_t qemu_icount_bias;
154
155 int64_t vm_clock_warp_start;
156 int64_t cpu_clock_offset;
157
158 /* Only written by TCG thread */
159 int64_t qemu_icount;
160
161 /* for adjusting icount */
162 QEMUTimer *icount_rt_timer;
163 QEMUTimer *icount_vm_timer;
164 QEMUTimer *icount_warp_timer;
165 } TimersState;
166
167 static TimersState timers_state;
168 bool mttcg_enabled;
169
170
171 /* The current number of executed instructions is based on what we
172 * originally budgeted minus the current state of the decrementing
173 * icount counters in extra/u16.low.
174 */
175 static int64_t cpu_get_icount_executed(CPUState *cpu)
176 {
177 return (cpu->icount_budget -
178 (cpu_neg(cpu)->icount_decr.u16.low + cpu->icount_extra));
179 }
180
181 /*
182 * Update the global shared timer_state.qemu_icount to take into
183 * account executed instructions. This is done by the TCG vCPU
184 * thread so the main-loop can see time has moved forward.
185 */
186 static void cpu_update_icount_locked(CPUState *cpu)
187 {
188 int64_t executed = cpu_get_icount_executed(cpu);
189 cpu->icount_budget -= executed;
190
191 atomic_set_i64(&timers_state.qemu_icount,
192 timers_state.qemu_icount + executed);
193 }
194
195 /*
196 * Update the global shared timer_state.qemu_icount to take into
197 * account executed instructions. This is done by the TCG vCPU
198 * thread so the main-loop can see time has moved forward.
199 */
200 void cpu_update_icount(CPUState *cpu)
201 {
202 seqlock_write_lock(&timers_state.vm_clock_seqlock,
203 &timers_state.vm_clock_lock);
204 cpu_update_icount_locked(cpu);
205 seqlock_write_unlock(&timers_state.vm_clock_seqlock,
206 &timers_state.vm_clock_lock);
207 }
208
209 static int64_t cpu_get_icount_raw_locked(void)
210 {
211 CPUState *cpu = current_cpu;
212
213 if (cpu && cpu->running) {
214 if (!cpu->can_do_io) {
215 error_report("Bad icount read");
216 exit(1);
217 }
218 /* Take into account what has run */
219 cpu_update_icount_locked(cpu);
220 }
221 /* The read is protected by the seqlock, but needs atomic64 to avoid UB */
222 return atomic_read_i64(&timers_state.qemu_icount);
223 }
224
225 static int64_t cpu_get_icount_locked(void)
226 {
227 int64_t icount = cpu_get_icount_raw_locked();
228 return atomic_read_i64(&timers_state.qemu_icount_bias) +
229 cpu_icount_to_ns(icount);
230 }
231
232 int64_t cpu_get_icount_raw(void)
233 {
234 int64_t icount;
235 unsigned start;
236
237 do {
238 start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
239 icount = cpu_get_icount_raw_locked();
240 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
241
242 return icount;
243 }
244
245 /* Return the virtual CPU time, based on the instruction counter. */
246 int64_t cpu_get_icount(void)
247 {
248 int64_t icount;
249 unsigned start;
250
251 do {
252 start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
253 icount = cpu_get_icount_locked();
254 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
255
256 return icount;
257 }
258
259 int64_t cpu_icount_to_ns(int64_t icount)
260 {
261 return icount << atomic_read(&timers_state.icount_time_shift);
262 }
263
264 static int64_t cpu_get_ticks_locked(void)
265 {
266 int64_t ticks = timers_state.cpu_ticks_offset;
267 if (timers_state.cpu_ticks_enabled) {
268 ticks += cpu_get_host_ticks();
269 }
270
271 if (timers_state.cpu_ticks_prev > ticks) {
272 /* Non increasing ticks may happen if the host uses software suspend. */
273 timers_state.cpu_ticks_offset += timers_state.cpu_ticks_prev - ticks;
274 ticks = timers_state.cpu_ticks_prev;
275 }
276
277 timers_state.cpu_ticks_prev = ticks;
278 return ticks;
279 }
280
281 /* return the time elapsed in VM between vm_start and vm_stop. Unless
282 * icount is active, cpu_get_ticks() uses units of the host CPU cycle
283 * counter.
284 */
285 int64_t cpu_get_ticks(void)
286 {
287 int64_t ticks;
288
289 if (use_icount) {
290 return cpu_get_icount();
291 }
292
293 qemu_spin_lock(&timers_state.vm_clock_lock);
294 ticks = cpu_get_ticks_locked();
295 qemu_spin_unlock(&timers_state.vm_clock_lock);
296 return ticks;
297 }
298
299 static int64_t cpu_get_clock_locked(void)
300 {
301 int64_t time;
302
303 time = timers_state.cpu_clock_offset;
304 if (timers_state.cpu_ticks_enabled) {
305 time += get_clock();
306 }
307
308 return time;
309 }
310
311 /* Return the monotonic time elapsed in VM, i.e.,
312 * the time between vm_start and vm_stop
313 */
314 int64_t cpu_get_clock(void)
315 {
316 int64_t ti;
317 unsigned start;
318
319 do {
320 start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
321 ti = cpu_get_clock_locked();
322 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
323
324 return ti;
325 }
326
327 /* enable cpu_get_ticks()
328 * Caller must hold BQL which serves as mutex for vm_clock_seqlock.
329 */
330 void cpu_enable_ticks(void)
331 {
332 seqlock_write_lock(&timers_state.vm_clock_seqlock,
333 &timers_state.vm_clock_lock);
334 if (!timers_state.cpu_ticks_enabled) {
335 timers_state.cpu_ticks_offset -= cpu_get_host_ticks();
336 timers_state.cpu_clock_offset -= get_clock();
337 timers_state.cpu_ticks_enabled = 1;
338 }
339 seqlock_write_unlock(&timers_state.vm_clock_seqlock,
340 &timers_state.vm_clock_lock);
341 }
342
343 /* disable cpu_get_ticks() : the clock is stopped. You must not call
344 * cpu_get_ticks() after that.
345 * Caller must hold BQL which serves as mutex for vm_clock_seqlock.
346 */
347 void cpu_disable_ticks(void)
348 {
349 seqlock_write_lock(&timers_state.vm_clock_seqlock,
350 &timers_state.vm_clock_lock);
351 if (timers_state.cpu_ticks_enabled) {
352 timers_state.cpu_ticks_offset += cpu_get_host_ticks();
353 timers_state.cpu_clock_offset = cpu_get_clock_locked();
354 timers_state.cpu_ticks_enabled = 0;
355 }
356 seqlock_write_unlock(&timers_state.vm_clock_seqlock,
357 &timers_state.vm_clock_lock);
358 }
359
360 /* Correlation between real and virtual time is always going to be
361 fairly approximate, so ignore small variation.
362 When the guest is idle real and virtual time will be aligned in
363 the IO wait loop. */
364 #define ICOUNT_WOBBLE (NANOSECONDS_PER_SECOND / 10)
365
366 static void icount_adjust(void)
367 {
368 int64_t cur_time;
369 int64_t cur_icount;
370 int64_t delta;
371
372 /* Protected by TimersState mutex. */
373 static int64_t last_delta;
374
375 /* If the VM is not running, then do nothing. */
376 if (!runstate_is_running()) {
377 return;
378 }
379
380 seqlock_write_lock(&timers_state.vm_clock_seqlock,
381 &timers_state.vm_clock_lock);
382 cur_time = cpu_get_clock_locked();
383 cur_icount = cpu_get_icount_locked();
384
385 delta = cur_icount - cur_time;
386 /* FIXME: This is a very crude algorithm, somewhat prone to oscillation. */
387 if (delta > 0
388 && last_delta + ICOUNT_WOBBLE < delta * 2
389 && timers_state.icount_time_shift > 0) {
390 /* The guest is getting too far ahead. Slow time down. */
391 atomic_set(&timers_state.icount_time_shift,
392 timers_state.icount_time_shift - 1);
393 }
394 if (delta < 0
395 && last_delta - ICOUNT_WOBBLE > delta * 2
396 && timers_state.icount_time_shift < MAX_ICOUNT_SHIFT) {
397 /* The guest is getting too far behind. Speed time up. */
398 atomic_set(&timers_state.icount_time_shift,
399 timers_state.icount_time_shift + 1);
400 }
401 last_delta = delta;
402 atomic_set_i64(&timers_state.qemu_icount_bias,
403 cur_icount - (timers_state.qemu_icount
404 << timers_state.icount_time_shift));
405 seqlock_write_unlock(&timers_state.vm_clock_seqlock,
406 &timers_state.vm_clock_lock);
407 }
408
409 static void icount_adjust_rt(void *opaque)
410 {
411 timer_mod(timers_state.icount_rt_timer,
412 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
413 icount_adjust();
414 }
415
416 static void icount_adjust_vm(void *opaque)
417 {
418 timer_mod(timers_state.icount_vm_timer,
419 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
420 NANOSECONDS_PER_SECOND / 10);
421 icount_adjust();
422 }
423
424 static int64_t qemu_icount_round(int64_t count)
425 {
426 int shift = atomic_read(&timers_state.icount_time_shift);
427 return (count + (1 << shift) - 1) >> shift;
428 }
429
430 static void icount_warp_rt(void)
431 {
432 unsigned seq;
433 int64_t warp_start;
434
435 /* The icount_warp_timer is rescheduled soon after vm_clock_warp_start
436 * changes from -1 to another value, so the race here is okay.
437 */
438 do {
439 seq = seqlock_read_begin(&timers_state.vm_clock_seqlock);
440 warp_start = timers_state.vm_clock_warp_start;
441 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, seq));
442
443 if (warp_start == -1) {
444 return;
445 }
446
447 seqlock_write_lock(&timers_state.vm_clock_seqlock,
448 &timers_state.vm_clock_lock);
449 if (runstate_is_running()) {
450 int64_t clock = REPLAY_CLOCK_LOCKED(REPLAY_CLOCK_VIRTUAL_RT,
451 cpu_get_clock_locked());
452 int64_t warp_delta;
453
454 warp_delta = clock - timers_state.vm_clock_warp_start;
455 if (use_icount == 2) {
456 /*
457 * In adaptive mode, do not let QEMU_CLOCK_VIRTUAL run too
458 * far ahead of real time.
459 */
460 int64_t cur_icount = cpu_get_icount_locked();
461 int64_t delta = clock - cur_icount;
462 warp_delta = MIN(warp_delta, delta);
463 }
464 atomic_set_i64(&timers_state.qemu_icount_bias,
465 timers_state.qemu_icount_bias + warp_delta);
466 }
467 timers_state.vm_clock_warp_start = -1;
468 seqlock_write_unlock(&timers_state.vm_clock_seqlock,
469 &timers_state.vm_clock_lock);
470
471 if (qemu_clock_expired(QEMU_CLOCK_VIRTUAL)) {
472 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
473 }
474 }
475
476 static void icount_timer_cb(void *opaque)
477 {
478 /* No need for a checkpoint because the timer already synchronizes
479 * with CHECKPOINT_CLOCK_VIRTUAL_RT.
480 */
481 icount_warp_rt();
482 }
483
484 void qtest_clock_warp(int64_t dest)
485 {
486 int64_t clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
487 AioContext *aio_context;
488 assert(qtest_enabled());
489 aio_context = qemu_get_aio_context();
490 while (clock < dest) {
491 int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL,
492 QEMU_TIMER_ATTR_ALL);
493 int64_t warp = qemu_soonest_timeout(dest - clock, deadline);
494
495 seqlock_write_lock(&timers_state.vm_clock_seqlock,
496 &timers_state.vm_clock_lock);
497 atomic_set_i64(&timers_state.qemu_icount_bias,
498 timers_state.qemu_icount_bias + warp);
499 seqlock_write_unlock(&timers_state.vm_clock_seqlock,
500 &timers_state.vm_clock_lock);
501
502 qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL);
503 timerlist_run_timers(aio_context->tlg.tl[QEMU_CLOCK_VIRTUAL]);
504 clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
505 }
506 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
507 }
508
509 void qemu_start_warp_timer(void)
510 {
511 int64_t clock;
512 int64_t deadline;
513
514 if (!use_icount) {
515 return;
516 }
517
518 /* Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers
519 * do not fire, so computing the deadline does not make sense.
520 */
521 if (!runstate_is_running()) {
522 return;
523 }
524
525 if (replay_mode != REPLAY_MODE_PLAY) {
526 if (!all_cpu_threads_idle()) {
527 return;
528 }
529
530 if (qtest_enabled()) {
531 /* When testing, qtest commands advance icount. */
532 return;
533 }
534
535 replay_checkpoint(CHECKPOINT_CLOCK_WARP_START);
536 } else {
537 /* warp clock deterministically in record/replay mode */
538 if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_START)) {
539 /* vCPU is sleeping and warp can't be started.
540 It is probably a race condition: notification sent
541 to vCPU was processed in advance and vCPU went to sleep.
542 Therefore we have to wake it up for doing someting. */
543 if (replay_has_checkpoint()) {
544 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
545 }
546 return;
547 }
548 }
549
550 /* We want to use the earliest deadline from ALL vm_clocks */
551 clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT);
552 deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL,
553 ~QEMU_TIMER_ATTR_EXTERNAL);
554 if (deadline < 0) {
555 static bool notified;
556 if (!icount_sleep && !notified) {
557 warn_report("icount sleep disabled and no active timers");
558 notified = true;
559 }
560 return;
561 }
562
563 if (deadline > 0) {
564 /*
565 * Ensure QEMU_CLOCK_VIRTUAL proceeds even when the virtual CPU goes to
566 * sleep. Otherwise, the CPU might be waiting for a future timer
567 * interrupt to wake it up, but the interrupt never comes because
568 * the vCPU isn't running any insns and thus doesn't advance the
569 * QEMU_CLOCK_VIRTUAL.
570 */
571 if (!icount_sleep) {
572 /*
573 * We never let VCPUs sleep in no sleep icount mode.
574 * If there is a pending QEMU_CLOCK_VIRTUAL timer we just advance
575 * to the next QEMU_CLOCK_VIRTUAL event and notify it.
576 * It is useful when we want a deterministic execution time,
577 * isolated from host latencies.
578 */
579 seqlock_write_lock(&timers_state.vm_clock_seqlock,
580 &timers_state.vm_clock_lock);
581 atomic_set_i64(&timers_state.qemu_icount_bias,
582 timers_state.qemu_icount_bias + deadline);
583 seqlock_write_unlock(&timers_state.vm_clock_seqlock,
584 &timers_state.vm_clock_lock);
585 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
586 } else {
587 /*
588 * We do stop VCPUs and only advance QEMU_CLOCK_VIRTUAL after some
589 * "real" time, (related to the time left until the next event) has
590 * passed. The QEMU_CLOCK_VIRTUAL_RT clock will do this.
591 * This avoids that the warps are visible externally; for example,
592 * you will not be sending network packets continuously instead of
593 * every 100ms.
594 */
595 seqlock_write_lock(&timers_state.vm_clock_seqlock,
596 &timers_state.vm_clock_lock);
597 if (timers_state.vm_clock_warp_start == -1
598 || timers_state.vm_clock_warp_start > clock) {
599 timers_state.vm_clock_warp_start = clock;
600 }
601 seqlock_write_unlock(&timers_state.vm_clock_seqlock,
602 &timers_state.vm_clock_lock);
603 timer_mod_anticipate(timers_state.icount_warp_timer,
604 clock + deadline);
605 }
606 } else if (deadline == 0) {
607 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
608 }
609 }
610
611 static void qemu_account_warp_timer(void)
612 {
613 if (!use_icount || !icount_sleep) {
614 return;
615 }
616
617 /* Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers
618 * do not fire, so computing the deadline does not make sense.
619 */
620 if (!runstate_is_running()) {
621 return;
622 }
623
624 /* warp clock deterministically in record/replay mode */
625 if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_ACCOUNT)) {
626 return;
627 }
628
629 timer_del(timers_state.icount_warp_timer);
630 icount_warp_rt();
631 }
632
633 static bool icount_state_needed(void *opaque)
634 {
635 return use_icount;
636 }
637
638 static bool warp_timer_state_needed(void *opaque)
639 {
640 TimersState *s = opaque;
641 return s->icount_warp_timer != NULL;
642 }
643
644 static bool adjust_timers_state_needed(void *opaque)
645 {
646 TimersState *s = opaque;
647 return s->icount_rt_timer != NULL;
648 }
649
650 /*
651 * Subsection for warp timer migration is optional, because may not be created
652 */
653 static const VMStateDescription icount_vmstate_warp_timer = {
654 .name = "timer/icount/warp_timer",
655 .version_id = 1,
656 .minimum_version_id = 1,
657 .needed = warp_timer_state_needed,
658 .fields = (VMStateField[]) {
659 VMSTATE_INT64(vm_clock_warp_start, TimersState),
660 VMSTATE_TIMER_PTR(icount_warp_timer, TimersState),
661 VMSTATE_END_OF_LIST()
662 }
663 };
664
665 static const VMStateDescription icount_vmstate_adjust_timers = {
666 .name = "timer/icount/timers",
667 .version_id = 1,
668 .minimum_version_id = 1,
669 .needed = adjust_timers_state_needed,
670 .fields = (VMStateField[]) {
671 VMSTATE_TIMER_PTR(icount_rt_timer, TimersState),
672 VMSTATE_TIMER_PTR(icount_vm_timer, TimersState),
673 VMSTATE_END_OF_LIST()
674 }
675 };
676
677 /*
678 * This is a subsection for icount migration.
679 */
680 static const VMStateDescription icount_vmstate_timers = {
681 .name = "timer/icount",
682 .version_id = 1,
683 .minimum_version_id = 1,
684 .needed = icount_state_needed,
685 .fields = (VMStateField[]) {
686 VMSTATE_INT64(qemu_icount_bias, TimersState),
687 VMSTATE_INT64(qemu_icount, TimersState),
688 VMSTATE_END_OF_LIST()
689 },
690 .subsections = (const VMStateDescription*[]) {
691 &icount_vmstate_warp_timer,
692 &icount_vmstate_adjust_timers,
693 NULL
694 }
695 };
696
697 static const VMStateDescription vmstate_timers = {
698 .name = "timer",
699 .version_id = 2,
700 .minimum_version_id = 1,
701 .fields = (VMStateField[]) {
702 VMSTATE_INT64(cpu_ticks_offset, TimersState),
703 VMSTATE_UNUSED(8),
704 VMSTATE_INT64_V(cpu_clock_offset, TimersState, 2),
705 VMSTATE_END_OF_LIST()
706 },
707 .subsections = (const VMStateDescription*[]) {
708 &icount_vmstate_timers,
709 NULL
710 }
711 };
712
713 static void cpu_throttle_thread(CPUState *cpu, run_on_cpu_data opaque)
714 {
715 double pct;
716 double throttle_ratio;
717 int64_t sleeptime_ns, endtime_ns;
718
719 if (!cpu_throttle_get_percentage()) {
720 return;
721 }
722
723 pct = (double)cpu_throttle_get_percentage()/100;
724 throttle_ratio = pct / (1 - pct);
725 /* Add 1ns to fix double's rounding error (like 0.9999999...) */
726 sleeptime_ns = (int64_t)(throttle_ratio * CPU_THROTTLE_TIMESLICE_NS + 1);
727 endtime_ns = qemu_clock_get_ns(QEMU_CLOCK_REALTIME) + sleeptime_ns;
728 while (sleeptime_ns > 0 && !cpu->stop) {
729 if (sleeptime_ns > SCALE_MS) {
730 qemu_cond_timedwait(cpu->halt_cond, &qemu_global_mutex,
731 sleeptime_ns / SCALE_MS);
732 } else {
733 qemu_mutex_unlock_iothread();
734 g_usleep(sleeptime_ns / SCALE_US);
735 qemu_mutex_lock_iothread();
736 }
737 sleeptime_ns = endtime_ns - qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
738 }
739 atomic_set(&cpu->throttle_thread_scheduled, 0);
740 }
741
742 static void cpu_throttle_timer_tick(void *opaque)
743 {
744 CPUState *cpu;
745 double pct;
746
747 /* Stop the timer if needed */
748 if (!cpu_throttle_get_percentage()) {
749 return;
750 }
751 CPU_FOREACH(cpu) {
752 if (!atomic_xchg(&cpu->throttle_thread_scheduled, 1)) {
753 async_run_on_cpu(cpu, cpu_throttle_thread,
754 RUN_ON_CPU_NULL);
755 }
756 }
757
758 pct = (double)cpu_throttle_get_percentage()/100;
759 timer_mod(throttle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT) +
760 CPU_THROTTLE_TIMESLICE_NS / (1-pct));
761 }
762
763 void cpu_throttle_set(int new_throttle_pct)
764 {
765 /* Ensure throttle percentage is within valid range */
766 new_throttle_pct = MIN(new_throttle_pct, CPU_THROTTLE_PCT_MAX);
767 new_throttle_pct = MAX(new_throttle_pct, CPU_THROTTLE_PCT_MIN);
768
769 atomic_set(&throttle_percentage, new_throttle_pct);
770
771 timer_mod(throttle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT) +
772 CPU_THROTTLE_TIMESLICE_NS);
773 }
774
775 void cpu_throttle_stop(void)
776 {
777 atomic_set(&throttle_percentage, 0);
778 }
779
780 bool cpu_throttle_active(void)
781 {
782 return (cpu_throttle_get_percentage() != 0);
783 }
784
785 int cpu_throttle_get_percentage(void)
786 {
787 return atomic_read(&throttle_percentage);
788 }
789
790 void cpu_ticks_init(void)
791 {
792 seqlock_init(&timers_state.vm_clock_seqlock);
793 qemu_spin_init(&timers_state.vm_clock_lock);
794 vmstate_register(NULL, 0, &vmstate_timers, &timers_state);
795 throttle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT,
796 cpu_throttle_timer_tick, NULL);
797 }
798
799 void configure_icount(QemuOpts *opts, Error **errp)
800 {
801 const char *option = qemu_opt_get(opts, "shift");
802 bool sleep = qemu_opt_get_bool(opts, "sleep", true);
803 bool align = qemu_opt_get_bool(opts, "align", false);
804 long time_shift = -1;
805
806 if (!option && qemu_opt_get(opts, "align")) {
807 error_setg(errp, "Please specify shift option when using align");
808 return;
809 }
810
811 if (align && !sleep) {
812 error_setg(errp, "align=on and sleep=off are incompatible");
813 return;
814 }
815
816 if (strcmp(option, "auto") != 0) {
817 if (qemu_strtol(option, NULL, 0, &time_shift) < 0
818 || time_shift < 0 || time_shift > MAX_ICOUNT_SHIFT) {
819 error_setg(errp, "icount: Invalid shift value");
820 return;
821 }
822 } else if (icount_align_option) {
823 error_setg(errp, "shift=auto and align=on are incompatible");
824 return;
825 } else if (!icount_sleep) {
826 error_setg(errp, "shift=auto and sleep=off are incompatible");
827 return;
828 }
829
830 icount_sleep = sleep;
831 if (icount_sleep) {
832 timers_state.icount_warp_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT,
833 icount_timer_cb, NULL);
834 }
835
836 icount_align_option = align;
837
838 if (time_shift >= 0) {
839 timers_state.icount_time_shift = time_shift;
840 use_icount = 1;
841 return;
842 }
843
844 use_icount = 2;
845
846 /* 125MIPS seems a reasonable initial guess at the guest speed.
847 It will be corrected fairly quickly anyway. */
848 timers_state.icount_time_shift = 3;
849
850 /* Have both realtime and virtual time triggers for speed adjustment.
851 The realtime trigger catches emulated time passing too slowly,
852 the virtual time trigger catches emulated time passing too fast.
853 Realtime triggers occur even when idle, so use them less frequently
854 than VM triggers. */
855 timers_state.vm_clock_warp_start = -1;
856 timers_state.icount_rt_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL_RT,
857 icount_adjust_rt, NULL);
858 timer_mod(timers_state.icount_rt_timer,
859 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
860 timers_state.icount_vm_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
861 icount_adjust_vm, NULL);
862 timer_mod(timers_state.icount_vm_timer,
863 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
864 NANOSECONDS_PER_SECOND / 10);
865 }
866
867 /***********************************************************/
868 /* TCG vCPU kick timer
869 *
870 * The kick timer is responsible for moving single threaded vCPU
871 * emulation on to the next vCPU. If more than one vCPU is running a
872 * timer event with force a cpu->exit so the next vCPU can get
873 * scheduled.
874 *
875 * The timer is removed if all vCPUs are idle and restarted again once
876 * idleness is complete.
877 */
878
879 static QEMUTimer *tcg_kick_vcpu_timer;
880 static CPUState *tcg_current_rr_cpu;
881
882 #define TCG_KICK_PERIOD (NANOSECONDS_PER_SECOND / 10)
883
884 static inline int64_t qemu_tcg_next_kick(void)
885 {
886 return qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + TCG_KICK_PERIOD;
887 }
888
889 /* Kick the currently round-robin scheduled vCPU to next */
890 static void qemu_cpu_kick_rr_next_cpu(void)
891 {
892 CPUState *cpu;
893 do {
894 cpu = atomic_mb_read(&tcg_current_rr_cpu);
895 if (cpu) {
896 cpu_exit(cpu);
897 }
898 } while (cpu != atomic_mb_read(&tcg_current_rr_cpu));
899 }
900
901 /* Kick all RR vCPUs */
902 static void qemu_cpu_kick_rr_cpus(void)
903 {
904 CPUState *cpu;
905
906 CPU_FOREACH(cpu) {
907 cpu_exit(cpu);
908 };
909 }
910
911 static void do_nothing(CPUState *cpu, run_on_cpu_data unused)
912 {
913 }
914
915 void qemu_timer_notify_cb(void *opaque, QEMUClockType type)
916 {
917 if (!use_icount || type != QEMU_CLOCK_VIRTUAL) {
918 qemu_notify_event();
919 return;
920 }
921
922 if (qemu_in_vcpu_thread()) {
923 /* A CPU is currently running; kick it back out to the
924 * tcg_cpu_exec() loop so it will recalculate its
925 * icount deadline immediately.
926 */
927 qemu_cpu_kick(current_cpu);
928 } else if (first_cpu) {
929 /* qemu_cpu_kick is not enough to kick a halted CPU out of
930 * qemu_tcg_wait_io_event. async_run_on_cpu, instead,
931 * causes cpu_thread_is_idle to return false. This way,
932 * handle_icount_deadline can run.
933 * If we have no CPUs at all for some reason, we don't
934 * need to do anything.
935 */
936 async_run_on_cpu(first_cpu, do_nothing, RUN_ON_CPU_NULL);
937 }
938 }
939
940 static void kick_tcg_thread(void *opaque)
941 {
942 timer_mod(tcg_kick_vcpu_timer, qemu_tcg_next_kick());
943 qemu_cpu_kick_rr_next_cpu();
944 }
945
946 static void start_tcg_kick_timer(void)
947 {
948 assert(!mttcg_enabled);
949 if (!tcg_kick_vcpu_timer && CPU_NEXT(first_cpu)) {
950 tcg_kick_vcpu_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
951 kick_tcg_thread, NULL);
952 }
953 if (tcg_kick_vcpu_timer && !timer_pending(tcg_kick_vcpu_timer)) {
954 timer_mod(tcg_kick_vcpu_timer, qemu_tcg_next_kick());
955 }
956 }
957
958 static void stop_tcg_kick_timer(void)
959 {
960 assert(!mttcg_enabled);
961 if (tcg_kick_vcpu_timer && timer_pending(tcg_kick_vcpu_timer)) {
962 timer_del(tcg_kick_vcpu_timer);
963 }
964 }
965
966 /***********************************************************/
967 void hw_error(const char *fmt, ...)
968 {
969 va_list ap;
970 CPUState *cpu;
971
972 va_start(ap, fmt);
973 fprintf(stderr, "qemu: hardware error: ");
974 vfprintf(stderr, fmt, ap);
975 fprintf(stderr, "\n");
976 CPU_FOREACH(cpu) {
977 fprintf(stderr, "CPU #%d:\n", cpu->cpu_index);
978 cpu_dump_state(cpu, stderr, CPU_DUMP_FPU);
979 }
980 va_end(ap);
981 abort();
982 }
983
984 void cpu_synchronize_all_states(void)
985 {
986 CPUState *cpu;
987
988 CPU_FOREACH(cpu) {
989 cpu_synchronize_state(cpu);
990 /* TODO: move to cpu_synchronize_state() */
991 if (hvf_enabled()) {
992 hvf_cpu_synchronize_state(cpu);
993 }
994 }
995 }
996
997 void cpu_synchronize_all_post_reset(void)
998 {
999 CPUState *cpu;
1000
1001 CPU_FOREACH(cpu) {
1002 cpu_synchronize_post_reset(cpu);
1003 /* TODO: move to cpu_synchronize_post_reset() */
1004 if (hvf_enabled()) {
1005 hvf_cpu_synchronize_post_reset(cpu);
1006 }
1007 }
1008 }
1009
1010 void cpu_synchronize_all_post_init(void)
1011 {
1012 CPUState *cpu;
1013
1014 CPU_FOREACH(cpu) {
1015 cpu_synchronize_post_init(cpu);
1016 /* TODO: move to cpu_synchronize_post_init() */
1017 if (hvf_enabled()) {
1018 hvf_cpu_synchronize_post_init(cpu);
1019 }
1020 }
1021 }
1022
1023 void cpu_synchronize_all_pre_loadvm(void)
1024 {
1025 CPUState *cpu;
1026
1027 CPU_FOREACH(cpu) {
1028 cpu_synchronize_pre_loadvm(cpu);
1029 }
1030 }
1031
1032 static int do_vm_stop(RunState state, bool send_stop)
1033 {
1034 int ret = 0;
1035
1036 if (runstate_is_running()) {
1037 runstate_set(state);
1038 cpu_disable_ticks();
1039 pause_all_vcpus();
1040 vm_state_notify(0, state);
1041 if (send_stop) {
1042 qapi_event_send_stop();
1043 }
1044 }
1045
1046 bdrv_drain_all();
1047 ret = bdrv_flush_all();
1048
1049 return ret;
1050 }
1051
1052 /* Special vm_stop() variant for terminating the process. Historically clients
1053 * did not expect a QMP STOP event and so we need to retain compatibility.
1054 */
1055 int vm_shutdown(void)
1056 {
1057 return do_vm_stop(RUN_STATE_SHUTDOWN, false);
1058 }
1059
1060 static bool cpu_can_run(CPUState *cpu)
1061 {
1062 if (cpu->stop) {
1063 return false;
1064 }
1065 if (cpu_is_stopped(cpu)) {
1066 return false;
1067 }
1068 return true;
1069 }
1070
1071 static void cpu_handle_guest_debug(CPUState *cpu)
1072 {
1073 gdb_set_stop_cpu(cpu);
1074 qemu_system_debug_request();
1075 cpu->stopped = true;
1076 }
1077
1078 #ifdef CONFIG_LINUX
1079 static void sigbus_reraise(void)
1080 {
1081 sigset_t set;
1082 struct sigaction action;
1083
1084 memset(&action, 0, sizeof(action));
1085 action.sa_handler = SIG_DFL;
1086 if (!sigaction(SIGBUS, &action, NULL)) {
1087 raise(SIGBUS);
1088 sigemptyset(&set);
1089 sigaddset(&set, SIGBUS);
1090 pthread_sigmask(SIG_UNBLOCK, &set, NULL);
1091 }
1092 perror("Failed to re-raise SIGBUS!\n");
1093 abort();
1094 }
1095
1096 static void sigbus_handler(int n, siginfo_t *siginfo, void *ctx)
1097 {
1098 if (siginfo->si_code != BUS_MCEERR_AO && siginfo->si_code != BUS_MCEERR_AR) {
1099 sigbus_reraise();
1100 }
1101
1102 if (current_cpu) {
1103 /* Called asynchronously in VCPU thread. */
1104 if (kvm_on_sigbus_vcpu(current_cpu, siginfo->si_code, siginfo->si_addr)) {
1105 sigbus_reraise();
1106 }
1107 } else {
1108 /* Called synchronously (via signalfd) in main thread. */
1109 if (kvm_on_sigbus(siginfo->si_code, siginfo->si_addr)) {
1110 sigbus_reraise();
1111 }
1112 }
1113 }
1114
1115 static void qemu_init_sigbus(void)
1116 {
1117 struct sigaction action;
1118
1119 memset(&action, 0, sizeof(action));
1120 action.sa_flags = SA_SIGINFO;
1121 action.sa_sigaction = sigbus_handler;
1122 sigaction(SIGBUS, &action, NULL);
1123
1124 prctl(PR_MCE_KILL, PR_MCE_KILL_SET, PR_MCE_KILL_EARLY, 0, 0);
1125 }
1126 #else /* !CONFIG_LINUX */
1127 static void qemu_init_sigbus(void)
1128 {
1129 }
1130 #endif /* !CONFIG_LINUX */
1131
1132 static QemuThread io_thread;
1133
1134 /* cpu creation */
1135 static QemuCond qemu_cpu_cond;
1136 /* system init */
1137 static QemuCond qemu_pause_cond;
1138
1139 void qemu_init_cpu_loop(void)
1140 {
1141 qemu_init_sigbus();
1142 qemu_cond_init(&qemu_cpu_cond);
1143 qemu_cond_init(&qemu_pause_cond);
1144 qemu_mutex_init(&qemu_global_mutex);
1145
1146 qemu_thread_get_self(&io_thread);
1147 }
1148
1149 void run_on_cpu(CPUState *cpu, run_on_cpu_func func, run_on_cpu_data data)
1150 {
1151 do_run_on_cpu(cpu, func, data, &qemu_global_mutex);
1152 }
1153
1154 static void qemu_kvm_destroy_vcpu(CPUState *cpu)
1155 {
1156 if (kvm_destroy_vcpu(cpu) < 0) {
1157 error_report("kvm_destroy_vcpu failed");
1158 exit(EXIT_FAILURE);
1159 }
1160 }
1161
1162 static void qemu_tcg_destroy_vcpu(CPUState *cpu)
1163 {
1164 }
1165
1166 static void qemu_cpu_stop(CPUState *cpu, bool exit)
1167 {
1168 g_assert(qemu_cpu_is_self(cpu));
1169 cpu->stop = false;
1170 cpu->stopped = true;
1171 if (exit) {
1172 cpu_exit(cpu);
1173 }
1174 qemu_cond_broadcast(&qemu_pause_cond);
1175 }
1176
1177 static void qemu_wait_io_event_common(CPUState *cpu)
1178 {
1179 atomic_mb_set(&cpu->thread_kicked, false);
1180 if (cpu->stop) {
1181 qemu_cpu_stop(cpu, false);
1182 }
1183 process_queued_cpu_work(cpu);
1184 }
1185
1186 static void qemu_tcg_rr_wait_io_event(void)
1187 {
1188 CPUState *cpu;
1189
1190 while (all_cpu_threads_idle()) {
1191 stop_tcg_kick_timer();
1192 qemu_cond_wait(first_cpu->halt_cond, &qemu_global_mutex);
1193 }
1194
1195 start_tcg_kick_timer();
1196
1197 CPU_FOREACH(cpu) {
1198 qemu_wait_io_event_common(cpu);
1199 }
1200 }
1201
1202 static void qemu_wait_io_event(CPUState *cpu)
1203 {
1204 bool slept = false;
1205
1206 while (cpu_thread_is_idle(cpu)) {
1207 if (!slept) {
1208 slept = true;
1209 qemu_plugin_vcpu_idle_cb(cpu);
1210 }
1211 qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
1212 }
1213 if (slept) {
1214 qemu_plugin_vcpu_resume_cb(cpu);
1215 }
1216
1217 #ifdef _WIN32
1218 /* Eat dummy APC queued by qemu_cpu_kick_thread. */
1219 if (!tcg_enabled()) {
1220 SleepEx(0, TRUE);
1221 }
1222 #endif
1223 qemu_wait_io_event_common(cpu);
1224 }
1225
1226 static void *qemu_kvm_cpu_thread_fn(void *arg)
1227 {
1228 CPUState *cpu = arg;
1229 int r;
1230
1231 rcu_register_thread();
1232
1233 qemu_mutex_lock_iothread();
1234 qemu_thread_get_self(cpu->thread);
1235 cpu->thread_id = qemu_get_thread_id();
1236 cpu->can_do_io = 1;
1237 current_cpu = cpu;
1238
1239 r = kvm_init_vcpu(cpu);
1240 if (r < 0) {
1241 error_report("kvm_init_vcpu failed: %s", strerror(-r));
1242 exit(1);
1243 }
1244
1245 kvm_init_cpu_signals(cpu);
1246
1247 /* signal CPU creation */
1248 cpu->created = true;
1249 qemu_cond_signal(&qemu_cpu_cond);
1250 qemu_guest_random_seed_thread_part2(cpu->random_seed);
1251
1252 do {
1253 if (cpu_can_run(cpu)) {
1254 r = kvm_cpu_exec(cpu);
1255 if (r == EXCP_DEBUG) {
1256 cpu_handle_guest_debug(cpu);
1257 }
1258 }
1259 qemu_wait_io_event(cpu);
1260 } while (!cpu->unplug || cpu_can_run(cpu));
1261
1262 qemu_kvm_destroy_vcpu(cpu);
1263 cpu->created = false;
1264 qemu_cond_signal(&qemu_cpu_cond);
1265 qemu_mutex_unlock_iothread();
1266 rcu_unregister_thread();
1267 return NULL;
1268 }
1269
1270 static void *qemu_dummy_cpu_thread_fn(void *arg)
1271 {
1272 #ifdef _WIN32
1273 error_report("qtest is not supported under Windows");
1274 exit(1);
1275 #else
1276 CPUState *cpu = arg;
1277 sigset_t waitset;
1278 int r;
1279
1280 rcu_register_thread();
1281
1282 qemu_mutex_lock_iothread();
1283 qemu_thread_get_self(cpu->thread);
1284 cpu->thread_id = qemu_get_thread_id();
1285 cpu->can_do_io = 1;
1286 current_cpu = cpu;
1287
1288 sigemptyset(&waitset);
1289 sigaddset(&waitset, SIG_IPI);
1290
1291 /* signal CPU creation */
1292 cpu->created = true;
1293 qemu_cond_signal(&qemu_cpu_cond);
1294 qemu_guest_random_seed_thread_part2(cpu->random_seed);
1295
1296 do {
1297 qemu_mutex_unlock_iothread();
1298 do {
1299 int sig;
1300 r = sigwait(&waitset, &sig);
1301 } while (r == -1 && (errno == EAGAIN || errno == EINTR));
1302 if (r == -1) {
1303 perror("sigwait");
1304 exit(1);
1305 }
1306 qemu_mutex_lock_iothread();
1307 qemu_wait_io_event(cpu);
1308 } while (!cpu->unplug);
1309
1310 qemu_mutex_unlock_iothread();
1311 rcu_unregister_thread();
1312 return NULL;
1313 #endif
1314 }
1315
1316 static int64_t tcg_get_icount_limit(void)
1317 {
1318 int64_t deadline;
1319
1320 if (replay_mode != REPLAY_MODE_PLAY) {
1321 /*
1322 * Include all the timers, because they may need an attention.
1323 * Too long CPU execution may create unnecessary delay in UI.
1324 */
1325 deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL,
1326 QEMU_TIMER_ATTR_ALL);
1327 /* Check realtime timers, because they help with input processing */
1328 deadline = qemu_soonest_timeout(deadline,
1329 qemu_clock_deadline_ns_all(QEMU_CLOCK_REALTIME,
1330 QEMU_TIMER_ATTR_ALL));
1331
1332 /* Maintain prior (possibly buggy) behaviour where if no deadline
1333 * was set (as there is no QEMU_CLOCK_VIRTUAL timer) or it is more than
1334 * INT32_MAX nanoseconds ahead, we still use INT32_MAX
1335 * nanoseconds.
1336 */
1337 if ((deadline < 0) || (deadline > INT32_MAX)) {
1338 deadline = INT32_MAX;
1339 }
1340
1341 return qemu_icount_round(deadline);
1342 } else {
1343 return replay_get_instructions();
1344 }
1345 }
1346
1347 static void handle_icount_deadline(void)
1348 {
1349 assert(qemu_in_vcpu_thread());
1350 if (use_icount) {
1351 int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL,
1352 QEMU_TIMER_ATTR_ALL);
1353
1354 if (deadline == 0) {
1355 /* Wake up other AioContexts. */
1356 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
1357 qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL);
1358 }
1359 }
1360 }
1361
1362 static void prepare_icount_for_run(CPUState *cpu)
1363 {
1364 if (use_icount) {
1365 int insns_left;
1366
1367 /* These should always be cleared by process_icount_data after
1368 * each vCPU execution. However u16.high can be raised
1369 * asynchronously by cpu_exit/cpu_interrupt/tcg_handle_interrupt
1370 */
1371 g_assert(cpu_neg(cpu)->icount_decr.u16.low == 0);
1372 g_assert(cpu->icount_extra == 0);
1373
1374 cpu->icount_budget = tcg_get_icount_limit();
1375 insns_left = MIN(0xffff, cpu->icount_budget);
1376 cpu_neg(cpu)->icount_decr.u16.low = insns_left;
1377 cpu->icount_extra = cpu->icount_budget - insns_left;
1378
1379 replay_mutex_lock();
1380 }
1381 }
1382
1383 static void process_icount_data(CPUState *cpu)
1384 {
1385 if (use_icount) {
1386 /* Account for executed instructions */
1387 cpu_update_icount(cpu);
1388
1389 /* Reset the counters */
1390 cpu_neg(cpu)->icount_decr.u16.low = 0;
1391 cpu->icount_extra = 0;
1392 cpu->icount_budget = 0;
1393
1394 replay_account_executed_instructions();
1395
1396 replay_mutex_unlock();
1397 }
1398 }
1399
1400
1401 static int tcg_cpu_exec(CPUState *cpu)
1402 {
1403 int ret;
1404 #ifdef CONFIG_PROFILER
1405 int64_t ti;
1406 #endif
1407
1408 assert(tcg_enabled());
1409 #ifdef CONFIG_PROFILER
1410 ti = profile_getclock();
1411 #endif
1412 cpu_exec_start(cpu);
1413 ret = cpu_exec(cpu);
1414 cpu_exec_end(cpu);
1415 #ifdef CONFIG_PROFILER
1416 atomic_set(&tcg_ctx->prof.cpu_exec_time,
1417 tcg_ctx->prof.cpu_exec_time + profile_getclock() - ti);
1418 #endif
1419 return ret;
1420 }
1421
1422 /* Destroy any remaining vCPUs which have been unplugged and have
1423 * finished running
1424 */
1425 static void deal_with_unplugged_cpus(void)
1426 {
1427 CPUState *cpu;
1428
1429 CPU_FOREACH(cpu) {
1430 if (cpu->unplug && !cpu_can_run(cpu)) {
1431 qemu_tcg_destroy_vcpu(cpu);
1432 cpu->created = false;
1433 qemu_cond_signal(&qemu_cpu_cond);
1434 break;
1435 }
1436 }
1437 }
1438
1439 /* Single-threaded TCG
1440 *
1441 * In the single-threaded case each vCPU is simulated in turn. If
1442 * there is more than a single vCPU we create a simple timer to kick
1443 * the vCPU and ensure we don't get stuck in a tight loop in one vCPU.
1444 * This is done explicitly rather than relying on side-effects
1445 * elsewhere.
1446 */
1447
1448 static void *qemu_tcg_rr_cpu_thread_fn(void *arg)
1449 {
1450 CPUState *cpu = arg;
1451
1452 assert(tcg_enabled());
1453 rcu_register_thread();
1454 tcg_register_thread();
1455
1456 qemu_mutex_lock_iothread();
1457 qemu_thread_get_self(cpu->thread);
1458
1459 cpu->thread_id = qemu_get_thread_id();
1460 cpu->created = true;
1461 cpu->can_do_io = 1;
1462 qemu_cond_signal(&qemu_cpu_cond);
1463 qemu_guest_random_seed_thread_part2(cpu->random_seed);
1464
1465 /* wait for initial kick-off after machine start */
1466 while (first_cpu->stopped) {
1467 qemu_cond_wait(first_cpu->halt_cond, &qemu_global_mutex);
1468
1469 /* process any pending work */
1470 CPU_FOREACH(cpu) {
1471 current_cpu = cpu;
1472 qemu_wait_io_event_common(cpu);
1473 }
1474 }
1475
1476 start_tcg_kick_timer();
1477
1478 cpu = first_cpu;
1479
1480 /* process any pending work */
1481 cpu->exit_request = 1;
1482
1483 while (1) {
1484 qemu_mutex_unlock_iothread();
1485 replay_mutex_lock();
1486 qemu_mutex_lock_iothread();
1487 /* Account partial waits to QEMU_CLOCK_VIRTUAL. */
1488 qemu_account_warp_timer();
1489
1490 /* Run the timers here. This is much more efficient than
1491 * waking up the I/O thread and waiting for completion.
1492 */
1493 handle_icount_deadline();
1494
1495 replay_mutex_unlock();
1496
1497 if (!cpu) {
1498 cpu = first_cpu;
1499 }
1500
1501 while (cpu && !cpu->queued_work_first && !cpu->exit_request) {
1502
1503 atomic_mb_set(&tcg_current_rr_cpu, cpu);
1504 current_cpu = cpu;
1505
1506 qemu_clock_enable(QEMU_CLOCK_VIRTUAL,
1507 (cpu->singlestep_enabled & SSTEP_NOTIMER) == 0);
1508
1509 if (cpu_can_run(cpu)) {
1510 int r;
1511
1512 qemu_mutex_unlock_iothread();
1513 prepare_icount_for_run(cpu);
1514
1515 r = tcg_cpu_exec(cpu);
1516
1517 process_icount_data(cpu);
1518 qemu_mutex_lock_iothread();
1519
1520 if (r == EXCP_DEBUG) {
1521 cpu_handle_guest_debug(cpu);
1522 break;
1523 } else if (r == EXCP_ATOMIC) {
1524 qemu_mutex_unlock_iothread();
1525 cpu_exec_step_atomic(cpu);
1526 qemu_mutex_lock_iothread();
1527 break;
1528 }
1529 } else if (cpu->stop) {
1530 if (cpu->unplug) {
1531 cpu = CPU_NEXT(cpu);
1532 }
1533 break;
1534 }
1535
1536 cpu = CPU_NEXT(cpu);
1537 } /* while (cpu && !cpu->exit_request).. */
1538
1539 /* Does not need atomic_mb_set because a spurious wakeup is okay. */
1540 atomic_set(&tcg_current_rr_cpu, NULL);
1541
1542 if (cpu && cpu->exit_request) {
1543 atomic_mb_set(&cpu->exit_request, 0);
1544 }
1545
1546 if (use_icount && all_cpu_threads_idle()) {
1547 /*
1548 * When all cpus are sleeping (e.g in WFI), to avoid a deadlock
1549 * in the main_loop, wake it up in order to start the warp timer.
1550 */
1551 qemu_notify_event();
1552 }
1553
1554 qemu_tcg_rr_wait_io_event();
1555 deal_with_unplugged_cpus();
1556 }
1557
1558 rcu_unregister_thread();
1559 return NULL;
1560 }
1561
1562 static void *qemu_hax_cpu_thread_fn(void *arg)
1563 {
1564 CPUState *cpu = arg;
1565 int r;
1566
1567 rcu_register_thread();
1568 qemu_mutex_lock_iothread();
1569 qemu_thread_get_self(cpu->thread);
1570
1571 cpu->thread_id = qemu_get_thread_id();
1572 cpu->created = true;
1573 current_cpu = cpu;
1574
1575 hax_init_vcpu(cpu);
1576 qemu_cond_signal(&qemu_cpu_cond);
1577 qemu_guest_random_seed_thread_part2(cpu->random_seed);
1578
1579 do {
1580 if (cpu_can_run(cpu)) {
1581 r = hax_smp_cpu_exec(cpu);
1582 if (r == EXCP_DEBUG) {
1583 cpu_handle_guest_debug(cpu);
1584 }
1585 }
1586
1587 qemu_wait_io_event(cpu);
1588 } while (!cpu->unplug || cpu_can_run(cpu));
1589 rcu_unregister_thread();
1590 return NULL;
1591 }
1592
1593 /* The HVF-specific vCPU thread function. This one should only run when the host
1594 * CPU supports the VMX "unrestricted guest" feature. */
1595 static void *qemu_hvf_cpu_thread_fn(void *arg)
1596 {
1597 CPUState *cpu = arg;
1598
1599 int r;
1600
1601 assert(hvf_enabled());
1602
1603 rcu_register_thread();
1604
1605 qemu_mutex_lock_iothread();
1606 qemu_thread_get_self(cpu->thread);
1607
1608 cpu->thread_id = qemu_get_thread_id();
1609 cpu->can_do_io = 1;
1610 current_cpu = cpu;
1611
1612 hvf_init_vcpu(cpu);
1613
1614 /* signal CPU creation */
1615 cpu->created = true;
1616 qemu_cond_signal(&qemu_cpu_cond);
1617 qemu_guest_random_seed_thread_part2(cpu->random_seed);
1618
1619 do {
1620 if (cpu_can_run(cpu)) {
1621 r = hvf_vcpu_exec(cpu);
1622 if (r == EXCP_DEBUG) {
1623 cpu_handle_guest_debug(cpu);
1624 }
1625 }
1626 qemu_wait_io_event(cpu);
1627 } while (!cpu->unplug || cpu_can_run(cpu));
1628
1629 hvf_vcpu_destroy(cpu);
1630 cpu->created = false;
1631 qemu_cond_signal(&qemu_cpu_cond);
1632 qemu_mutex_unlock_iothread();
1633 rcu_unregister_thread();
1634 return NULL;
1635 }
1636
1637 static void *qemu_whpx_cpu_thread_fn(void *arg)
1638 {
1639 CPUState *cpu = arg;
1640 int r;
1641
1642 rcu_register_thread();
1643
1644 qemu_mutex_lock_iothread();
1645 qemu_thread_get_self(cpu->thread);
1646 cpu->thread_id = qemu_get_thread_id();
1647 current_cpu = cpu;
1648
1649 r = whpx_init_vcpu(cpu);
1650 if (r < 0) {
1651 fprintf(stderr, "whpx_init_vcpu failed: %s\n", strerror(-r));
1652 exit(1);
1653 }
1654
1655 /* signal CPU creation */
1656 cpu->created = true;
1657 qemu_cond_signal(&qemu_cpu_cond);
1658 qemu_guest_random_seed_thread_part2(cpu->random_seed);
1659
1660 do {
1661 if (cpu_can_run(cpu)) {
1662 r = whpx_vcpu_exec(cpu);
1663 if (r == EXCP_DEBUG) {
1664 cpu_handle_guest_debug(cpu);
1665 }
1666 }
1667 while (cpu_thread_is_idle(cpu)) {
1668 qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
1669 }
1670 qemu_wait_io_event_common(cpu);
1671 } while (!cpu->unplug || cpu_can_run(cpu));
1672
1673 whpx_destroy_vcpu(cpu);
1674 cpu->created = false;
1675 qemu_cond_signal(&qemu_cpu_cond);
1676 qemu_mutex_unlock_iothread();
1677 rcu_unregister_thread();
1678 return NULL;
1679 }
1680
1681 #ifdef _WIN32
1682 static void CALLBACK dummy_apc_func(ULONG_PTR unused)
1683 {
1684 }
1685 #endif
1686
1687 /* Multi-threaded TCG
1688 *
1689 * In the multi-threaded case each vCPU has its own thread. The TLS
1690 * variable current_cpu can be used deep in the code to find the
1691 * current CPUState for a given thread.
1692 */
1693
1694 static void *qemu_tcg_cpu_thread_fn(void *arg)
1695 {
1696 CPUState *cpu = arg;
1697
1698 assert(tcg_enabled());
1699 g_assert(!use_icount);
1700
1701 rcu_register_thread();
1702 tcg_register_thread();
1703
1704 qemu_mutex_lock_iothread();
1705 qemu_thread_get_self(cpu->thread);
1706
1707 cpu->thread_id = qemu_get_thread_id();
1708 cpu->created = true;
1709 cpu->can_do_io = 1;
1710 current_cpu = cpu;
1711 qemu_cond_signal(&qemu_cpu_cond);
1712 qemu_guest_random_seed_thread_part2(cpu->random_seed);
1713
1714 /* process any pending work */
1715 cpu->exit_request = 1;
1716
1717 do {
1718 if (cpu_can_run(cpu)) {
1719 int r;
1720 qemu_mutex_unlock_iothread();
1721 r = tcg_cpu_exec(cpu);
1722 qemu_mutex_lock_iothread();
1723 switch (r) {
1724 case EXCP_DEBUG:
1725 cpu_handle_guest_debug(cpu);
1726 break;
1727 case EXCP_HALTED:
1728 /* during start-up the vCPU is reset and the thread is
1729 * kicked several times. If we don't ensure we go back
1730 * to sleep in the halted state we won't cleanly
1731 * start-up when the vCPU is enabled.
1732 *
1733 * cpu->halted should ensure we sleep in wait_io_event
1734 */
1735 g_assert(cpu->halted);
1736 break;
1737 case EXCP_ATOMIC:
1738 qemu_mutex_unlock_iothread();
1739 cpu_exec_step_atomic(cpu);
1740 qemu_mutex_lock_iothread();
1741 default:
1742 /* Ignore everything else? */
1743 break;
1744 }
1745 }
1746
1747 atomic_mb_set(&cpu->exit_request, 0);
1748 qemu_wait_io_event(cpu);
1749 } while (!cpu->unplug || cpu_can_run(cpu));
1750
1751 qemu_tcg_destroy_vcpu(cpu);
1752 cpu->created = false;
1753 qemu_cond_signal(&qemu_cpu_cond);
1754 qemu_mutex_unlock_iothread();
1755 rcu_unregister_thread();
1756 return NULL;
1757 }
1758
1759 static void qemu_cpu_kick_thread(CPUState *cpu)
1760 {
1761 #ifndef _WIN32
1762 int err;
1763
1764 if (cpu->thread_kicked) {
1765 return;
1766 }
1767 cpu->thread_kicked = true;
1768 err = pthread_kill(cpu->thread->thread, SIG_IPI);
1769 if (err && err != ESRCH) {
1770 fprintf(stderr, "qemu:%s: %s", __func__, strerror(err));
1771 exit(1);
1772 }
1773 #else /* _WIN32 */
1774 if (!qemu_cpu_is_self(cpu)) {
1775 if (whpx_enabled()) {
1776 whpx_vcpu_kick(cpu);
1777 } else if (!QueueUserAPC(dummy_apc_func, cpu->hThread, 0)) {
1778 fprintf(stderr, "%s: QueueUserAPC failed with error %lu\n",
1779 __func__, GetLastError());
1780 exit(1);
1781 }
1782 }
1783 #endif
1784 }
1785
1786 void qemu_cpu_kick(CPUState *cpu)
1787 {
1788 qemu_cond_broadcast(cpu->halt_cond);
1789 if (tcg_enabled()) {
1790 if (qemu_tcg_mttcg_enabled()) {
1791 cpu_exit(cpu);
1792 } else {
1793 qemu_cpu_kick_rr_cpus();
1794 }
1795 } else {
1796 if (hax_enabled()) {
1797 /*
1798 * FIXME: race condition with the exit_request check in
1799 * hax_vcpu_hax_exec
1800 */
1801 cpu->exit_request = 1;
1802 }
1803 qemu_cpu_kick_thread(cpu);
1804 }
1805 }
1806
1807 void qemu_cpu_kick_self(void)
1808 {
1809 assert(current_cpu);
1810 qemu_cpu_kick_thread(current_cpu);
1811 }
1812
1813 bool qemu_cpu_is_self(CPUState *cpu)
1814 {
1815 return qemu_thread_is_self(cpu->thread);
1816 }
1817
1818 bool qemu_in_vcpu_thread(void)
1819 {
1820 return current_cpu && qemu_cpu_is_self(current_cpu);
1821 }
1822
1823 static __thread bool iothread_locked = false;
1824
1825 bool qemu_mutex_iothread_locked(void)
1826 {
1827 return iothread_locked;
1828 }
1829
1830 /*
1831 * The BQL is taken from so many places that it is worth profiling the
1832 * callers directly, instead of funneling them all through a single function.
1833 */
1834 void qemu_mutex_lock_iothread_impl(const char *file, int line)
1835 {
1836 QemuMutexLockFunc bql_lock = atomic_read(&qemu_bql_mutex_lock_func);
1837
1838 g_assert(!qemu_mutex_iothread_locked());
1839 bql_lock(&qemu_global_mutex, file, line);
1840 iothread_locked = true;
1841 }
1842
1843 void qemu_mutex_unlock_iothread(void)
1844 {
1845 g_assert(qemu_mutex_iothread_locked());
1846 iothread_locked = false;
1847 qemu_mutex_unlock(&qemu_global_mutex);
1848 }
1849
1850 void qemu_cond_wait_iothread(QemuCond *cond)
1851 {
1852 qemu_cond_wait(cond, &qemu_global_mutex);
1853 }
1854
1855 static bool all_vcpus_paused(void)
1856 {
1857 CPUState *cpu;
1858
1859 CPU_FOREACH(cpu) {
1860 if (!cpu->stopped) {
1861 return false;
1862 }
1863 }
1864
1865 return true;
1866 }
1867
1868 void pause_all_vcpus(void)
1869 {
1870 CPUState *cpu;
1871
1872 qemu_clock_enable(QEMU_CLOCK_VIRTUAL, false);
1873 CPU_FOREACH(cpu) {
1874 if (qemu_cpu_is_self(cpu)) {
1875 qemu_cpu_stop(cpu, true);
1876 } else {
1877 cpu->stop = true;
1878 qemu_cpu_kick(cpu);
1879 }
1880 }
1881
1882 /* We need to drop the replay_lock so any vCPU threads woken up
1883 * can finish their replay tasks
1884 */
1885 replay_mutex_unlock();
1886
1887 while (!all_vcpus_paused()) {
1888 qemu_cond_wait(&qemu_pause_cond, &qemu_global_mutex);
1889 CPU_FOREACH(cpu) {
1890 qemu_cpu_kick(cpu);
1891 }
1892 }
1893
1894 qemu_mutex_unlock_iothread();
1895 replay_mutex_lock();
1896 qemu_mutex_lock_iothread();
1897 }
1898
1899 void cpu_resume(CPUState *cpu)
1900 {
1901 cpu->stop = false;
1902 cpu->stopped = false;
1903 qemu_cpu_kick(cpu);
1904 }
1905
1906 void resume_all_vcpus(void)
1907 {
1908 CPUState *cpu;
1909
1910 if (!runstate_is_running()) {
1911 return;
1912 }
1913
1914 qemu_clock_enable(QEMU_CLOCK_VIRTUAL, true);
1915 CPU_FOREACH(cpu) {
1916 cpu_resume(cpu);
1917 }
1918 }
1919
1920 void cpu_remove_sync(CPUState *cpu)
1921 {
1922 cpu->stop = true;
1923 cpu->unplug = true;
1924 qemu_cpu_kick(cpu);
1925 qemu_mutex_unlock_iothread();
1926 qemu_thread_join(cpu->thread);
1927 qemu_mutex_lock_iothread();
1928 }
1929
1930 /* For temporary buffers for forming a name */
1931 #define VCPU_THREAD_NAME_SIZE 16
1932
1933 static void qemu_tcg_init_vcpu(CPUState *cpu)
1934 {
1935 char thread_name[VCPU_THREAD_NAME_SIZE];
1936 static QemuCond *single_tcg_halt_cond;
1937 static QemuThread *single_tcg_cpu_thread;
1938 static int tcg_region_inited;
1939
1940 assert(tcg_enabled());
1941 /*
1942 * Initialize TCG regions--once. Now is a good time, because:
1943 * (1) TCG's init context, prologue and target globals have been set up.
1944 * (2) qemu_tcg_mttcg_enabled() works now (TCG init code runs before the
1945 * -accel flag is processed, so the check doesn't work then).
1946 */
1947 if (!tcg_region_inited) {
1948 tcg_region_inited = 1;
1949 tcg_region_init();
1950 }
1951
1952 if (qemu_tcg_mttcg_enabled() || !single_tcg_cpu_thread) {
1953 cpu->thread = g_malloc0(sizeof(QemuThread));
1954 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1955 qemu_cond_init(cpu->halt_cond);
1956
1957 if (qemu_tcg_mttcg_enabled()) {
1958 /* create a thread per vCPU with TCG (MTTCG) */
1959 parallel_cpus = true;
1960 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/TCG",
1961 cpu->cpu_index);
1962
1963 qemu_thread_create(cpu->thread, thread_name, qemu_tcg_cpu_thread_fn,
1964 cpu, QEMU_THREAD_JOINABLE);
1965
1966 } else {
1967 /* share a single thread for all cpus with TCG */
1968 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "ALL CPUs/TCG");
1969 qemu_thread_create(cpu->thread, thread_name,
1970 qemu_tcg_rr_cpu_thread_fn,
1971 cpu, QEMU_THREAD_JOINABLE);
1972
1973 single_tcg_halt_cond = cpu->halt_cond;
1974 single_tcg_cpu_thread = cpu->thread;
1975 }
1976 #ifdef _WIN32
1977 cpu->hThread = qemu_thread_get_handle(cpu->thread);
1978 #endif
1979 } else {
1980 /* For non-MTTCG cases we share the thread */
1981 cpu->thread = single_tcg_cpu_thread;
1982 cpu->halt_cond = single_tcg_halt_cond;
1983 cpu->thread_id = first_cpu->thread_id;
1984 cpu->can_do_io = 1;
1985 cpu->created = true;
1986 }
1987 }
1988
1989 static void qemu_hax_start_vcpu(CPUState *cpu)
1990 {
1991 char thread_name[VCPU_THREAD_NAME_SIZE];
1992
1993 cpu->thread = g_malloc0(sizeof(QemuThread));
1994 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1995 qemu_cond_init(cpu->halt_cond);
1996
1997 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/HAX",
1998 cpu->cpu_index);
1999 qemu_thread_create(cpu->thread, thread_name, qemu_hax_cpu_thread_fn,
2000 cpu, QEMU_THREAD_JOINABLE);
2001 #ifdef _WIN32
2002 cpu->hThread = qemu_thread_get_handle(cpu->thread);
2003 #endif
2004 }
2005
2006 static void qemu_kvm_start_vcpu(CPUState *cpu)
2007 {
2008 char thread_name[VCPU_THREAD_NAME_SIZE];
2009
2010 cpu->thread = g_malloc0(sizeof(QemuThread));
2011 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
2012 qemu_cond_init(cpu->halt_cond);
2013 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/KVM",
2014 cpu->cpu_index);
2015 qemu_thread_create(cpu->thread, thread_name, qemu_kvm_cpu_thread_fn,
2016 cpu, QEMU_THREAD_JOINABLE);
2017 }
2018
2019 static void qemu_hvf_start_vcpu(CPUState *cpu)
2020 {
2021 char thread_name[VCPU_THREAD_NAME_SIZE];
2022
2023 /* HVF currently does not support TCG, and only runs in
2024 * unrestricted-guest mode. */
2025 assert(hvf_enabled());
2026
2027 cpu->thread = g_malloc0(sizeof(QemuThread));
2028 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
2029 qemu_cond_init(cpu->halt_cond);
2030
2031 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/HVF",
2032 cpu->cpu_index);
2033 qemu_thread_create(cpu->thread, thread_name, qemu_hvf_cpu_thread_fn,
2034 cpu, QEMU_THREAD_JOINABLE);
2035 }
2036
2037 static void qemu_whpx_start_vcpu(CPUState *cpu)
2038 {
2039 char thread_name[VCPU_THREAD_NAME_SIZE];
2040
2041 cpu->thread = g_malloc0(sizeof(QemuThread));
2042 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
2043 qemu_cond_init(cpu->halt_cond);
2044 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/WHPX",
2045 cpu->cpu_index);
2046 qemu_thread_create(cpu->thread, thread_name, qemu_whpx_cpu_thread_fn,
2047 cpu, QEMU_THREAD_JOINABLE);
2048 #ifdef _WIN32
2049 cpu->hThread = qemu_thread_get_handle(cpu->thread);
2050 #endif
2051 }
2052
2053 static void qemu_dummy_start_vcpu(CPUState *cpu)
2054 {
2055 char thread_name[VCPU_THREAD_NAME_SIZE];
2056
2057 cpu->thread = g_malloc0(sizeof(QemuThread));
2058 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
2059 qemu_cond_init(cpu->halt_cond);
2060 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/DUMMY",
2061 cpu->cpu_index);
2062 qemu_thread_create(cpu->thread, thread_name, qemu_dummy_cpu_thread_fn, cpu,
2063 QEMU_THREAD_JOINABLE);
2064 }
2065
2066 void qemu_init_vcpu(CPUState *cpu)
2067 {
2068 MachineState *ms = MACHINE(qdev_get_machine());
2069
2070 cpu->nr_cores = ms->smp.cores;
2071 cpu->nr_threads = ms->smp.threads;
2072 cpu->stopped = true;
2073 cpu->random_seed = qemu_guest_random_seed_thread_part1();
2074
2075 if (!cpu->as) {
2076 /* If the target cpu hasn't set up any address spaces itself,
2077 * give it the default one.
2078 */
2079 cpu->num_ases = 1;
2080 cpu_address_space_init(cpu, 0, "cpu-memory", cpu->memory);
2081 }
2082
2083 if (kvm_enabled()) {
2084 qemu_kvm_start_vcpu(cpu);
2085 } else if (hax_enabled()) {
2086 qemu_hax_start_vcpu(cpu);
2087 } else if (hvf_enabled()) {
2088 qemu_hvf_start_vcpu(cpu);
2089 } else if (tcg_enabled()) {
2090 qemu_tcg_init_vcpu(cpu);
2091 } else if (whpx_enabled()) {
2092 qemu_whpx_start_vcpu(cpu);
2093 } else {
2094 qemu_dummy_start_vcpu(cpu);
2095 }
2096
2097 while (!cpu->created) {
2098 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
2099 }
2100 }
2101
2102 void cpu_stop_current(void)
2103 {
2104 if (current_cpu) {
2105 current_cpu->stop = true;
2106 cpu_exit(current_cpu);
2107 }
2108 }
2109
2110 int vm_stop(RunState state)
2111 {
2112 if (qemu_in_vcpu_thread()) {
2113 qemu_system_vmstop_request_prepare();
2114 qemu_system_vmstop_request(state);
2115 /*
2116 * FIXME: should not return to device code in case
2117 * vm_stop() has been requested.
2118 */
2119 cpu_stop_current();
2120 return 0;
2121 }
2122
2123 return do_vm_stop(state, true);
2124 }
2125
2126 /**
2127 * Prepare for (re)starting the VM.
2128 * Returns -1 if the vCPUs are not to be restarted (e.g. if they are already
2129 * running or in case of an error condition), 0 otherwise.
2130 */
2131 int vm_prepare_start(void)
2132 {
2133 RunState requested;
2134
2135 qemu_vmstop_requested(&requested);
2136 if (runstate_is_running() && requested == RUN_STATE__MAX) {
2137 return -1;
2138 }
2139
2140 /* Ensure that a STOP/RESUME pair of events is emitted if a
2141 * vmstop request was pending. The BLOCK_IO_ERROR event, for
2142 * example, according to documentation is always followed by
2143 * the STOP event.
2144 */
2145 if (runstate_is_running()) {
2146 qapi_event_send_stop();
2147 qapi_event_send_resume();
2148 return -1;
2149 }
2150
2151 /* We are sending this now, but the CPUs will be resumed shortly later */
2152 qapi_event_send_resume();
2153
2154 cpu_enable_ticks();
2155 runstate_set(RUN_STATE_RUNNING);
2156 vm_state_notify(1, RUN_STATE_RUNNING);
2157 return 0;
2158 }
2159
2160 void vm_start(void)
2161 {
2162 if (!vm_prepare_start()) {
2163 resume_all_vcpus();
2164 }
2165 }
2166
2167 /* does a state transition even if the VM is already stopped,
2168 current state is forgotten forever */
2169 int vm_stop_force_state(RunState state)
2170 {
2171 if (runstate_is_running()) {
2172 return vm_stop(state);
2173 } else {
2174 runstate_set(state);
2175
2176 bdrv_drain_all();
2177 /* Make sure to return an error if the flush in a previous vm_stop()
2178 * failed. */
2179 return bdrv_flush_all();
2180 }
2181 }
2182
2183 void list_cpus(const char *optarg)
2184 {
2185 /* XXX: implement xxx_cpu_list for targets that still miss it */
2186 #if defined(cpu_list)
2187 cpu_list();
2188 #endif
2189 }
2190
2191 void qmp_memsave(int64_t addr, int64_t size, const char *filename,
2192 bool has_cpu, int64_t cpu_index, Error **errp)
2193 {
2194 FILE *f;
2195 uint32_t l;
2196 CPUState *cpu;
2197 uint8_t buf[1024];
2198 int64_t orig_addr = addr, orig_size = size;
2199
2200 if (!has_cpu) {
2201 cpu_index = 0;
2202 }
2203
2204 cpu = qemu_get_cpu(cpu_index);
2205 if (cpu == NULL) {
2206 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "cpu-index",
2207 "a CPU number");
2208 return;
2209 }
2210
2211 f = fopen(filename, "wb");
2212 if (!f) {
2213 error_setg_file_open(errp, errno, filename);
2214 return;
2215 }
2216
2217 while (size != 0) {
2218 l = sizeof(buf);
2219 if (l > size)
2220 l = size;
2221 if (cpu_memory_rw_debug(cpu, addr, buf, l, 0) != 0) {
2222 error_setg(errp, "Invalid addr 0x%016" PRIx64 "/size %" PRId64
2223 " specified", orig_addr, orig_size);
2224 goto exit;
2225 }
2226 if (fwrite(buf, 1, l, f) != l) {
2227 error_setg(errp, QERR_IO_ERROR);
2228 goto exit;
2229 }
2230 addr += l;
2231 size -= l;
2232 }
2233
2234 exit:
2235 fclose(f);
2236 }
2237
2238 void qmp_pmemsave(int64_t addr, int64_t size, const char *filename,
2239 Error **errp)
2240 {
2241 FILE *f;
2242 uint32_t l;
2243 uint8_t buf[1024];
2244
2245 f = fopen(filename, "wb");
2246 if (!f) {
2247 error_setg_file_open(errp, errno, filename);
2248 return;
2249 }
2250
2251 while (size != 0) {
2252 l = sizeof(buf);
2253 if (l > size)
2254 l = size;
2255 cpu_physical_memory_read(addr, buf, l);
2256 if (fwrite(buf, 1, l, f) != l) {
2257 error_setg(errp, QERR_IO_ERROR);
2258 goto exit;
2259 }
2260 addr += l;
2261 size -= l;
2262 }
2263
2264 exit:
2265 fclose(f);
2266 }
2267
2268 void qmp_inject_nmi(Error **errp)
2269 {
2270 nmi_monitor_handle(monitor_get_cpu_index(), errp);
2271 }
2272
2273 void dump_drift_info(void)
2274 {
2275 if (!use_icount) {
2276 return;
2277 }
2278
2279 qemu_printf("Host - Guest clock %"PRIi64" ms\n",
2280 (cpu_get_clock() - cpu_get_icount())/SCALE_MS);
2281 if (icount_align_option) {
2282 qemu_printf("Max guest delay %"PRIi64" ms\n",
2283 -max_delay / SCALE_MS);
2284 qemu_printf("Max guest advance %"PRIi64" ms\n",
2285 max_advance / SCALE_MS);
2286 } else {
2287 qemu_printf("Max guest delay NA\n");
2288 qemu_printf("Max guest advance NA\n");
2289 }
2290 }