]> git.ipfire.org Git - thirdparty/qemu.git/blob - cpus.c
Merge remote-tracking branch 'remotes/bonzini/tags/for-upstream' into staging
[thirdparty/qemu.git] / cpus.c
1 /*
2 * QEMU System Emulator
3 *
4 * Copyright (c) 2003-2008 Fabrice Bellard
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
24
25 /* Needed early for CONFIG_BSD etc. */
26 #include "qemu/osdep.h"
27
28 #include "monitor/monitor.h"
29 #include "qapi/qmp/qerror.h"
30 #include "qemu/error-report.h"
31 #include "sysemu/sysemu.h"
32 #include "exec/gdbstub.h"
33 #include "sysemu/dma.h"
34 #include "sysemu/kvm.h"
35 #include "qmp-commands.h"
36
37 #include "qemu/thread.h"
38 #include "sysemu/cpus.h"
39 #include "sysemu/qtest.h"
40 #include "qemu/main-loop.h"
41 #include "qemu/bitmap.h"
42 #include "qemu/seqlock.h"
43 #include "qapi-event.h"
44 #include "hw/nmi.h"
45 #include "sysemu/replay.h"
46
47 #ifndef _WIN32
48 #include "qemu/compatfd.h"
49 #endif
50
51 #ifdef CONFIG_LINUX
52
53 #include <sys/prctl.h>
54
55 #ifndef PR_MCE_KILL
56 #define PR_MCE_KILL 33
57 #endif
58
59 #ifndef PR_MCE_KILL_SET
60 #define PR_MCE_KILL_SET 1
61 #endif
62
63 #ifndef PR_MCE_KILL_EARLY
64 #define PR_MCE_KILL_EARLY 1
65 #endif
66
67 #endif /* CONFIG_LINUX */
68
69 static CPUState *next_cpu;
70 int64_t max_delay;
71 int64_t max_advance;
72
73 /* vcpu throttling controls */
74 static QEMUTimer *throttle_timer;
75 static unsigned int throttle_percentage;
76
77 #define CPU_THROTTLE_PCT_MIN 1
78 #define CPU_THROTTLE_PCT_MAX 99
79 #define CPU_THROTTLE_TIMESLICE_NS 10000000
80
81 bool cpu_is_stopped(CPUState *cpu)
82 {
83 return cpu->stopped || !runstate_is_running();
84 }
85
86 static bool cpu_thread_is_idle(CPUState *cpu)
87 {
88 if (cpu->stop || cpu->queued_work_first) {
89 return false;
90 }
91 if (cpu_is_stopped(cpu)) {
92 return true;
93 }
94 if (!cpu->halted || cpu_has_work(cpu) ||
95 kvm_halt_in_kernel()) {
96 return false;
97 }
98 return true;
99 }
100
101 static bool all_cpu_threads_idle(void)
102 {
103 CPUState *cpu;
104
105 CPU_FOREACH(cpu) {
106 if (!cpu_thread_is_idle(cpu)) {
107 return false;
108 }
109 }
110 return true;
111 }
112
113 /***********************************************************/
114 /* guest cycle counter */
115
116 /* Protected by TimersState seqlock */
117
118 static bool icount_sleep = true;
119 static int64_t vm_clock_warp_start = -1;
120 /* Conversion factor from emulated instructions to virtual clock ticks. */
121 static int icount_time_shift;
122 /* Arbitrarily pick 1MIPS as the minimum allowable speed. */
123 #define MAX_ICOUNT_SHIFT 10
124
125 static QEMUTimer *icount_rt_timer;
126 static QEMUTimer *icount_vm_timer;
127 static QEMUTimer *icount_warp_timer;
128
129 typedef struct TimersState {
130 /* Protected by BQL. */
131 int64_t cpu_ticks_prev;
132 int64_t cpu_ticks_offset;
133
134 /* cpu_clock_offset can be read out of BQL, so protect it with
135 * this lock.
136 */
137 QemuSeqLock vm_clock_seqlock;
138 int64_t cpu_clock_offset;
139 int32_t cpu_ticks_enabled;
140 int64_t dummy;
141
142 /* Compensate for varying guest execution speed. */
143 int64_t qemu_icount_bias;
144 /* Only written by TCG thread */
145 int64_t qemu_icount;
146 } TimersState;
147
148 static TimersState timers_state;
149
150 int64_t cpu_get_icount_raw(void)
151 {
152 int64_t icount;
153 CPUState *cpu = current_cpu;
154
155 icount = timers_state.qemu_icount;
156 if (cpu) {
157 if (!cpu->can_do_io) {
158 fprintf(stderr, "Bad icount read\n");
159 exit(1);
160 }
161 icount -= (cpu->icount_decr.u16.low + cpu->icount_extra);
162 }
163 return icount;
164 }
165
166 /* Return the virtual CPU time, based on the instruction counter. */
167 static int64_t cpu_get_icount_locked(void)
168 {
169 int64_t icount = cpu_get_icount_raw();
170 return timers_state.qemu_icount_bias + cpu_icount_to_ns(icount);
171 }
172
173 int64_t cpu_get_icount(void)
174 {
175 int64_t icount;
176 unsigned start;
177
178 do {
179 start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
180 icount = cpu_get_icount_locked();
181 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
182
183 return icount;
184 }
185
186 int64_t cpu_icount_to_ns(int64_t icount)
187 {
188 return icount << icount_time_shift;
189 }
190
191 /* return the host CPU cycle counter and handle stop/restart */
192 /* Caller must hold the BQL */
193 int64_t cpu_get_ticks(void)
194 {
195 int64_t ticks;
196
197 if (use_icount) {
198 return cpu_get_icount();
199 }
200
201 ticks = timers_state.cpu_ticks_offset;
202 if (timers_state.cpu_ticks_enabled) {
203 ticks += cpu_get_host_ticks();
204 }
205
206 if (timers_state.cpu_ticks_prev > ticks) {
207 /* Note: non increasing ticks may happen if the host uses
208 software suspend */
209 timers_state.cpu_ticks_offset += timers_state.cpu_ticks_prev - ticks;
210 ticks = timers_state.cpu_ticks_prev;
211 }
212
213 timers_state.cpu_ticks_prev = ticks;
214 return ticks;
215 }
216
217 static int64_t cpu_get_clock_locked(void)
218 {
219 int64_t ticks;
220
221 ticks = timers_state.cpu_clock_offset;
222 if (timers_state.cpu_ticks_enabled) {
223 ticks += get_clock();
224 }
225
226 return ticks;
227 }
228
229 /* return the host CPU monotonic timer and handle stop/restart */
230 int64_t cpu_get_clock(void)
231 {
232 int64_t ti;
233 unsigned start;
234
235 do {
236 start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
237 ti = cpu_get_clock_locked();
238 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
239
240 return ti;
241 }
242
243 /* enable cpu_get_ticks()
244 * Caller must hold BQL which server as mutex for vm_clock_seqlock.
245 */
246 void cpu_enable_ticks(void)
247 {
248 /* Here, the really thing protected by seqlock is cpu_clock_offset. */
249 seqlock_write_lock(&timers_state.vm_clock_seqlock);
250 if (!timers_state.cpu_ticks_enabled) {
251 timers_state.cpu_ticks_offset -= cpu_get_host_ticks();
252 timers_state.cpu_clock_offset -= get_clock();
253 timers_state.cpu_ticks_enabled = 1;
254 }
255 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
256 }
257
258 /* disable cpu_get_ticks() : the clock is stopped. You must not call
259 * cpu_get_ticks() after that.
260 * Caller must hold BQL which server as mutex for vm_clock_seqlock.
261 */
262 void cpu_disable_ticks(void)
263 {
264 /* Here, the really thing protected by seqlock is cpu_clock_offset. */
265 seqlock_write_lock(&timers_state.vm_clock_seqlock);
266 if (timers_state.cpu_ticks_enabled) {
267 timers_state.cpu_ticks_offset += cpu_get_host_ticks();
268 timers_state.cpu_clock_offset = cpu_get_clock_locked();
269 timers_state.cpu_ticks_enabled = 0;
270 }
271 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
272 }
273
274 /* Correlation between real and virtual time is always going to be
275 fairly approximate, so ignore small variation.
276 When the guest is idle real and virtual time will be aligned in
277 the IO wait loop. */
278 #define ICOUNT_WOBBLE (get_ticks_per_sec() / 10)
279
280 static void icount_adjust(void)
281 {
282 int64_t cur_time;
283 int64_t cur_icount;
284 int64_t delta;
285
286 /* Protected by TimersState mutex. */
287 static int64_t last_delta;
288
289 /* If the VM is not running, then do nothing. */
290 if (!runstate_is_running()) {
291 return;
292 }
293
294 seqlock_write_lock(&timers_state.vm_clock_seqlock);
295 cur_time = cpu_get_clock_locked();
296 cur_icount = cpu_get_icount_locked();
297
298 delta = cur_icount - cur_time;
299 /* FIXME: This is a very crude algorithm, somewhat prone to oscillation. */
300 if (delta > 0
301 && last_delta + ICOUNT_WOBBLE < delta * 2
302 && icount_time_shift > 0) {
303 /* The guest is getting too far ahead. Slow time down. */
304 icount_time_shift--;
305 }
306 if (delta < 0
307 && last_delta - ICOUNT_WOBBLE > delta * 2
308 && icount_time_shift < MAX_ICOUNT_SHIFT) {
309 /* The guest is getting too far behind. Speed time up. */
310 icount_time_shift++;
311 }
312 last_delta = delta;
313 timers_state.qemu_icount_bias = cur_icount
314 - (timers_state.qemu_icount << icount_time_shift);
315 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
316 }
317
318 static void icount_adjust_rt(void *opaque)
319 {
320 timer_mod(icount_rt_timer,
321 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
322 icount_adjust();
323 }
324
325 static void icount_adjust_vm(void *opaque)
326 {
327 timer_mod(icount_vm_timer,
328 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
329 get_ticks_per_sec() / 10);
330 icount_adjust();
331 }
332
333 static int64_t qemu_icount_round(int64_t count)
334 {
335 return (count + (1 << icount_time_shift) - 1) >> icount_time_shift;
336 }
337
338 static void icount_warp_rt(void)
339 {
340 /* The icount_warp_timer is rescheduled soon after vm_clock_warp_start
341 * changes from -1 to another value, so the race here is okay.
342 */
343 if (atomic_read(&vm_clock_warp_start) == -1) {
344 return;
345 }
346
347 seqlock_write_lock(&timers_state.vm_clock_seqlock);
348 if (runstate_is_running()) {
349 int64_t clock = REPLAY_CLOCK(REPLAY_CLOCK_VIRTUAL_RT,
350 cpu_get_clock_locked());
351 int64_t warp_delta;
352
353 warp_delta = clock - vm_clock_warp_start;
354 if (use_icount == 2) {
355 /*
356 * In adaptive mode, do not let QEMU_CLOCK_VIRTUAL run too
357 * far ahead of real time.
358 */
359 int64_t cur_icount = cpu_get_icount_locked();
360 int64_t delta = clock - cur_icount;
361 warp_delta = MIN(warp_delta, delta);
362 }
363 timers_state.qemu_icount_bias += warp_delta;
364 }
365 vm_clock_warp_start = -1;
366 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
367
368 if (qemu_clock_expired(QEMU_CLOCK_VIRTUAL)) {
369 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
370 }
371 }
372
373 static void icount_timer_cb(void *opaque)
374 {
375 /* No need for a checkpoint because the timer already synchronizes
376 * with CHECKPOINT_CLOCK_VIRTUAL_RT.
377 */
378 icount_warp_rt();
379 }
380
381 void qtest_clock_warp(int64_t dest)
382 {
383 int64_t clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
384 AioContext *aio_context;
385 assert(qtest_enabled());
386 aio_context = qemu_get_aio_context();
387 while (clock < dest) {
388 int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
389 int64_t warp = qemu_soonest_timeout(dest - clock, deadline);
390
391 seqlock_write_lock(&timers_state.vm_clock_seqlock);
392 timers_state.qemu_icount_bias += warp;
393 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
394
395 qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL);
396 timerlist_run_timers(aio_context->tlg.tl[QEMU_CLOCK_VIRTUAL]);
397 clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
398 }
399 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
400 }
401
402 void qemu_start_warp_timer(void)
403 {
404 int64_t clock;
405 int64_t deadline;
406
407 if (!use_icount) {
408 return;
409 }
410
411 /* Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers
412 * do not fire, so computing the deadline does not make sense.
413 */
414 if (!runstate_is_running()) {
415 return;
416 }
417
418 /* warp clock deterministically in record/replay mode */
419 if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_START)) {
420 return;
421 }
422
423 if (!all_cpu_threads_idle()) {
424 return;
425 }
426
427 if (qtest_enabled()) {
428 /* When testing, qtest commands advance icount. */
429 return;
430 }
431
432 /* We want to use the earliest deadline from ALL vm_clocks */
433 clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT);
434 deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
435 if (deadline < 0) {
436 static bool notified;
437 if (!icount_sleep && !notified) {
438 error_report("WARNING: icount sleep disabled and no active timers");
439 notified = true;
440 }
441 return;
442 }
443
444 if (deadline > 0) {
445 /*
446 * Ensure QEMU_CLOCK_VIRTUAL proceeds even when the virtual CPU goes to
447 * sleep. Otherwise, the CPU might be waiting for a future timer
448 * interrupt to wake it up, but the interrupt never comes because
449 * the vCPU isn't running any insns and thus doesn't advance the
450 * QEMU_CLOCK_VIRTUAL.
451 */
452 if (!icount_sleep) {
453 /*
454 * We never let VCPUs sleep in no sleep icount mode.
455 * If there is a pending QEMU_CLOCK_VIRTUAL timer we just advance
456 * to the next QEMU_CLOCK_VIRTUAL event and notify it.
457 * It is useful when we want a deterministic execution time,
458 * isolated from host latencies.
459 */
460 seqlock_write_lock(&timers_state.vm_clock_seqlock);
461 timers_state.qemu_icount_bias += deadline;
462 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
463 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
464 } else {
465 /*
466 * We do stop VCPUs and only advance QEMU_CLOCK_VIRTUAL after some
467 * "real" time, (related to the time left until the next event) has
468 * passed. The QEMU_CLOCK_VIRTUAL_RT clock will do this.
469 * This avoids that the warps are visible externally; for example,
470 * you will not be sending network packets continuously instead of
471 * every 100ms.
472 */
473 seqlock_write_lock(&timers_state.vm_clock_seqlock);
474 if (vm_clock_warp_start == -1 || vm_clock_warp_start > clock) {
475 vm_clock_warp_start = clock;
476 }
477 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
478 timer_mod_anticipate(icount_warp_timer, clock + deadline);
479 }
480 } else if (deadline == 0) {
481 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
482 }
483 }
484
485 static void qemu_account_warp_timer(void)
486 {
487 if (!use_icount || !icount_sleep) {
488 return;
489 }
490
491 /* Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers
492 * do not fire, so computing the deadline does not make sense.
493 */
494 if (!runstate_is_running()) {
495 return;
496 }
497
498 /* warp clock deterministically in record/replay mode */
499 if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_ACCOUNT)) {
500 return;
501 }
502
503 timer_del(icount_warp_timer);
504 icount_warp_rt();
505 }
506
507 static bool icount_state_needed(void *opaque)
508 {
509 return use_icount;
510 }
511
512 /*
513 * This is a subsection for icount migration.
514 */
515 static const VMStateDescription icount_vmstate_timers = {
516 .name = "timer/icount",
517 .version_id = 1,
518 .minimum_version_id = 1,
519 .needed = icount_state_needed,
520 .fields = (VMStateField[]) {
521 VMSTATE_INT64(qemu_icount_bias, TimersState),
522 VMSTATE_INT64(qemu_icount, TimersState),
523 VMSTATE_END_OF_LIST()
524 }
525 };
526
527 static const VMStateDescription vmstate_timers = {
528 .name = "timer",
529 .version_id = 2,
530 .minimum_version_id = 1,
531 .fields = (VMStateField[]) {
532 VMSTATE_INT64(cpu_ticks_offset, TimersState),
533 VMSTATE_INT64(dummy, TimersState),
534 VMSTATE_INT64_V(cpu_clock_offset, TimersState, 2),
535 VMSTATE_END_OF_LIST()
536 },
537 .subsections = (const VMStateDescription*[]) {
538 &icount_vmstate_timers,
539 NULL
540 }
541 };
542
543 static void cpu_throttle_thread(void *opaque)
544 {
545 CPUState *cpu = opaque;
546 double pct;
547 double throttle_ratio;
548 long sleeptime_ns;
549
550 if (!cpu_throttle_get_percentage()) {
551 return;
552 }
553
554 pct = (double)cpu_throttle_get_percentage()/100;
555 throttle_ratio = pct / (1 - pct);
556 sleeptime_ns = (long)(throttle_ratio * CPU_THROTTLE_TIMESLICE_NS);
557
558 qemu_mutex_unlock_iothread();
559 atomic_set(&cpu->throttle_thread_scheduled, 0);
560 g_usleep(sleeptime_ns / 1000); /* Convert ns to us for usleep call */
561 qemu_mutex_lock_iothread();
562 }
563
564 static void cpu_throttle_timer_tick(void *opaque)
565 {
566 CPUState *cpu;
567 double pct;
568
569 /* Stop the timer if needed */
570 if (!cpu_throttle_get_percentage()) {
571 return;
572 }
573 CPU_FOREACH(cpu) {
574 if (!atomic_xchg(&cpu->throttle_thread_scheduled, 1)) {
575 async_run_on_cpu(cpu, cpu_throttle_thread, cpu);
576 }
577 }
578
579 pct = (double)cpu_throttle_get_percentage()/100;
580 timer_mod(throttle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT) +
581 CPU_THROTTLE_TIMESLICE_NS / (1-pct));
582 }
583
584 void cpu_throttle_set(int new_throttle_pct)
585 {
586 /* Ensure throttle percentage is within valid range */
587 new_throttle_pct = MIN(new_throttle_pct, CPU_THROTTLE_PCT_MAX);
588 new_throttle_pct = MAX(new_throttle_pct, CPU_THROTTLE_PCT_MIN);
589
590 atomic_set(&throttle_percentage, new_throttle_pct);
591
592 timer_mod(throttle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT) +
593 CPU_THROTTLE_TIMESLICE_NS);
594 }
595
596 void cpu_throttle_stop(void)
597 {
598 atomic_set(&throttle_percentage, 0);
599 }
600
601 bool cpu_throttle_active(void)
602 {
603 return (cpu_throttle_get_percentage() != 0);
604 }
605
606 int cpu_throttle_get_percentage(void)
607 {
608 return atomic_read(&throttle_percentage);
609 }
610
611 void cpu_ticks_init(void)
612 {
613 seqlock_init(&timers_state.vm_clock_seqlock, NULL);
614 vmstate_register(NULL, 0, &vmstate_timers, &timers_state);
615 throttle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT,
616 cpu_throttle_timer_tick, NULL);
617 }
618
619 void configure_icount(QemuOpts *opts, Error **errp)
620 {
621 const char *option;
622 char *rem_str = NULL;
623
624 option = qemu_opt_get(opts, "shift");
625 if (!option) {
626 if (qemu_opt_get(opts, "align") != NULL) {
627 error_setg(errp, "Please specify shift option when using align");
628 }
629 return;
630 }
631
632 icount_sleep = qemu_opt_get_bool(opts, "sleep", true);
633 if (icount_sleep) {
634 icount_warp_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT,
635 icount_timer_cb, NULL);
636 }
637
638 icount_align_option = qemu_opt_get_bool(opts, "align", false);
639
640 if (icount_align_option && !icount_sleep) {
641 error_setg(errp, "align=on and sleep=off are incompatible");
642 }
643 if (strcmp(option, "auto") != 0) {
644 errno = 0;
645 icount_time_shift = strtol(option, &rem_str, 0);
646 if (errno != 0 || *rem_str != '\0' || !strlen(option)) {
647 error_setg(errp, "icount: Invalid shift value");
648 }
649 use_icount = 1;
650 return;
651 } else if (icount_align_option) {
652 error_setg(errp, "shift=auto and align=on are incompatible");
653 } else if (!icount_sleep) {
654 error_setg(errp, "shift=auto and sleep=off are incompatible");
655 }
656
657 use_icount = 2;
658
659 /* 125MIPS seems a reasonable initial guess at the guest speed.
660 It will be corrected fairly quickly anyway. */
661 icount_time_shift = 3;
662
663 /* Have both realtime and virtual time triggers for speed adjustment.
664 The realtime trigger catches emulated time passing too slowly,
665 the virtual time trigger catches emulated time passing too fast.
666 Realtime triggers occur even when idle, so use them less frequently
667 than VM triggers. */
668 icount_rt_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL_RT,
669 icount_adjust_rt, NULL);
670 timer_mod(icount_rt_timer,
671 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
672 icount_vm_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
673 icount_adjust_vm, NULL);
674 timer_mod(icount_vm_timer,
675 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
676 get_ticks_per_sec() / 10);
677 }
678
679 /***********************************************************/
680 void hw_error(const char *fmt, ...)
681 {
682 va_list ap;
683 CPUState *cpu;
684
685 va_start(ap, fmt);
686 fprintf(stderr, "qemu: hardware error: ");
687 vfprintf(stderr, fmt, ap);
688 fprintf(stderr, "\n");
689 CPU_FOREACH(cpu) {
690 fprintf(stderr, "CPU #%d:\n", cpu->cpu_index);
691 cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU);
692 }
693 va_end(ap);
694 abort();
695 }
696
697 void cpu_synchronize_all_states(void)
698 {
699 CPUState *cpu;
700
701 CPU_FOREACH(cpu) {
702 cpu_synchronize_state(cpu);
703 }
704 }
705
706 void cpu_synchronize_all_post_reset(void)
707 {
708 CPUState *cpu;
709
710 CPU_FOREACH(cpu) {
711 cpu_synchronize_post_reset(cpu);
712 }
713 }
714
715 void cpu_synchronize_all_post_init(void)
716 {
717 CPUState *cpu;
718
719 CPU_FOREACH(cpu) {
720 cpu_synchronize_post_init(cpu);
721 }
722 }
723
724 static int do_vm_stop(RunState state)
725 {
726 int ret = 0;
727
728 if (runstate_is_running()) {
729 cpu_disable_ticks();
730 pause_all_vcpus();
731 runstate_set(state);
732 vm_state_notify(0, state);
733 qapi_event_send_stop(&error_abort);
734 }
735
736 bdrv_drain_all();
737 ret = bdrv_flush_all();
738
739 return ret;
740 }
741
742 static bool cpu_can_run(CPUState *cpu)
743 {
744 if (cpu->stop) {
745 return false;
746 }
747 if (cpu_is_stopped(cpu)) {
748 return false;
749 }
750 return true;
751 }
752
753 static void cpu_handle_guest_debug(CPUState *cpu)
754 {
755 gdb_set_stop_cpu(cpu);
756 qemu_system_debug_request();
757 cpu->stopped = true;
758 }
759
760 #ifdef CONFIG_LINUX
761 static void sigbus_reraise(void)
762 {
763 sigset_t set;
764 struct sigaction action;
765
766 memset(&action, 0, sizeof(action));
767 action.sa_handler = SIG_DFL;
768 if (!sigaction(SIGBUS, &action, NULL)) {
769 raise(SIGBUS);
770 sigemptyset(&set);
771 sigaddset(&set, SIGBUS);
772 sigprocmask(SIG_UNBLOCK, &set, NULL);
773 }
774 perror("Failed to re-raise SIGBUS!\n");
775 abort();
776 }
777
778 static void sigbus_handler(int n, struct qemu_signalfd_siginfo *siginfo,
779 void *ctx)
780 {
781 if (kvm_on_sigbus(siginfo->ssi_code,
782 (void *)(intptr_t)siginfo->ssi_addr)) {
783 sigbus_reraise();
784 }
785 }
786
787 static void qemu_init_sigbus(void)
788 {
789 struct sigaction action;
790
791 memset(&action, 0, sizeof(action));
792 action.sa_flags = SA_SIGINFO;
793 action.sa_sigaction = (void (*)(int, siginfo_t*, void*))sigbus_handler;
794 sigaction(SIGBUS, &action, NULL);
795
796 prctl(PR_MCE_KILL, PR_MCE_KILL_SET, PR_MCE_KILL_EARLY, 0, 0);
797 }
798
799 static void qemu_kvm_eat_signals(CPUState *cpu)
800 {
801 struct timespec ts = { 0, 0 };
802 siginfo_t siginfo;
803 sigset_t waitset;
804 sigset_t chkset;
805 int r;
806
807 sigemptyset(&waitset);
808 sigaddset(&waitset, SIG_IPI);
809 sigaddset(&waitset, SIGBUS);
810
811 do {
812 r = sigtimedwait(&waitset, &siginfo, &ts);
813 if (r == -1 && !(errno == EAGAIN || errno == EINTR)) {
814 perror("sigtimedwait");
815 exit(1);
816 }
817
818 switch (r) {
819 case SIGBUS:
820 if (kvm_on_sigbus_vcpu(cpu, siginfo.si_code, siginfo.si_addr)) {
821 sigbus_reraise();
822 }
823 break;
824 default:
825 break;
826 }
827
828 r = sigpending(&chkset);
829 if (r == -1) {
830 perror("sigpending");
831 exit(1);
832 }
833 } while (sigismember(&chkset, SIG_IPI) || sigismember(&chkset, SIGBUS));
834 }
835
836 #else /* !CONFIG_LINUX */
837
838 static void qemu_init_sigbus(void)
839 {
840 }
841
842 static void qemu_kvm_eat_signals(CPUState *cpu)
843 {
844 }
845 #endif /* !CONFIG_LINUX */
846
847 #ifndef _WIN32
848 static void dummy_signal(int sig)
849 {
850 }
851
852 static void qemu_kvm_init_cpu_signals(CPUState *cpu)
853 {
854 int r;
855 sigset_t set;
856 struct sigaction sigact;
857
858 memset(&sigact, 0, sizeof(sigact));
859 sigact.sa_handler = dummy_signal;
860 sigaction(SIG_IPI, &sigact, NULL);
861
862 pthread_sigmask(SIG_BLOCK, NULL, &set);
863 sigdelset(&set, SIG_IPI);
864 sigdelset(&set, SIGBUS);
865 r = kvm_set_signal_mask(cpu, &set);
866 if (r) {
867 fprintf(stderr, "kvm_set_signal_mask: %s\n", strerror(-r));
868 exit(1);
869 }
870 }
871
872 #else /* _WIN32 */
873 static void qemu_kvm_init_cpu_signals(CPUState *cpu)
874 {
875 abort();
876 }
877 #endif /* _WIN32 */
878
879 static QemuMutex qemu_global_mutex;
880 static QemuCond qemu_io_proceeded_cond;
881 static unsigned iothread_requesting_mutex;
882
883 static QemuThread io_thread;
884
885 /* cpu creation */
886 static QemuCond qemu_cpu_cond;
887 /* system init */
888 static QemuCond qemu_pause_cond;
889 static QemuCond qemu_work_cond;
890
891 void qemu_init_cpu_loop(void)
892 {
893 qemu_init_sigbus();
894 qemu_cond_init(&qemu_cpu_cond);
895 qemu_cond_init(&qemu_pause_cond);
896 qemu_cond_init(&qemu_work_cond);
897 qemu_cond_init(&qemu_io_proceeded_cond);
898 qemu_mutex_init(&qemu_global_mutex);
899
900 qemu_thread_get_self(&io_thread);
901 }
902
903 void run_on_cpu(CPUState *cpu, void (*func)(void *data), void *data)
904 {
905 struct qemu_work_item wi;
906
907 if (qemu_cpu_is_self(cpu)) {
908 func(data);
909 return;
910 }
911
912 wi.func = func;
913 wi.data = data;
914 wi.free = false;
915
916 qemu_mutex_lock(&cpu->work_mutex);
917 if (cpu->queued_work_first == NULL) {
918 cpu->queued_work_first = &wi;
919 } else {
920 cpu->queued_work_last->next = &wi;
921 }
922 cpu->queued_work_last = &wi;
923 wi.next = NULL;
924 wi.done = false;
925 qemu_mutex_unlock(&cpu->work_mutex);
926
927 qemu_cpu_kick(cpu);
928 while (!atomic_mb_read(&wi.done)) {
929 CPUState *self_cpu = current_cpu;
930
931 qemu_cond_wait(&qemu_work_cond, &qemu_global_mutex);
932 current_cpu = self_cpu;
933 }
934 }
935
936 void async_run_on_cpu(CPUState *cpu, void (*func)(void *data), void *data)
937 {
938 struct qemu_work_item *wi;
939
940 if (qemu_cpu_is_self(cpu)) {
941 func(data);
942 return;
943 }
944
945 wi = g_malloc0(sizeof(struct qemu_work_item));
946 wi->func = func;
947 wi->data = data;
948 wi->free = true;
949
950 qemu_mutex_lock(&cpu->work_mutex);
951 if (cpu->queued_work_first == NULL) {
952 cpu->queued_work_first = wi;
953 } else {
954 cpu->queued_work_last->next = wi;
955 }
956 cpu->queued_work_last = wi;
957 wi->next = NULL;
958 wi->done = false;
959 qemu_mutex_unlock(&cpu->work_mutex);
960
961 qemu_cpu_kick(cpu);
962 }
963
964 static void flush_queued_work(CPUState *cpu)
965 {
966 struct qemu_work_item *wi;
967
968 if (cpu->queued_work_first == NULL) {
969 return;
970 }
971
972 qemu_mutex_lock(&cpu->work_mutex);
973 while (cpu->queued_work_first != NULL) {
974 wi = cpu->queued_work_first;
975 cpu->queued_work_first = wi->next;
976 if (!cpu->queued_work_first) {
977 cpu->queued_work_last = NULL;
978 }
979 qemu_mutex_unlock(&cpu->work_mutex);
980 wi->func(wi->data);
981 qemu_mutex_lock(&cpu->work_mutex);
982 if (wi->free) {
983 g_free(wi);
984 } else {
985 atomic_mb_set(&wi->done, true);
986 }
987 }
988 qemu_mutex_unlock(&cpu->work_mutex);
989 qemu_cond_broadcast(&qemu_work_cond);
990 }
991
992 static void qemu_wait_io_event_common(CPUState *cpu)
993 {
994 if (cpu->stop) {
995 cpu->stop = false;
996 cpu->stopped = true;
997 qemu_cond_broadcast(&qemu_pause_cond);
998 }
999 flush_queued_work(cpu);
1000 cpu->thread_kicked = false;
1001 }
1002
1003 static void qemu_tcg_wait_io_event(CPUState *cpu)
1004 {
1005 while (all_cpu_threads_idle()) {
1006 qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
1007 }
1008
1009 while (iothread_requesting_mutex) {
1010 qemu_cond_wait(&qemu_io_proceeded_cond, &qemu_global_mutex);
1011 }
1012
1013 CPU_FOREACH(cpu) {
1014 qemu_wait_io_event_common(cpu);
1015 }
1016 }
1017
1018 static void qemu_kvm_wait_io_event(CPUState *cpu)
1019 {
1020 while (cpu_thread_is_idle(cpu)) {
1021 qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
1022 }
1023
1024 qemu_kvm_eat_signals(cpu);
1025 qemu_wait_io_event_common(cpu);
1026 }
1027
1028 static void *qemu_kvm_cpu_thread_fn(void *arg)
1029 {
1030 CPUState *cpu = arg;
1031 int r;
1032
1033 rcu_register_thread();
1034
1035 qemu_mutex_lock_iothread();
1036 qemu_thread_get_self(cpu->thread);
1037 cpu->thread_id = qemu_get_thread_id();
1038 cpu->can_do_io = 1;
1039 current_cpu = cpu;
1040
1041 r = kvm_init_vcpu(cpu);
1042 if (r < 0) {
1043 fprintf(stderr, "kvm_init_vcpu failed: %s\n", strerror(-r));
1044 exit(1);
1045 }
1046
1047 qemu_kvm_init_cpu_signals(cpu);
1048
1049 /* signal CPU creation */
1050 cpu->created = true;
1051 qemu_cond_signal(&qemu_cpu_cond);
1052
1053 while (1) {
1054 if (cpu_can_run(cpu)) {
1055 r = kvm_cpu_exec(cpu);
1056 if (r == EXCP_DEBUG) {
1057 cpu_handle_guest_debug(cpu);
1058 }
1059 }
1060 qemu_kvm_wait_io_event(cpu);
1061 }
1062
1063 return NULL;
1064 }
1065
1066 static void *qemu_dummy_cpu_thread_fn(void *arg)
1067 {
1068 #ifdef _WIN32
1069 fprintf(stderr, "qtest is not supported under Windows\n");
1070 exit(1);
1071 #else
1072 CPUState *cpu = arg;
1073 sigset_t waitset;
1074 int r;
1075
1076 rcu_register_thread();
1077
1078 qemu_mutex_lock_iothread();
1079 qemu_thread_get_self(cpu->thread);
1080 cpu->thread_id = qemu_get_thread_id();
1081 cpu->can_do_io = 1;
1082
1083 sigemptyset(&waitset);
1084 sigaddset(&waitset, SIG_IPI);
1085
1086 /* signal CPU creation */
1087 cpu->created = true;
1088 qemu_cond_signal(&qemu_cpu_cond);
1089
1090 current_cpu = cpu;
1091 while (1) {
1092 current_cpu = NULL;
1093 qemu_mutex_unlock_iothread();
1094 do {
1095 int sig;
1096 r = sigwait(&waitset, &sig);
1097 } while (r == -1 && (errno == EAGAIN || errno == EINTR));
1098 if (r == -1) {
1099 perror("sigwait");
1100 exit(1);
1101 }
1102 qemu_mutex_lock_iothread();
1103 current_cpu = cpu;
1104 qemu_wait_io_event_common(cpu);
1105 }
1106
1107 return NULL;
1108 #endif
1109 }
1110
1111 static void tcg_exec_all(void);
1112
1113 static void *qemu_tcg_cpu_thread_fn(void *arg)
1114 {
1115 CPUState *cpu = arg;
1116
1117 rcu_register_thread();
1118
1119 qemu_mutex_lock_iothread();
1120 qemu_thread_get_self(cpu->thread);
1121
1122 CPU_FOREACH(cpu) {
1123 cpu->thread_id = qemu_get_thread_id();
1124 cpu->created = true;
1125 cpu->can_do_io = 1;
1126 }
1127 qemu_cond_signal(&qemu_cpu_cond);
1128
1129 /* wait for initial kick-off after machine start */
1130 while (first_cpu->stopped) {
1131 qemu_cond_wait(first_cpu->halt_cond, &qemu_global_mutex);
1132
1133 /* process any pending work */
1134 CPU_FOREACH(cpu) {
1135 qemu_wait_io_event_common(cpu);
1136 }
1137 }
1138
1139 /* process any pending work */
1140 atomic_mb_set(&exit_request, 1);
1141
1142 while (1) {
1143 tcg_exec_all();
1144
1145 if (use_icount) {
1146 int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
1147
1148 if (deadline == 0) {
1149 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
1150 }
1151 }
1152 qemu_tcg_wait_io_event(QTAILQ_FIRST(&cpus));
1153 }
1154
1155 return NULL;
1156 }
1157
1158 static void qemu_cpu_kick_thread(CPUState *cpu)
1159 {
1160 #ifndef _WIN32
1161 int err;
1162
1163 if (cpu->thread_kicked) {
1164 return;
1165 }
1166 cpu->thread_kicked = true;
1167 err = pthread_kill(cpu->thread->thread, SIG_IPI);
1168 if (err) {
1169 fprintf(stderr, "qemu:%s: %s", __func__, strerror(err));
1170 exit(1);
1171 }
1172 #else /* _WIN32 */
1173 abort();
1174 #endif
1175 }
1176
1177 static void qemu_cpu_kick_no_halt(void)
1178 {
1179 CPUState *cpu;
1180 /* Ensure whatever caused the exit has reached the CPU threads before
1181 * writing exit_request.
1182 */
1183 atomic_mb_set(&exit_request, 1);
1184 cpu = atomic_mb_read(&tcg_current_cpu);
1185 if (cpu) {
1186 cpu_exit(cpu);
1187 }
1188 }
1189
1190 void qemu_cpu_kick(CPUState *cpu)
1191 {
1192 qemu_cond_broadcast(cpu->halt_cond);
1193 if (tcg_enabled()) {
1194 qemu_cpu_kick_no_halt();
1195 } else {
1196 qemu_cpu_kick_thread(cpu);
1197 }
1198 }
1199
1200 void qemu_cpu_kick_self(void)
1201 {
1202 assert(current_cpu);
1203 qemu_cpu_kick_thread(current_cpu);
1204 }
1205
1206 bool qemu_cpu_is_self(CPUState *cpu)
1207 {
1208 return qemu_thread_is_self(cpu->thread);
1209 }
1210
1211 bool qemu_in_vcpu_thread(void)
1212 {
1213 return current_cpu && qemu_cpu_is_self(current_cpu);
1214 }
1215
1216 static __thread bool iothread_locked = false;
1217
1218 bool qemu_mutex_iothread_locked(void)
1219 {
1220 return iothread_locked;
1221 }
1222
1223 void qemu_mutex_lock_iothread(void)
1224 {
1225 atomic_inc(&iothread_requesting_mutex);
1226 /* In the simple case there is no need to bump the VCPU thread out of
1227 * TCG code execution.
1228 */
1229 if (!tcg_enabled() || qemu_in_vcpu_thread() ||
1230 !first_cpu || !first_cpu->created) {
1231 qemu_mutex_lock(&qemu_global_mutex);
1232 atomic_dec(&iothread_requesting_mutex);
1233 } else {
1234 if (qemu_mutex_trylock(&qemu_global_mutex)) {
1235 qemu_cpu_kick_no_halt();
1236 qemu_mutex_lock(&qemu_global_mutex);
1237 }
1238 atomic_dec(&iothread_requesting_mutex);
1239 qemu_cond_broadcast(&qemu_io_proceeded_cond);
1240 }
1241 iothread_locked = true;
1242 }
1243
1244 void qemu_mutex_unlock_iothread(void)
1245 {
1246 iothread_locked = false;
1247 qemu_mutex_unlock(&qemu_global_mutex);
1248 }
1249
1250 static int all_vcpus_paused(void)
1251 {
1252 CPUState *cpu;
1253
1254 CPU_FOREACH(cpu) {
1255 if (!cpu->stopped) {
1256 return 0;
1257 }
1258 }
1259
1260 return 1;
1261 }
1262
1263 void pause_all_vcpus(void)
1264 {
1265 CPUState *cpu;
1266
1267 qemu_clock_enable(QEMU_CLOCK_VIRTUAL, false);
1268 CPU_FOREACH(cpu) {
1269 cpu->stop = true;
1270 qemu_cpu_kick(cpu);
1271 }
1272
1273 if (qemu_in_vcpu_thread()) {
1274 cpu_stop_current();
1275 if (!kvm_enabled()) {
1276 CPU_FOREACH(cpu) {
1277 cpu->stop = false;
1278 cpu->stopped = true;
1279 }
1280 return;
1281 }
1282 }
1283
1284 while (!all_vcpus_paused()) {
1285 qemu_cond_wait(&qemu_pause_cond, &qemu_global_mutex);
1286 CPU_FOREACH(cpu) {
1287 qemu_cpu_kick(cpu);
1288 }
1289 }
1290 }
1291
1292 void cpu_resume(CPUState *cpu)
1293 {
1294 cpu->stop = false;
1295 cpu->stopped = false;
1296 qemu_cpu_kick(cpu);
1297 }
1298
1299 void resume_all_vcpus(void)
1300 {
1301 CPUState *cpu;
1302
1303 qemu_clock_enable(QEMU_CLOCK_VIRTUAL, true);
1304 CPU_FOREACH(cpu) {
1305 cpu_resume(cpu);
1306 }
1307 }
1308
1309 /* For temporary buffers for forming a name */
1310 #define VCPU_THREAD_NAME_SIZE 16
1311
1312 static void qemu_tcg_init_vcpu(CPUState *cpu)
1313 {
1314 char thread_name[VCPU_THREAD_NAME_SIZE];
1315 static QemuCond *tcg_halt_cond;
1316 static QemuThread *tcg_cpu_thread;
1317
1318 /* share a single thread for all cpus with TCG */
1319 if (!tcg_cpu_thread) {
1320 cpu->thread = g_malloc0(sizeof(QemuThread));
1321 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1322 qemu_cond_init(cpu->halt_cond);
1323 tcg_halt_cond = cpu->halt_cond;
1324 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/TCG",
1325 cpu->cpu_index);
1326 qemu_thread_create(cpu->thread, thread_name, qemu_tcg_cpu_thread_fn,
1327 cpu, QEMU_THREAD_JOINABLE);
1328 #ifdef _WIN32
1329 cpu->hThread = qemu_thread_get_handle(cpu->thread);
1330 #endif
1331 while (!cpu->created) {
1332 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1333 }
1334 tcg_cpu_thread = cpu->thread;
1335 } else {
1336 cpu->thread = tcg_cpu_thread;
1337 cpu->halt_cond = tcg_halt_cond;
1338 }
1339 }
1340
1341 static void qemu_kvm_start_vcpu(CPUState *cpu)
1342 {
1343 char thread_name[VCPU_THREAD_NAME_SIZE];
1344
1345 cpu->thread = g_malloc0(sizeof(QemuThread));
1346 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1347 qemu_cond_init(cpu->halt_cond);
1348 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/KVM",
1349 cpu->cpu_index);
1350 qemu_thread_create(cpu->thread, thread_name, qemu_kvm_cpu_thread_fn,
1351 cpu, QEMU_THREAD_JOINABLE);
1352 while (!cpu->created) {
1353 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1354 }
1355 }
1356
1357 static void qemu_dummy_start_vcpu(CPUState *cpu)
1358 {
1359 char thread_name[VCPU_THREAD_NAME_SIZE];
1360
1361 cpu->thread = g_malloc0(sizeof(QemuThread));
1362 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1363 qemu_cond_init(cpu->halt_cond);
1364 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/DUMMY",
1365 cpu->cpu_index);
1366 qemu_thread_create(cpu->thread, thread_name, qemu_dummy_cpu_thread_fn, cpu,
1367 QEMU_THREAD_JOINABLE);
1368 while (!cpu->created) {
1369 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1370 }
1371 }
1372
1373 void qemu_init_vcpu(CPUState *cpu)
1374 {
1375 cpu->nr_cores = smp_cores;
1376 cpu->nr_threads = smp_threads;
1377 cpu->stopped = true;
1378
1379 if (!cpu->as) {
1380 /* If the target cpu hasn't set up any address spaces itself,
1381 * give it the default one.
1382 */
1383 AddressSpace *as = address_space_init_shareable(cpu->memory,
1384 "cpu-memory");
1385 cpu->num_ases = 1;
1386 cpu_address_space_init(cpu, as, 0);
1387 }
1388
1389 if (kvm_enabled()) {
1390 qemu_kvm_start_vcpu(cpu);
1391 } else if (tcg_enabled()) {
1392 qemu_tcg_init_vcpu(cpu);
1393 } else {
1394 qemu_dummy_start_vcpu(cpu);
1395 }
1396 }
1397
1398 void cpu_stop_current(void)
1399 {
1400 if (current_cpu) {
1401 current_cpu->stop = false;
1402 current_cpu->stopped = true;
1403 cpu_exit(current_cpu);
1404 qemu_cond_broadcast(&qemu_pause_cond);
1405 }
1406 }
1407
1408 int vm_stop(RunState state)
1409 {
1410 if (qemu_in_vcpu_thread()) {
1411 qemu_system_vmstop_request_prepare();
1412 qemu_system_vmstop_request(state);
1413 /*
1414 * FIXME: should not return to device code in case
1415 * vm_stop() has been requested.
1416 */
1417 cpu_stop_current();
1418 return 0;
1419 }
1420
1421 return do_vm_stop(state);
1422 }
1423
1424 /* does a state transition even if the VM is already stopped,
1425 current state is forgotten forever */
1426 int vm_stop_force_state(RunState state)
1427 {
1428 if (runstate_is_running()) {
1429 return vm_stop(state);
1430 } else {
1431 runstate_set(state);
1432
1433 bdrv_drain_all();
1434 /* Make sure to return an error if the flush in a previous vm_stop()
1435 * failed. */
1436 return bdrv_flush_all();
1437 }
1438 }
1439
1440 static int64_t tcg_get_icount_limit(void)
1441 {
1442 int64_t deadline;
1443
1444 if (replay_mode != REPLAY_MODE_PLAY) {
1445 deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
1446
1447 /* Maintain prior (possibly buggy) behaviour where if no deadline
1448 * was set (as there is no QEMU_CLOCK_VIRTUAL timer) or it is more than
1449 * INT32_MAX nanoseconds ahead, we still use INT32_MAX
1450 * nanoseconds.
1451 */
1452 if ((deadline < 0) || (deadline > INT32_MAX)) {
1453 deadline = INT32_MAX;
1454 }
1455
1456 return qemu_icount_round(deadline);
1457 } else {
1458 return replay_get_instructions();
1459 }
1460 }
1461
1462 static int tcg_cpu_exec(CPUState *cpu)
1463 {
1464 int ret;
1465 #ifdef CONFIG_PROFILER
1466 int64_t ti;
1467 #endif
1468
1469 #ifdef CONFIG_PROFILER
1470 ti = profile_getclock();
1471 #endif
1472 if (use_icount) {
1473 int64_t count;
1474 int decr;
1475 timers_state.qemu_icount -= (cpu->icount_decr.u16.low
1476 + cpu->icount_extra);
1477 cpu->icount_decr.u16.low = 0;
1478 cpu->icount_extra = 0;
1479 count = tcg_get_icount_limit();
1480 timers_state.qemu_icount += count;
1481 decr = (count > 0xffff) ? 0xffff : count;
1482 count -= decr;
1483 cpu->icount_decr.u16.low = decr;
1484 cpu->icount_extra = count;
1485 }
1486 ret = cpu_exec(cpu);
1487 #ifdef CONFIG_PROFILER
1488 tcg_time += profile_getclock() - ti;
1489 #endif
1490 if (use_icount) {
1491 /* Fold pending instructions back into the
1492 instruction counter, and clear the interrupt flag. */
1493 timers_state.qemu_icount -= (cpu->icount_decr.u16.low
1494 + cpu->icount_extra);
1495 cpu->icount_decr.u32 = 0;
1496 cpu->icount_extra = 0;
1497 replay_account_executed_instructions();
1498 }
1499 return ret;
1500 }
1501
1502 static void tcg_exec_all(void)
1503 {
1504 int r;
1505
1506 /* Account partial waits to QEMU_CLOCK_VIRTUAL. */
1507 qemu_account_warp_timer();
1508
1509 if (next_cpu == NULL) {
1510 next_cpu = first_cpu;
1511 }
1512 for (; next_cpu != NULL && !exit_request; next_cpu = CPU_NEXT(next_cpu)) {
1513 CPUState *cpu = next_cpu;
1514
1515 qemu_clock_enable(QEMU_CLOCK_VIRTUAL,
1516 (cpu->singlestep_enabled & SSTEP_NOTIMER) == 0);
1517
1518 if (cpu_can_run(cpu)) {
1519 r = tcg_cpu_exec(cpu);
1520 if (r == EXCP_DEBUG) {
1521 cpu_handle_guest_debug(cpu);
1522 break;
1523 }
1524 } else if (cpu->stop || cpu->stopped) {
1525 break;
1526 }
1527 }
1528
1529 /* Pairs with smp_wmb in qemu_cpu_kick. */
1530 atomic_mb_set(&exit_request, 0);
1531 }
1532
1533 void list_cpus(FILE *f, fprintf_function cpu_fprintf, const char *optarg)
1534 {
1535 /* XXX: implement xxx_cpu_list for targets that still miss it */
1536 #if defined(cpu_list)
1537 cpu_list(f, cpu_fprintf);
1538 #endif
1539 }
1540
1541 CpuInfoList *qmp_query_cpus(Error **errp)
1542 {
1543 CpuInfoList *head = NULL, *cur_item = NULL;
1544 CPUState *cpu;
1545
1546 CPU_FOREACH(cpu) {
1547 CpuInfoList *info;
1548 #if defined(TARGET_I386)
1549 X86CPU *x86_cpu = X86_CPU(cpu);
1550 CPUX86State *env = &x86_cpu->env;
1551 #elif defined(TARGET_PPC)
1552 PowerPCCPU *ppc_cpu = POWERPC_CPU(cpu);
1553 CPUPPCState *env = &ppc_cpu->env;
1554 #elif defined(TARGET_SPARC)
1555 SPARCCPU *sparc_cpu = SPARC_CPU(cpu);
1556 CPUSPARCState *env = &sparc_cpu->env;
1557 #elif defined(TARGET_MIPS)
1558 MIPSCPU *mips_cpu = MIPS_CPU(cpu);
1559 CPUMIPSState *env = &mips_cpu->env;
1560 #elif defined(TARGET_TRICORE)
1561 TriCoreCPU *tricore_cpu = TRICORE_CPU(cpu);
1562 CPUTriCoreState *env = &tricore_cpu->env;
1563 #endif
1564
1565 cpu_synchronize_state(cpu);
1566
1567 info = g_malloc0(sizeof(*info));
1568 info->value = g_malloc0(sizeof(*info->value));
1569 info->value->CPU = cpu->cpu_index;
1570 info->value->current = (cpu == first_cpu);
1571 info->value->halted = cpu->halted;
1572 info->value->qom_path = object_get_canonical_path(OBJECT(cpu));
1573 info->value->thread_id = cpu->thread_id;
1574 #if defined(TARGET_I386)
1575 info->value->arch = CPU_INFO_ARCH_X86;
1576 info->value->u.x86.pc = env->eip + env->segs[R_CS].base;
1577 #elif defined(TARGET_PPC)
1578 info->value->arch = CPU_INFO_ARCH_PPC;
1579 info->value->u.ppc.nip = env->nip;
1580 #elif defined(TARGET_SPARC)
1581 info->value->arch = CPU_INFO_ARCH_SPARC;
1582 info->value->u.q_sparc.pc = env->pc;
1583 info->value->u.q_sparc.npc = env->npc;
1584 #elif defined(TARGET_MIPS)
1585 info->value->arch = CPU_INFO_ARCH_MIPS;
1586 info->value->u.q_mips.PC = env->active_tc.PC;
1587 #elif defined(TARGET_TRICORE)
1588 info->value->arch = CPU_INFO_ARCH_TRICORE;
1589 info->value->u.tricore.PC = env->PC;
1590 #else
1591 info->value->arch = CPU_INFO_ARCH_OTHER;
1592 #endif
1593
1594 /* XXX: waiting for the qapi to support GSList */
1595 if (!cur_item) {
1596 head = cur_item = info;
1597 } else {
1598 cur_item->next = info;
1599 cur_item = info;
1600 }
1601 }
1602
1603 return head;
1604 }
1605
1606 void qmp_memsave(int64_t addr, int64_t size, const char *filename,
1607 bool has_cpu, int64_t cpu_index, Error **errp)
1608 {
1609 FILE *f;
1610 uint32_t l;
1611 CPUState *cpu;
1612 uint8_t buf[1024];
1613 int64_t orig_addr = addr, orig_size = size;
1614
1615 if (!has_cpu) {
1616 cpu_index = 0;
1617 }
1618
1619 cpu = qemu_get_cpu(cpu_index);
1620 if (cpu == NULL) {
1621 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "cpu-index",
1622 "a CPU number");
1623 return;
1624 }
1625
1626 f = fopen(filename, "wb");
1627 if (!f) {
1628 error_setg_file_open(errp, errno, filename);
1629 return;
1630 }
1631
1632 while (size != 0) {
1633 l = sizeof(buf);
1634 if (l > size)
1635 l = size;
1636 if (cpu_memory_rw_debug(cpu, addr, buf, l, 0) != 0) {
1637 error_setg(errp, "Invalid addr 0x%016" PRIx64 "/size %" PRId64
1638 " specified", orig_addr, orig_size);
1639 goto exit;
1640 }
1641 if (fwrite(buf, 1, l, f) != l) {
1642 error_setg(errp, QERR_IO_ERROR);
1643 goto exit;
1644 }
1645 addr += l;
1646 size -= l;
1647 }
1648
1649 exit:
1650 fclose(f);
1651 }
1652
1653 void qmp_pmemsave(int64_t addr, int64_t size, const char *filename,
1654 Error **errp)
1655 {
1656 FILE *f;
1657 uint32_t l;
1658 uint8_t buf[1024];
1659
1660 f = fopen(filename, "wb");
1661 if (!f) {
1662 error_setg_file_open(errp, errno, filename);
1663 return;
1664 }
1665
1666 while (size != 0) {
1667 l = sizeof(buf);
1668 if (l > size)
1669 l = size;
1670 cpu_physical_memory_read(addr, buf, l);
1671 if (fwrite(buf, 1, l, f) != l) {
1672 error_setg(errp, QERR_IO_ERROR);
1673 goto exit;
1674 }
1675 addr += l;
1676 size -= l;
1677 }
1678
1679 exit:
1680 fclose(f);
1681 }
1682
1683 void qmp_inject_nmi(Error **errp)
1684 {
1685 #if defined(TARGET_I386)
1686 CPUState *cs;
1687
1688 CPU_FOREACH(cs) {
1689 X86CPU *cpu = X86_CPU(cs);
1690
1691 if (!cpu->apic_state) {
1692 cpu_interrupt(cs, CPU_INTERRUPT_NMI);
1693 } else {
1694 apic_deliver_nmi(cpu->apic_state);
1695 }
1696 }
1697 #else
1698 nmi_monitor_handle(monitor_get_cpu_index(), errp);
1699 #endif
1700 }
1701
1702 void dump_drift_info(FILE *f, fprintf_function cpu_fprintf)
1703 {
1704 if (!use_icount) {
1705 return;
1706 }
1707
1708 cpu_fprintf(f, "Host - Guest clock %"PRIi64" ms\n",
1709 (cpu_get_clock() - cpu_get_icount())/SCALE_MS);
1710 if (icount_align_option) {
1711 cpu_fprintf(f, "Max guest delay %"PRIi64" ms\n", -max_delay/SCALE_MS);
1712 cpu_fprintf(f, "Max guest advance %"PRIi64" ms\n", max_advance/SCALE_MS);
1713 } else {
1714 cpu_fprintf(f, "Max guest delay NA\n");
1715 cpu_fprintf(f, "Max guest advance NA\n");
1716 }
1717 }