]> git.ipfire.org Git - thirdparty/qemu.git/blame_incremental - exec.c
Merge remote-tracking branch 'remotes/mwalle/tags/lm32-fixes/20140204' into staging
[thirdparty/qemu.git] / exec.c
... / ...
CommitLineData
1/*
2 * Virtual page mapping
3 *
4 * Copyright (c) 2003 Fabrice Bellard
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18 */
19#include "config.h"
20#ifdef _WIN32
21#include <windows.h>
22#else
23#include <sys/types.h>
24#include <sys/mman.h>
25#endif
26
27#include "qemu-common.h"
28#include "cpu.h"
29#include "tcg.h"
30#include "hw/hw.h"
31#include "hw/qdev.h"
32#include "qemu/osdep.h"
33#include "sysemu/kvm.h"
34#include "sysemu/sysemu.h"
35#include "hw/xen/xen.h"
36#include "qemu/timer.h"
37#include "qemu/config-file.h"
38#include "exec/memory.h"
39#include "sysemu/dma.h"
40#include "exec/address-spaces.h"
41#if defined(CONFIG_USER_ONLY)
42#include <qemu.h>
43#else /* !CONFIG_USER_ONLY */
44#include "sysemu/xen-mapcache.h"
45#include "trace.h"
46#endif
47#include "exec/cpu-all.h"
48
49#include "exec/cputlb.h"
50#include "translate-all.h"
51
52#include "exec/memory-internal.h"
53#include "exec/ram_addr.h"
54#include "qemu/cache-utils.h"
55
56#include "qemu/range.h"
57
58//#define DEBUG_SUBPAGE
59
60#if !defined(CONFIG_USER_ONLY)
61static bool in_migration;
62
63RAMList ram_list = { .blocks = QTAILQ_HEAD_INITIALIZER(ram_list.blocks) };
64
65static MemoryRegion *system_memory;
66static MemoryRegion *system_io;
67
68AddressSpace address_space_io;
69AddressSpace address_space_memory;
70
71MemoryRegion io_mem_rom, io_mem_notdirty;
72static MemoryRegion io_mem_unassigned;
73
74#endif
75
76struct CPUTailQ cpus = QTAILQ_HEAD_INITIALIZER(cpus);
77/* current CPU in the current thread. It is only valid inside
78 cpu_exec() */
79DEFINE_TLS(CPUState *, current_cpu);
80/* 0 = Do not count executed instructions.
81 1 = Precise instruction counting.
82 2 = Adaptive rate instruction counting. */
83int use_icount;
84
85#if !defined(CONFIG_USER_ONLY)
86
87typedef struct PhysPageEntry PhysPageEntry;
88
89struct PhysPageEntry {
90 /* How many bits skip to next level (in units of L2_SIZE). 0 for a leaf. */
91 uint32_t skip : 6;
92 /* index into phys_sections (!skip) or phys_map_nodes (skip) */
93 uint32_t ptr : 26;
94};
95
96#define PHYS_MAP_NODE_NIL (((uint32_t)~0) >> 6)
97
98/* Size of the L2 (and L3, etc) page tables. */
99#define ADDR_SPACE_BITS 64
100
101#define P_L2_BITS 9
102#define P_L2_SIZE (1 << P_L2_BITS)
103
104#define P_L2_LEVELS (((ADDR_SPACE_BITS - TARGET_PAGE_BITS - 1) / P_L2_BITS) + 1)
105
106typedef PhysPageEntry Node[P_L2_SIZE];
107
108typedef struct PhysPageMap {
109 unsigned sections_nb;
110 unsigned sections_nb_alloc;
111 unsigned nodes_nb;
112 unsigned nodes_nb_alloc;
113 Node *nodes;
114 MemoryRegionSection *sections;
115} PhysPageMap;
116
117struct AddressSpaceDispatch {
118 /* This is a multi-level map on the physical address space.
119 * The bottom level has pointers to MemoryRegionSections.
120 */
121 PhysPageEntry phys_map;
122 PhysPageMap map;
123 AddressSpace *as;
124};
125
126#define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
127typedef struct subpage_t {
128 MemoryRegion iomem;
129 AddressSpace *as;
130 hwaddr base;
131 uint16_t sub_section[TARGET_PAGE_SIZE];
132} subpage_t;
133
134#define PHYS_SECTION_UNASSIGNED 0
135#define PHYS_SECTION_NOTDIRTY 1
136#define PHYS_SECTION_ROM 2
137#define PHYS_SECTION_WATCH 3
138
139static void io_mem_init(void);
140static void memory_map_init(void);
141
142static MemoryRegion io_mem_watch;
143#endif
144
145#if !defined(CONFIG_USER_ONLY)
146
147static void phys_map_node_reserve(PhysPageMap *map, unsigned nodes)
148{
149 if (map->nodes_nb + nodes > map->nodes_nb_alloc) {
150 map->nodes_nb_alloc = MAX(map->nodes_nb_alloc * 2, 16);
151 map->nodes_nb_alloc = MAX(map->nodes_nb_alloc, map->nodes_nb + nodes);
152 map->nodes = g_renew(Node, map->nodes, map->nodes_nb_alloc);
153 }
154}
155
156static uint32_t phys_map_node_alloc(PhysPageMap *map)
157{
158 unsigned i;
159 uint32_t ret;
160
161 ret = map->nodes_nb++;
162 assert(ret != PHYS_MAP_NODE_NIL);
163 assert(ret != map->nodes_nb_alloc);
164 for (i = 0; i < P_L2_SIZE; ++i) {
165 map->nodes[ret][i].skip = 1;
166 map->nodes[ret][i].ptr = PHYS_MAP_NODE_NIL;
167 }
168 return ret;
169}
170
171static void phys_page_set_level(PhysPageMap *map, PhysPageEntry *lp,
172 hwaddr *index, hwaddr *nb, uint16_t leaf,
173 int level)
174{
175 PhysPageEntry *p;
176 int i;
177 hwaddr step = (hwaddr)1 << (level * P_L2_BITS);
178
179 if (lp->skip && lp->ptr == PHYS_MAP_NODE_NIL) {
180 lp->ptr = phys_map_node_alloc(map);
181 p = map->nodes[lp->ptr];
182 if (level == 0) {
183 for (i = 0; i < P_L2_SIZE; i++) {
184 p[i].skip = 0;
185 p[i].ptr = PHYS_SECTION_UNASSIGNED;
186 }
187 }
188 } else {
189 p = map->nodes[lp->ptr];
190 }
191 lp = &p[(*index >> (level * P_L2_BITS)) & (P_L2_SIZE - 1)];
192
193 while (*nb && lp < &p[P_L2_SIZE]) {
194 if ((*index & (step - 1)) == 0 && *nb >= step) {
195 lp->skip = 0;
196 lp->ptr = leaf;
197 *index += step;
198 *nb -= step;
199 } else {
200 phys_page_set_level(map, lp, index, nb, leaf, level - 1);
201 }
202 ++lp;
203 }
204}
205
206static void phys_page_set(AddressSpaceDispatch *d,
207 hwaddr index, hwaddr nb,
208 uint16_t leaf)
209{
210 /* Wildly overreserve - it doesn't matter much. */
211 phys_map_node_reserve(&d->map, 3 * P_L2_LEVELS);
212
213 phys_page_set_level(&d->map, &d->phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
214}
215
216/* Compact a non leaf page entry. Simply detect that the entry has a single child,
217 * and update our entry so we can skip it and go directly to the destination.
218 */
219static void phys_page_compact(PhysPageEntry *lp, Node *nodes, unsigned long *compacted)
220{
221 unsigned valid_ptr = P_L2_SIZE;
222 int valid = 0;
223 PhysPageEntry *p;
224 int i;
225
226 if (lp->ptr == PHYS_MAP_NODE_NIL) {
227 return;
228 }
229
230 p = nodes[lp->ptr];
231 for (i = 0; i < P_L2_SIZE; i++) {
232 if (p[i].ptr == PHYS_MAP_NODE_NIL) {
233 continue;
234 }
235
236 valid_ptr = i;
237 valid++;
238 if (p[i].skip) {
239 phys_page_compact(&p[i], nodes, compacted);
240 }
241 }
242
243 /* We can only compress if there's only one child. */
244 if (valid != 1) {
245 return;
246 }
247
248 assert(valid_ptr < P_L2_SIZE);
249
250 /* Don't compress if it won't fit in the # of bits we have. */
251 if (lp->skip + p[valid_ptr].skip >= (1 << 3)) {
252 return;
253 }
254
255 lp->ptr = p[valid_ptr].ptr;
256 if (!p[valid_ptr].skip) {
257 /* If our only child is a leaf, make this a leaf. */
258 /* By design, we should have made this node a leaf to begin with so we
259 * should never reach here.
260 * But since it's so simple to handle this, let's do it just in case we
261 * change this rule.
262 */
263 lp->skip = 0;
264 } else {
265 lp->skip += p[valid_ptr].skip;
266 }
267}
268
269static void phys_page_compact_all(AddressSpaceDispatch *d, int nodes_nb)
270{
271 DECLARE_BITMAP(compacted, nodes_nb);
272
273 if (d->phys_map.skip) {
274 phys_page_compact(&d->phys_map, d->map.nodes, compacted);
275 }
276}
277
278static MemoryRegionSection *phys_page_find(PhysPageEntry lp, hwaddr addr,
279 Node *nodes, MemoryRegionSection *sections)
280{
281 PhysPageEntry *p;
282 hwaddr index = addr >> TARGET_PAGE_BITS;
283 int i;
284
285 for (i = P_L2_LEVELS; lp.skip && (i -= lp.skip) >= 0;) {
286 if (lp.ptr == PHYS_MAP_NODE_NIL) {
287 return &sections[PHYS_SECTION_UNASSIGNED];
288 }
289 p = nodes[lp.ptr];
290 lp = p[(index >> (i * P_L2_BITS)) & (P_L2_SIZE - 1)];
291 }
292
293 if (sections[lp.ptr].size.hi ||
294 range_covers_byte(sections[lp.ptr].offset_within_address_space,
295 sections[lp.ptr].size.lo, addr)) {
296 return &sections[lp.ptr];
297 } else {
298 return &sections[PHYS_SECTION_UNASSIGNED];
299 }
300}
301
302bool memory_region_is_unassigned(MemoryRegion *mr)
303{
304 return mr != &io_mem_rom && mr != &io_mem_notdirty && !mr->rom_device
305 && mr != &io_mem_watch;
306}
307
308static MemoryRegionSection *address_space_lookup_region(AddressSpaceDispatch *d,
309 hwaddr addr,
310 bool resolve_subpage)
311{
312 MemoryRegionSection *section;
313 subpage_t *subpage;
314
315 section = phys_page_find(d->phys_map, addr, d->map.nodes, d->map.sections);
316 if (resolve_subpage && section->mr->subpage) {
317 subpage = container_of(section->mr, subpage_t, iomem);
318 section = &d->map.sections[subpage->sub_section[SUBPAGE_IDX(addr)]];
319 }
320 return section;
321}
322
323static MemoryRegionSection *
324address_space_translate_internal(AddressSpaceDispatch *d, hwaddr addr, hwaddr *xlat,
325 hwaddr *plen, bool resolve_subpage)
326{
327 MemoryRegionSection *section;
328 Int128 diff, diff_page;
329
330 section = address_space_lookup_region(d, addr, resolve_subpage);
331 /* Compute offset within MemoryRegionSection */
332 addr -= section->offset_within_address_space;
333
334 /* Compute offset within MemoryRegion */
335 *xlat = addr + section->offset_within_region;
336
337 diff_page = int128_make64(((addr & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE) - addr);
338 diff = int128_sub(section->mr->size, int128_make64(addr));
339 diff = int128_min(diff, diff_page);
340 *plen = int128_get64(int128_min(diff, int128_make64(*plen)));
341 return section;
342}
343
344MemoryRegion *address_space_translate(AddressSpace *as, hwaddr addr,
345 hwaddr *xlat, hwaddr *plen,
346 bool is_write)
347{
348 IOMMUTLBEntry iotlb;
349 MemoryRegionSection *section;
350 MemoryRegion *mr;
351 hwaddr len = *plen;
352
353 for (;;) {
354 section = address_space_translate_internal(as->dispatch, addr, &addr, &len, true);
355 mr = section->mr;
356
357 if (!mr->iommu_ops) {
358 break;
359 }
360
361 iotlb = mr->iommu_ops->translate(mr, addr);
362 addr = ((iotlb.translated_addr & ~iotlb.addr_mask)
363 | (addr & iotlb.addr_mask));
364 len = MIN(len, (addr | iotlb.addr_mask) - addr + 1);
365 if (!(iotlb.perm & (1 << is_write))) {
366 mr = &io_mem_unassigned;
367 break;
368 }
369
370 as = iotlb.target_as;
371 }
372
373 *plen = len;
374 *xlat = addr;
375 return mr;
376}
377
378MemoryRegionSection *
379address_space_translate_for_iotlb(AddressSpace *as, hwaddr addr, hwaddr *xlat,
380 hwaddr *plen)
381{
382 MemoryRegionSection *section;
383 section = address_space_translate_internal(as->dispatch, addr, xlat, plen, false);
384
385 assert(!section->mr->iommu_ops);
386 return section;
387}
388#endif
389
390void cpu_exec_init_all(void)
391{
392#if !defined(CONFIG_USER_ONLY)
393 qemu_mutex_init(&ram_list.mutex);
394 memory_map_init();
395 io_mem_init();
396#endif
397}
398
399#if !defined(CONFIG_USER_ONLY)
400
401static int cpu_common_post_load(void *opaque, int version_id)
402{
403 CPUState *cpu = opaque;
404
405 /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
406 version_id is increased. */
407 cpu->interrupt_request &= ~0x01;
408 tlb_flush(cpu->env_ptr, 1);
409
410 return 0;
411}
412
413const VMStateDescription vmstate_cpu_common = {
414 .name = "cpu_common",
415 .version_id = 1,
416 .minimum_version_id = 1,
417 .minimum_version_id_old = 1,
418 .post_load = cpu_common_post_load,
419 .fields = (VMStateField []) {
420 VMSTATE_UINT32(halted, CPUState),
421 VMSTATE_UINT32(interrupt_request, CPUState),
422 VMSTATE_END_OF_LIST()
423 }
424};
425
426#endif
427
428CPUState *qemu_get_cpu(int index)
429{
430 CPUState *cpu;
431
432 CPU_FOREACH(cpu) {
433 if (cpu->cpu_index == index) {
434 return cpu;
435 }
436 }
437
438 return NULL;
439}
440
441void cpu_exec_init(CPUArchState *env)
442{
443 CPUState *cpu = ENV_GET_CPU(env);
444 CPUClass *cc = CPU_GET_CLASS(cpu);
445 CPUState *some_cpu;
446 int cpu_index;
447
448#if defined(CONFIG_USER_ONLY)
449 cpu_list_lock();
450#endif
451 cpu_index = 0;
452 CPU_FOREACH(some_cpu) {
453 cpu_index++;
454 }
455 cpu->cpu_index = cpu_index;
456 cpu->numa_node = 0;
457 QTAILQ_INIT(&env->breakpoints);
458 QTAILQ_INIT(&env->watchpoints);
459#ifndef CONFIG_USER_ONLY
460 cpu->thread_id = qemu_get_thread_id();
461#endif
462 QTAILQ_INSERT_TAIL(&cpus, cpu, node);
463#if defined(CONFIG_USER_ONLY)
464 cpu_list_unlock();
465#endif
466 if (qdev_get_vmsd(DEVICE(cpu)) == NULL) {
467 vmstate_register(NULL, cpu_index, &vmstate_cpu_common, cpu);
468 }
469#if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
470 register_savevm(NULL, "cpu", cpu_index, CPU_SAVE_VERSION,
471 cpu_save, cpu_load, env);
472 assert(cc->vmsd == NULL);
473 assert(qdev_get_vmsd(DEVICE(cpu)) == NULL);
474#endif
475 if (cc->vmsd != NULL) {
476 vmstate_register(NULL, cpu_index, cc->vmsd, cpu);
477 }
478}
479
480#if defined(TARGET_HAS_ICE)
481#if defined(CONFIG_USER_ONLY)
482static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
483{
484 tb_invalidate_phys_page_range(pc, pc + 1, 0);
485}
486#else
487static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
488{
489 hwaddr phys = cpu_get_phys_page_debug(cpu, pc);
490 if (phys != -1) {
491 tb_invalidate_phys_addr(phys | (pc & ~TARGET_PAGE_MASK));
492 }
493}
494#endif
495#endif /* TARGET_HAS_ICE */
496
497#if defined(CONFIG_USER_ONLY)
498void cpu_watchpoint_remove_all(CPUArchState *env, int mask)
499
500{
501}
502
503int cpu_watchpoint_insert(CPUArchState *env, target_ulong addr, target_ulong len,
504 int flags, CPUWatchpoint **watchpoint)
505{
506 return -ENOSYS;
507}
508#else
509/* Add a watchpoint. */
510int cpu_watchpoint_insert(CPUArchState *env, target_ulong addr, target_ulong len,
511 int flags, CPUWatchpoint **watchpoint)
512{
513 target_ulong len_mask = ~(len - 1);
514 CPUWatchpoint *wp;
515
516 /* sanity checks: allow power-of-2 lengths, deny unaligned watchpoints */
517 if ((len & (len - 1)) || (addr & ~len_mask) ||
518 len == 0 || len > TARGET_PAGE_SIZE) {
519 fprintf(stderr, "qemu: tried to set invalid watchpoint at "
520 TARGET_FMT_lx ", len=" TARGET_FMT_lu "\n", addr, len);
521 return -EINVAL;
522 }
523 wp = g_malloc(sizeof(*wp));
524
525 wp->vaddr = addr;
526 wp->len_mask = len_mask;
527 wp->flags = flags;
528
529 /* keep all GDB-injected watchpoints in front */
530 if (flags & BP_GDB)
531 QTAILQ_INSERT_HEAD(&env->watchpoints, wp, entry);
532 else
533 QTAILQ_INSERT_TAIL(&env->watchpoints, wp, entry);
534
535 tlb_flush_page(env, addr);
536
537 if (watchpoint)
538 *watchpoint = wp;
539 return 0;
540}
541
542/* Remove a specific watchpoint. */
543int cpu_watchpoint_remove(CPUArchState *env, target_ulong addr, target_ulong len,
544 int flags)
545{
546 target_ulong len_mask = ~(len - 1);
547 CPUWatchpoint *wp;
548
549 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
550 if (addr == wp->vaddr && len_mask == wp->len_mask
551 && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
552 cpu_watchpoint_remove_by_ref(env, wp);
553 return 0;
554 }
555 }
556 return -ENOENT;
557}
558
559/* Remove a specific watchpoint by reference. */
560void cpu_watchpoint_remove_by_ref(CPUArchState *env, CPUWatchpoint *watchpoint)
561{
562 QTAILQ_REMOVE(&env->watchpoints, watchpoint, entry);
563
564 tlb_flush_page(env, watchpoint->vaddr);
565
566 g_free(watchpoint);
567}
568
569/* Remove all matching watchpoints. */
570void cpu_watchpoint_remove_all(CPUArchState *env, int mask)
571{
572 CPUWatchpoint *wp, *next;
573
574 QTAILQ_FOREACH_SAFE(wp, &env->watchpoints, entry, next) {
575 if (wp->flags & mask)
576 cpu_watchpoint_remove_by_ref(env, wp);
577 }
578}
579#endif
580
581/* Add a breakpoint. */
582int cpu_breakpoint_insert(CPUArchState *env, target_ulong pc, int flags,
583 CPUBreakpoint **breakpoint)
584{
585#if defined(TARGET_HAS_ICE)
586 CPUBreakpoint *bp;
587
588 bp = g_malloc(sizeof(*bp));
589
590 bp->pc = pc;
591 bp->flags = flags;
592
593 /* keep all GDB-injected breakpoints in front */
594 if (flags & BP_GDB) {
595 QTAILQ_INSERT_HEAD(&env->breakpoints, bp, entry);
596 } else {
597 QTAILQ_INSERT_TAIL(&env->breakpoints, bp, entry);
598 }
599
600 breakpoint_invalidate(ENV_GET_CPU(env), pc);
601
602 if (breakpoint) {
603 *breakpoint = bp;
604 }
605 return 0;
606#else
607 return -ENOSYS;
608#endif
609}
610
611/* Remove a specific breakpoint. */
612int cpu_breakpoint_remove(CPUArchState *env, target_ulong pc, int flags)
613{
614#if defined(TARGET_HAS_ICE)
615 CPUBreakpoint *bp;
616
617 QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
618 if (bp->pc == pc && bp->flags == flags) {
619 cpu_breakpoint_remove_by_ref(env, bp);
620 return 0;
621 }
622 }
623 return -ENOENT;
624#else
625 return -ENOSYS;
626#endif
627}
628
629/* Remove a specific breakpoint by reference. */
630void cpu_breakpoint_remove_by_ref(CPUArchState *env, CPUBreakpoint *breakpoint)
631{
632#if defined(TARGET_HAS_ICE)
633 QTAILQ_REMOVE(&env->breakpoints, breakpoint, entry);
634
635 breakpoint_invalidate(ENV_GET_CPU(env), breakpoint->pc);
636
637 g_free(breakpoint);
638#endif
639}
640
641/* Remove all matching breakpoints. */
642void cpu_breakpoint_remove_all(CPUArchState *env, int mask)
643{
644#if defined(TARGET_HAS_ICE)
645 CPUBreakpoint *bp, *next;
646
647 QTAILQ_FOREACH_SAFE(bp, &env->breakpoints, entry, next) {
648 if (bp->flags & mask)
649 cpu_breakpoint_remove_by_ref(env, bp);
650 }
651#endif
652}
653
654/* enable or disable single step mode. EXCP_DEBUG is returned by the
655 CPU loop after each instruction */
656void cpu_single_step(CPUState *cpu, int enabled)
657{
658#if defined(TARGET_HAS_ICE)
659 if (cpu->singlestep_enabled != enabled) {
660 cpu->singlestep_enabled = enabled;
661 if (kvm_enabled()) {
662 kvm_update_guest_debug(cpu, 0);
663 } else {
664 /* must flush all the translated code to avoid inconsistencies */
665 /* XXX: only flush what is necessary */
666 CPUArchState *env = cpu->env_ptr;
667 tb_flush(env);
668 }
669 }
670#endif
671}
672
673void cpu_abort(CPUArchState *env, const char *fmt, ...)
674{
675 CPUState *cpu = ENV_GET_CPU(env);
676 va_list ap;
677 va_list ap2;
678
679 va_start(ap, fmt);
680 va_copy(ap2, ap);
681 fprintf(stderr, "qemu: fatal: ");
682 vfprintf(stderr, fmt, ap);
683 fprintf(stderr, "\n");
684 cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU | CPU_DUMP_CCOP);
685 if (qemu_log_enabled()) {
686 qemu_log("qemu: fatal: ");
687 qemu_log_vprintf(fmt, ap2);
688 qemu_log("\n");
689 log_cpu_state(cpu, CPU_DUMP_FPU | CPU_DUMP_CCOP);
690 qemu_log_flush();
691 qemu_log_close();
692 }
693 va_end(ap2);
694 va_end(ap);
695#if defined(CONFIG_USER_ONLY)
696 {
697 struct sigaction act;
698 sigfillset(&act.sa_mask);
699 act.sa_handler = SIG_DFL;
700 sigaction(SIGABRT, &act, NULL);
701 }
702#endif
703 abort();
704}
705
706#if !defined(CONFIG_USER_ONLY)
707static RAMBlock *qemu_get_ram_block(ram_addr_t addr)
708{
709 RAMBlock *block;
710
711 /* The list is protected by the iothread lock here. */
712 block = ram_list.mru_block;
713 if (block && addr - block->offset < block->length) {
714 goto found;
715 }
716 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
717 if (addr - block->offset < block->length) {
718 goto found;
719 }
720 }
721
722 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
723 abort();
724
725found:
726 ram_list.mru_block = block;
727 return block;
728}
729
730static void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t length)
731{
732 ram_addr_t start1;
733 RAMBlock *block;
734 ram_addr_t end;
735
736 end = TARGET_PAGE_ALIGN(start + length);
737 start &= TARGET_PAGE_MASK;
738
739 block = qemu_get_ram_block(start);
740 assert(block == qemu_get_ram_block(end - 1));
741 start1 = (uintptr_t)block->host + (start - block->offset);
742 cpu_tlb_reset_dirty_all(start1, length);
743}
744
745/* Note: start and end must be within the same ram block. */
746void cpu_physical_memory_reset_dirty(ram_addr_t start, ram_addr_t length,
747 unsigned client)
748{
749 if (length == 0)
750 return;
751 cpu_physical_memory_clear_dirty_range(start, length, client);
752
753 if (tcg_enabled()) {
754 tlb_reset_dirty_range_all(start, length);
755 }
756}
757
758static void cpu_physical_memory_set_dirty_tracking(bool enable)
759{
760 in_migration = enable;
761}
762
763hwaddr memory_region_section_get_iotlb(CPUArchState *env,
764 MemoryRegionSection *section,
765 target_ulong vaddr,
766 hwaddr paddr, hwaddr xlat,
767 int prot,
768 target_ulong *address)
769{
770 hwaddr iotlb;
771 CPUWatchpoint *wp;
772
773 if (memory_region_is_ram(section->mr)) {
774 /* Normal RAM. */
775 iotlb = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
776 + xlat;
777 if (!section->readonly) {
778 iotlb |= PHYS_SECTION_NOTDIRTY;
779 } else {
780 iotlb |= PHYS_SECTION_ROM;
781 }
782 } else {
783 iotlb = section - address_space_memory.dispatch->map.sections;
784 iotlb += xlat;
785 }
786
787 /* Make accesses to pages with watchpoints go via the
788 watchpoint trap routines. */
789 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
790 if (vaddr == (wp->vaddr & TARGET_PAGE_MASK)) {
791 /* Avoid trapping reads of pages with a write breakpoint. */
792 if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
793 iotlb = PHYS_SECTION_WATCH + paddr;
794 *address |= TLB_MMIO;
795 break;
796 }
797 }
798 }
799
800 return iotlb;
801}
802#endif /* defined(CONFIG_USER_ONLY) */
803
804#if !defined(CONFIG_USER_ONLY)
805
806static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
807 uint16_t section);
808static subpage_t *subpage_init(AddressSpace *as, hwaddr base);
809
810static void *(*phys_mem_alloc)(size_t size) = qemu_anon_ram_alloc;
811
812/*
813 * Set a custom physical guest memory alloator.
814 * Accelerators with unusual needs may need this. Hopefully, we can
815 * get rid of it eventually.
816 */
817void phys_mem_set_alloc(void *(*alloc)(size_t))
818{
819 phys_mem_alloc = alloc;
820}
821
822static uint16_t phys_section_add(PhysPageMap *map,
823 MemoryRegionSection *section)
824{
825 /* The physical section number is ORed with a page-aligned
826 * pointer to produce the iotlb entries. Thus it should
827 * never overflow into the page-aligned value.
828 */
829 assert(map->sections_nb < TARGET_PAGE_SIZE);
830
831 if (map->sections_nb == map->sections_nb_alloc) {
832 map->sections_nb_alloc = MAX(map->sections_nb_alloc * 2, 16);
833 map->sections = g_renew(MemoryRegionSection, map->sections,
834 map->sections_nb_alloc);
835 }
836 map->sections[map->sections_nb] = *section;
837 memory_region_ref(section->mr);
838 return map->sections_nb++;
839}
840
841static void phys_section_destroy(MemoryRegion *mr)
842{
843 memory_region_unref(mr);
844
845 if (mr->subpage) {
846 subpage_t *subpage = container_of(mr, subpage_t, iomem);
847 memory_region_destroy(&subpage->iomem);
848 g_free(subpage);
849 }
850}
851
852static void phys_sections_free(PhysPageMap *map)
853{
854 while (map->sections_nb > 0) {
855 MemoryRegionSection *section = &map->sections[--map->sections_nb];
856 phys_section_destroy(section->mr);
857 }
858 g_free(map->sections);
859 g_free(map->nodes);
860}
861
862static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *section)
863{
864 subpage_t *subpage;
865 hwaddr base = section->offset_within_address_space
866 & TARGET_PAGE_MASK;
867 MemoryRegionSection *existing = phys_page_find(d->phys_map, base,
868 d->map.nodes, d->map.sections);
869 MemoryRegionSection subsection = {
870 .offset_within_address_space = base,
871 .size = int128_make64(TARGET_PAGE_SIZE),
872 };
873 hwaddr start, end;
874
875 assert(existing->mr->subpage || existing->mr == &io_mem_unassigned);
876
877 if (!(existing->mr->subpage)) {
878 subpage = subpage_init(d->as, base);
879 subsection.mr = &subpage->iomem;
880 phys_page_set(d, base >> TARGET_PAGE_BITS, 1,
881 phys_section_add(&d->map, &subsection));
882 } else {
883 subpage = container_of(existing->mr, subpage_t, iomem);
884 }
885 start = section->offset_within_address_space & ~TARGET_PAGE_MASK;
886 end = start + int128_get64(section->size) - 1;
887 subpage_register(subpage, start, end,
888 phys_section_add(&d->map, section));
889}
890
891
892static void register_multipage(AddressSpaceDispatch *d,
893 MemoryRegionSection *section)
894{
895 hwaddr start_addr = section->offset_within_address_space;
896 uint16_t section_index = phys_section_add(&d->map, section);
897 uint64_t num_pages = int128_get64(int128_rshift(section->size,
898 TARGET_PAGE_BITS));
899
900 assert(num_pages);
901 phys_page_set(d, start_addr >> TARGET_PAGE_BITS, num_pages, section_index);
902}
903
904static void mem_add(MemoryListener *listener, MemoryRegionSection *section)
905{
906 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
907 AddressSpaceDispatch *d = as->next_dispatch;
908 MemoryRegionSection now = *section, remain = *section;
909 Int128 page_size = int128_make64(TARGET_PAGE_SIZE);
910
911 if (now.offset_within_address_space & ~TARGET_PAGE_MASK) {
912 uint64_t left = TARGET_PAGE_ALIGN(now.offset_within_address_space)
913 - now.offset_within_address_space;
914
915 now.size = int128_min(int128_make64(left), now.size);
916 register_subpage(d, &now);
917 } else {
918 now.size = int128_zero();
919 }
920 while (int128_ne(remain.size, now.size)) {
921 remain.size = int128_sub(remain.size, now.size);
922 remain.offset_within_address_space += int128_get64(now.size);
923 remain.offset_within_region += int128_get64(now.size);
924 now = remain;
925 if (int128_lt(remain.size, page_size)) {
926 register_subpage(d, &now);
927 } else if (remain.offset_within_address_space & ~TARGET_PAGE_MASK) {
928 now.size = page_size;
929 register_subpage(d, &now);
930 } else {
931 now.size = int128_and(now.size, int128_neg(page_size));
932 register_multipage(d, &now);
933 }
934 }
935}
936
937void qemu_flush_coalesced_mmio_buffer(void)
938{
939 if (kvm_enabled())
940 kvm_flush_coalesced_mmio_buffer();
941}
942
943void qemu_mutex_lock_ramlist(void)
944{
945 qemu_mutex_lock(&ram_list.mutex);
946}
947
948void qemu_mutex_unlock_ramlist(void)
949{
950 qemu_mutex_unlock(&ram_list.mutex);
951}
952
953#ifdef __linux__
954
955#include <sys/vfs.h>
956
957#define HUGETLBFS_MAGIC 0x958458f6
958
959static long gethugepagesize(const char *path)
960{
961 struct statfs fs;
962 int ret;
963
964 do {
965 ret = statfs(path, &fs);
966 } while (ret != 0 && errno == EINTR);
967
968 if (ret != 0) {
969 perror(path);
970 return 0;
971 }
972
973 if (fs.f_type != HUGETLBFS_MAGIC)
974 fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path);
975
976 return fs.f_bsize;
977}
978
979static sigjmp_buf sigjump;
980
981static void sigbus_handler(int signal)
982{
983 siglongjmp(sigjump, 1);
984}
985
986static void *file_ram_alloc(RAMBlock *block,
987 ram_addr_t memory,
988 const char *path)
989{
990 char *filename;
991 char *sanitized_name;
992 char *c;
993 void *area;
994 int fd;
995 unsigned long hpagesize;
996
997 hpagesize = gethugepagesize(path);
998 if (!hpagesize) {
999 return NULL;
1000 }
1001
1002 if (memory < hpagesize) {
1003 return NULL;
1004 }
1005
1006 if (kvm_enabled() && !kvm_has_sync_mmu()) {
1007 fprintf(stderr, "host lacks kvm mmu notifiers, -mem-path unsupported\n");
1008 return NULL;
1009 }
1010
1011 /* Make name safe to use with mkstemp by replacing '/' with '_'. */
1012 sanitized_name = g_strdup(block->mr->name);
1013 for (c = sanitized_name; *c != '\0'; c++) {
1014 if (*c == '/')
1015 *c = '_';
1016 }
1017
1018 filename = g_strdup_printf("%s/qemu_back_mem.%s.XXXXXX", path,
1019 sanitized_name);
1020 g_free(sanitized_name);
1021
1022 fd = mkstemp(filename);
1023 if (fd < 0) {
1024 perror("unable to create backing store for hugepages");
1025 g_free(filename);
1026 return NULL;
1027 }
1028 unlink(filename);
1029 g_free(filename);
1030
1031 memory = (memory+hpagesize-1) & ~(hpagesize-1);
1032
1033 /*
1034 * ftruncate is not supported by hugetlbfs in older
1035 * hosts, so don't bother bailing out on errors.
1036 * If anything goes wrong with it under other filesystems,
1037 * mmap will fail.
1038 */
1039 if (ftruncate(fd, memory))
1040 perror("ftruncate");
1041
1042 area = mmap(0, memory, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
1043 if (area == MAP_FAILED) {
1044 perror("file_ram_alloc: can't mmap RAM pages");
1045 close(fd);
1046 return (NULL);
1047 }
1048
1049 if (mem_prealloc) {
1050 int ret, i;
1051 struct sigaction act, oldact;
1052 sigset_t set, oldset;
1053
1054 memset(&act, 0, sizeof(act));
1055 act.sa_handler = &sigbus_handler;
1056 act.sa_flags = 0;
1057
1058 ret = sigaction(SIGBUS, &act, &oldact);
1059 if (ret) {
1060 perror("file_ram_alloc: failed to install signal handler");
1061 exit(1);
1062 }
1063
1064 /* unblock SIGBUS */
1065 sigemptyset(&set);
1066 sigaddset(&set, SIGBUS);
1067 pthread_sigmask(SIG_UNBLOCK, &set, &oldset);
1068
1069 if (sigsetjmp(sigjump, 1)) {
1070 fprintf(stderr, "file_ram_alloc: failed to preallocate pages\n");
1071 exit(1);
1072 }
1073
1074 /* MAP_POPULATE silently ignores failures */
1075 for (i = 0; i < (memory/hpagesize); i++) {
1076 memset(area + (hpagesize*i), 0, 1);
1077 }
1078
1079 ret = sigaction(SIGBUS, &oldact, NULL);
1080 if (ret) {
1081 perror("file_ram_alloc: failed to reinstall signal handler");
1082 exit(1);
1083 }
1084
1085 pthread_sigmask(SIG_SETMASK, &oldset, NULL);
1086 }
1087
1088 block->fd = fd;
1089 return area;
1090}
1091#else
1092static void *file_ram_alloc(RAMBlock *block,
1093 ram_addr_t memory,
1094 const char *path)
1095{
1096 fprintf(stderr, "-mem-path not supported on this host\n");
1097 exit(1);
1098}
1099#endif
1100
1101static ram_addr_t find_ram_offset(ram_addr_t size)
1102{
1103 RAMBlock *block, *next_block;
1104 ram_addr_t offset = RAM_ADDR_MAX, mingap = RAM_ADDR_MAX;
1105
1106 assert(size != 0); /* it would hand out same offset multiple times */
1107
1108 if (QTAILQ_EMPTY(&ram_list.blocks))
1109 return 0;
1110
1111 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1112 ram_addr_t end, next = RAM_ADDR_MAX;
1113
1114 end = block->offset + block->length;
1115
1116 QTAILQ_FOREACH(next_block, &ram_list.blocks, next) {
1117 if (next_block->offset >= end) {
1118 next = MIN(next, next_block->offset);
1119 }
1120 }
1121 if (next - end >= size && next - end < mingap) {
1122 offset = end;
1123 mingap = next - end;
1124 }
1125 }
1126
1127 if (offset == RAM_ADDR_MAX) {
1128 fprintf(stderr, "Failed to find gap of requested size: %" PRIu64 "\n",
1129 (uint64_t)size);
1130 abort();
1131 }
1132
1133 return offset;
1134}
1135
1136ram_addr_t last_ram_offset(void)
1137{
1138 RAMBlock *block;
1139 ram_addr_t last = 0;
1140
1141 QTAILQ_FOREACH(block, &ram_list.blocks, next)
1142 last = MAX(last, block->offset + block->length);
1143
1144 return last;
1145}
1146
1147static void qemu_ram_setup_dump(void *addr, ram_addr_t size)
1148{
1149 int ret;
1150
1151 /* Use MADV_DONTDUMP, if user doesn't want the guest memory in the core */
1152 if (!qemu_opt_get_bool(qemu_get_machine_opts(),
1153 "dump-guest-core", true)) {
1154 ret = qemu_madvise(addr, size, QEMU_MADV_DONTDUMP);
1155 if (ret) {
1156 perror("qemu_madvise");
1157 fprintf(stderr, "madvise doesn't support MADV_DONTDUMP, "
1158 "but dump_guest_core=off specified\n");
1159 }
1160 }
1161}
1162
1163void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev)
1164{
1165 RAMBlock *new_block, *block;
1166
1167 new_block = NULL;
1168 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1169 if (block->offset == addr) {
1170 new_block = block;
1171 break;
1172 }
1173 }
1174 assert(new_block);
1175 assert(!new_block->idstr[0]);
1176
1177 if (dev) {
1178 char *id = qdev_get_dev_path(dev);
1179 if (id) {
1180 snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
1181 g_free(id);
1182 }
1183 }
1184 pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
1185
1186 /* This assumes the iothread lock is taken here too. */
1187 qemu_mutex_lock_ramlist();
1188 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1189 if (block != new_block && !strcmp(block->idstr, new_block->idstr)) {
1190 fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
1191 new_block->idstr);
1192 abort();
1193 }
1194 }
1195 qemu_mutex_unlock_ramlist();
1196}
1197
1198static int memory_try_enable_merging(void *addr, size_t len)
1199{
1200 if (!qemu_opt_get_bool(qemu_get_machine_opts(), "mem-merge", true)) {
1201 /* disabled by the user */
1202 return 0;
1203 }
1204
1205 return qemu_madvise(addr, len, QEMU_MADV_MERGEABLE);
1206}
1207
1208ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
1209 MemoryRegion *mr)
1210{
1211 RAMBlock *block, *new_block;
1212 ram_addr_t old_ram_size, new_ram_size;
1213
1214 old_ram_size = last_ram_offset() >> TARGET_PAGE_BITS;
1215
1216 size = TARGET_PAGE_ALIGN(size);
1217 new_block = g_malloc0(sizeof(*new_block));
1218 new_block->fd = -1;
1219
1220 /* This assumes the iothread lock is taken here too. */
1221 qemu_mutex_lock_ramlist();
1222 new_block->mr = mr;
1223 new_block->offset = find_ram_offset(size);
1224 if (host) {
1225 new_block->host = host;
1226 new_block->flags |= RAM_PREALLOC_MASK;
1227 } else if (xen_enabled()) {
1228 if (mem_path) {
1229 fprintf(stderr, "-mem-path not supported with Xen\n");
1230 exit(1);
1231 }
1232 xen_ram_alloc(new_block->offset, size, mr);
1233 } else {
1234 if (mem_path) {
1235 if (phys_mem_alloc != qemu_anon_ram_alloc) {
1236 /*
1237 * file_ram_alloc() needs to allocate just like
1238 * phys_mem_alloc, but we haven't bothered to provide
1239 * a hook there.
1240 */
1241 fprintf(stderr,
1242 "-mem-path not supported with this accelerator\n");
1243 exit(1);
1244 }
1245 new_block->host = file_ram_alloc(new_block, size, mem_path);
1246 }
1247 if (!new_block->host) {
1248 new_block->host = phys_mem_alloc(size);
1249 if (!new_block->host) {
1250 fprintf(stderr, "Cannot set up guest memory '%s': %s\n",
1251 new_block->mr->name, strerror(errno));
1252 exit(1);
1253 }
1254 memory_try_enable_merging(new_block->host, size);
1255 }
1256 }
1257 new_block->length = size;
1258
1259 /* Keep the list sorted from biggest to smallest block. */
1260 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1261 if (block->length < new_block->length) {
1262 break;
1263 }
1264 }
1265 if (block) {
1266 QTAILQ_INSERT_BEFORE(block, new_block, next);
1267 } else {
1268 QTAILQ_INSERT_TAIL(&ram_list.blocks, new_block, next);
1269 }
1270 ram_list.mru_block = NULL;
1271
1272 ram_list.version++;
1273 qemu_mutex_unlock_ramlist();
1274
1275 new_ram_size = last_ram_offset() >> TARGET_PAGE_BITS;
1276
1277 if (new_ram_size > old_ram_size) {
1278 int i;
1279 for (i = 0; i < DIRTY_MEMORY_NUM; i++) {
1280 ram_list.dirty_memory[i] =
1281 bitmap_zero_extend(ram_list.dirty_memory[i],
1282 old_ram_size, new_ram_size);
1283 }
1284 }
1285 cpu_physical_memory_set_dirty_range(new_block->offset, size);
1286
1287 qemu_ram_setup_dump(new_block->host, size);
1288 qemu_madvise(new_block->host, size, QEMU_MADV_HUGEPAGE);
1289 qemu_madvise(new_block->host, size, QEMU_MADV_DONTFORK);
1290
1291 if (kvm_enabled())
1292 kvm_setup_guest_memory(new_block->host, size);
1293
1294 return new_block->offset;
1295}
1296
1297ram_addr_t qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr)
1298{
1299 return qemu_ram_alloc_from_ptr(size, NULL, mr);
1300}
1301
1302void qemu_ram_free_from_ptr(ram_addr_t addr)
1303{
1304 RAMBlock *block;
1305
1306 /* This assumes the iothread lock is taken here too. */
1307 qemu_mutex_lock_ramlist();
1308 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1309 if (addr == block->offset) {
1310 QTAILQ_REMOVE(&ram_list.blocks, block, next);
1311 ram_list.mru_block = NULL;
1312 ram_list.version++;
1313 g_free(block);
1314 break;
1315 }
1316 }
1317 qemu_mutex_unlock_ramlist();
1318}
1319
1320void qemu_ram_free(ram_addr_t addr)
1321{
1322 RAMBlock *block;
1323
1324 /* This assumes the iothread lock is taken here too. */
1325 qemu_mutex_lock_ramlist();
1326 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1327 if (addr == block->offset) {
1328 QTAILQ_REMOVE(&ram_list.blocks, block, next);
1329 ram_list.mru_block = NULL;
1330 ram_list.version++;
1331 if (block->flags & RAM_PREALLOC_MASK) {
1332 ;
1333 } else if (xen_enabled()) {
1334 xen_invalidate_map_cache_entry(block->host);
1335#ifndef _WIN32
1336 } else if (block->fd >= 0) {
1337 munmap(block->host, block->length);
1338 close(block->fd);
1339#endif
1340 } else {
1341 qemu_anon_ram_free(block->host, block->length);
1342 }
1343 g_free(block);
1344 break;
1345 }
1346 }
1347 qemu_mutex_unlock_ramlist();
1348
1349}
1350
1351#ifndef _WIN32
1352void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
1353{
1354 RAMBlock *block;
1355 ram_addr_t offset;
1356 int flags;
1357 void *area, *vaddr;
1358
1359 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1360 offset = addr - block->offset;
1361 if (offset < block->length) {
1362 vaddr = block->host + offset;
1363 if (block->flags & RAM_PREALLOC_MASK) {
1364 ;
1365 } else if (xen_enabled()) {
1366 abort();
1367 } else {
1368 flags = MAP_FIXED;
1369 munmap(vaddr, length);
1370 if (block->fd >= 0) {
1371#ifdef MAP_POPULATE
1372 flags |= mem_prealloc ? MAP_POPULATE | MAP_SHARED :
1373 MAP_PRIVATE;
1374#else
1375 flags |= MAP_PRIVATE;
1376#endif
1377 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1378 flags, block->fd, offset);
1379 } else {
1380 /*
1381 * Remap needs to match alloc. Accelerators that
1382 * set phys_mem_alloc never remap. If they did,
1383 * we'd need a remap hook here.
1384 */
1385 assert(phys_mem_alloc == qemu_anon_ram_alloc);
1386
1387 flags |= MAP_PRIVATE | MAP_ANONYMOUS;
1388 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1389 flags, -1, 0);
1390 }
1391 if (area != vaddr) {
1392 fprintf(stderr, "Could not remap addr: "
1393 RAM_ADDR_FMT "@" RAM_ADDR_FMT "\n",
1394 length, addr);
1395 exit(1);
1396 }
1397 memory_try_enable_merging(vaddr, length);
1398 qemu_ram_setup_dump(vaddr, length);
1399 }
1400 return;
1401 }
1402 }
1403}
1404#endif /* !_WIN32 */
1405
1406/* Return a host pointer to ram allocated with qemu_ram_alloc.
1407 With the exception of the softmmu code in this file, this should
1408 only be used for local memory (e.g. video ram) that the device owns,
1409 and knows it isn't going to access beyond the end of the block.
1410
1411 It should not be used for general purpose DMA.
1412 Use cpu_physical_memory_map/cpu_physical_memory_rw instead.
1413 */
1414void *qemu_get_ram_ptr(ram_addr_t addr)
1415{
1416 RAMBlock *block = qemu_get_ram_block(addr);
1417
1418 if (xen_enabled()) {
1419 /* We need to check if the requested address is in the RAM
1420 * because we don't want to map the entire memory in QEMU.
1421 * In that case just map until the end of the page.
1422 */
1423 if (block->offset == 0) {
1424 return xen_map_cache(addr, 0, 0);
1425 } else if (block->host == NULL) {
1426 block->host =
1427 xen_map_cache(block->offset, block->length, 1);
1428 }
1429 }
1430 return block->host + (addr - block->offset);
1431}
1432
1433/* Return a host pointer to guest's ram. Similar to qemu_get_ram_ptr
1434 * but takes a size argument */
1435static void *qemu_ram_ptr_length(ram_addr_t addr, hwaddr *size)
1436{
1437 if (*size == 0) {
1438 return NULL;
1439 }
1440 if (xen_enabled()) {
1441 return xen_map_cache(addr, *size, 1);
1442 } else {
1443 RAMBlock *block;
1444
1445 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1446 if (addr - block->offset < block->length) {
1447 if (addr - block->offset + *size > block->length)
1448 *size = block->length - addr + block->offset;
1449 return block->host + (addr - block->offset);
1450 }
1451 }
1452
1453 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
1454 abort();
1455 }
1456}
1457
1458/* Some of the softmmu routines need to translate from a host pointer
1459 (typically a TLB entry) back to a ram offset. */
1460MemoryRegion *qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
1461{
1462 RAMBlock *block;
1463 uint8_t *host = ptr;
1464
1465 if (xen_enabled()) {
1466 *ram_addr = xen_ram_addr_from_mapcache(ptr);
1467 return qemu_get_ram_block(*ram_addr)->mr;
1468 }
1469
1470 block = ram_list.mru_block;
1471 if (block && block->host && host - block->host < block->length) {
1472 goto found;
1473 }
1474
1475 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1476 /* This case append when the block is not mapped. */
1477 if (block->host == NULL) {
1478 continue;
1479 }
1480 if (host - block->host < block->length) {
1481 goto found;
1482 }
1483 }
1484
1485 return NULL;
1486
1487found:
1488 *ram_addr = block->offset + (host - block->host);
1489 return block->mr;
1490}
1491
1492static void notdirty_mem_write(void *opaque, hwaddr ram_addr,
1493 uint64_t val, unsigned size)
1494{
1495 if (!cpu_physical_memory_get_dirty_flag(ram_addr, DIRTY_MEMORY_CODE)) {
1496 tb_invalidate_phys_page_fast(ram_addr, size);
1497 }
1498 switch (size) {
1499 case 1:
1500 stb_p(qemu_get_ram_ptr(ram_addr), val);
1501 break;
1502 case 2:
1503 stw_p(qemu_get_ram_ptr(ram_addr), val);
1504 break;
1505 case 4:
1506 stl_p(qemu_get_ram_ptr(ram_addr), val);
1507 break;
1508 default:
1509 abort();
1510 }
1511 cpu_physical_memory_set_dirty_flag(ram_addr, DIRTY_MEMORY_MIGRATION);
1512 cpu_physical_memory_set_dirty_flag(ram_addr, DIRTY_MEMORY_VGA);
1513 /* we remove the notdirty callback only if the code has been
1514 flushed */
1515 if (!cpu_physical_memory_is_clean(ram_addr)) {
1516 CPUArchState *env = current_cpu->env_ptr;
1517 tlb_set_dirty(env, env->mem_io_vaddr);
1518 }
1519}
1520
1521static bool notdirty_mem_accepts(void *opaque, hwaddr addr,
1522 unsigned size, bool is_write)
1523{
1524 return is_write;
1525}
1526
1527static const MemoryRegionOps notdirty_mem_ops = {
1528 .write = notdirty_mem_write,
1529 .valid.accepts = notdirty_mem_accepts,
1530 .endianness = DEVICE_NATIVE_ENDIAN,
1531};
1532
1533/* Generate a debug exception if a watchpoint has been hit. */
1534static void check_watchpoint(int offset, int len_mask, int flags)
1535{
1536 CPUArchState *env = current_cpu->env_ptr;
1537 target_ulong pc, cs_base;
1538 target_ulong vaddr;
1539 CPUWatchpoint *wp;
1540 int cpu_flags;
1541
1542 if (env->watchpoint_hit) {
1543 /* We re-entered the check after replacing the TB. Now raise
1544 * the debug interrupt so that is will trigger after the
1545 * current instruction. */
1546 cpu_interrupt(ENV_GET_CPU(env), CPU_INTERRUPT_DEBUG);
1547 return;
1548 }
1549 vaddr = (env->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
1550 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
1551 if ((vaddr == (wp->vaddr & len_mask) ||
1552 (vaddr & wp->len_mask) == wp->vaddr) && (wp->flags & flags)) {
1553 wp->flags |= BP_WATCHPOINT_HIT;
1554 if (!env->watchpoint_hit) {
1555 env->watchpoint_hit = wp;
1556 tb_check_watchpoint(env);
1557 if (wp->flags & BP_STOP_BEFORE_ACCESS) {
1558 env->exception_index = EXCP_DEBUG;
1559 cpu_loop_exit(env);
1560 } else {
1561 cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
1562 tb_gen_code(env, pc, cs_base, cpu_flags, 1);
1563 cpu_resume_from_signal(env, NULL);
1564 }
1565 }
1566 } else {
1567 wp->flags &= ~BP_WATCHPOINT_HIT;
1568 }
1569 }
1570}
1571
1572/* Watchpoint access routines. Watchpoints are inserted using TLB tricks,
1573 so these check for a hit then pass through to the normal out-of-line
1574 phys routines. */
1575static uint64_t watch_mem_read(void *opaque, hwaddr addr,
1576 unsigned size)
1577{
1578 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~(size - 1), BP_MEM_READ);
1579 switch (size) {
1580 case 1: return ldub_phys(addr);
1581 case 2: return lduw_phys(addr);
1582 case 4: return ldl_phys(addr);
1583 default: abort();
1584 }
1585}
1586
1587static void watch_mem_write(void *opaque, hwaddr addr,
1588 uint64_t val, unsigned size)
1589{
1590 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~(size - 1), BP_MEM_WRITE);
1591 switch (size) {
1592 case 1:
1593 stb_phys(addr, val);
1594 break;
1595 case 2:
1596 stw_phys(addr, val);
1597 break;
1598 case 4:
1599 stl_phys(addr, val);
1600 break;
1601 default: abort();
1602 }
1603}
1604
1605static const MemoryRegionOps watch_mem_ops = {
1606 .read = watch_mem_read,
1607 .write = watch_mem_write,
1608 .endianness = DEVICE_NATIVE_ENDIAN,
1609};
1610
1611static uint64_t subpage_read(void *opaque, hwaddr addr,
1612 unsigned len)
1613{
1614 subpage_t *subpage = opaque;
1615 uint8_t buf[4];
1616
1617#if defined(DEBUG_SUBPAGE)
1618 printf("%s: subpage %p len %u addr " TARGET_FMT_plx "\n", __func__,
1619 subpage, len, addr);
1620#endif
1621 address_space_read(subpage->as, addr + subpage->base, buf, len);
1622 switch (len) {
1623 case 1:
1624 return ldub_p(buf);
1625 case 2:
1626 return lduw_p(buf);
1627 case 4:
1628 return ldl_p(buf);
1629 default:
1630 abort();
1631 }
1632}
1633
1634static void subpage_write(void *opaque, hwaddr addr,
1635 uint64_t value, unsigned len)
1636{
1637 subpage_t *subpage = opaque;
1638 uint8_t buf[4];
1639
1640#if defined(DEBUG_SUBPAGE)
1641 printf("%s: subpage %p len %u addr " TARGET_FMT_plx
1642 " value %"PRIx64"\n",
1643 __func__, subpage, len, addr, value);
1644#endif
1645 switch (len) {
1646 case 1:
1647 stb_p(buf, value);
1648 break;
1649 case 2:
1650 stw_p(buf, value);
1651 break;
1652 case 4:
1653 stl_p(buf, value);
1654 break;
1655 default:
1656 abort();
1657 }
1658 address_space_write(subpage->as, addr + subpage->base, buf, len);
1659}
1660
1661static bool subpage_accepts(void *opaque, hwaddr addr,
1662 unsigned len, bool is_write)
1663{
1664 subpage_t *subpage = opaque;
1665#if defined(DEBUG_SUBPAGE)
1666 printf("%s: subpage %p %c len %u addr " TARGET_FMT_plx "\n",
1667 __func__, subpage, is_write ? 'w' : 'r', len, addr);
1668#endif
1669
1670 return address_space_access_valid(subpage->as, addr + subpage->base,
1671 len, is_write);
1672}
1673
1674static const MemoryRegionOps subpage_ops = {
1675 .read = subpage_read,
1676 .write = subpage_write,
1677 .valid.accepts = subpage_accepts,
1678 .endianness = DEVICE_NATIVE_ENDIAN,
1679};
1680
1681static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
1682 uint16_t section)
1683{
1684 int idx, eidx;
1685
1686 if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE)
1687 return -1;
1688 idx = SUBPAGE_IDX(start);
1689 eidx = SUBPAGE_IDX(end);
1690#if defined(DEBUG_SUBPAGE)
1691 printf("%s: %p start %08x end %08x idx %08x eidx %08x section %d\n",
1692 __func__, mmio, start, end, idx, eidx, section);
1693#endif
1694 for (; idx <= eidx; idx++) {
1695 mmio->sub_section[idx] = section;
1696 }
1697
1698 return 0;
1699}
1700
1701static subpage_t *subpage_init(AddressSpace *as, hwaddr base)
1702{
1703 subpage_t *mmio;
1704
1705 mmio = g_malloc0(sizeof(subpage_t));
1706
1707 mmio->as = as;
1708 mmio->base = base;
1709 memory_region_init_io(&mmio->iomem, NULL, &subpage_ops, mmio,
1710 "subpage", TARGET_PAGE_SIZE);
1711 mmio->iomem.subpage = true;
1712#if defined(DEBUG_SUBPAGE)
1713 printf("%s: %p base " TARGET_FMT_plx " len %08x\n", __func__,
1714 mmio, base, TARGET_PAGE_SIZE);
1715#endif
1716 subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, PHYS_SECTION_UNASSIGNED);
1717
1718 return mmio;
1719}
1720
1721static uint16_t dummy_section(PhysPageMap *map, MemoryRegion *mr)
1722{
1723 MemoryRegionSection section = {
1724 .mr = mr,
1725 .offset_within_address_space = 0,
1726 .offset_within_region = 0,
1727 .size = int128_2_64(),
1728 };
1729
1730 return phys_section_add(map, &section);
1731}
1732
1733MemoryRegion *iotlb_to_region(hwaddr index)
1734{
1735 return address_space_memory.dispatch->map.sections[
1736 index & ~TARGET_PAGE_MASK].mr;
1737}
1738
1739static void io_mem_init(void)
1740{
1741 memory_region_init_io(&io_mem_rom, NULL, &unassigned_mem_ops, NULL, "rom", UINT64_MAX);
1742 memory_region_init_io(&io_mem_unassigned, NULL, &unassigned_mem_ops, NULL,
1743 "unassigned", UINT64_MAX);
1744 memory_region_init_io(&io_mem_notdirty, NULL, &notdirty_mem_ops, NULL,
1745 "notdirty", UINT64_MAX);
1746 memory_region_init_io(&io_mem_watch, NULL, &watch_mem_ops, NULL,
1747 "watch", UINT64_MAX);
1748}
1749
1750static void mem_begin(MemoryListener *listener)
1751{
1752 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
1753 AddressSpaceDispatch *d = g_new0(AddressSpaceDispatch, 1);
1754 uint16_t n;
1755
1756 n = dummy_section(&d->map, &io_mem_unassigned);
1757 assert(n == PHYS_SECTION_UNASSIGNED);
1758 n = dummy_section(&d->map, &io_mem_notdirty);
1759 assert(n == PHYS_SECTION_NOTDIRTY);
1760 n = dummy_section(&d->map, &io_mem_rom);
1761 assert(n == PHYS_SECTION_ROM);
1762 n = dummy_section(&d->map, &io_mem_watch);
1763 assert(n == PHYS_SECTION_WATCH);
1764
1765 d->phys_map = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, .skip = 1 };
1766 d->as = as;
1767 as->next_dispatch = d;
1768}
1769
1770static void mem_commit(MemoryListener *listener)
1771{
1772 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
1773 AddressSpaceDispatch *cur = as->dispatch;
1774 AddressSpaceDispatch *next = as->next_dispatch;
1775
1776 phys_page_compact_all(next, next->map.nodes_nb);
1777
1778 as->dispatch = next;
1779
1780 if (cur) {
1781 phys_sections_free(&cur->map);
1782 g_free(cur);
1783 }
1784}
1785
1786static void tcg_commit(MemoryListener *listener)
1787{
1788 CPUState *cpu;
1789
1790 /* since each CPU stores ram addresses in its TLB cache, we must
1791 reset the modified entries */
1792 /* XXX: slow ! */
1793 CPU_FOREACH(cpu) {
1794 CPUArchState *env = cpu->env_ptr;
1795
1796 tlb_flush(env, 1);
1797 }
1798}
1799
1800static void core_log_global_start(MemoryListener *listener)
1801{
1802 cpu_physical_memory_set_dirty_tracking(true);
1803}
1804
1805static void core_log_global_stop(MemoryListener *listener)
1806{
1807 cpu_physical_memory_set_dirty_tracking(false);
1808}
1809
1810static MemoryListener core_memory_listener = {
1811 .log_global_start = core_log_global_start,
1812 .log_global_stop = core_log_global_stop,
1813 .priority = 1,
1814};
1815
1816static MemoryListener tcg_memory_listener = {
1817 .commit = tcg_commit,
1818};
1819
1820void address_space_init_dispatch(AddressSpace *as)
1821{
1822 as->dispatch = NULL;
1823 as->dispatch_listener = (MemoryListener) {
1824 .begin = mem_begin,
1825 .commit = mem_commit,
1826 .region_add = mem_add,
1827 .region_nop = mem_add,
1828 .priority = 0,
1829 };
1830 memory_listener_register(&as->dispatch_listener, as);
1831}
1832
1833void address_space_destroy_dispatch(AddressSpace *as)
1834{
1835 AddressSpaceDispatch *d = as->dispatch;
1836
1837 memory_listener_unregister(&as->dispatch_listener);
1838 g_free(d);
1839 as->dispatch = NULL;
1840}
1841
1842static void memory_map_init(void)
1843{
1844 system_memory = g_malloc(sizeof(*system_memory));
1845
1846 memory_region_init(system_memory, NULL, "system", UINT64_MAX);
1847 address_space_init(&address_space_memory, system_memory, "memory");
1848
1849 system_io = g_malloc(sizeof(*system_io));
1850 memory_region_init_io(system_io, NULL, &unassigned_io_ops, NULL, "io",
1851 65536);
1852 address_space_init(&address_space_io, system_io, "I/O");
1853
1854 memory_listener_register(&core_memory_listener, &address_space_memory);
1855 if (tcg_enabled()) {
1856 memory_listener_register(&tcg_memory_listener, &address_space_memory);
1857 }
1858}
1859
1860MemoryRegion *get_system_memory(void)
1861{
1862 return system_memory;
1863}
1864
1865MemoryRegion *get_system_io(void)
1866{
1867 return system_io;
1868}
1869
1870#endif /* !defined(CONFIG_USER_ONLY) */
1871
1872/* physical memory access (slow version, mainly for debug) */
1873#if defined(CONFIG_USER_ONLY)
1874int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
1875 uint8_t *buf, int len, int is_write)
1876{
1877 int l, flags;
1878 target_ulong page;
1879 void * p;
1880
1881 while (len > 0) {
1882 page = addr & TARGET_PAGE_MASK;
1883 l = (page + TARGET_PAGE_SIZE) - addr;
1884 if (l > len)
1885 l = len;
1886 flags = page_get_flags(page);
1887 if (!(flags & PAGE_VALID))
1888 return -1;
1889 if (is_write) {
1890 if (!(flags & PAGE_WRITE))
1891 return -1;
1892 /* XXX: this code should not depend on lock_user */
1893 if (!(p = lock_user(VERIFY_WRITE, addr, l, 0)))
1894 return -1;
1895 memcpy(p, buf, l);
1896 unlock_user(p, addr, l);
1897 } else {
1898 if (!(flags & PAGE_READ))
1899 return -1;
1900 /* XXX: this code should not depend on lock_user */
1901 if (!(p = lock_user(VERIFY_READ, addr, l, 1)))
1902 return -1;
1903 memcpy(buf, p, l);
1904 unlock_user(p, addr, 0);
1905 }
1906 len -= l;
1907 buf += l;
1908 addr += l;
1909 }
1910 return 0;
1911}
1912
1913#else
1914
1915static void invalidate_and_set_dirty(hwaddr addr,
1916 hwaddr length)
1917{
1918 if (cpu_physical_memory_is_clean(addr)) {
1919 /* invalidate code */
1920 tb_invalidate_phys_page_range(addr, addr + length, 0);
1921 /* set dirty bit */
1922 cpu_physical_memory_set_dirty_flag(addr, DIRTY_MEMORY_VGA);
1923 cpu_physical_memory_set_dirty_flag(addr, DIRTY_MEMORY_MIGRATION);
1924 }
1925 xen_modified_memory(addr, length);
1926}
1927
1928static inline bool memory_access_is_direct(MemoryRegion *mr, bool is_write)
1929{
1930 if (memory_region_is_ram(mr)) {
1931 return !(is_write && mr->readonly);
1932 }
1933 if (memory_region_is_romd(mr)) {
1934 return !is_write;
1935 }
1936
1937 return false;
1938}
1939
1940static int memory_access_size(MemoryRegion *mr, unsigned l, hwaddr addr)
1941{
1942 unsigned access_size_max = mr->ops->valid.max_access_size;
1943
1944 /* Regions are assumed to support 1-4 byte accesses unless
1945 otherwise specified. */
1946 if (access_size_max == 0) {
1947 access_size_max = 4;
1948 }
1949
1950 /* Bound the maximum access by the alignment of the address. */
1951 if (!mr->ops->impl.unaligned) {
1952 unsigned align_size_max = addr & -addr;
1953 if (align_size_max != 0 && align_size_max < access_size_max) {
1954 access_size_max = align_size_max;
1955 }
1956 }
1957
1958 /* Don't attempt accesses larger than the maximum. */
1959 if (l > access_size_max) {
1960 l = access_size_max;
1961 }
1962 if (l & (l - 1)) {
1963 l = 1 << (qemu_fls(l) - 1);
1964 }
1965
1966 return l;
1967}
1968
1969bool address_space_rw(AddressSpace *as, hwaddr addr, uint8_t *buf,
1970 int len, bool is_write)
1971{
1972 hwaddr l;
1973 uint8_t *ptr;
1974 uint64_t val;
1975 hwaddr addr1;
1976 MemoryRegion *mr;
1977 bool error = false;
1978
1979 while (len > 0) {
1980 l = len;
1981 mr = address_space_translate(as, addr, &addr1, &l, is_write);
1982
1983 if (is_write) {
1984 if (!memory_access_is_direct(mr, is_write)) {
1985 l = memory_access_size(mr, l, addr1);
1986 /* XXX: could force current_cpu to NULL to avoid
1987 potential bugs */
1988 switch (l) {
1989 case 8:
1990 /* 64 bit write access */
1991 val = ldq_p(buf);
1992 error |= io_mem_write(mr, addr1, val, 8);
1993 break;
1994 case 4:
1995 /* 32 bit write access */
1996 val = ldl_p(buf);
1997 error |= io_mem_write(mr, addr1, val, 4);
1998 break;
1999 case 2:
2000 /* 16 bit write access */
2001 val = lduw_p(buf);
2002 error |= io_mem_write(mr, addr1, val, 2);
2003 break;
2004 case 1:
2005 /* 8 bit write access */
2006 val = ldub_p(buf);
2007 error |= io_mem_write(mr, addr1, val, 1);
2008 break;
2009 default:
2010 abort();
2011 }
2012 } else {
2013 addr1 += memory_region_get_ram_addr(mr);
2014 /* RAM case */
2015 ptr = qemu_get_ram_ptr(addr1);
2016 memcpy(ptr, buf, l);
2017 invalidate_and_set_dirty(addr1, l);
2018 }
2019 } else {
2020 if (!memory_access_is_direct(mr, is_write)) {
2021 /* I/O case */
2022 l = memory_access_size(mr, l, addr1);
2023 switch (l) {
2024 case 8:
2025 /* 64 bit read access */
2026 error |= io_mem_read(mr, addr1, &val, 8);
2027 stq_p(buf, val);
2028 break;
2029 case 4:
2030 /* 32 bit read access */
2031 error |= io_mem_read(mr, addr1, &val, 4);
2032 stl_p(buf, val);
2033 break;
2034 case 2:
2035 /* 16 bit read access */
2036 error |= io_mem_read(mr, addr1, &val, 2);
2037 stw_p(buf, val);
2038 break;
2039 case 1:
2040 /* 8 bit read access */
2041 error |= io_mem_read(mr, addr1, &val, 1);
2042 stb_p(buf, val);
2043 break;
2044 default:
2045 abort();
2046 }
2047 } else {
2048 /* RAM case */
2049 ptr = qemu_get_ram_ptr(mr->ram_addr + addr1);
2050 memcpy(buf, ptr, l);
2051 }
2052 }
2053 len -= l;
2054 buf += l;
2055 addr += l;
2056 }
2057
2058 return error;
2059}
2060
2061bool address_space_write(AddressSpace *as, hwaddr addr,
2062 const uint8_t *buf, int len)
2063{
2064 return address_space_rw(as, addr, (uint8_t *)buf, len, true);
2065}
2066
2067bool address_space_read(AddressSpace *as, hwaddr addr, uint8_t *buf, int len)
2068{
2069 return address_space_rw(as, addr, buf, len, false);
2070}
2071
2072
2073void cpu_physical_memory_rw(hwaddr addr, uint8_t *buf,
2074 int len, int is_write)
2075{
2076 address_space_rw(&address_space_memory, addr, buf, len, is_write);
2077}
2078
2079enum write_rom_type {
2080 WRITE_DATA,
2081 FLUSH_CACHE,
2082};
2083
2084static inline void cpu_physical_memory_write_rom_internal(
2085 hwaddr addr, const uint8_t *buf, int len, enum write_rom_type type)
2086{
2087 hwaddr l;
2088 uint8_t *ptr;
2089 hwaddr addr1;
2090 MemoryRegion *mr;
2091
2092 while (len > 0) {
2093 l = len;
2094 mr = address_space_translate(&address_space_memory,
2095 addr, &addr1, &l, true);
2096
2097 if (!(memory_region_is_ram(mr) ||
2098 memory_region_is_romd(mr))) {
2099 /* do nothing */
2100 } else {
2101 addr1 += memory_region_get_ram_addr(mr);
2102 /* ROM/RAM case */
2103 ptr = qemu_get_ram_ptr(addr1);
2104 switch (type) {
2105 case WRITE_DATA:
2106 memcpy(ptr, buf, l);
2107 invalidate_and_set_dirty(addr1, l);
2108 break;
2109 case FLUSH_CACHE:
2110 flush_icache_range((uintptr_t)ptr, (uintptr_t)ptr + l);
2111 break;
2112 }
2113 }
2114 len -= l;
2115 buf += l;
2116 addr += l;
2117 }
2118}
2119
2120/* used for ROM loading : can write in RAM and ROM */
2121void cpu_physical_memory_write_rom(hwaddr addr,
2122 const uint8_t *buf, int len)
2123{
2124 cpu_physical_memory_write_rom_internal(addr, buf, len, WRITE_DATA);
2125}
2126
2127void cpu_flush_icache_range(hwaddr start, int len)
2128{
2129 /*
2130 * This function should do the same thing as an icache flush that was
2131 * triggered from within the guest. For TCG we are always cache coherent,
2132 * so there is no need to flush anything. For KVM / Xen we need to flush
2133 * the host's instruction cache at least.
2134 */
2135 if (tcg_enabled()) {
2136 return;
2137 }
2138
2139 cpu_physical_memory_write_rom_internal(start, NULL, len, FLUSH_CACHE);
2140}
2141
2142typedef struct {
2143 MemoryRegion *mr;
2144 void *buffer;
2145 hwaddr addr;
2146 hwaddr len;
2147} BounceBuffer;
2148
2149static BounceBuffer bounce;
2150
2151typedef struct MapClient {
2152 void *opaque;
2153 void (*callback)(void *opaque);
2154 QLIST_ENTRY(MapClient) link;
2155} MapClient;
2156
2157static QLIST_HEAD(map_client_list, MapClient) map_client_list
2158 = QLIST_HEAD_INITIALIZER(map_client_list);
2159
2160void *cpu_register_map_client(void *opaque, void (*callback)(void *opaque))
2161{
2162 MapClient *client = g_malloc(sizeof(*client));
2163
2164 client->opaque = opaque;
2165 client->callback = callback;
2166 QLIST_INSERT_HEAD(&map_client_list, client, link);
2167 return client;
2168}
2169
2170static void cpu_unregister_map_client(void *_client)
2171{
2172 MapClient *client = (MapClient *)_client;
2173
2174 QLIST_REMOVE(client, link);
2175 g_free(client);
2176}
2177
2178static void cpu_notify_map_clients(void)
2179{
2180 MapClient *client;
2181
2182 while (!QLIST_EMPTY(&map_client_list)) {
2183 client = QLIST_FIRST(&map_client_list);
2184 client->callback(client->opaque);
2185 cpu_unregister_map_client(client);
2186 }
2187}
2188
2189bool address_space_access_valid(AddressSpace *as, hwaddr addr, int len, bool is_write)
2190{
2191 MemoryRegion *mr;
2192 hwaddr l, xlat;
2193
2194 while (len > 0) {
2195 l = len;
2196 mr = address_space_translate(as, addr, &xlat, &l, is_write);
2197 if (!memory_access_is_direct(mr, is_write)) {
2198 l = memory_access_size(mr, l, addr);
2199 if (!memory_region_access_valid(mr, xlat, l, is_write)) {
2200 return false;
2201 }
2202 }
2203
2204 len -= l;
2205 addr += l;
2206 }
2207 return true;
2208}
2209
2210/* Map a physical memory region into a host virtual address.
2211 * May map a subset of the requested range, given by and returned in *plen.
2212 * May return NULL if resources needed to perform the mapping are exhausted.
2213 * Use only for reads OR writes - not for read-modify-write operations.
2214 * Use cpu_register_map_client() to know when retrying the map operation is
2215 * likely to succeed.
2216 */
2217void *address_space_map(AddressSpace *as,
2218 hwaddr addr,
2219 hwaddr *plen,
2220 bool is_write)
2221{
2222 hwaddr len = *plen;
2223 hwaddr done = 0;
2224 hwaddr l, xlat, base;
2225 MemoryRegion *mr, *this_mr;
2226 ram_addr_t raddr;
2227
2228 if (len == 0) {
2229 return NULL;
2230 }
2231
2232 l = len;
2233 mr = address_space_translate(as, addr, &xlat, &l, is_write);
2234 if (!memory_access_is_direct(mr, is_write)) {
2235 if (bounce.buffer) {
2236 return NULL;
2237 }
2238 /* Avoid unbounded allocations */
2239 l = MIN(l, TARGET_PAGE_SIZE);
2240 bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, l);
2241 bounce.addr = addr;
2242 bounce.len = l;
2243
2244 memory_region_ref(mr);
2245 bounce.mr = mr;
2246 if (!is_write) {
2247 address_space_read(as, addr, bounce.buffer, l);
2248 }
2249
2250 *plen = l;
2251 return bounce.buffer;
2252 }
2253
2254 base = xlat;
2255 raddr = memory_region_get_ram_addr(mr);
2256
2257 for (;;) {
2258 len -= l;
2259 addr += l;
2260 done += l;
2261 if (len == 0) {
2262 break;
2263 }
2264
2265 l = len;
2266 this_mr = address_space_translate(as, addr, &xlat, &l, is_write);
2267 if (this_mr != mr || xlat != base + done) {
2268 break;
2269 }
2270 }
2271
2272 memory_region_ref(mr);
2273 *plen = done;
2274 return qemu_ram_ptr_length(raddr + base, plen);
2275}
2276
2277/* Unmaps a memory region previously mapped by address_space_map().
2278 * Will also mark the memory as dirty if is_write == 1. access_len gives
2279 * the amount of memory that was actually read or written by the caller.
2280 */
2281void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len,
2282 int is_write, hwaddr access_len)
2283{
2284 if (buffer != bounce.buffer) {
2285 MemoryRegion *mr;
2286 ram_addr_t addr1;
2287
2288 mr = qemu_ram_addr_from_host(buffer, &addr1);
2289 assert(mr != NULL);
2290 if (is_write) {
2291 while (access_len) {
2292 unsigned l;
2293 l = TARGET_PAGE_SIZE;
2294 if (l > access_len)
2295 l = access_len;
2296 invalidate_and_set_dirty(addr1, l);
2297 addr1 += l;
2298 access_len -= l;
2299 }
2300 }
2301 if (xen_enabled()) {
2302 xen_invalidate_map_cache_entry(buffer);
2303 }
2304 memory_region_unref(mr);
2305 return;
2306 }
2307 if (is_write) {
2308 address_space_write(as, bounce.addr, bounce.buffer, access_len);
2309 }
2310 qemu_vfree(bounce.buffer);
2311 bounce.buffer = NULL;
2312 memory_region_unref(bounce.mr);
2313 cpu_notify_map_clients();
2314}
2315
2316void *cpu_physical_memory_map(hwaddr addr,
2317 hwaddr *plen,
2318 int is_write)
2319{
2320 return address_space_map(&address_space_memory, addr, plen, is_write);
2321}
2322
2323void cpu_physical_memory_unmap(void *buffer, hwaddr len,
2324 int is_write, hwaddr access_len)
2325{
2326 return address_space_unmap(&address_space_memory, buffer, len, is_write, access_len);
2327}
2328
2329/* warning: addr must be aligned */
2330static inline uint32_t ldl_phys_internal(hwaddr addr,
2331 enum device_endian endian)
2332{
2333 uint8_t *ptr;
2334 uint64_t val;
2335 MemoryRegion *mr;
2336 hwaddr l = 4;
2337 hwaddr addr1;
2338
2339 mr = address_space_translate(&address_space_memory, addr, &addr1, &l,
2340 false);
2341 if (l < 4 || !memory_access_is_direct(mr, false)) {
2342 /* I/O case */
2343 io_mem_read(mr, addr1, &val, 4);
2344#if defined(TARGET_WORDS_BIGENDIAN)
2345 if (endian == DEVICE_LITTLE_ENDIAN) {
2346 val = bswap32(val);
2347 }
2348#else
2349 if (endian == DEVICE_BIG_ENDIAN) {
2350 val = bswap32(val);
2351 }
2352#endif
2353 } else {
2354 /* RAM case */
2355 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
2356 & TARGET_PAGE_MASK)
2357 + addr1);
2358 switch (endian) {
2359 case DEVICE_LITTLE_ENDIAN:
2360 val = ldl_le_p(ptr);
2361 break;
2362 case DEVICE_BIG_ENDIAN:
2363 val = ldl_be_p(ptr);
2364 break;
2365 default:
2366 val = ldl_p(ptr);
2367 break;
2368 }
2369 }
2370 return val;
2371}
2372
2373uint32_t ldl_phys(hwaddr addr)
2374{
2375 return ldl_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
2376}
2377
2378uint32_t ldl_le_phys(hwaddr addr)
2379{
2380 return ldl_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
2381}
2382
2383uint32_t ldl_be_phys(hwaddr addr)
2384{
2385 return ldl_phys_internal(addr, DEVICE_BIG_ENDIAN);
2386}
2387
2388/* warning: addr must be aligned */
2389static inline uint64_t ldq_phys_internal(hwaddr addr,
2390 enum device_endian endian)
2391{
2392 uint8_t *ptr;
2393 uint64_t val;
2394 MemoryRegion *mr;
2395 hwaddr l = 8;
2396 hwaddr addr1;
2397
2398 mr = address_space_translate(&address_space_memory, addr, &addr1, &l,
2399 false);
2400 if (l < 8 || !memory_access_is_direct(mr, false)) {
2401 /* I/O case */
2402 io_mem_read(mr, addr1, &val, 8);
2403#if defined(TARGET_WORDS_BIGENDIAN)
2404 if (endian == DEVICE_LITTLE_ENDIAN) {
2405 val = bswap64(val);
2406 }
2407#else
2408 if (endian == DEVICE_BIG_ENDIAN) {
2409 val = bswap64(val);
2410 }
2411#endif
2412 } else {
2413 /* RAM case */
2414 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
2415 & TARGET_PAGE_MASK)
2416 + addr1);
2417 switch (endian) {
2418 case DEVICE_LITTLE_ENDIAN:
2419 val = ldq_le_p(ptr);
2420 break;
2421 case DEVICE_BIG_ENDIAN:
2422 val = ldq_be_p(ptr);
2423 break;
2424 default:
2425 val = ldq_p(ptr);
2426 break;
2427 }
2428 }
2429 return val;
2430}
2431
2432uint64_t ldq_phys(hwaddr addr)
2433{
2434 return ldq_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
2435}
2436
2437uint64_t ldq_le_phys(hwaddr addr)
2438{
2439 return ldq_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
2440}
2441
2442uint64_t ldq_be_phys(hwaddr addr)
2443{
2444 return ldq_phys_internal(addr, DEVICE_BIG_ENDIAN);
2445}
2446
2447/* XXX: optimize */
2448uint32_t ldub_phys(hwaddr addr)
2449{
2450 uint8_t val;
2451 cpu_physical_memory_read(addr, &val, 1);
2452 return val;
2453}
2454
2455/* warning: addr must be aligned */
2456static inline uint32_t lduw_phys_internal(hwaddr addr,
2457 enum device_endian endian)
2458{
2459 uint8_t *ptr;
2460 uint64_t val;
2461 MemoryRegion *mr;
2462 hwaddr l = 2;
2463 hwaddr addr1;
2464
2465 mr = address_space_translate(&address_space_memory, addr, &addr1, &l,
2466 false);
2467 if (l < 2 || !memory_access_is_direct(mr, false)) {
2468 /* I/O case */
2469 io_mem_read(mr, addr1, &val, 2);
2470#if defined(TARGET_WORDS_BIGENDIAN)
2471 if (endian == DEVICE_LITTLE_ENDIAN) {
2472 val = bswap16(val);
2473 }
2474#else
2475 if (endian == DEVICE_BIG_ENDIAN) {
2476 val = bswap16(val);
2477 }
2478#endif
2479 } else {
2480 /* RAM case */
2481 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
2482 & TARGET_PAGE_MASK)
2483 + addr1);
2484 switch (endian) {
2485 case DEVICE_LITTLE_ENDIAN:
2486 val = lduw_le_p(ptr);
2487 break;
2488 case DEVICE_BIG_ENDIAN:
2489 val = lduw_be_p(ptr);
2490 break;
2491 default:
2492 val = lduw_p(ptr);
2493 break;
2494 }
2495 }
2496 return val;
2497}
2498
2499uint32_t lduw_phys(hwaddr addr)
2500{
2501 return lduw_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
2502}
2503
2504uint32_t lduw_le_phys(hwaddr addr)
2505{
2506 return lduw_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
2507}
2508
2509uint32_t lduw_be_phys(hwaddr addr)
2510{
2511 return lduw_phys_internal(addr, DEVICE_BIG_ENDIAN);
2512}
2513
2514/* warning: addr must be aligned. The ram page is not masked as dirty
2515 and the code inside is not invalidated. It is useful if the dirty
2516 bits are used to track modified PTEs */
2517void stl_phys_notdirty(hwaddr addr, uint32_t val)
2518{
2519 uint8_t *ptr;
2520 MemoryRegion *mr;
2521 hwaddr l = 4;
2522 hwaddr addr1;
2523
2524 mr = address_space_translate(&address_space_memory, addr, &addr1, &l,
2525 true);
2526 if (l < 4 || !memory_access_is_direct(mr, true)) {
2527 io_mem_write(mr, addr1, val, 4);
2528 } else {
2529 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
2530 ptr = qemu_get_ram_ptr(addr1);
2531 stl_p(ptr, val);
2532
2533 if (unlikely(in_migration)) {
2534 if (cpu_physical_memory_is_clean(addr1)) {
2535 /* invalidate code */
2536 tb_invalidate_phys_page_range(addr1, addr1 + 4, 0);
2537 /* set dirty bit */
2538 cpu_physical_memory_set_dirty_flag(addr1,
2539 DIRTY_MEMORY_MIGRATION);
2540 cpu_physical_memory_set_dirty_flag(addr1, DIRTY_MEMORY_VGA);
2541 }
2542 }
2543 }
2544}
2545
2546/* warning: addr must be aligned */
2547static inline void stl_phys_internal(hwaddr addr, uint32_t val,
2548 enum device_endian endian)
2549{
2550 uint8_t *ptr;
2551 MemoryRegion *mr;
2552 hwaddr l = 4;
2553 hwaddr addr1;
2554
2555 mr = address_space_translate(&address_space_memory, addr, &addr1, &l,
2556 true);
2557 if (l < 4 || !memory_access_is_direct(mr, true)) {
2558#if defined(TARGET_WORDS_BIGENDIAN)
2559 if (endian == DEVICE_LITTLE_ENDIAN) {
2560 val = bswap32(val);
2561 }
2562#else
2563 if (endian == DEVICE_BIG_ENDIAN) {
2564 val = bswap32(val);
2565 }
2566#endif
2567 io_mem_write(mr, addr1, val, 4);
2568 } else {
2569 /* RAM case */
2570 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
2571 ptr = qemu_get_ram_ptr(addr1);
2572 switch (endian) {
2573 case DEVICE_LITTLE_ENDIAN:
2574 stl_le_p(ptr, val);
2575 break;
2576 case DEVICE_BIG_ENDIAN:
2577 stl_be_p(ptr, val);
2578 break;
2579 default:
2580 stl_p(ptr, val);
2581 break;
2582 }
2583 invalidate_and_set_dirty(addr1, 4);
2584 }
2585}
2586
2587void stl_phys(hwaddr addr, uint32_t val)
2588{
2589 stl_phys_internal(addr, val, DEVICE_NATIVE_ENDIAN);
2590}
2591
2592void stl_le_phys(hwaddr addr, uint32_t val)
2593{
2594 stl_phys_internal(addr, val, DEVICE_LITTLE_ENDIAN);
2595}
2596
2597void stl_be_phys(hwaddr addr, uint32_t val)
2598{
2599 stl_phys_internal(addr, val, DEVICE_BIG_ENDIAN);
2600}
2601
2602/* XXX: optimize */
2603void stb_phys(hwaddr addr, uint32_t val)
2604{
2605 uint8_t v = val;
2606 cpu_physical_memory_write(addr, &v, 1);
2607}
2608
2609/* warning: addr must be aligned */
2610static inline void stw_phys_internal(hwaddr addr, uint32_t val,
2611 enum device_endian endian)
2612{
2613 uint8_t *ptr;
2614 MemoryRegion *mr;
2615 hwaddr l = 2;
2616 hwaddr addr1;
2617
2618 mr = address_space_translate(&address_space_memory, addr, &addr1, &l,
2619 true);
2620 if (l < 2 || !memory_access_is_direct(mr, true)) {
2621#if defined(TARGET_WORDS_BIGENDIAN)
2622 if (endian == DEVICE_LITTLE_ENDIAN) {
2623 val = bswap16(val);
2624 }
2625#else
2626 if (endian == DEVICE_BIG_ENDIAN) {
2627 val = bswap16(val);
2628 }
2629#endif
2630 io_mem_write(mr, addr1, val, 2);
2631 } else {
2632 /* RAM case */
2633 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
2634 ptr = qemu_get_ram_ptr(addr1);
2635 switch (endian) {
2636 case DEVICE_LITTLE_ENDIAN:
2637 stw_le_p(ptr, val);
2638 break;
2639 case DEVICE_BIG_ENDIAN:
2640 stw_be_p(ptr, val);
2641 break;
2642 default:
2643 stw_p(ptr, val);
2644 break;
2645 }
2646 invalidate_and_set_dirty(addr1, 2);
2647 }
2648}
2649
2650void stw_phys(hwaddr addr, uint32_t val)
2651{
2652 stw_phys_internal(addr, val, DEVICE_NATIVE_ENDIAN);
2653}
2654
2655void stw_le_phys(hwaddr addr, uint32_t val)
2656{
2657 stw_phys_internal(addr, val, DEVICE_LITTLE_ENDIAN);
2658}
2659
2660void stw_be_phys(hwaddr addr, uint32_t val)
2661{
2662 stw_phys_internal(addr, val, DEVICE_BIG_ENDIAN);
2663}
2664
2665/* XXX: optimize */
2666void stq_phys(hwaddr addr, uint64_t val)
2667{
2668 val = tswap64(val);
2669 cpu_physical_memory_write(addr, &val, 8);
2670}
2671
2672void stq_le_phys(hwaddr addr, uint64_t val)
2673{
2674 val = cpu_to_le64(val);
2675 cpu_physical_memory_write(addr, &val, 8);
2676}
2677
2678void stq_be_phys(hwaddr addr, uint64_t val)
2679{
2680 val = cpu_to_be64(val);
2681 cpu_physical_memory_write(addr, &val, 8);
2682}
2683
2684/* virtual memory access for debug (includes writing to ROM) */
2685int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
2686 uint8_t *buf, int len, int is_write)
2687{
2688 int l;
2689 hwaddr phys_addr;
2690 target_ulong page;
2691
2692 while (len > 0) {
2693 page = addr & TARGET_PAGE_MASK;
2694 phys_addr = cpu_get_phys_page_debug(cpu, page);
2695 /* if no physical page mapped, return an error */
2696 if (phys_addr == -1)
2697 return -1;
2698 l = (page + TARGET_PAGE_SIZE) - addr;
2699 if (l > len)
2700 l = len;
2701 phys_addr += (addr & ~TARGET_PAGE_MASK);
2702 if (is_write)
2703 cpu_physical_memory_write_rom(phys_addr, buf, l);
2704 else
2705 cpu_physical_memory_rw(phys_addr, buf, l, is_write);
2706 len -= l;
2707 buf += l;
2708 addr += l;
2709 }
2710 return 0;
2711}
2712#endif
2713
2714#if !defined(CONFIG_USER_ONLY)
2715
2716/*
2717 * A helper function for the _utterly broken_ virtio device model to find out if
2718 * it's running on a big endian machine. Don't do this at home kids!
2719 */
2720bool virtio_is_big_endian(void);
2721bool virtio_is_big_endian(void)
2722{
2723#if defined(TARGET_WORDS_BIGENDIAN)
2724 return true;
2725#else
2726 return false;
2727#endif
2728}
2729
2730#endif
2731
2732#ifndef CONFIG_USER_ONLY
2733bool cpu_physical_memory_is_io(hwaddr phys_addr)
2734{
2735 MemoryRegion*mr;
2736 hwaddr l = 1;
2737
2738 mr = address_space_translate(&address_space_memory,
2739 phys_addr, &phys_addr, &l, false);
2740
2741 return !(memory_region_is_ram(mr) ||
2742 memory_region_is_romd(mr));
2743}
2744
2745void qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque)
2746{
2747 RAMBlock *block;
2748
2749 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
2750 func(block->host, block->offset, block->length, opaque);
2751 }
2752}
2753#endif