]> git.ipfire.org Git - thirdparty/qemu.git/blob - exec.c
Merge remote-tracking branch 'remotes/juanquintela/tags/migration/20150612' into...
[thirdparty/qemu.git] / exec.c
1 /*
2 * Virtual page mapping
3 *
4 * Copyright (c) 2003 Fabrice Bellard
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18 */
19 #include "config.h"
20 #ifndef _WIN32
21 #include <sys/types.h>
22 #include <sys/mman.h>
23 #endif
24
25 #include "qemu-common.h"
26 #include "cpu.h"
27 #include "tcg.h"
28 #include "hw/hw.h"
29 #if !defined(CONFIG_USER_ONLY)
30 #include "hw/boards.h"
31 #endif
32 #include "hw/qdev.h"
33 #include "qemu/osdep.h"
34 #include "sysemu/kvm.h"
35 #include "sysemu/sysemu.h"
36 #include "hw/xen/xen.h"
37 #include "qemu/timer.h"
38 #include "qemu/config-file.h"
39 #include "qemu/error-report.h"
40 #include "exec/memory.h"
41 #include "sysemu/dma.h"
42 #include "exec/address-spaces.h"
43 #if defined(CONFIG_USER_ONLY)
44 #include <qemu.h>
45 #else /* !CONFIG_USER_ONLY */
46 #include "sysemu/xen-mapcache.h"
47 #include "trace.h"
48 #endif
49 #include "exec/cpu-all.h"
50 #include "qemu/rcu_queue.h"
51 #include "exec/cputlb.h"
52 #include "translate-all.h"
53
54 #include "exec/memory-internal.h"
55 #include "exec/ram_addr.h"
56
57 #include "qemu/range.h"
58
59 //#define DEBUG_SUBPAGE
60
61 #if !defined(CONFIG_USER_ONLY)
62 /* ram_list is read under rcu_read_lock()/rcu_read_unlock(). Writes
63 * are protected by the ramlist lock.
64 */
65 RAMList ram_list = { .blocks = QLIST_HEAD_INITIALIZER(ram_list.blocks) };
66
67 static MemoryRegion *system_memory;
68 static MemoryRegion *system_io;
69
70 AddressSpace address_space_io;
71 AddressSpace address_space_memory;
72
73 MemoryRegion io_mem_rom, io_mem_notdirty;
74 static MemoryRegion io_mem_unassigned;
75
76 /* RAM is pre-allocated and passed into qemu_ram_alloc_from_ptr */
77 #define RAM_PREALLOC (1 << 0)
78
79 /* RAM is mmap-ed with MAP_SHARED */
80 #define RAM_SHARED (1 << 1)
81
82 /* Only a portion of RAM (used_length) is actually used, and migrated.
83 * This used_length size can change across reboots.
84 */
85 #define RAM_RESIZEABLE (1 << 2)
86
87 #endif
88
89 struct CPUTailQ cpus = QTAILQ_HEAD_INITIALIZER(cpus);
90 /* current CPU in the current thread. It is only valid inside
91 cpu_exec() */
92 DEFINE_TLS(CPUState *, current_cpu);
93 /* 0 = Do not count executed instructions.
94 1 = Precise instruction counting.
95 2 = Adaptive rate instruction counting. */
96 int use_icount;
97
98 #if !defined(CONFIG_USER_ONLY)
99
100 typedef struct PhysPageEntry PhysPageEntry;
101
102 struct PhysPageEntry {
103 /* How many bits skip to next level (in units of L2_SIZE). 0 for a leaf. */
104 uint32_t skip : 6;
105 /* index into phys_sections (!skip) or phys_map_nodes (skip) */
106 uint32_t ptr : 26;
107 };
108
109 #define PHYS_MAP_NODE_NIL (((uint32_t)~0) >> 6)
110
111 /* Size of the L2 (and L3, etc) page tables. */
112 #define ADDR_SPACE_BITS 64
113
114 #define P_L2_BITS 9
115 #define P_L2_SIZE (1 << P_L2_BITS)
116
117 #define P_L2_LEVELS (((ADDR_SPACE_BITS - TARGET_PAGE_BITS - 1) / P_L2_BITS) + 1)
118
119 typedef PhysPageEntry Node[P_L2_SIZE];
120
121 typedef struct PhysPageMap {
122 struct rcu_head rcu;
123
124 unsigned sections_nb;
125 unsigned sections_nb_alloc;
126 unsigned nodes_nb;
127 unsigned nodes_nb_alloc;
128 Node *nodes;
129 MemoryRegionSection *sections;
130 } PhysPageMap;
131
132 struct AddressSpaceDispatch {
133 struct rcu_head rcu;
134
135 /* This is a multi-level map on the physical address space.
136 * The bottom level has pointers to MemoryRegionSections.
137 */
138 PhysPageEntry phys_map;
139 PhysPageMap map;
140 AddressSpace *as;
141 };
142
143 #define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
144 typedef struct subpage_t {
145 MemoryRegion iomem;
146 AddressSpace *as;
147 hwaddr base;
148 uint16_t sub_section[TARGET_PAGE_SIZE];
149 } subpage_t;
150
151 #define PHYS_SECTION_UNASSIGNED 0
152 #define PHYS_SECTION_NOTDIRTY 1
153 #define PHYS_SECTION_ROM 2
154 #define PHYS_SECTION_WATCH 3
155
156 static void io_mem_init(void);
157 static void memory_map_init(void);
158 static void tcg_commit(MemoryListener *listener);
159
160 static MemoryRegion io_mem_watch;
161 #endif
162
163 #if !defined(CONFIG_USER_ONLY)
164
165 static void phys_map_node_reserve(PhysPageMap *map, unsigned nodes)
166 {
167 if (map->nodes_nb + nodes > map->nodes_nb_alloc) {
168 map->nodes_nb_alloc = MAX(map->nodes_nb_alloc * 2, 16);
169 map->nodes_nb_alloc = MAX(map->nodes_nb_alloc, map->nodes_nb + nodes);
170 map->nodes = g_renew(Node, map->nodes, map->nodes_nb_alloc);
171 }
172 }
173
174 static uint32_t phys_map_node_alloc(PhysPageMap *map, bool leaf)
175 {
176 unsigned i;
177 uint32_t ret;
178 PhysPageEntry e;
179 PhysPageEntry *p;
180
181 ret = map->nodes_nb++;
182 p = map->nodes[ret];
183 assert(ret != PHYS_MAP_NODE_NIL);
184 assert(ret != map->nodes_nb_alloc);
185
186 e.skip = leaf ? 0 : 1;
187 e.ptr = leaf ? PHYS_SECTION_UNASSIGNED : PHYS_MAP_NODE_NIL;
188 for (i = 0; i < P_L2_SIZE; ++i) {
189 memcpy(&p[i], &e, sizeof(e));
190 }
191 return ret;
192 }
193
194 static void phys_page_set_level(PhysPageMap *map, PhysPageEntry *lp,
195 hwaddr *index, hwaddr *nb, uint16_t leaf,
196 int level)
197 {
198 PhysPageEntry *p;
199 hwaddr step = (hwaddr)1 << (level * P_L2_BITS);
200
201 if (lp->skip && lp->ptr == PHYS_MAP_NODE_NIL) {
202 lp->ptr = phys_map_node_alloc(map, level == 0);
203 }
204 p = map->nodes[lp->ptr];
205 lp = &p[(*index >> (level * P_L2_BITS)) & (P_L2_SIZE - 1)];
206
207 while (*nb && lp < &p[P_L2_SIZE]) {
208 if ((*index & (step - 1)) == 0 && *nb >= step) {
209 lp->skip = 0;
210 lp->ptr = leaf;
211 *index += step;
212 *nb -= step;
213 } else {
214 phys_page_set_level(map, lp, index, nb, leaf, level - 1);
215 }
216 ++lp;
217 }
218 }
219
220 static void phys_page_set(AddressSpaceDispatch *d,
221 hwaddr index, hwaddr nb,
222 uint16_t leaf)
223 {
224 /* Wildly overreserve - it doesn't matter much. */
225 phys_map_node_reserve(&d->map, 3 * P_L2_LEVELS);
226
227 phys_page_set_level(&d->map, &d->phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
228 }
229
230 /* Compact a non leaf page entry. Simply detect that the entry has a single child,
231 * and update our entry so we can skip it and go directly to the destination.
232 */
233 static void phys_page_compact(PhysPageEntry *lp, Node *nodes, unsigned long *compacted)
234 {
235 unsigned valid_ptr = P_L2_SIZE;
236 int valid = 0;
237 PhysPageEntry *p;
238 int i;
239
240 if (lp->ptr == PHYS_MAP_NODE_NIL) {
241 return;
242 }
243
244 p = nodes[lp->ptr];
245 for (i = 0; i < P_L2_SIZE; i++) {
246 if (p[i].ptr == PHYS_MAP_NODE_NIL) {
247 continue;
248 }
249
250 valid_ptr = i;
251 valid++;
252 if (p[i].skip) {
253 phys_page_compact(&p[i], nodes, compacted);
254 }
255 }
256
257 /* We can only compress if there's only one child. */
258 if (valid != 1) {
259 return;
260 }
261
262 assert(valid_ptr < P_L2_SIZE);
263
264 /* Don't compress if it won't fit in the # of bits we have. */
265 if (lp->skip + p[valid_ptr].skip >= (1 << 3)) {
266 return;
267 }
268
269 lp->ptr = p[valid_ptr].ptr;
270 if (!p[valid_ptr].skip) {
271 /* If our only child is a leaf, make this a leaf. */
272 /* By design, we should have made this node a leaf to begin with so we
273 * should never reach here.
274 * But since it's so simple to handle this, let's do it just in case we
275 * change this rule.
276 */
277 lp->skip = 0;
278 } else {
279 lp->skip += p[valid_ptr].skip;
280 }
281 }
282
283 static void phys_page_compact_all(AddressSpaceDispatch *d, int nodes_nb)
284 {
285 DECLARE_BITMAP(compacted, nodes_nb);
286
287 if (d->phys_map.skip) {
288 phys_page_compact(&d->phys_map, d->map.nodes, compacted);
289 }
290 }
291
292 static MemoryRegionSection *phys_page_find(PhysPageEntry lp, hwaddr addr,
293 Node *nodes, MemoryRegionSection *sections)
294 {
295 PhysPageEntry *p;
296 hwaddr index = addr >> TARGET_PAGE_BITS;
297 int i;
298
299 for (i = P_L2_LEVELS; lp.skip && (i -= lp.skip) >= 0;) {
300 if (lp.ptr == PHYS_MAP_NODE_NIL) {
301 return &sections[PHYS_SECTION_UNASSIGNED];
302 }
303 p = nodes[lp.ptr];
304 lp = p[(index >> (i * P_L2_BITS)) & (P_L2_SIZE - 1)];
305 }
306
307 if (sections[lp.ptr].size.hi ||
308 range_covers_byte(sections[lp.ptr].offset_within_address_space,
309 sections[lp.ptr].size.lo, addr)) {
310 return &sections[lp.ptr];
311 } else {
312 return &sections[PHYS_SECTION_UNASSIGNED];
313 }
314 }
315
316 bool memory_region_is_unassigned(MemoryRegion *mr)
317 {
318 return mr != &io_mem_rom && mr != &io_mem_notdirty && !mr->rom_device
319 && mr != &io_mem_watch;
320 }
321
322 /* Called from RCU critical section */
323 static MemoryRegionSection *address_space_lookup_region(AddressSpaceDispatch *d,
324 hwaddr addr,
325 bool resolve_subpage)
326 {
327 MemoryRegionSection *section;
328 subpage_t *subpage;
329
330 section = phys_page_find(d->phys_map, addr, d->map.nodes, d->map.sections);
331 if (resolve_subpage && section->mr->subpage) {
332 subpage = container_of(section->mr, subpage_t, iomem);
333 section = &d->map.sections[subpage->sub_section[SUBPAGE_IDX(addr)]];
334 }
335 return section;
336 }
337
338 /* Called from RCU critical section */
339 static MemoryRegionSection *
340 address_space_translate_internal(AddressSpaceDispatch *d, hwaddr addr, hwaddr *xlat,
341 hwaddr *plen, bool resolve_subpage)
342 {
343 MemoryRegionSection *section;
344 Int128 diff;
345
346 section = address_space_lookup_region(d, addr, resolve_subpage);
347 /* Compute offset within MemoryRegionSection */
348 addr -= section->offset_within_address_space;
349
350 /* Compute offset within MemoryRegion */
351 *xlat = addr + section->offset_within_region;
352
353 diff = int128_sub(section->mr->size, int128_make64(addr));
354 *plen = int128_get64(int128_min(diff, int128_make64(*plen)));
355 return section;
356 }
357
358 static inline bool memory_access_is_direct(MemoryRegion *mr, bool is_write)
359 {
360 if (memory_region_is_ram(mr)) {
361 return !(is_write && mr->readonly);
362 }
363 if (memory_region_is_romd(mr)) {
364 return !is_write;
365 }
366
367 return false;
368 }
369
370 /* Called from RCU critical section */
371 MemoryRegion *address_space_translate(AddressSpace *as, hwaddr addr,
372 hwaddr *xlat, hwaddr *plen,
373 bool is_write)
374 {
375 IOMMUTLBEntry iotlb;
376 MemoryRegionSection *section;
377 MemoryRegion *mr;
378
379 for (;;) {
380 AddressSpaceDispatch *d = atomic_rcu_read(&as->dispatch);
381 section = address_space_translate_internal(d, addr, &addr, plen, true);
382 mr = section->mr;
383
384 if (!mr->iommu_ops) {
385 break;
386 }
387
388 iotlb = mr->iommu_ops->translate(mr, addr, is_write);
389 addr = ((iotlb.translated_addr & ~iotlb.addr_mask)
390 | (addr & iotlb.addr_mask));
391 *plen = MIN(*plen, (addr | iotlb.addr_mask) - addr + 1);
392 if (!(iotlb.perm & (1 << is_write))) {
393 mr = &io_mem_unassigned;
394 break;
395 }
396
397 as = iotlb.target_as;
398 }
399
400 if (xen_enabled() && memory_access_is_direct(mr, is_write)) {
401 hwaddr page = ((addr & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE) - addr;
402 *plen = MIN(page, *plen);
403 }
404
405 *xlat = addr;
406 return mr;
407 }
408
409 /* Called from RCU critical section */
410 MemoryRegionSection *
411 address_space_translate_for_iotlb(CPUState *cpu, hwaddr addr,
412 hwaddr *xlat, hwaddr *plen)
413 {
414 MemoryRegionSection *section;
415 section = address_space_translate_internal(cpu->memory_dispatch,
416 addr, xlat, plen, false);
417
418 assert(!section->mr->iommu_ops);
419 return section;
420 }
421 #endif
422
423 #if !defined(CONFIG_USER_ONLY)
424
425 static int cpu_common_post_load(void *opaque, int version_id)
426 {
427 CPUState *cpu = opaque;
428
429 /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
430 version_id is increased. */
431 cpu->interrupt_request &= ~0x01;
432 tlb_flush(cpu, 1);
433
434 return 0;
435 }
436
437 static int cpu_common_pre_load(void *opaque)
438 {
439 CPUState *cpu = opaque;
440
441 cpu->exception_index = -1;
442
443 return 0;
444 }
445
446 static bool cpu_common_exception_index_needed(void *opaque)
447 {
448 CPUState *cpu = opaque;
449
450 return tcg_enabled() && cpu->exception_index != -1;
451 }
452
453 static const VMStateDescription vmstate_cpu_common_exception_index = {
454 .name = "cpu_common/exception_index",
455 .version_id = 1,
456 .minimum_version_id = 1,
457 .needed = cpu_common_exception_index_needed,
458 .fields = (VMStateField[]) {
459 VMSTATE_INT32(exception_index, CPUState),
460 VMSTATE_END_OF_LIST()
461 }
462 };
463
464 const VMStateDescription vmstate_cpu_common = {
465 .name = "cpu_common",
466 .version_id = 1,
467 .minimum_version_id = 1,
468 .pre_load = cpu_common_pre_load,
469 .post_load = cpu_common_post_load,
470 .fields = (VMStateField[]) {
471 VMSTATE_UINT32(halted, CPUState),
472 VMSTATE_UINT32(interrupt_request, CPUState),
473 VMSTATE_END_OF_LIST()
474 },
475 .subsections = (const VMStateDescription*[]) {
476 &vmstate_cpu_common_exception_index,
477 NULL
478 }
479 };
480
481 #endif
482
483 CPUState *qemu_get_cpu(int index)
484 {
485 CPUState *cpu;
486
487 CPU_FOREACH(cpu) {
488 if (cpu->cpu_index == index) {
489 return cpu;
490 }
491 }
492
493 return NULL;
494 }
495
496 #if !defined(CONFIG_USER_ONLY)
497 void tcg_cpu_address_space_init(CPUState *cpu, AddressSpace *as)
498 {
499 /* We only support one address space per cpu at the moment. */
500 assert(cpu->as == as);
501
502 if (cpu->tcg_as_listener) {
503 memory_listener_unregister(cpu->tcg_as_listener);
504 } else {
505 cpu->tcg_as_listener = g_new0(MemoryListener, 1);
506 }
507 cpu->tcg_as_listener->commit = tcg_commit;
508 memory_listener_register(cpu->tcg_as_listener, as);
509 }
510 #endif
511
512 void cpu_exec_init(CPUArchState *env)
513 {
514 CPUState *cpu = ENV_GET_CPU(env);
515 CPUClass *cc = CPU_GET_CLASS(cpu);
516 CPUState *some_cpu;
517 int cpu_index;
518
519 #if defined(CONFIG_USER_ONLY)
520 cpu_list_lock();
521 #endif
522 cpu_index = 0;
523 CPU_FOREACH(some_cpu) {
524 cpu_index++;
525 }
526 cpu->cpu_index = cpu_index;
527 cpu->numa_node = 0;
528 QTAILQ_INIT(&cpu->breakpoints);
529 QTAILQ_INIT(&cpu->watchpoints);
530 #ifndef CONFIG_USER_ONLY
531 cpu->as = &address_space_memory;
532 cpu->thread_id = qemu_get_thread_id();
533 cpu_reload_memory_map(cpu);
534 #endif
535 QTAILQ_INSERT_TAIL(&cpus, cpu, node);
536 #if defined(CONFIG_USER_ONLY)
537 cpu_list_unlock();
538 #endif
539 if (qdev_get_vmsd(DEVICE(cpu)) == NULL) {
540 vmstate_register(NULL, cpu_index, &vmstate_cpu_common, cpu);
541 }
542 #if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
543 register_savevm(NULL, "cpu", cpu_index, CPU_SAVE_VERSION,
544 cpu_save, cpu_load, env);
545 assert(cc->vmsd == NULL);
546 assert(qdev_get_vmsd(DEVICE(cpu)) == NULL);
547 #endif
548 if (cc->vmsd != NULL) {
549 vmstate_register(NULL, cpu_index, cc->vmsd, cpu);
550 }
551 }
552
553 #if defined(CONFIG_USER_ONLY)
554 static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
555 {
556 tb_invalidate_phys_page_range(pc, pc + 1, 0);
557 }
558 #else
559 static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
560 {
561 hwaddr phys = cpu_get_phys_page_debug(cpu, pc);
562 if (phys != -1) {
563 tb_invalidate_phys_addr(cpu->as,
564 phys | (pc & ~TARGET_PAGE_MASK));
565 }
566 }
567 #endif
568
569 #if defined(CONFIG_USER_ONLY)
570 void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
571
572 {
573 }
574
575 int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, vaddr len,
576 int flags)
577 {
578 return -ENOSYS;
579 }
580
581 void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint)
582 {
583 }
584
585 int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
586 int flags, CPUWatchpoint **watchpoint)
587 {
588 return -ENOSYS;
589 }
590 #else
591 /* Add a watchpoint. */
592 int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
593 int flags, CPUWatchpoint **watchpoint)
594 {
595 CPUWatchpoint *wp;
596
597 /* forbid ranges which are empty or run off the end of the address space */
598 if (len == 0 || (addr + len - 1) < addr) {
599 error_report("tried to set invalid watchpoint at %"
600 VADDR_PRIx ", len=%" VADDR_PRIu, addr, len);
601 return -EINVAL;
602 }
603 wp = g_malloc(sizeof(*wp));
604
605 wp->vaddr = addr;
606 wp->len = len;
607 wp->flags = flags;
608
609 /* keep all GDB-injected watchpoints in front */
610 if (flags & BP_GDB) {
611 QTAILQ_INSERT_HEAD(&cpu->watchpoints, wp, entry);
612 } else {
613 QTAILQ_INSERT_TAIL(&cpu->watchpoints, wp, entry);
614 }
615
616 tlb_flush_page(cpu, addr);
617
618 if (watchpoint)
619 *watchpoint = wp;
620 return 0;
621 }
622
623 /* Remove a specific watchpoint. */
624 int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, vaddr len,
625 int flags)
626 {
627 CPUWatchpoint *wp;
628
629 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
630 if (addr == wp->vaddr && len == wp->len
631 && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
632 cpu_watchpoint_remove_by_ref(cpu, wp);
633 return 0;
634 }
635 }
636 return -ENOENT;
637 }
638
639 /* Remove a specific watchpoint by reference. */
640 void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint)
641 {
642 QTAILQ_REMOVE(&cpu->watchpoints, watchpoint, entry);
643
644 tlb_flush_page(cpu, watchpoint->vaddr);
645
646 g_free(watchpoint);
647 }
648
649 /* Remove all matching watchpoints. */
650 void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
651 {
652 CPUWatchpoint *wp, *next;
653
654 QTAILQ_FOREACH_SAFE(wp, &cpu->watchpoints, entry, next) {
655 if (wp->flags & mask) {
656 cpu_watchpoint_remove_by_ref(cpu, wp);
657 }
658 }
659 }
660
661 /* Return true if this watchpoint address matches the specified
662 * access (ie the address range covered by the watchpoint overlaps
663 * partially or completely with the address range covered by the
664 * access).
665 */
666 static inline bool cpu_watchpoint_address_matches(CPUWatchpoint *wp,
667 vaddr addr,
668 vaddr len)
669 {
670 /* We know the lengths are non-zero, but a little caution is
671 * required to avoid errors in the case where the range ends
672 * exactly at the top of the address space and so addr + len
673 * wraps round to zero.
674 */
675 vaddr wpend = wp->vaddr + wp->len - 1;
676 vaddr addrend = addr + len - 1;
677
678 return !(addr > wpend || wp->vaddr > addrend);
679 }
680
681 #endif
682
683 /* Add a breakpoint. */
684 int cpu_breakpoint_insert(CPUState *cpu, vaddr pc, int flags,
685 CPUBreakpoint **breakpoint)
686 {
687 CPUBreakpoint *bp;
688
689 bp = g_malloc(sizeof(*bp));
690
691 bp->pc = pc;
692 bp->flags = flags;
693
694 /* keep all GDB-injected breakpoints in front */
695 if (flags & BP_GDB) {
696 QTAILQ_INSERT_HEAD(&cpu->breakpoints, bp, entry);
697 } else {
698 QTAILQ_INSERT_TAIL(&cpu->breakpoints, bp, entry);
699 }
700
701 breakpoint_invalidate(cpu, pc);
702
703 if (breakpoint) {
704 *breakpoint = bp;
705 }
706 return 0;
707 }
708
709 /* Remove a specific breakpoint. */
710 int cpu_breakpoint_remove(CPUState *cpu, vaddr pc, int flags)
711 {
712 CPUBreakpoint *bp;
713
714 QTAILQ_FOREACH(bp, &cpu->breakpoints, entry) {
715 if (bp->pc == pc && bp->flags == flags) {
716 cpu_breakpoint_remove_by_ref(cpu, bp);
717 return 0;
718 }
719 }
720 return -ENOENT;
721 }
722
723 /* Remove a specific breakpoint by reference. */
724 void cpu_breakpoint_remove_by_ref(CPUState *cpu, CPUBreakpoint *breakpoint)
725 {
726 QTAILQ_REMOVE(&cpu->breakpoints, breakpoint, entry);
727
728 breakpoint_invalidate(cpu, breakpoint->pc);
729
730 g_free(breakpoint);
731 }
732
733 /* Remove all matching breakpoints. */
734 void cpu_breakpoint_remove_all(CPUState *cpu, int mask)
735 {
736 CPUBreakpoint *bp, *next;
737
738 QTAILQ_FOREACH_SAFE(bp, &cpu->breakpoints, entry, next) {
739 if (bp->flags & mask) {
740 cpu_breakpoint_remove_by_ref(cpu, bp);
741 }
742 }
743 }
744
745 /* enable or disable single step mode. EXCP_DEBUG is returned by the
746 CPU loop after each instruction */
747 void cpu_single_step(CPUState *cpu, int enabled)
748 {
749 if (cpu->singlestep_enabled != enabled) {
750 cpu->singlestep_enabled = enabled;
751 if (kvm_enabled()) {
752 kvm_update_guest_debug(cpu, 0);
753 } else {
754 /* must flush all the translated code to avoid inconsistencies */
755 /* XXX: only flush what is necessary */
756 CPUArchState *env = cpu->env_ptr;
757 tb_flush(env);
758 }
759 }
760 }
761
762 void cpu_abort(CPUState *cpu, const char *fmt, ...)
763 {
764 va_list ap;
765 va_list ap2;
766
767 va_start(ap, fmt);
768 va_copy(ap2, ap);
769 fprintf(stderr, "qemu: fatal: ");
770 vfprintf(stderr, fmt, ap);
771 fprintf(stderr, "\n");
772 cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU | CPU_DUMP_CCOP);
773 if (qemu_log_enabled()) {
774 qemu_log("qemu: fatal: ");
775 qemu_log_vprintf(fmt, ap2);
776 qemu_log("\n");
777 log_cpu_state(cpu, CPU_DUMP_FPU | CPU_DUMP_CCOP);
778 qemu_log_flush();
779 qemu_log_close();
780 }
781 va_end(ap2);
782 va_end(ap);
783 #if defined(CONFIG_USER_ONLY)
784 {
785 struct sigaction act;
786 sigfillset(&act.sa_mask);
787 act.sa_handler = SIG_DFL;
788 sigaction(SIGABRT, &act, NULL);
789 }
790 #endif
791 abort();
792 }
793
794 #if !defined(CONFIG_USER_ONLY)
795 /* Called from RCU critical section */
796 static RAMBlock *qemu_get_ram_block(ram_addr_t addr)
797 {
798 RAMBlock *block;
799
800 block = atomic_rcu_read(&ram_list.mru_block);
801 if (block && addr - block->offset < block->max_length) {
802 goto found;
803 }
804 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
805 if (addr - block->offset < block->max_length) {
806 goto found;
807 }
808 }
809
810 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
811 abort();
812
813 found:
814 /* It is safe to write mru_block outside the iothread lock. This
815 * is what happens:
816 *
817 * mru_block = xxx
818 * rcu_read_unlock()
819 * xxx removed from list
820 * rcu_read_lock()
821 * read mru_block
822 * mru_block = NULL;
823 * call_rcu(reclaim_ramblock, xxx);
824 * rcu_read_unlock()
825 *
826 * atomic_rcu_set is not needed here. The block was already published
827 * when it was placed into the list. Here we're just making an extra
828 * copy of the pointer.
829 */
830 ram_list.mru_block = block;
831 return block;
832 }
833
834 static void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t length)
835 {
836 ram_addr_t start1;
837 RAMBlock *block;
838 ram_addr_t end;
839
840 end = TARGET_PAGE_ALIGN(start + length);
841 start &= TARGET_PAGE_MASK;
842
843 rcu_read_lock();
844 block = qemu_get_ram_block(start);
845 assert(block == qemu_get_ram_block(end - 1));
846 start1 = (uintptr_t)ramblock_ptr(block, start - block->offset);
847 cpu_tlb_reset_dirty_all(start1, length);
848 rcu_read_unlock();
849 }
850
851 /* Note: start and end must be within the same ram block. */
852 bool cpu_physical_memory_test_and_clear_dirty(ram_addr_t start,
853 ram_addr_t length,
854 unsigned client)
855 {
856 unsigned long end, page;
857 bool dirty;
858
859 if (length == 0) {
860 return false;
861 }
862
863 end = TARGET_PAGE_ALIGN(start + length) >> TARGET_PAGE_BITS;
864 page = start >> TARGET_PAGE_BITS;
865 dirty = bitmap_test_and_clear_atomic(ram_list.dirty_memory[client],
866 page, end - page);
867
868 if (dirty && tcg_enabled()) {
869 tlb_reset_dirty_range_all(start, length);
870 }
871
872 return dirty;
873 }
874
875 /* Called from RCU critical section */
876 hwaddr memory_region_section_get_iotlb(CPUState *cpu,
877 MemoryRegionSection *section,
878 target_ulong vaddr,
879 hwaddr paddr, hwaddr xlat,
880 int prot,
881 target_ulong *address)
882 {
883 hwaddr iotlb;
884 CPUWatchpoint *wp;
885
886 if (memory_region_is_ram(section->mr)) {
887 /* Normal RAM. */
888 iotlb = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
889 + xlat;
890 if (!section->readonly) {
891 iotlb |= PHYS_SECTION_NOTDIRTY;
892 } else {
893 iotlb |= PHYS_SECTION_ROM;
894 }
895 } else {
896 iotlb = section - section->address_space->dispatch->map.sections;
897 iotlb += xlat;
898 }
899
900 /* Make accesses to pages with watchpoints go via the
901 watchpoint trap routines. */
902 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
903 if (cpu_watchpoint_address_matches(wp, vaddr, TARGET_PAGE_SIZE)) {
904 /* Avoid trapping reads of pages with a write breakpoint. */
905 if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
906 iotlb = PHYS_SECTION_WATCH + paddr;
907 *address |= TLB_MMIO;
908 break;
909 }
910 }
911 }
912
913 return iotlb;
914 }
915 #endif /* defined(CONFIG_USER_ONLY) */
916
917 #if !defined(CONFIG_USER_ONLY)
918
919 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
920 uint16_t section);
921 static subpage_t *subpage_init(AddressSpace *as, hwaddr base);
922
923 static void *(*phys_mem_alloc)(size_t size, uint64_t *align) =
924 qemu_anon_ram_alloc;
925
926 /*
927 * Set a custom physical guest memory alloator.
928 * Accelerators with unusual needs may need this. Hopefully, we can
929 * get rid of it eventually.
930 */
931 void phys_mem_set_alloc(void *(*alloc)(size_t, uint64_t *align))
932 {
933 phys_mem_alloc = alloc;
934 }
935
936 static uint16_t phys_section_add(PhysPageMap *map,
937 MemoryRegionSection *section)
938 {
939 /* The physical section number is ORed with a page-aligned
940 * pointer to produce the iotlb entries. Thus it should
941 * never overflow into the page-aligned value.
942 */
943 assert(map->sections_nb < TARGET_PAGE_SIZE);
944
945 if (map->sections_nb == map->sections_nb_alloc) {
946 map->sections_nb_alloc = MAX(map->sections_nb_alloc * 2, 16);
947 map->sections = g_renew(MemoryRegionSection, map->sections,
948 map->sections_nb_alloc);
949 }
950 map->sections[map->sections_nb] = *section;
951 memory_region_ref(section->mr);
952 return map->sections_nb++;
953 }
954
955 static void phys_section_destroy(MemoryRegion *mr)
956 {
957 memory_region_unref(mr);
958
959 if (mr->subpage) {
960 subpage_t *subpage = container_of(mr, subpage_t, iomem);
961 object_unref(OBJECT(&subpage->iomem));
962 g_free(subpage);
963 }
964 }
965
966 static void phys_sections_free(PhysPageMap *map)
967 {
968 while (map->sections_nb > 0) {
969 MemoryRegionSection *section = &map->sections[--map->sections_nb];
970 phys_section_destroy(section->mr);
971 }
972 g_free(map->sections);
973 g_free(map->nodes);
974 }
975
976 static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *section)
977 {
978 subpage_t *subpage;
979 hwaddr base = section->offset_within_address_space
980 & TARGET_PAGE_MASK;
981 MemoryRegionSection *existing = phys_page_find(d->phys_map, base,
982 d->map.nodes, d->map.sections);
983 MemoryRegionSection subsection = {
984 .offset_within_address_space = base,
985 .size = int128_make64(TARGET_PAGE_SIZE),
986 };
987 hwaddr start, end;
988
989 assert(existing->mr->subpage || existing->mr == &io_mem_unassigned);
990
991 if (!(existing->mr->subpage)) {
992 subpage = subpage_init(d->as, base);
993 subsection.address_space = d->as;
994 subsection.mr = &subpage->iomem;
995 phys_page_set(d, base >> TARGET_PAGE_BITS, 1,
996 phys_section_add(&d->map, &subsection));
997 } else {
998 subpage = container_of(existing->mr, subpage_t, iomem);
999 }
1000 start = section->offset_within_address_space & ~TARGET_PAGE_MASK;
1001 end = start + int128_get64(section->size) - 1;
1002 subpage_register(subpage, start, end,
1003 phys_section_add(&d->map, section));
1004 }
1005
1006
1007 static void register_multipage(AddressSpaceDispatch *d,
1008 MemoryRegionSection *section)
1009 {
1010 hwaddr start_addr = section->offset_within_address_space;
1011 uint16_t section_index = phys_section_add(&d->map, section);
1012 uint64_t num_pages = int128_get64(int128_rshift(section->size,
1013 TARGET_PAGE_BITS));
1014
1015 assert(num_pages);
1016 phys_page_set(d, start_addr >> TARGET_PAGE_BITS, num_pages, section_index);
1017 }
1018
1019 static void mem_add(MemoryListener *listener, MemoryRegionSection *section)
1020 {
1021 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
1022 AddressSpaceDispatch *d = as->next_dispatch;
1023 MemoryRegionSection now = *section, remain = *section;
1024 Int128 page_size = int128_make64(TARGET_PAGE_SIZE);
1025
1026 if (now.offset_within_address_space & ~TARGET_PAGE_MASK) {
1027 uint64_t left = TARGET_PAGE_ALIGN(now.offset_within_address_space)
1028 - now.offset_within_address_space;
1029
1030 now.size = int128_min(int128_make64(left), now.size);
1031 register_subpage(d, &now);
1032 } else {
1033 now.size = int128_zero();
1034 }
1035 while (int128_ne(remain.size, now.size)) {
1036 remain.size = int128_sub(remain.size, now.size);
1037 remain.offset_within_address_space += int128_get64(now.size);
1038 remain.offset_within_region += int128_get64(now.size);
1039 now = remain;
1040 if (int128_lt(remain.size, page_size)) {
1041 register_subpage(d, &now);
1042 } else if (remain.offset_within_address_space & ~TARGET_PAGE_MASK) {
1043 now.size = page_size;
1044 register_subpage(d, &now);
1045 } else {
1046 now.size = int128_and(now.size, int128_neg(page_size));
1047 register_multipage(d, &now);
1048 }
1049 }
1050 }
1051
1052 void qemu_flush_coalesced_mmio_buffer(void)
1053 {
1054 if (kvm_enabled())
1055 kvm_flush_coalesced_mmio_buffer();
1056 }
1057
1058 void qemu_mutex_lock_ramlist(void)
1059 {
1060 qemu_mutex_lock(&ram_list.mutex);
1061 }
1062
1063 void qemu_mutex_unlock_ramlist(void)
1064 {
1065 qemu_mutex_unlock(&ram_list.mutex);
1066 }
1067
1068 #ifdef __linux__
1069
1070 #include <sys/vfs.h>
1071
1072 #define HUGETLBFS_MAGIC 0x958458f6
1073
1074 static long gethugepagesize(const char *path, Error **errp)
1075 {
1076 struct statfs fs;
1077 int ret;
1078
1079 do {
1080 ret = statfs(path, &fs);
1081 } while (ret != 0 && errno == EINTR);
1082
1083 if (ret != 0) {
1084 error_setg_errno(errp, errno, "failed to get page size of file %s",
1085 path);
1086 return 0;
1087 }
1088
1089 if (fs.f_type != HUGETLBFS_MAGIC)
1090 fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path);
1091
1092 return fs.f_bsize;
1093 }
1094
1095 static void *file_ram_alloc(RAMBlock *block,
1096 ram_addr_t memory,
1097 const char *path,
1098 Error **errp)
1099 {
1100 char *filename;
1101 char *sanitized_name;
1102 char *c;
1103 void *area = NULL;
1104 int fd;
1105 uint64_t hpagesize;
1106 Error *local_err = NULL;
1107
1108 hpagesize = gethugepagesize(path, &local_err);
1109 if (local_err) {
1110 error_propagate(errp, local_err);
1111 goto error;
1112 }
1113 block->mr->align = hpagesize;
1114
1115 if (memory < hpagesize) {
1116 error_setg(errp, "memory size 0x" RAM_ADDR_FMT " must be equal to "
1117 "or larger than huge page size 0x%" PRIx64,
1118 memory, hpagesize);
1119 goto error;
1120 }
1121
1122 if (kvm_enabled() && !kvm_has_sync_mmu()) {
1123 error_setg(errp,
1124 "host lacks kvm mmu notifiers, -mem-path unsupported");
1125 goto error;
1126 }
1127
1128 /* Make name safe to use with mkstemp by replacing '/' with '_'. */
1129 sanitized_name = g_strdup(memory_region_name(block->mr));
1130 for (c = sanitized_name; *c != '\0'; c++) {
1131 if (*c == '/')
1132 *c = '_';
1133 }
1134
1135 filename = g_strdup_printf("%s/qemu_back_mem.%s.XXXXXX", path,
1136 sanitized_name);
1137 g_free(sanitized_name);
1138
1139 fd = mkstemp(filename);
1140 if (fd < 0) {
1141 error_setg_errno(errp, errno,
1142 "unable to create backing store for hugepages");
1143 g_free(filename);
1144 goto error;
1145 }
1146 unlink(filename);
1147 g_free(filename);
1148
1149 memory = (memory+hpagesize-1) & ~(hpagesize-1);
1150
1151 /*
1152 * ftruncate is not supported by hugetlbfs in older
1153 * hosts, so don't bother bailing out on errors.
1154 * If anything goes wrong with it under other filesystems,
1155 * mmap will fail.
1156 */
1157 if (ftruncate(fd, memory)) {
1158 perror("ftruncate");
1159 }
1160
1161 area = mmap(0, memory, PROT_READ | PROT_WRITE,
1162 (block->flags & RAM_SHARED ? MAP_SHARED : MAP_PRIVATE),
1163 fd, 0);
1164 if (area == MAP_FAILED) {
1165 error_setg_errno(errp, errno,
1166 "unable to map backing store for hugepages");
1167 close(fd);
1168 goto error;
1169 }
1170
1171 if (mem_prealloc) {
1172 os_mem_prealloc(fd, area, memory);
1173 }
1174
1175 block->fd = fd;
1176 return area;
1177
1178 error:
1179 if (mem_prealloc) {
1180 error_report("%s", error_get_pretty(*errp));
1181 exit(1);
1182 }
1183 return NULL;
1184 }
1185 #endif
1186
1187 /* Called with the ramlist lock held. */
1188 static ram_addr_t find_ram_offset(ram_addr_t size)
1189 {
1190 RAMBlock *block, *next_block;
1191 ram_addr_t offset = RAM_ADDR_MAX, mingap = RAM_ADDR_MAX;
1192
1193 assert(size != 0); /* it would hand out same offset multiple times */
1194
1195 if (QLIST_EMPTY_RCU(&ram_list.blocks)) {
1196 return 0;
1197 }
1198
1199 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1200 ram_addr_t end, next = RAM_ADDR_MAX;
1201
1202 end = block->offset + block->max_length;
1203
1204 QLIST_FOREACH_RCU(next_block, &ram_list.blocks, next) {
1205 if (next_block->offset >= end) {
1206 next = MIN(next, next_block->offset);
1207 }
1208 }
1209 if (next - end >= size && next - end < mingap) {
1210 offset = end;
1211 mingap = next - end;
1212 }
1213 }
1214
1215 if (offset == RAM_ADDR_MAX) {
1216 fprintf(stderr, "Failed to find gap of requested size: %" PRIu64 "\n",
1217 (uint64_t)size);
1218 abort();
1219 }
1220
1221 return offset;
1222 }
1223
1224 ram_addr_t last_ram_offset(void)
1225 {
1226 RAMBlock *block;
1227 ram_addr_t last = 0;
1228
1229 rcu_read_lock();
1230 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1231 last = MAX(last, block->offset + block->max_length);
1232 }
1233 rcu_read_unlock();
1234 return last;
1235 }
1236
1237 static void qemu_ram_setup_dump(void *addr, ram_addr_t size)
1238 {
1239 int ret;
1240
1241 /* Use MADV_DONTDUMP, if user doesn't want the guest memory in the core */
1242 if (!machine_dump_guest_core(current_machine)) {
1243 ret = qemu_madvise(addr, size, QEMU_MADV_DONTDUMP);
1244 if (ret) {
1245 perror("qemu_madvise");
1246 fprintf(stderr, "madvise doesn't support MADV_DONTDUMP, "
1247 "but dump_guest_core=off specified\n");
1248 }
1249 }
1250 }
1251
1252 /* Called within an RCU critical section, or while the ramlist lock
1253 * is held.
1254 */
1255 static RAMBlock *find_ram_block(ram_addr_t addr)
1256 {
1257 RAMBlock *block;
1258
1259 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1260 if (block->offset == addr) {
1261 return block;
1262 }
1263 }
1264
1265 return NULL;
1266 }
1267
1268 /* Called with iothread lock held. */
1269 void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev)
1270 {
1271 RAMBlock *new_block, *block;
1272
1273 rcu_read_lock();
1274 new_block = find_ram_block(addr);
1275 assert(new_block);
1276 assert(!new_block->idstr[0]);
1277
1278 if (dev) {
1279 char *id = qdev_get_dev_path(dev);
1280 if (id) {
1281 snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
1282 g_free(id);
1283 }
1284 }
1285 pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
1286
1287 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1288 if (block != new_block && !strcmp(block->idstr, new_block->idstr)) {
1289 fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
1290 new_block->idstr);
1291 abort();
1292 }
1293 }
1294 rcu_read_unlock();
1295 }
1296
1297 /* Called with iothread lock held. */
1298 void qemu_ram_unset_idstr(ram_addr_t addr)
1299 {
1300 RAMBlock *block;
1301
1302 /* FIXME: arch_init.c assumes that this is not called throughout
1303 * migration. Ignore the problem since hot-unplug during migration
1304 * does not work anyway.
1305 */
1306
1307 rcu_read_lock();
1308 block = find_ram_block(addr);
1309 if (block) {
1310 memset(block->idstr, 0, sizeof(block->idstr));
1311 }
1312 rcu_read_unlock();
1313 }
1314
1315 static int memory_try_enable_merging(void *addr, size_t len)
1316 {
1317 if (!machine_mem_merge(current_machine)) {
1318 /* disabled by the user */
1319 return 0;
1320 }
1321
1322 return qemu_madvise(addr, len, QEMU_MADV_MERGEABLE);
1323 }
1324
1325 /* Only legal before guest might have detected the memory size: e.g. on
1326 * incoming migration, or right after reset.
1327 *
1328 * As memory core doesn't know how is memory accessed, it is up to
1329 * resize callback to update device state and/or add assertions to detect
1330 * misuse, if necessary.
1331 */
1332 int qemu_ram_resize(ram_addr_t base, ram_addr_t newsize, Error **errp)
1333 {
1334 RAMBlock *block = find_ram_block(base);
1335
1336 assert(block);
1337
1338 newsize = TARGET_PAGE_ALIGN(newsize);
1339
1340 if (block->used_length == newsize) {
1341 return 0;
1342 }
1343
1344 if (!(block->flags & RAM_RESIZEABLE)) {
1345 error_setg_errno(errp, EINVAL,
1346 "Length mismatch: %s: 0x" RAM_ADDR_FMT
1347 " in != 0x" RAM_ADDR_FMT, block->idstr,
1348 newsize, block->used_length);
1349 return -EINVAL;
1350 }
1351
1352 if (block->max_length < newsize) {
1353 error_setg_errno(errp, EINVAL,
1354 "Length too large: %s: 0x" RAM_ADDR_FMT
1355 " > 0x" RAM_ADDR_FMT, block->idstr,
1356 newsize, block->max_length);
1357 return -EINVAL;
1358 }
1359
1360 cpu_physical_memory_clear_dirty_range(block->offset, block->used_length);
1361 block->used_length = newsize;
1362 cpu_physical_memory_set_dirty_range(block->offset, block->used_length,
1363 DIRTY_CLIENTS_ALL);
1364 memory_region_set_size(block->mr, newsize);
1365 if (block->resized) {
1366 block->resized(block->idstr, newsize, block->host);
1367 }
1368 return 0;
1369 }
1370
1371 static ram_addr_t ram_block_add(RAMBlock *new_block, Error **errp)
1372 {
1373 RAMBlock *block;
1374 RAMBlock *last_block = NULL;
1375 ram_addr_t old_ram_size, new_ram_size;
1376
1377 old_ram_size = last_ram_offset() >> TARGET_PAGE_BITS;
1378
1379 qemu_mutex_lock_ramlist();
1380 new_block->offset = find_ram_offset(new_block->max_length);
1381
1382 if (!new_block->host) {
1383 if (xen_enabled()) {
1384 xen_ram_alloc(new_block->offset, new_block->max_length,
1385 new_block->mr);
1386 } else {
1387 new_block->host = phys_mem_alloc(new_block->max_length,
1388 &new_block->mr->align);
1389 if (!new_block->host) {
1390 error_setg_errno(errp, errno,
1391 "cannot set up guest memory '%s'",
1392 memory_region_name(new_block->mr));
1393 qemu_mutex_unlock_ramlist();
1394 return -1;
1395 }
1396 memory_try_enable_merging(new_block->host, new_block->max_length);
1397 }
1398 }
1399
1400 /* Keep the list sorted from biggest to smallest block. Unlike QTAILQ,
1401 * QLIST (which has an RCU-friendly variant) does not have insertion at
1402 * tail, so save the last element in last_block.
1403 */
1404 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1405 last_block = block;
1406 if (block->max_length < new_block->max_length) {
1407 break;
1408 }
1409 }
1410 if (block) {
1411 QLIST_INSERT_BEFORE_RCU(block, new_block, next);
1412 } else if (last_block) {
1413 QLIST_INSERT_AFTER_RCU(last_block, new_block, next);
1414 } else { /* list is empty */
1415 QLIST_INSERT_HEAD_RCU(&ram_list.blocks, new_block, next);
1416 }
1417 ram_list.mru_block = NULL;
1418
1419 /* Write list before version */
1420 smp_wmb();
1421 ram_list.version++;
1422 qemu_mutex_unlock_ramlist();
1423
1424 new_ram_size = last_ram_offset() >> TARGET_PAGE_BITS;
1425
1426 if (new_ram_size > old_ram_size) {
1427 int i;
1428
1429 /* ram_list.dirty_memory[] is protected by the iothread lock. */
1430 for (i = 0; i < DIRTY_MEMORY_NUM; i++) {
1431 ram_list.dirty_memory[i] =
1432 bitmap_zero_extend(ram_list.dirty_memory[i],
1433 old_ram_size, new_ram_size);
1434 }
1435 }
1436 cpu_physical_memory_set_dirty_range(new_block->offset,
1437 new_block->used_length,
1438 DIRTY_CLIENTS_ALL);
1439
1440 if (new_block->host) {
1441 qemu_ram_setup_dump(new_block->host, new_block->max_length);
1442 qemu_madvise(new_block->host, new_block->max_length, QEMU_MADV_HUGEPAGE);
1443 qemu_madvise(new_block->host, new_block->max_length, QEMU_MADV_DONTFORK);
1444 if (kvm_enabled()) {
1445 kvm_setup_guest_memory(new_block->host, new_block->max_length);
1446 }
1447 }
1448
1449 return new_block->offset;
1450 }
1451
1452 #ifdef __linux__
1453 ram_addr_t qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr,
1454 bool share, const char *mem_path,
1455 Error **errp)
1456 {
1457 RAMBlock *new_block;
1458 ram_addr_t addr;
1459 Error *local_err = NULL;
1460
1461 if (xen_enabled()) {
1462 error_setg(errp, "-mem-path not supported with Xen");
1463 return -1;
1464 }
1465
1466 if (phys_mem_alloc != qemu_anon_ram_alloc) {
1467 /*
1468 * file_ram_alloc() needs to allocate just like
1469 * phys_mem_alloc, but we haven't bothered to provide
1470 * a hook there.
1471 */
1472 error_setg(errp,
1473 "-mem-path not supported with this accelerator");
1474 return -1;
1475 }
1476
1477 size = TARGET_PAGE_ALIGN(size);
1478 new_block = g_malloc0(sizeof(*new_block));
1479 new_block->mr = mr;
1480 new_block->used_length = size;
1481 new_block->max_length = size;
1482 new_block->flags = share ? RAM_SHARED : 0;
1483 new_block->host = file_ram_alloc(new_block, size,
1484 mem_path, errp);
1485 if (!new_block->host) {
1486 g_free(new_block);
1487 return -1;
1488 }
1489
1490 addr = ram_block_add(new_block, &local_err);
1491 if (local_err) {
1492 g_free(new_block);
1493 error_propagate(errp, local_err);
1494 return -1;
1495 }
1496 return addr;
1497 }
1498 #endif
1499
1500 static
1501 ram_addr_t qemu_ram_alloc_internal(ram_addr_t size, ram_addr_t max_size,
1502 void (*resized)(const char*,
1503 uint64_t length,
1504 void *host),
1505 void *host, bool resizeable,
1506 MemoryRegion *mr, Error **errp)
1507 {
1508 RAMBlock *new_block;
1509 ram_addr_t addr;
1510 Error *local_err = NULL;
1511
1512 size = TARGET_PAGE_ALIGN(size);
1513 max_size = TARGET_PAGE_ALIGN(max_size);
1514 new_block = g_malloc0(sizeof(*new_block));
1515 new_block->mr = mr;
1516 new_block->resized = resized;
1517 new_block->used_length = size;
1518 new_block->max_length = max_size;
1519 assert(max_size >= size);
1520 new_block->fd = -1;
1521 new_block->host = host;
1522 if (host) {
1523 new_block->flags |= RAM_PREALLOC;
1524 }
1525 if (resizeable) {
1526 new_block->flags |= RAM_RESIZEABLE;
1527 }
1528 addr = ram_block_add(new_block, &local_err);
1529 if (local_err) {
1530 g_free(new_block);
1531 error_propagate(errp, local_err);
1532 return -1;
1533 }
1534 return addr;
1535 }
1536
1537 ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
1538 MemoryRegion *mr, Error **errp)
1539 {
1540 return qemu_ram_alloc_internal(size, size, NULL, host, false, mr, errp);
1541 }
1542
1543 ram_addr_t qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr, Error **errp)
1544 {
1545 return qemu_ram_alloc_internal(size, size, NULL, NULL, false, mr, errp);
1546 }
1547
1548 ram_addr_t qemu_ram_alloc_resizeable(ram_addr_t size, ram_addr_t maxsz,
1549 void (*resized)(const char*,
1550 uint64_t length,
1551 void *host),
1552 MemoryRegion *mr, Error **errp)
1553 {
1554 return qemu_ram_alloc_internal(size, maxsz, resized, NULL, true, mr, errp);
1555 }
1556
1557 void qemu_ram_free_from_ptr(ram_addr_t addr)
1558 {
1559 RAMBlock *block;
1560
1561 qemu_mutex_lock_ramlist();
1562 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1563 if (addr == block->offset) {
1564 QLIST_REMOVE_RCU(block, next);
1565 ram_list.mru_block = NULL;
1566 /* Write list before version */
1567 smp_wmb();
1568 ram_list.version++;
1569 g_free_rcu(block, rcu);
1570 break;
1571 }
1572 }
1573 qemu_mutex_unlock_ramlist();
1574 }
1575
1576 static void reclaim_ramblock(RAMBlock *block)
1577 {
1578 if (block->flags & RAM_PREALLOC) {
1579 ;
1580 } else if (xen_enabled()) {
1581 xen_invalidate_map_cache_entry(block->host);
1582 #ifndef _WIN32
1583 } else if (block->fd >= 0) {
1584 munmap(block->host, block->max_length);
1585 close(block->fd);
1586 #endif
1587 } else {
1588 qemu_anon_ram_free(block->host, block->max_length);
1589 }
1590 g_free(block);
1591 }
1592
1593 void qemu_ram_free(ram_addr_t addr)
1594 {
1595 RAMBlock *block;
1596
1597 qemu_mutex_lock_ramlist();
1598 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1599 if (addr == block->offset) {
1600 QLIST_REMOVE_RCU(block, next);
1601 ram_list.mru_block = NULL;
1602 /* Write list before version */
1603 smp_wmb();
1604 ram_list.version++;
1605 call_rcu(block, reclaim_ramblock, rcu);
1606 break;
1607 }
1608 }
1609 qemu_mutex_unlock_ramlist();
1610 }
1611
1612 #ifndef _WIN32
1613 void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
1614 {
1615 RAMBlock *block;
1616 ram_addr_t offset;
1617 int flags;
1618 void *area, *vaddr;
1619
1620 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1621 offset = addr - block->offset;
1622 if (offset < block->max_length) {
1623 vaddr = ramblock_ptr(block, offset);
1624 if (block->flags & RAM_PREALLOC) {
1625 ;
1626 } else if (xen_enabled()) {
1627 abort();
1628 } else {
1629 flags = MAP_FIXED;
1630 if (block->fd >= 0) {
1631 flags |= (block->flags & RAM_SHARED ?
1632 MAP_SHARED : MAP_PRIVATE);
1633 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1634 flags, block->fd, offset);
1635 } else {
1636 /*
1637 * Remap needs to match alloc. Accelerators that
1638 * set phys_mem_alloc never remap. If they did,
1639 * we'd need a remap hook here.
1640 */
1641 assert(phys_mem_alloc == qemu_anon_ram_alloc);
1642
1643 flags |= MAP_PRIVATE | MAP_ANONYMOUS;
1644 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1645 flags, -1, 0);
1646 }
1647 if (area != vaddr) {
1648 fprintf(stderr, "Could not remap addr: "
1649 RAM_ADDR_FMT "@" RAM_ADDR_FMT "\n",
1650 length, addr);
1651 exit(1);
1652 }
1653 memory_try_enable_merging(vaddr, length);
1654 qemu_ram_setup_dump(vaddr, length);
1655 }
1656 }
1657 }
1658 }
1659 #endif /* !_WIN32 */
1660
1661 int qemu_get_ram_fd(ram_addr_t addr)
1662 {
1663 RAMBlock *block;
1664 int fd;
1665
1666 rcu_read_lock();
1667 block = qemu_get_ram_block(addr);
1668 fd = block->fd;
1669 rcu_read_unlock();
1670 return fd;
1671 }
1672
1673 void *qemu_get_ram_block_host_ptr(ram_addr_t addr)
1674 {
1675 RAMBlock *block;
1676 void *ptr;
1677
1678 rcu_read_lock();
1679 block = qemu_get_ram_block(addr);
1680 ptr = ramblock_ptr(block, 0);
1681 rcu_read_unlock();
1682 return ptr;
1683 }
1684
1685 /* Return a host pointer to ram allocated with qemu_ram_alloc.
1686 * This should not be used for general purpose DMA. Use address_space_map
1687 * or address_space_rw instead. For local memory (e.g. video ram) that the
1688 * device owns, use memory_region_get_ram_ptr.
1689 *
1690 * By the time this function returns, the returned pointer is not protected
1691 * by RCU anymore. If the caller is not within an RCU critical section and
1692 * does not hold the iothread lock, it must have other means of protecting the
1693 * pointer, such as a reference to the region that includes the incoming
1694 * ram_addr_t.
1695 */
1696 void *qemu_get_ram_ptr(ram_addr_t addr)
1697 {
1698 RAMBlock *block;
1699 void *ptr;
1700
1701 rcu_read_lock();
1702 block = qemu_get_ram_block(addr);
1703
1704 if (xen_enabled() && block->host == NULL) {
1705 /* We need to check if the requested address is in the RAM
1706 * because we don't want to map the entire memory in QEMU.
1707 * In that case just map until the end of the page.
1708 */
1709 if (block->offset == 0) {
1710 ptr = xen_map_cache(addr, 0, 0);
1711 goto unlock;
1712 }
1713
1714 block->host = xen_map_cache(block->offset, block->max_length, 1);
1715 }
1716 ptr = ramblock_ptr(block, addr - block->offset);
1717
1718 unlock:
1719 rcu_read_unlock();
1720 return ptr;
1721 }
1722
1723 /* Return a host pointer to guest's ram. Similar to qemu_get_ram_ptr
1724 * but takes a size argument.
1725 *
1726 * By the time this function returns, the returned pointer is not protected
1727 * by RCU anymore. If the caller is not within an RCU critical section and
1728 * does not hold the iothread lock, it must have other means of protecting the
1729 * pointer, such as a reference to the region that includes the incoming
1730 * ram_addr_t.
1731 */
1732 static void *qemu_ram_ptr_length(ram_addr_t addr, hwaddr *size)
1733 {
1734 void *ptr;
1735 if (*size == 0) {
1736 return NULL;
1737 }
1738 if (xen_enabled()) {
1739 return xen_map_cache(addr, *size, 1);
1740 } else {
1741 RAMBlock *block;
1742 rcu_read_lock();
1743 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1744 if (addr - block->offset < block->max_length) {
1745 if (addr - block->offset + *size > block->max_length)
1746 *size = block->max_length - addr + block->offset;
1747 ptr = ramblock_ptr(block, addr - block->offset);
1748 rcu_read_unlock();
1749 return ptr;
1750 }
1751 }
1752
1753 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
1754 abort();
1755 }
1756 }
1757
1758 /* Some of the softmmu routines need to translate from a host pointer
1759 * (typically a TLB entry) back to a ram offset.
1760 *
1761 * By the time this function returns, the returned pointer is not protected
1762 * by RCU anymore. If the caller is not within an RCU critical section and
1763 * does not hold the iothread lock, it must have other means of protecting the
1764 * pointer, such as a reference to the region that includes the incoming
1765 * ram_addr_t.
1766 */
1767 MemoryRegion *qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
1768 {
1769 RAMBlock *block;
1770 uint8_t *host = ptr;
1771 MemoryRegion *mr;
1772
1773 if (xen_enabled()) {
1774 rcu_read_lock();
1775 *ram_addr = xen_ram_addr_from_mapcache(ptr);
1776 mr = qemu_get_ram_block(*ram_addr)->mr;
1777 rcu_read_unlock();
1778 return mr;
1779 }
1780
1781 rcu_read_lock();
1782 block = atomic_rcu_read(&ram_list.mru_block);
1783 if (block && block->host && host - block->host < block->max_length) {
1784 goto found;
1785 }
1786
1787 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1788 /* This case append when the block is not mapped. */
1789 if (block->host == NULL) {
1790 continue;
1791 }
1792 if (host - block->host < block->max_length) {
1793 goto found;
1794 }
1795 }
1796
1797 rcu_read_unlock();
1798 return NULL;
1799
1800 found:
1801 *ram_addr = block->offset + (host - block->host);
1802 mr = block->mr;
1803 rcu_read_unlock();
1804 return mr;
1805 }
1806
1807 static void notdirty_mem_write(void *opaque, hwaddr ram_addr,
1808 uint64_t val, unsigned size)
1809 {
1810 if (!cpu_physical_memory_get_dirty_flag(ram_addr, DIRTY_MEMORY_CODE)) {
1811 tb_invalidate_phys_page_fast(ram_addr, size);
1812 }
1813 switch (size) {
1814 case 1:
1815 stb_p(qemu_get_ram_ptr(ram_addr), val);
1816 break;
1817 case 2:
1818 stw_p(qemu_get_ram_ptr(ram_addr), val);
1819 break;
1820 case 4:
1821 stl_p(qemu_get_ram_ptr(ram_addr), val);
1822 break;
1823 default:
1824 abort();
1825 }
1826 /* Set both VGA and migration bits for simplicity and to remove
1827 * the notdirty callback faster.
1828 */
1829 cpu_physical_memory_set_dirty_range(ram_addr, size,
1830 DIRTY_CLIENTS_NOCODE);
1831 /* we remove the notdirty callback only if the code has been
1832 flushed */
1833 if (!cpu_physical_memory_is_clean(ram_addr)) {
1834 CPUArchState *env = current_cpu->env_ptr;
1835 tlb_set_dirty(env, current_cpu->mem_io_vaddr);
1836 }
1837 }
1838
1839 static bool notdirty_mem_accepts(void *opaque, hwaddr addr,
1840 unsigned size, bool is_write)
1841 {
1842 return is_write;
1843 }
1844
1845 static const MemoryRegionOps notdirty_mem_ops = {
1846 .write = notdirty_mem_write,
1847 .valid.accepts = notdirty_mem_accepts,
1848 .endianness = DEVICE_NATIVE_ENDIAN,
1849 };
1850
1851 /* Generate a debug exception if a watchpoint has been hit. */
1852 static void check_watchpoint(int offset, int len, MemTxAttrs attrs, int flags)
1853 {
1854 CPUState *cpu = current_cpu;
1855 CPUArchState *env = cpu->env_ptr;
1856 target_ulong pc, cs_base;
1857 target_ulong vaddr;
1858 CPUWatchpoint *wp;
1859 int cpu_flags;
1860
1861 if (cpu->watchpoint_hit) {
1862 /* We re-entered the check after replacing the TB. Now raise
1863 * the debug interrupt so that is will trigger after the
1864 * current instruction. */
1865 cpu_interrupt(cpu, CPU_INTERRUPT_DEBUG);
1866 return;
1867 }
1868 vaddr = (cpu->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
1869 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
1870 if (cpu_watchpoint_address_matches(wp, vaddr, len)
1871 && (wp->flags & flags)) {
1872 if (flags == BP_MEM_READ) {
1873 wp->flags |= BP_WATCHPOINT_HIT_READ;
1874 } else {
1875 wp->flags |= BP_WATCHPOINT_HIT_WRITE;
1876 }
1877 wp->hitaddr = vaddr;
1878 wp->hitattrs = attrs;
1879 if (!cpu->watchpoint_hit) {
1880 cpu->watchpoint_hit = wp;
1881 tb_check_watchpoint(cpu);
1882 if (wp->flags & BP_STOP_BEFORE_ACCESS) {
1883 cpu->exception_index = EXCP_DEBUG;
1884 cpu_loop_exit(cpu);
1885 } else {
1886 cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
1887 tb_gen_code(cpu, pc, cs_base, cpu_flags, 1);
1888 cpu_resume_from_signal(cpu, NULL);
1889 }
1890 }
1891 } else {
1892 wp->flags &= ~BP_WATCHPOINT_HIT;
1893 }
1894 }
1895 }
1896
1897 /* Watchpoint access routines. Watchpoints are inserted using TLB tricks,
1898 so these check for a hit then pass through to the normal out-of-line
1899 phys routines. */
1900 static MemTxResult watch_mem_read(void *opaque, hwaddr addr, uint64_t *pdata,
1901 unsigned size, MemTxAttrs attrs)
1902 {
1903 MemTxResult res;
1904 uint64_t data;
1905
1906 check_watchpoint(addr & ~TARGET_PAGE_MASK, size, attrs, BP_MEM_READ);
1907 switch (size) {
1908 case 1:
1909 data = address_space_ldub(&address_space_memory, addr, attrs, &res);
1910 break;
1911 case 2:
1912 data = address_space_lduw(&address_space_memory, addr, attrs, &res);
1913 break;
1914 case 4:
1915 data = address_space_ldl(&address_space_memory, addr, attrs, &res);
1916 break;
1917 default: abort();
1918 }
1919 *pdata = data;
1920 return res;
1921 }
1922
1923 static MemTxResult watch_mem_write(void *opaque, hwaddr addr,
1924 uint64_t val, unsigned size,
1925 MemTxAttrs attrs)
1926 {
1927 MemTxResult res;
1928
1929 check_watchpoint(addr & ~TARGET_PAGE_MASK, size, attrs, BP_MEM_WRITE);
1930 switch (size) {
1931 case 1:
1932 address_space_stb(&address_space_memory, addr, val, attrs, &res);
1933 break;
1934 case 2:
1935 address_space_stw(&address_space_memory, addr, val, attrs, &res);
1936 break;
1937 case 4:
1938 address_space_stl(&address_space_memory, addr, val, attrs, &res);
1939 break;
1940 default: abort();
1941 }
1942 return res;
1943 }
1944
1945 static const MemoryRegionOps watch_mem_ops = {
1946 .read_with_attrs = watch_mem_read,
1947 .write_with_attrs = watch_mem_write,
1948 .endianness = DEVICE_NATIVE_ENDIAN,
1949 };
1950
1951 static MemTxResult subpage_read(void *opaque, hwaddr addr, uint64_t *data,
1952 unsigned len, MemTxAttrs attrs)
1953 {
1954 subpage_t *subpage = opaque;
1955 uint8_t buf[8];
1956 MemTxResult res;
1957
1958 #if defined(DEBUG_SUBPAGE)
1959 printf("%s: subpage %p len %u addr " TARGET_FMT_plx "\n", __func__,
1960 subpage, len, addr);
1961 #endif
1962 res = address_space_read(subpage->as, addr + subpage->base,
1963 attrs, buf, len);
1964 if (res) {
1965 return res;
1966 }
1967 switch (len) {
1968 case 1:
1969 *data = ldub_p(buf);
1970 return MEMTX_OK;
1971 case 2:
1972 *data = lduw_p(buf);
1973 return MEMTX_OK;
1974 case 4:
1975 *data = ldl_p(buf);
1976 return MEMTX_OK;
1977 case 8:
1978 *data = ldq_p(buf);
1979 return MEMTX_OK;
1980 default:
1981 abort();
1982 }
1983 }
1984
1985 static MemTxResult subpage_write(void *opaque, hwaddr addr,
1986 uint64_t value, unsigned len, MemTxAttrs attrs)
1987 {
1988 subpage_t *subpage = opaque;
1989 uint8_t buf[8];
1990
1991 #if defined(DEBUG_SUBPAGE)
1992 printf("%s: subpage %p len %u addr " TARGET_FMT_plx
1993 " value %"PRIx64"\n",
1994 __func__, subpage, len, addr, value);
1995 #endif
1996 switch (len) {
1997 case 1:
1998 stb_p(buf, value);
1999 break;
2000 case 2:
2001 stw_p(buf, value);
2002 break;
2003 case 4:
2004 stl_p(buf, value);
2005 break;
2006 case 8:
2007 stq_p(buf, value);
2008 break;
2009 default:
2010 abort();
2011 }
2012 return address_space_write(subpage->as, addr + subpage->base,
2013 attrs, buf, len);
2014 }
2015
2016 static bool subpage_accepts(void *opaque, hwaddr addr,
2017 unsigned len, bool is_write)
2018 {
2019 subpage_t *subpage = opaque;
2020 #if defined(DEBUG_SUBPAGE)
2021 printf("%s: subpage %p %c len %u addr " TARGET_FMT_plx "\n",
2022 __func__, subpage, is_write ? 'w' : 'r', len, addr);
2023 #endif
2024
2025 return address_space_access_valid(subpage->as, addr + subpage->base,
2026 len, is_write);
2027 }
2028
2029 static const MemoryRegionOps subpage_ops = {
2030 .read_with_attrs = subpage_read,
2031 .write_with_attrs = subpage_write,
2032 .impl.min_access_size = 1,
2033 .impl.max_access_size = 8,
2034 .valid.min_access_size = 1,
2035 .valid.max_access_size = 8,
2036 .valid.accepts = subpage_accepts,
2037 .endianness = DEVICE_NATIVE_ENDIAN,
2038 };
2039
2040 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
2041 uint16_t section)
2042 {
2043 int idx, eidx;
2044
2045 if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE)
2046 return -1;
2047 idx = SUBPAGE_IDX(start);
2048 eidx = SUBPAGE_IDX(end);
2049 #if defined(DEBUG_SUBPAGE)
2050 printf("%s: %p start %08x end %08x idx %08x eidx %08x section %d\n",
2051 __func__, mmio, start, end, idx, eidx, section);
2052 #endif
2053 for (; idx <= eidx; idx++) {
2054 mmio->sub_section[idx] = section;
2055 }
2056
2057 return 0;
2058 }
2059
2060 static subpage_t *subpage_init(AddressSpace *as, hwaddr base)
2061 {
2062 subpage_t *mmio;
2063
2064 mmio = g_malloc0(sizeof(subpage_t));
2065
2066 mmio->as = as;
2067 mmio->base = base;
2068 memory_region_init_io(&mmio->iomem, NULL, &subpage_ops, mmio,
2069 NULL, TARGET_PAGE_SIZE);
2070 mmio->iomem.subpage = true;
2071 #if defined(DEBUG_SUBPAGE)
2072 printf("%s: %p base " TARGET_FMT_plx " len %08x\n", __func__,
2073 mmio, base, TARGET_PAGE_SIZE);
2074 #endif
2075 subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, PHYS_SECTION_UNASSIGNED);
2076
2077 return mmio;
2078 }
2079
2080 static uint16_t dummy_section(PhysPageMap *map, AddressSpace *as,
2081 MemoryRegion *mr)
2082 {
2083 assert(as);
2084 MemoryRegionSection section = {
2085 .address_space = as,
2086 .mr = mr,
2087 .offset_within_address_space = 0,
2088 .offset_within_region = 0,
2089 .size = int128_2_64(),
2090 };
2091
2092 return phys_section_add(map, &section);
2093 }
2094
2095 MemoryRegion *iotlb_to_region(CPUState *cpu, hwaddr index)
2096 {
2097 AddressSpaceDispatch *d = atomic_rcu_read(&cpu->memory_dispatch);
2098 MemoryRegionSection *sections = d->map.sections;
2099
2100 return sections[index & ~TARGET_PAGE_MASK].mr;
2101 }
2102
2103 static void io_mem_init(void)
2104 {
2105 memory_region_init_io(&io_mem_rom, NULL, &unassigned_mem_ops, NULL, NULL, UINT64_MAX);
2106 memory_region_init_io(&io_mem_unassigned, NULL, &unassigned_mem_ops, NULL,
2107 NULL, UINT64_MAX);
2108 memory_region_init_io(&io_mem_notdirty, NULL, &notdirty_mem_ops, NULL,
2109 NULL, UINT64_MAX);
2110 memory_region_init_io(&io_mem_watch, NULL, &watch_mem_ops, NULL,
2111 NULL, UINT64_MAX);
2112 }
2113
2114 static void mem_begin(MemoryListener *listener)
2115 {
2116 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
2117 AddressSpaceDispatch *d = g_new0(AddressSpaceDispatch, 1);
2118 uint16_t n;
2119
2120 n = dummy_section(&d->map, as, &io_mem_unassigned);
2121 assert(n == PHYS_SECTION_UNASSIGNED);
2122 n = dummy_section(&d->map, as, &io_mem_notdirty);
2123 assert(n == PHYS_SECTION_NOTDIRTY);
2124 n = dummy_section(&d->map, as, &io_mem_rom);
2125 assert(n == PHYS_SECTION_ROM);
2126 n = dummy_section(&d->map, as, &io_mem_watch);
2127 assert(n == PHYS_SECTION_WATCH);
2128
2129 d->phys_map = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, .skip = 1 };
2130 d->as = as;
2131 as->next_dispatch = d;
2132 }
2133
2134 static void address_space_dispatch_free(AddressSpaceDispatch *d)
2135 {
2136 phys_sections_free(&d->map);
2137 g_free(d);
2138 }
2139
2140 static void mem_commit(MemoryListener *listener)
2141 {
2142 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
2143 AddressSpaceDispatch *cur = as->dispatch;
2144 AddressSpaceDispatch *next = as->next_dispatch;
2145
2146 phys_page_compact_all(next, next->map.nodes_nb);
2147
2148 atomic_rcu_set(&as->dispatch, next);
2149 if (cur) {
2150 call_rcu(cur, address_space_dispatch_free, rcu);
2151 }
2152 }
2153
2154 static void tcg_commit(MemoryListener *listener)
2155 {
2156 CPUState *cpu;
2157
2158 /* since each CPU stores ram addresses in its TLB cache, we must
2159 reset the modified entries */
2160 /* XXX: slow ! */
2161 CPU_FOREACH(cpu) {
2162 /* FIXME: Disentangle the cpu.h circular files deps so we can
2163 directly get the right CPU from listener. */
2164 if (cpu->tcg_as_listener != listener) {
2165 continue;
2166 }
2167 cpu_reload_memory_map(cpu);
2168 }
2169 }
2170
2171 void address_space_init_dispatch(AddressSpace *as)
2172 {
2173 as->dispatch = NULL;
2174 as->dispatch_listener = (MemoryListener) {
2175 .begin = mem_begin,
2176 .commit = mem_commit,
2177 .region_add = mem_add,
2178 .region_nop = mem_add,
2179 .priority = 0,
2180 };
2181 memory_listener_register(&as->dispatch_listener, as);
2182 }
2183
2184 void address_space_unregister(AddressSpace *as)
2185 {
2186 memory_listener_unregister(&as->dispatch_listener);
2187 }
2188
2189 void address_space_destroy_dispatch(AddressSpace *as)
2190 {
2191 AddressSpaceDispatch *d = as->dispatch;
2192
2193 atomic_rcu_set(&as->dispatch, NULL);
2194 if (d) {
2195 call_rcu(d, address_space_dispatch_free, rcu);
2196 }
2197 }
2198
2199 static void memory_map_init(void)
2200 {
2201 system_memory = g_malloc(sizeof(*system_memory));
2202
2203 memory_region_init(system_memory, NULL, "system", UINT64_MAX);
2204 address_space_init(&address_space_memory, system_memory, "memory");
2205
2206 system_io = g_malloc(sizeof(*system_io));
2207 memory_region_init_io(system_io, NULL, &unassigned_io_ops, NULL, "io",
2208 65536);
2209 address_space_init(&address_space_io, system_io, "I/O");
2210 }
2211
2212 MemoryRegion *get_system_memory(void)
2213 {
2214 return system_memory;
2215 }
2216
2217 MemoryRegion *get_system_io(void)
2218 {
2219 return system_io;
2220 }
2221
2222 #endif /* !defined(CONFIG_USER_ONLY) */
2223
2224 /* physical memory access (slow version, mainly for debug) */
2225 #if defined(CONFIG_USER_ONLY)
2226 int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
2227 uint8_t *buf, int len, int is_write)
2228 {
2229 int l, flags;
2230 target_ulong page;
2231 void * p;
2232
2233 while (len > 0) {
2234 page = addr & TARGET_PAGE_MASK;
2235 l = (page + TARGET_PAGE_SIZE) - addr;
2236 if (l > len)
2237 l = len;
2238 flags = page_get_flags(page);
2239 if (!(flags & PAGE_VALID))
2240 return -1;
2241 if (is_write) {
2242 if (!(flags & PAGE_WRITE))
2243 return -1;
2244 /* XXX: this code should not depend on lock_user */
2245 if (!(p = lock_user(VERIFY_WRITE, addr, l, 0)))
2246 return -1;
2247 memcpy(p, buf, l);
2248 unlock_user(p, addr, l);
2249 } else {
2250 if (!(flags & PAGE_READ))
2251 return -1;
2252 /* XXX: this code should not depend on lock_user */
2253 if (!(p = lock_user(VERIFY_READ, addr, l, 1)))
2254 return -1;
2255 memcpy(buf, p, l);
2256 unlock_user(p, addr, 0);
2257 }
2258 len -= l;
2259 buf += l;
2260 addr += l;
2261 }
2262 return 0;
2263 }
2264
2265 #else
2266
2267 static void invalidate_and_set_dirty(MemoryRegion *mr, hwaddr addr,
2268 hwaddr length)
2269 {
2270 uint8_t dirty_log_mask = memory_region_get_dirty_log_mask(mr);
2271 /* No early return if dirty_log_mask is or becomes 0, because
2272 * cpu_physical_memory_set_dirty_range will still call
2273 * xen_modified_memory.
2274 */
2275 if (dirty_log_mask) {
2276 dirty_log_mask =
2277 cpu_physical_memory_range_includes_clean(addr, length, dirty_log_mask);
2278 }
2279 if (dirty_log_mask & (1 << DIRTY_MEMORY_CODE)) {
2280 tb_invalidate_phys_range(addr, addr + length);
2281 dirty_log_mask &= ~(1 << DIRTY_MEMORY_CODE);
2282 }
2283 cpu_physical_memory_set_dirty_range(addr, length, dirty_log_mask);
2284 }
2285
2286 static int memory_access_size(MemoryRegion *mr, unsigned l, hwaddr addr)
2287 {
2288 unsigned access_size_max = mr->ops->valid.max_access_size;
2289
2290 /* Regions are assumed to support 1-4 byte accesses unless
2291 otherwise specified. */
2292 if (access_size_max == 0) {
2293 access_size_max = 4;
2294 }
2295
2296 /* Bound the maximum access by the alignment of the address. */
2297 if (!mr->ops->impl.unaligned) {
2298 unsigned align_size_max = addr & -addr;
2299 if (align_size_max != 0 && align_size_max < access_size_max) {
2300 access_size_max = align_size_max;
2301 }
2302 }
2303
2304 /* Don't attempt accesses larger than the maximum. */
2305 if (l > access_size_max) {
2306 l = access_size_max;
2307 }
2308 if (l & (l - 1)) {
2309 l = 1 << (qemu_fls(l) - 1);
2310 }
2311
2312 return l;
2313 }
2314
2315 MemTxResult address_space_rw(AddressSpace *as, hwaddr addr, MemTxAttrs attrs,
2316 uint8_t *buf, int len, bool is_write)
2317 {
2318 hwaddr l;
2319 uint8_t *ptr;
2320 uint64_t val;
2321 hwaddr addr1;
2322 MemoryRegion *mr;
2323 MemTxResult result = MEMTX_OK;
2324
2325 rcu_read_lock();
2326 while (len > 0) {
2327 l = len;
2328 mr = address_space_translate(as, addr, &addr1, &l, is_write);
2329
2330 if (is_write) {
2331 if (!memory_access_is_direct(mr, is_write)) {
2332 l = memory_access_size(mr, l, addr1);
2333 /* XXX: could force current_cpu to NULL to avoid
2334 potential bugs */
2335 switch (l) {
2336 case 8:
2337 /* 64 bit write access */
2338 val = ldq_p(buf);
2339 result |= memory_region_dispatch_write(mr, addr1, val, 8,
2340 attrs);
2341 break;
2342 case 4:
2343 /* 32 bit write access */
2344 val = ldl_p(buf);
2345 result |= memory_region_dispatch_write(mr, addr1, val, 4,
2346 attrs);
2347 break;
2348 case 2:
2349 /* 16 bit write access */
2350 val = lduw_p(buf);
2351 result |= memory_region_dispatch_write(mr, addr1, val, 2,
2352 attrs);
2353 break;
2354 case 1:
2355 /* 8 bit write access */
2356 val = ldub_p(buf);
2357 result |= memory_region_dispatch_write(mr, addr1, val, 1,
2358 attrs);
2359 break;
2360 default:
2361 abort();
2362 }
2363 } else {
2364 addr1 += memory_region_get_ram_addr(mr);
2365 /* RAM case */
2366 ptr = qemu_get_ram_ptr(addr1);
2367 memcpy(ptr, buf, l);
2368 invalidate_and_set_dirty(mr, addr1, l);
2369 }
2370 } else {
2371 if (!memory_access_is_direct(mr, is_write)) {
2372 /* I/O case */
2373 l = memory_access_size(mr, l, addr1);
2374 switch (l) {
2375 case 8:
2376 /* 64 bit read access */
2377 result |= memory_region_dispatch_read(mr, addr1, &val, 8,
2378 attrs);
2379 stq_p(buf, val);
2380 break;
2381 case 4:
2382 /* 32 bit read access */
2383 result |= memory_region_dispatch_read(mr, addr1, &val, 4,
2384 attrs);
2385 stl_p(buf, val);
2386 break;
2387 case 2:
2388 /* 16 bit read access */
2389 result |= memory_region_dispatch_read(mr, addr1, &val, 2,
2390 attrs);
2391 stw_p(buf, val);
2392 break;
2393 case 1:
2394 /* 8 bit read access */
2395 result |= memory_region_dispatch_read(mr, addr1, &val, 1,
2396 attrs);
2397 stb_p(buf, val);
2398 break;
2399 default:
2400 abort();
2401 }
2402 } else {
2403 /* RAM case */
2404 ptr = qemu_get_ram_ptr(mr->ram_addr + addr1);
2405 memcpy(buf, ptr, l);
2406 }
2407 }
2408 len -= l;
2409 buf += l;
2410 addr += l;
2411 }
2412 rcu_read_unlock();
2413
2414 return result;
2415 }
2416
2417 MemTxResult address_space_write(AddressSpace *as, hwaddr addr, MemTxAttrs attrs,
2418 const uint8_t *buf, int len)
2419 {
2420 return address_space_rw(as, addr, attrs, (uint8_t *)buf, len, true);
2421 }
2422
2423 MemTxResult address_space_read(AddressSpace *as, hwaddr addr, MemTxAttrs attrs,
2424 uint8_t *buf, int len)
2425 {
2426 return address_space_rw(as, addr, attrs, buf, len, false);
2427 }
2428
2429
2430 void cpu_physical_memory_rw(hwaddr addr, uint8_t *buf,
2431 int len, int is_write)
2432 {
2433 address_space_rw(&address_space_memory, addr, MEMTXATTRS_UNSPECIFIED,
2434 buf, len, is_write);
2435 }
2436
2437 enum write_rom_type {
2438 WRITE_DATA,
2439 FLUSH_CACHE,
2440 };
2441
2442 static inline void cpu_physical_memory_write_rom_internal(AddressSpace *as,
2443 hwaddr addr, const uint8_t *buf, int len, enum write_rom_type type)
2444 {
2445 hwaddr l;
2446 uint8_t *ptr;
2447 hwaddr addr1;
2448 MemoryRegion *mr;
2449
2450 rcu_read_lock();
2451 while (len > 0) {
2452 l = len;
2453 mr = address_space_translate(as, addr, &addr1, &l, true);
2454
2455 if (!(memory_region_is_ram(mr) ||
2456 memory_region_is_romd(mr))) {
2457 /* do nothing */
2458 } else {
2459 addr1 += memory_region_get_ram_addr(mr);
2460 /* ROM/RAM case */
2461 ptr = qemu_get_ram_ptr(addr1);
2462 switch (type) {
2463 case WRITE_DATA:
2464 memcpy(ptr, buf, l);
2465 invalidate_and_set_dirty(mr, addr1, l);
2466 break;
2467 case FLUSH_CACHE:
2468 flush_icache_range((uintptr_t)ptr, (uintptr_t)ptr + l);
2469 break;
2470 }
2471 }
2472 len -= l;
2473 buf += l;
2474 addr += l;
2475 }
2476 rcu_read_unlock();
2477 }
2478
2479 /* used for ROM loading : can write in RAM and ROM */
2480 void cpu_physical_memory_write_rom(AddressSpace *as, hwaddr addr,
2481 const uint8_t *buf, int len)
2482 {
2483 cpu_physical_memory_write_rom_internal(as, addr, buf, len, WRITE_DATA);
2484 }
2485
2486 void cpu_flush_icache_range(hwaddr start, int len)
2487 {
2488 /*
2489 * This function should do the same thing as an icache flush that was
2490 * triggered from within the guest. For TCG we are always cache coherent,
2491 * so there is no need to flush anything. For KVM / Xen we need to flush
2492 * the host's instruction cache at least.
2493 */
2494 if (tcg_enabled()) {
2495 return;
2496 }
2497
2498 cpu_physical_memory_write_rom_internal(&address_space_memory,
2499 start, NULL, len, FLUSH_CACHE);
2500 }
2501
2502 typedef struct {
2503 MemoryRegion *mr;
2504 void *buffer;
2505 hwaddr addr;
2506 hwaddr len;
2507 bool in_use;
2508 } BounceBuffer;
2509
2510 static BounceBuffer bounce;
2511
2512 typedef struct MapClient {
2513 QEMUBH *bh;
2514 QLIST_ENTRY(MapClient) link;
2515 } MapClient;
2516
2517 QemuMutex map_client_list_lock;
2518 static QLIST_HEAD(map_client_list, MapClient) map_client_list
2519 = QLIST_HEAD_INITIALIZER(map_client_list);
2520
2521 static void cpu_unregister_map_client_do(MapClient *client)
2522 {
2523 QLIST_REMOVE(client, link);
2524 g_free(client);
2525 }
2526
2527 static void cpu_notify_map_clients_locked(void)
2528 {
2529 MapClient *client;
2530
2531 while (!QLIST_EMPTY(&map_client_list)) {
2532 client = QLIST_FIRST(&map_client_list);
2533 qemu_bh_schedule(client->bh);
2534 cpu_unregister_map_client_do(client);
2535 }
2536 }
2537
2538 void cpu_register_map_client(QEMUBH *bh)
2539 {
2540 MapClient *client = g_malloc(sizeof(*client));
2541
2542 qemu_mutex_lock(&map_client_list_lock);
2543 client->bh = bh;
2544 QLIST_INSERT_HEAD(&map_client_list, client, link);
2545 if (!atomic_read(&bounce.in_use)) {
2546 cpu_notify_map_clients_locked();
2547 }
2548 qemu_mutex_unlock(&map_client_list_lock);
2549 }
2550
2551 void cpu_exec_init_all(void)
2552 {
2553 qemu_mutex_init(&ram_list.mutex);
2554 memory_map_init();
2555 io_mem_init();
2556 qemu_mutex_init(&map_client_list_lock);
2557 }
2558
2559 void cpu_unregister_map_client(QEMUBH *bh)
2560 {
2561 MapClient *client;
2562
2563 qemu_mutex_lock(&map_client_list_lock);
2564 QLIST_FOREACH(client, &map_client_list, link) {
2565 if (client->bh == bh) {
2566 cpu_unregister_map_client_do(client);
2567 break;
2568 }
2569 }
2570 qemu_mutex_unlock(&map_client_list_lock);
2571 }
2572
2573 static void cpu_notify_map_clients(void)
2574 {
2575 qemu_mutex_lock(&map_client_list_lock);
2576 cpu_notify_map_clients_locked();
2577 qemu_mutex_unlock(&map_client_list_lock);
2578 }
2579
2580 bool address_space_access_valid(AddressSpace *as, hwaddr addr, int len, bool is_write)
2581 {
2582 MemoryRegion *mr;
2583 hwaddr l, xlat;
2584
2585 rcu_read_lock();
2586 while (len > 0) {
2587 l = len;
2588 mr = address_space_translate(as, addr, &xlat, &l, is_write);
2589 if (!memory_access_is_direct(mr, is_write)) {
2590 l = memory_access_size(mr, l, addr);
2591 if (!memory_region_access_valid(mr, xlat, l, is_write)) {
2592 return false;
2593 }
2594 }
2595
2596 len -= l;
2597 addr += l;
2598 }
2599 rcu_read_unlock();
2600 return true;
2601 }
2602
2603 /* Map a physical memory region into a host virtual address.
2604 * May map a subset of the requested range, given by and returned in *plen.
2605 * May return NULL if resources needed to perform the mapping are exhausted.
2606 * Use only for reads OR writes - not for read-modify-write operations.
2607 * Use cpu_register_map_client() to know when retrying the map operation is
2608 * likely to succeed.
2609 */
2610 void *address_space_map(AddressSpace *as,
2611 hwaddr addr,
2612 hwaddr *plen,
2613 bool is_write)
2614 {
2615 hwaddr len = *plen;
2616 hwaddr done = 0;
2617 hwaddr l, xlat, base;
2618 MemoryRegion *mr, *this_mr;
2619 ram_addr_t raddr;
2620
2621 if (len == 0) {
2622 return NULL;
2623 }
2624
2625 l = len;
2626 rcu_read_lock();
2627 mr = address_space_translate(as, addr, &xlat, &l, is_write);
2628
2629 if (!memory_access_is_direct(mr, is_write)) {
2630 if (atomic_xchg(&bounce.in_use, true)) {
2631 rcu_read_unlock();
2632 return NULL;
2633 }
2634 /* Avoid unbounded allocations */
2635 l = MIN(l, TARGET_PAGE_SIZE);
2636 bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, l);
2637 bounce.addr = addr;
2638 bounce.len = l;
2639
2640 memory_region_ref(mr);
2641 bounce.mr = mr;
2642 if (!is_write) {
2643 address_space_read(as, addr, MEMTXATTRS_UNSPECIFIED,
2644 bounce.buffer, l);
2645 }
2646
2647 rcu_read_unlock();
2648 *plen = l;
2649 return bounce.buffer;
2650 }
2651
2652 base = xlat;
2653 raddr = memory_region_get_ram_addr(mr);
2654
2655 for (;;) {
2656 len -= l;
2657 addr += l;
2658 done += l;
2659 if (len == 0) {
2660 break;
2661 }
2662
2663 l = len;
2664 this_mr = address_space_translate(as, addr, &xlat, &l, is_write);
2665 if (this_mr != mr || xlat != base + done) {
2666 break;
2667 }
2668 }
2669
2670 memory_region_ref(mr);
2671 rcu_read_unlock();
2672 *plen = done;
2673 return qemu_ram_ptr_length(raddr + base, plen);
2674 }
2675
2676 /* Unmaps a memory region previously mapped by address_space_map().
2677 * Will also mark the memory as dirty if is_write == 1. access_len gives
2678 * the amount of memory that was actually read or written by the caller.
2679 */
2680 void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len,
2681 int is_write, hwaddr access_len)
2682 {
2683 if (buffer != bounce.buffer) {
2684 MemoryRegion *mr;
2685 ram_addr_t addr1;
2686
2687 mr = qemu_ram_addr_from_host(buffer, &addr1);
2688 assert(mr != NULL);
2689 if (is_write) {
2690 invalidate_and_set_dirty(mr, addr1, access_len);
2691 }
2692 if (xen_enabled()) {
2693 xen_invalidate_map_cache_entry(buffer);
2694 }
2695 memory_region_unref(mr);
2696 return;
2697 }
2698 if (is_write) {
2699 address_space_write(as, bounce.addr, MEMTXATTRS_UNSPECIFIED,
2700 bounce.buffer, access_len);
2701 }
2702 qemu_vfree(bounce.buffer);
2703 bounce.buffer = NULL;
2704 memory_region_unref(bounce.mr);
2705 atomic_mb_set(&bounce.in_use, false);
2706 cpu_notify_map_clients();
2707 }
2708
2709 void *cpu_physical_memory_map(hwaddr addr,
2710 hwaddr *plen,
2711 int is_write)
2712 {
2713 return address_space_map(&address_space_memory, addr, plen, is_write);
2714 }
2715
2716 void cpu_physical_memory_unmap(void *buffer, hwaddr len,
2717 int is_write, hwaddr access_len)
2718 {
2719 return address_space_unmap(&address_space_memory, buffer, len, is_write, access_len);
2720 }
2721
2722 /* warning: addr must be aligned */
2723 static inline uint32_t address_space_ldl_internal(AddressSpace *as, hwaddr addr,
2724 MemTxAttrs attrs,
2725 MemTxResult *result,
2726 enum device_endian endian)
2727 {
2728 uint8_t *ptr;
2729 uint64_t val;
2730 MemoryRegion *mr;
2731 hwaddr l = 4;
2732 hwaddr addr1;
2733 MemTxResult r;
2734
2735 rcu_read_lock();
2736 mr = address_space_translate(as, addr, &addr1, &l, false);
2737 if (l < 4 || !memory_access_is_direct(mr, false)) {
2738 /* I/O case */
2739 r = memory_region_dispatch_read(mr, addr1, &val, 4, attrs);
2740 #if defined(TARGET_WORDS_BIGENDIAN)
2741 if (endian == DEVICE_LITTLE_ENDIAN) {
2742 val = bswap32(val);
2743 }
2744 #else
2745 if (endian == DEVICE_BIG_ENDIAN) {
2746 val = bswap32(val);
2747 }
2748 #endif
2749 } else {
2750 /* RAM case */
2751 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
2752 & TARGET_PAGE_MASK)
2753 + addr1);
2754 switch (endian) {
2755 case DEVICE_LITTLE_ENDIAN:
2756 val = ldl_le_p(ptr);
2757 break;
2758 case DEVICE_BIG_ENDIAN:
2759 val = ldl_be_p(ptr);
2760 break;
2761 default:
2762 val = ldl_p(ptr);
2763 break;
2764 }
2765 r = MEMTX_OK;
2766 }
2767 if (result) {
2768 *result = r;
2769 }
2770 rcu_read_unlock();
2771 return val;
2772 }
2773
2774 uint32_t address_space_ldl(AddressSpace *as, hwaddr addr,
2775 MemTxAttrs attrs, MemTxResult *result)
2776 {
2777 return address_space_ldl_internal(as, addr, attrs, result,
2778 DEVICE_NATIVE_ENDIAN);
2779 }
2780
2781 uint32_t address_space_ldl_le(AddressSpace *as, hwaddr addr,
2782 MemTxAttrs attrs, MemTxResult *result)
2783 {
2784 return address_space_ldl_internal(as, addr, attrs, result,
2785 DEVICE_LITTLE_ENDIAN);
2786 }
2787
2788 uint32_t address_space_ldl_be(AddressSpace *as, hwaddr addr,
2789 MemTxAttrs attrs, MemTxResult *result)
2790 {
2791 return address_space_ldl_internal(as, addr, attrs, result,
2792 DEVICE_BIG_ENDIAN);
2793 }
2794
2795 uint32_t ldl_phys(AddressSpace *as, hwaddr addr)
2796 {
2797 return address_space_ldl(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
2798 }
2799
2800 uint32_t ldl_le_phys(AddressSpace *as, hwaddr addr)
2801 {
2802 return address_space_ldl_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
2803 }
2804
2805 uint32_t ldl_be_phys(AddressSpace *as, hwaddr addr)
2806 {
2807 return address_space_ldl_be(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
2808 }
2809
2810 /* warning: addr must be aligned */
2811 static inline uint64_t address_space_ldq_internal(AddressSpace *as, hwaddr addr,
2812 MemTxAttrs attrs,
2813 MemTxResult *result,
2814 enum device_endian endian)
2815 {
2816 uint8_t *ptr;
2817 uint64_t val;
2818 MemoryRegion *mr;
2819 hwaddr l = 8;
2820 hwaddr addr1;
2821 MemTxResult r;
2822
2823 rcu_read_lock();
2824 mr = address_space_translate(as, addr, &addr1, &l,
2825 false);
2826 if (l < 8 || !memory_access_is_direct(mr, false)) {
2827 /* I/O case */
2828 r = memory_region_dispatch_read(mr, addr1, &val, 8, attrs);
2829 #if defined(TARGET_WORDS_BIGENDIAN)
2830 if (endian == DEVICE_LITTLE_ENDIAN) {
2831 val = bswap64(val);
2832 }
2833 #else
2834 if (endian == DEVICE_BIG_ENDIAN) {
2835 val = bswap64(val);
2836 }
2837 #endif
2838 } else {
2839 /* RAM case */
2840 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
2841 & TARGET_PAGE_MASK)
2842 + addr1);
2843 switch (endian) {
2844 case DEVICE_LITTLE_ENDIAN:
2845 val = ldq_le_p(ptr);
2846 break;
2847 case DEVICE_BIG_ENDIAN:
2848 val = ldq_be_p(ptr);
2849 break;
2850 default:
2851 val = ldq_p(ptr);
2852 break;
2853 }
2854 r = MEMTX_OK;
2855 }
2856 if (result) {
2857 *result = r;
2858 }
2859 rcu_read_unlock();
2860 return val;
2861 }
2862
2863 uint64_t address_space_ldq(AddressSpace *as, hwaddr addr,
2864 MemTxAttrs attrs, MemTxResult *result)
2865 {
2866 return address_space_ldq_internal(as, addr, attrs, result,
2867 DEVICE_NATIVE_ENDIAN);
2868 }
2869
2870 uint64_t address_space_ldq_le(AddressSpace *as, hwaddr addr,
2871 MemTxAttrs attrs, MemTxResult *result)
2872 {
2873 return address_space_ldq_internal(as, addr, attrs, result,
2874 DEVICE_LITTLE_ENDIAN);
2875 }
2876
2877 uint64_t address_space_ldq_be(AddressSpace *as, hwaddr addr,
2878 MemTxAttrs attrs, MemTxResult *result)
2879 {
2880 return address_space_ldq_internal(as, addr, attrs, result,
2881 DEVICE_BIG_ENDIAN);
2882 }
2883
2884 uint64_t ldq_phys(AddressSpace *as, hwaddr addr)
2885 {
2886 return address_space_ldq(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
2887 }
2888
2889 uint64_t ldq_le_phys(AddressSpace *as, hwaddr addr)
2890 {
2891 return address_space_ldq_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
2892 }
2893
2894 uint64_t ldq_be_phys(AddressSpace *as, hwaddr addr)
2895 {
2896 return address_space_ldq_be(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
2897 }
2898
2899 /* XXX: optimize */
2900 uint32_t address_space_ldub(AddressSpace *as, hwaddr addr,
2901 MemTxAttrs attrs, MemTxResult *result)
2902 {
2903 uint8_t val;
2904 MemTxResult r;
2905
2906 r = address_space_rw(as, addr, attrs, &val, 1, 0);
2907 if (result) {
2908 *result = r;
2909 }
2910 return val;
2911 }
2912
2913 uint32_t ldub_phys(AddressSpace *as, hwaddr addr)
2914 {
2915 return address_space_ldub(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
2916 }
2917
2918 /* warning: addr must be aligned */
2919 static inline uint32_t address_space_lduw_internal(AddressSpace *as,
2920 hwaddr addr,
2921 MemTxAttrs attrs,
2922 MemTxResult *result,
2923 enum device_endian endian)
2924 {
2925 uint8_t *ptr;
2926 uint64_t val;
2927 MemoryRegion *mr;
2928 hwaddr l = 2;
2929 hwaddr addr1;
2930 MemTxResult r;
2931
2932 rcu_read_lock();
2933 mr = address_space_translate(as, addr, &addr1, &l,
2934 false);
2935 if (l < 2 || !memory_access_is_direct(mr, false)) {
2936 /* I/O case */
2937 r = memory_region_dispatch_read(mr, addr1, &val, 2, attrs);
2938 #if defined(TARGET_WORDS_BIGENDIAN)
2939 if (endian == DEVICE_LITTLE_ENDIAN) {
2940 val = bswap16(val);
2941 }
2942 #else
2943 if (endian == DEVICE_BIG_ENDIAN) {
2944 val = bswap16(val);
2945 }
2946 #endif
2947 } else {
2948 /* RAM case */
2949 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
2950 & TARGET_PAGE_MASK)
2951 + addr1);
2952 switch (endian) {
2953 case DEVICE_LITTLE_ENDIAN:
2954 val = lduw_le_p(ptr);
2955 break;
2956 case DEVICE_BIG_ENDIAN:
2957 val = lduw_be_p(ptr);
2958 break;
2959 default:
2960 val = lduw_p(ptr);
2961 break;
2962 }
2963 r = MEMTX_OK;
2964 }
2965 if (result) {
2966 *result = r;
2967 }
2968 rcu_read_unlock();
2969 return val;
2970 }
2971
2972 uint32_t address_space_lduw(AddressSpace *as, hwaddr addr,
2973 MemTxAttrs attrs, MemTxResult *result)
2974 {
2975 return address_space_lduw_internal(as, addr, attrs, result,
2976 DEVICE_NATIVE_ENDIAN);
2977 }
2978
2979 uint32_t address_space_lduw_le(AddressSpace *as, hwaddr addr,
2980 MemTxAttrs attrs, MemTxResult *result)
2981 {
2982 return address_space_lduw_internal(as, addr, attrs, result,
2983 DEVICE_LITTLE_ENDIAN);
2984 }
2985
2986 uint32_t address_space_lduw_be(AddressSpace *as, hwaddr addr,
2987 MemTxAttrs attrs, MemTxResult *result)
2988 {
2989 return address_space_lduw_internal(as, addr, attrs, result,
2990 DEVICE_BIG_ENDIAN);
2991 }
2992
2993 uint32_t lduw_phys(AddressSpace *as, hwaddr addr)
2994 {
2995 return address_space_lduw(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
2996 }
2997
2998 uint32_t lduw_le_phys(AddressSpace *as, hwaddr addr)
2999 {
3000 return address_space_lduw_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3001 }
3002
3003 uint32_t lduw_be_phys(AddressSpace *as, hwaddr addr)
3004 {
3005 return address_space_lduw_be(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3006 }
3007
3008 /* warning: addr must be aligned. The ram page is not masked as dirty
3009 and the code inside is not invalidated. It is useful if the dirty
3010 bits are used to track modified PTEs */
3011 void address_space_stl_notdirty(AddressSpace *as, hwaddr addr, uint32_t val,
3012 MemTxAttrs attrs, MemTxResult *result)
3013 {
3014 uint8_t *ptr;
3015 MemoryRegion *mr;
3016 hwaddr l = 4;
3017 hwaddr addr1;
3018 MemTxResult r;
3019 uint8_t dirty_log_mask;
3020
3021 rcu_read_lock();
3022 mr = address_space_translate(as, addr, &addr1, &l,
3023 true);
3024 if (l < 4 || !memory_access_is_direct(mr, true)) {
3025 r = memory_region_dispatch_write(mr, addr1, val, 4, attrs);
3026 } else {
3027 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
3028 ptr = qemu_get_ram_ptr(addr1);
3029 stl_p(ptr, val);
3030
3031 dirty_log_mask = memory_region_get_dirty_log_mask(mr);
3032 dirty_log_mask &= ~(1 << DIRTY_MEMORY_CODE);
3033 cpu_physical_memory_set_dirty_range(addr1, 4, dirty_log_mask);
3034 r = MEMTX_OK;
3035 }
3036 if (result) {
3037 *result = r;
3038 }
3039 rcu_read_unlock();
3040 }
3041
3042 void stl_phys_notdirty(AddressSpace *as, hwaddr addr, uint32_t val)
3043 {
3044 address_space_stl_notdirty(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3045 }
3046
3047 /* warning: addr must be aligned */
3048 static inline void address_space_stl_internal(AddressSpace *as,
3049 hwaddr addr, uint32_t val,
3050 MemTxAttrs attrs,
3051 MemTxResult *result,
3052 enum device_endian endian)
3053 {
3054 uint8_t *ptr;
3055 MemoryRegion *mr;
3056 hwaddr l = 4;
3057 hwaddr addr1;
3058 MemTxResult r;
3059
3060 rcu_read_lock();
3061 mr = address_space_translate(as, addr, &addr1, &l,
3062 true);
3063 if (l < 4 || !memory_access_is_direct(mr, true)) {
3064 #if defined(TARGET_WORDS_BIGENDIAN)
3065 if (endian == DEVICE_LITTLE_ENDIAN) {
3066 val = bswap32(val);
3067 }
3068 #else
3069 if (endian == DEVICE_BIG_ENDIAN) {
3070 val = bswap32(val);
3071 }
3072 #endif
3073 r = memory_region_dispatch_write(mr, addr1, val, 4, attrs);
3074 } else {
3075 /* RAM case */
3076 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
3077 ptr = qemu_get_ram_ptr(addr1);
3078 switch (endian) {
3079 case DEVICE_LITTLE_ENDIAN:
3080 stl_le_p(ptr, val);
3081 break;
3082 case DEVICE_BIG_ENDIAN:
3083 stl_be_p(ptr, val);
3084 break;
3085 default:
3086 stl_p(ptr, val);
3087 break;
3088 }
3089 invalidate_and_set_dirty(mr, addr1, 4);
3090 r = MEMTX_OK;
3091 }
3092 if (result) {
3093 *result = r;
3094 }
3095 rcu_read_unlock();
3096 }
3097
3098 void address_space_stl(AddressSpace *as, hwaddr addr, uint32_t val,
3099 MemTxAttrs attrs, MemTxResult *result)
3100 {
3101 address_space_stl_internal(as, addr, val, attrs, result,
3102 DEVICE_NATIVE_ENDIAN);
3103 }
3104
3105 void address_space_stl_le(AddressSpace *as, hwaddr addr, uint32_t val,
3106 MemTxAttrs attrs, MemTxResult *result)
3107 {
3108 address_space_stl_internal(as, addr, val, attrs, result,
3109 DEVICE_LITTLE_ENDIAN);
3110 }
3111
3112 void address_space_stl_be(AddressSpace *as, hwaddr addr, uint32_t val,
3113 MemTxAttrs attrs, MemTxResult *result)
3114 {
3115 address_space_stl_internal(as, addr, val, attrs, result,
3116 DEVICE_BIG_ENDIAN);
3117 }
3118
3119 void stl_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3120 {
3121 address_space_stl(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3122 }
3123
3124 void stl_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3125 {
3126 address_space_stl_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3127 }
3128
3129 void stl_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3130 {
3131 address_space_stl_be(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3132 }
3133
3134 /* XXX: optimize */
3135 void address_space_stb(AddressSpace *as, hwaddr addr, uint32_t val,
3136 MemTxAttrs attrs, MemTxResult *result)
3137 {
3138 uint8_t v = val;
3139 MemTxResult r;
3140
3141 r = address_space_rw(as, addr, attrs, &v, 1, 1);
3142 if (result) {
3143 *result = r;
3144 }
3145 }
3146
3147 void stb_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3148 {
3149 address_space_stb(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3150 }
3151
3152 /* warning: addr must be aligned */
3153 static inline void address_space_stw_internal(AddressSpace *as,
3154 hwaddr addr, uint32_t val,
3155 MemTxAttrs attrs,
3156 MemTxResult *result,
3157 enum device_endian endian)
3158 {
3159 uint8_t *ptr;
3160 MemoryRegion *mr;
3161 hwaddr l = 2;
3162 hwaddr addr1;
3163 MemTxResult r;
3164
3165 rcu_read_lock();
3166 mr = address_space_translate(as, addr, &addr1, &l, true);
3167 if (l < 2 || !memory_access_is_direct(mr, true)) {
3168 #if defined(TARGET_WORDS_BIGENDIAN)
3169 if (endian == DEVICE_LITTLE_ENDIAN) {
3170 val = bswap16(val);
3171 }
3172 #else
3173 if (endian == DEVICE_BIG_ENDIAN) {
3174 val = bswap16(val);
3175 }
3176 #endif
3177 r = memory_region_dispatch_write(mr, addr1, val, 2, attrs);
3178 } else {
3179 /* RAM case */
3180 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
3181 ptr = qemu_get_ram_ptr(addr1);
3182 switch (endian) {
3183 case DEVICE_LITTLE_ENDIAN:
3184 stw_le_p(ptr, val);
3185 break;
3186 case DEVICE_BIG_ENDIAN:
3187 stw_be_p(ptr, val);
3188 break;
3189 default:
3190 stw_p(ptr, val);
3191 break;
3192 }
3193 invalidate_and_set_dirty(mr, addr1, 2);
3194 r = MEMTX_OK;
3195 }
3196 if (result) {
3197 *result = r;
3198 }
3199 rcu_read_unlock();
3200 }
3201
3202 void address_space_stw(AddressSpace *as, hwaddr addr, uint32_t val,
3203 MemTxAttrs attrs, MemTxResult *result)
3204 {
3205 address_space_stw_internal(as, addr, val, attrs, result,
3206 DEVICE_NATIVE_ENDIAN);
3207 }
3208
3209 void address_space_stw_le(AddressSpace *as, hwaddr addr, uint32_t val,
3210 MemTxAttrs attrs, MemTxResult *result)
3211 {
3212 address_space_stw_internal(as, addr, val, attrs, result,
3213 DEVICE_LITTLE_ENDIAN);
3214 }
3215
3216 void address_space_stw_be(AddressSpace *as, hwaddr addr, uint32_t val,
3217 MemTxAttrs attrs, MemTxResult *result)
3218 {
3219 address_space_stw_internal(as, addr, val, attrs, result,
3220 DEVICE_BIG_ENDIAN);
3221 }
3222
3223 void stw_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3224 {
3225 address_space_stw(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3226 }
3227
3228 void stw_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3229 {
3230 address_space_stw_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3231 }
3232
3233 void stw_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3234 {
3235 address_space_stw_be(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3236 }
3237
3238 /* XXX: optimize */
3239 void address_space_stq(AddressSpace *as, hwaddr addr, uint64_t val,
3240 MemTxAttrs attrs, MemTxResult *result)
3241 {
3242 MemTxResult r;
3243 val = tswap64(val);
3244 r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1);
3245 if (result) {
3246 *result = r;
3247 }
3248 }
3249
3250 void address_space_stq_le(AddressSpace *as, hwaddr addr, uint64_t val,
3251 MemTxAttrs attrs, MemTxResult *result)
3252 {
3253 MemTxResult r;
3254 val = cpu_to_le64(val);
3255 r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1);
3256 if (result) {
3257 *result = r;
3258 }
3259 }
3260 void address_space_stq_be(AddressSpace *as, hwaddr addr, uint64_t val,
3261 MemTxAttrs attrs, MemTxResult *result)
3262 {
3263 MemTxResult r;
3264 val = cpu_to_be64(val);
3265 r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1);
3266 if (result) {
3267 *result = r;
3268 }
3269 }
3270
3271 void stq_phys(AddressSpace *as, hwaddr addr, uint64_t val)
3272 {
3273 address_space_stq(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3274 }
3275
3276 void stq_le_phys(AddressSpace *as, hwaddr addr, uint64_t val)
3277 {
3278 address_space_stq_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3279 }
3280
3281 void stq_be_phys(AddressSpace *as, hwaddr addr, uint64_t val)
3282 {
3283 address_space_stq_be(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3284 }
3285
3286 /* virtual memory access for debug (includes writing to ROM) */
3287 int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
3288 uint8_t *buf, int len, int is_write)
3289 {
3290 int l;
3291 hwaddr phys_addr;
3292 target_ulong page;
3293
3294 while (len > 0) {
3295 page = addr & TARGET_PAGE_MASK;
3296 phys_addr = cpu_get_phys_page_debug(cpu, page);
3297 /* if no physical page mapped, return an error */
3298 if (phys_addr == -1)
3299 return -1;
3300 l = (page + TARGET_PAGE_SIZE) - addr;
3301 if (l > len)
3302 l = len;
3303 phys_addr += (addr & ~TARGET_PAGE_MASK);
3304 if (is_write) {
3305 cpu_physical_memory_write_rom(cpu->as, phys_addr, buf, l);
3306 } else {
3307 address_space_rw(cpu->as, phys_addr, MEMTXATTRS_UNSPECIFIED,
3308 buf, l, 0);
3309 }
3310 len -= l;
3311 buf += l;
3312 addr += l;
3313 }
3314 return 0;
3315 }
3316 #endif
3317
3318 /*
3319 * A helper function for the _utterly broken_ virtio device model to find out if
3320 * it's running on a big endian machine. Don't do this at home kids!
3321 */
3322 bool target_words_bigendian(void);
3323 bool target_words_bigendian(void)
3324 {
3325 #if defined(TARGET_WORDS_BIGENDIAN)
3326 return true;
3327 #else
3328 return false;
3329 #endif
3330 }
3331
3332 #ifndef CONFIG_USER_ONLY
3333 bool cpu_physical_memory_is_io(hwaddr phys_addr)
3334 {
3335 MemoryRegion*mr;
3336 hwaddr l = 1;
3337 bool res;
3338
3339 rcu_read_lock();
3340 mr = address_space_translate(&address_space_memory,
3341 phys_addr, &phys_addr, &l, false);
3342
3343 res = !(memory_region_is_ram(mr) || memory_region_is_romd(mr));
3344 rcu_read_unlock();
3345 return res;
3346 }
3347
3348 int qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque)
3349 {
3350 RAMBlock *block;
3351 int ret = 0;
3352
3353 rcu_read_lock();
3354 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
3355 ret = func(block->idstr, block->host, block->offset,
3356 block->used_length, opaque);
3357 if (ret) {
3358 break;
3359 }
3360 }
3361 rcu_read_unlock();
3362 return ret;
3363 }
3364 #endif