]> git.ipfire.org Git - thirdparty/qemu.git/blame - exec.c
exec: don't include hw/boards for linux-user
[thirdparty/qemu.git] / exec.c
CommitLineData
54936004 1/*
5b6dd868 2 * Virtual page mapping
5fafdf24 3 *
54936004
FB
4 * Copyright (c) 2003 Fabrice Bellard
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
8167ee88 17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
54936004 18 */
67b915a5 19#include "config.h"
777872e5 20#ifndef _WIN32
a98d49b1 21#include <sys/types.h>
d5a8f07c
FB
22#include <sys/mman.h>
23#endif
54936004 24
055403b2 25#include "qemu-common.h"
6180a181 26#include "cpu.h"
b67d9a52 27#include "tcg.h"
b3c7724c 28#include "hw/hw.h"
4485bd26 29#if !defined(CONFIG_USER_ONLY)
47c8ca53 30#include "hw/boards.h"
4485bd26 31#endif
cc9e98cb 32#include "hw/qdev.h"
1de7afc9 33#include "qemu/osdep.h"
9c17d615 34#include "sysemu/kvm.h"
2ff3de68 35#include "sysemu/sysemu.h"
0d09e41a 36#include "hw/xen/xen.h"
1de7afc9
PB
37#include "qemu/timer.h"
38#include "qemu/config-file.h"
75a34036 39#include "qemu/error-report.h"
022c62cb 40#include "exec/memory.h"
9c17d615 41#include "sysemu/dma.h"
022c62cb 42#include "exec/address-spaces.h"
53a5960a
PB
43#if defined(CONFIG_USER_ONLY)
44#include <qemu.h>
432d268c 45#else /* !CONFIG_USER_ONLY */
9c17d615 46#include "sysemu/xen-mapcache.h"
6506e4f9 47#include "trace.h"
53a5960a 48#endif
0d6d3c87 49#include "exec/cpu-all.h"
0dc3f44a 50#include "qemu/rcu_queue.h"
022c62cb 51#include "exec/cputlb.h"
5b6dd868 52#include "translate-all.h"
0cac1b66 53
022c62cb 54#include "exec/memory-internal.h"
220c3ebd 55#include "exec/ram_addr.h"
67d95c15 56
b35ba30f
MT
57#include "qemu/range.h"
58
db7b5426 59//#define DEBUG_SUBPAGE
1196be37 60
e2eef170 61#if !defined(CONFIG_USER_ONLY)
981fdf23 62static bool in_migration;
94a6b54f 63
0dc3f44a
MD
64/* ram_list is read under rcu_read_lock()/rcu_read_unlock(). Writes
65 * are protected by the ramlist lock.
66 */
0d53d9fe 67RAMList ram_list = { .blocks = QLIST_HEAD_INITIALIZER(ram_list.blocks) };
62152b8a
AK
68
69static MemoryRegion *system_memory;
309cb471 70static MemoryRegion *system_io;
62152b8a 71
f6790af6
AK
72AddressSpace address_space_io;
73AddressSpace address_space_memory;
2673a5da 74
0844e007 75MemoryRegion io_mem_rom, io_mem_notdirty;
acc9d80b 76static MemoryRegion io_mem_unassigned;
0e0df1e2 77
7bd4f430
PB
78/* RAM is pre-allocated and passed into qemu_ram_alloc_from_ptr */
79#define RAM_PREALLOC (1 << 0)
80
dbcb8981
PB
81/* RAM is mmap-ed with MAP_SHARED */
82#define RAM_SHARED (1 << 1)
83
62be4e3a
MT
84/* Only a portion of RAM (used_length) is actually used, and migrated.
85 * This used_length size can change across reboots.
86 */
87#define RAM_RESIZEABLE (1 << 2)
88
e2eef170 89#endif
9fa3e853 90
bdc44640 91struct CPUTailQ cpus = QTAILQ_HEAD_INITIALIZER(cpus);
6a00d601
FB
92/* current CPU in the current thread. It is only valid inside
93 cpu_exec() */
4917cf44 94DEFINE_TLS(CPUState *, current_cpu);
2e70f6ef 95/* 0 = Do not count executed instructions.
bf20dc07 96 1 = Precise instruction counting.
2e70f6ef 97 2 = Adaptive rate instruction counting. */
5708fc66 98int use_icount;
6a00d601 99
e2eef170 100#if !defined(CONFIG_USER_ONLY)
4346ae3e 101
1db8abb1
PB
102typedef struct PhysPageEntry PhysPageEntry;
103
104struct PhysPageEntry {
9736e55b 105 /* How many bits skip to next level (in units of L2_SIZE). 0 for a leaf. */
8b795765 106 uint32_t skip : 6;
9736e55b 107 /* index into phys_sections (!skip) or phys_map_nodes (skip) */
8b795765 108 uint32_t ptr : 26;
1db8abb1
PB
109};
110
8b795765
MT
111#define PHYS_MAP_NODE_NIL (((uint32_t)~0) >> 6)
112
03f49957 113/* Size of the L2 (and L3, etc) page tables. */
57271d63 114#define ADDR_SPACE_BITS 64
03f49957 115
026736ce 116#define P_L2_BITS 9
03f49957
PB
117#define P_L2_SIZE (1 << P_L2_BITS)
118
119#define P_L2_LEVELS (((ADDR_SPACE_BITS - TARGET_PAGE_BITS - 1) / P_L2_BITS) + 1)
120
121typedef PhysPageEntry Node[P_L2_SIZE];
0475d94f 122
53cb28cb 123typedef struct PhysPageMap {
79e2b9ae
PB
124 struct rcu_head rcu;
125
53cb28cb
MA
126 unsigned sections_nb;
127 unsigned sections_nb_alloc;
128 unsigned nodes_nb;
129 unsigned nodes_nb_alloc;
130 Node *nodes;
131 MemoryRegionSection *sections;
132} PhysPageMap;
133
1db8abb1 134struct AddressSpaceDispatch {
79e2b9ae
PB
135 struct rcu_head rcu;
136
1db8abb1
PB
137 /* This is a multi-level map on the physical address space.
138 * The bottom level has pointers to MemoryRegionSections.
139 */
140 PhysPageEntry phys_map;
53cb28cb 141 PhysPageMap map;
acc9d80b 142 AddressSpace *as;
1db8abb1
PB
143};
144
90260c6c
JK
145#define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
146typedef struct subpage_t {
147 MemoryRegion iomem;
acc9d80b 148 AddressSpace *as;
90260c6c
JK
149 hwaddr base;
150 uint16_t sub_section[TARGET_PAGE_SIZE];
151} subpage_t;
152
b41aac4f
LPF
153#define PHYS_SECTION_UNASSIGNED 0
154#define PHYS_SECTION_NOTDIRTY 1
155#define PHYS_SECTION_ROM 2
156#define PHYS_SECTION_WATCH 3
5312bd8b 157
e2eef170 158static void io_mem_init(void);
62152b8a 159static void memory_map_init(void);
09daed84 160static void tcg_commit(MemoryListener *listener);
e2eef170 161
1ec9b909 162static MemoryRegion io_mem_watch;
6658ffb8 163#endif
fd6ce8f6 164
6d9a1304 165#if !defined(CONFIG_USER_ONLY)
d6f2ea22 166
53cb28cb 167static void phys_map_node_reserve(PhysPageMap *map, unsigned nodes)
d6f2ea22 168{
53cb28cb
MA
169 if (map->nodes_nb + nodes > map->nodes_nb_alloc) {
170 map->nodes_nb_alloc = MAX(map->nodes_nb_alloc * 2, 16);
171 map->nodes_nb_alloc = MAX(map->nodes_nb_alloc, map->nodes_nb + nodes);
172 map->nodes = g_renew(Node, map->nodes, map->nodes_nb_alloc);
d6f2ea22 173 }
f7bf5461
AK
174}
175
53cb28cb 176static uint32_t phys_map_node_alloc(PhysPageMap *map)
f7bf5461
AK
177{
178 unsigned i;
8b795765 179 uint32_t ret;
f7bf5461 180
53cb28cb 181 ret = map->nodes_nb++;
f7bf5461 182 assert(ret != PHYS_MAP_NODE_NIL);
53cb28cb 183 assert(ret != map->nodes_nb_alloc);
03f49957 184 for (i = 0; i < P_L2_SIZE; ++i) {
53cb28cb
MA
185 map->nodes[ret][i].skip = 1;
186 map->nodes[ret][i].ptr = PHYS_MAP_NODE_NIL;
d6f2ea22 187 }
f7bf5461 188 return ret;
d6f2ea22
AK
189}
190
53cb28cb
MA
191static void phys_page_set_level(PhysPageMap *map, PhysPageEntry *lp,
192 hwaddr *index, hwaddr *nb, uint16_t leaf,
2999097b 193 int level)
f7bf5461
AK
194{
195 PhysPageEntry *p;
196 int i;
03f49957 197 hwaddr step = (hwaddr)1 << (level * P_L2_BITS);
108c49b8 198
9736e55b 199 if (lp->skip && lp->ptr == PHYS_MAP_NODE_NIL) {
53cb28cb
MA
200 lp->ptr = phys_map_node_alloc(map);
201 p = map->nodes[lp->ptr];
f7bf5461 202 if (level == 0) {
03f49957 203 for (i = 0; i < P_L2_SIZE; i++) {
9736e55b 204 p[i].skip = 0;
b41aac4f 205 p[i].ptr = PHYS_SECTION_UNASSIGNED;
4346ae3e 206 }
67c4d23c 207 }
f7bf5461 208 } else {
53cb28cb 209 p = map->nodes[lp->ptr];
92e873b9 210 }
03f49957 211 lp = &p[(*index >> (level * P_L2_BITS)) & (P_L2_SIZE - 1)];
f7bf5461 212
03f49957 213 while (*nb && lp < &p[P_L2_SIZE]) {
07f07b31 214 if ((*index & (step - 1)) == 0 && *nb >= step) {
9736e55b 215 lp->skip = 0;
c19e8800 216 lp->ptr = leaf;
07f07b31
AK
217 *index += step;
218 *nb -= step;
2999097b 219 } else {
53cb28cb 220 phys_page_set_level(map, lp, index, nb, leaf, level - 1);
2999097b
AK
221 }
222 ++lp;
f7bf5461
AK
223 }
224}
225
ac1970fb 226static void phys_page_set(AddressSpaceDispatch *d,
a8170e5e 227 hwaddr index, hwaddr nb,
2999097b 228 uint16_t leaf)
f7bf5461 229{
2999097b 230 /* Wildly overreserve - it doesn't matter much. */
53cb28cb 231 phys_map_node_reserve(&d->map, 3 * P_L2_LEVELS);
5cd2c5b6 232
53cb28cb 233 phys_page_set_level(&d->map, &d->phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
92e873b9
FB
234}
235
b35ba30f
MT
236/* Compact a non leaf page entry. Simply detect that the entry has a single child,
237 * and update our entry so we can skip it and go directly to the destination.
238 */
239static void phys_page_compact(PhysPageEntry *lp, Node *nodes, unsigned long *compacted)
240{
241 unsigned valid_ptr = P_L2_SIZE;
242 int valid = 0;
243 PhysPageEntry *p;
244 int i;
245
246 if (lp->ptr == PHYS_MAP_NODE_NIL) {
247 return;
248 }
249
250 p = nodes[lp->ptr];
251 for (i = 0; i < P_L2_SIZE; i++) {
252 if (p[i].ptr == PHYS_MAP_NODE_NIL) {
253 continue;
254 }
255
256 valid_ptr = i;
257 valid++;
258 if (p[i].skip) {
259 phys_page_compact(&p[i], nodes, compacted);
260 }
261 }
262
263 /* We can only compress if there's only one child. */
264 if (valid != 1) {
265 return;
266 }
267
268 assert(valid_ptr < P_L2_SIZE);
269
270 /* Don't compress if it won't fit in the # of bits we have. */
271 if (lp->skip + p[valid_ptr].skip >= (1 << 3)) {
272 return;
273 }
274
275 lp->ptr = p[valid_ptr].ptr;
276 if (!p[valid_ptr].skip) {
277 /* If our only child is a leaf, make this a leaf. */
278 /* By design, we should have made this node a leaf to begin with so we
279 * should never reach here.
280 * But since it's so simple to handle this, let's do it just in case we
281 * change this rule.
282 */
283 lp->skip = 0;
284 } else {
285 lp->skip += p[valid_ptr].skip;
286 }
287}
288
289static void phys_page_compact_all(AddressSpaceDispatch *d, int nodes_nb)
290{
291 DECLARE_BITMAP(compacted, nodes_nb);
292
293 if (d->phys_map.skip) {
53cb28cb 294 phys_page_compact(&d->phys_map, d->map.nodes, compacted);
b35ba30f
MT
295 }
296}
297
97115a8d 298static MemoryRegionSection *phys_page_find(PhysPageEntry lp, hwaddr addr,
9affd6fc 299 Node *nodes, MemoryRegionSection *sections)
92e873b9 300{
31ab2b4a 301 PhysPageEntry *p;
97115a8d 302 hwaddr index = addr >> TARGET_PAGE_BITS;
31ab2b4a 303 int i;
f1f6e3b8 304
9736e55b 305 for (i = P_L2_LEVELS; lp.skip && (i -= lp.skip) >= 0;) {
c19e8800 306 if (lp.ptr == PHYS_MAP_NODE_NIL) {
9affd6fc 307 return &sections[PHYS_SECTION_UNASSIGNED];
31ab2b4a 308 }
9affd6fc 309 p = nodes[lp.ptr];
03f49957 310 lp = p[(index >> (i * P_L2_BITS)) & (P_L2_SIZE - 1)];
5312bd8b 311 }
b35ba30f
MT
312
313 if (sections[lp.ptr].size.hi ||
314 range_covers_byte(sections[lp.ptr].offset_within_address_space,
315 sections[lp.ptr].size.lo, addr)) {
316 return &sections[lp.ptr];
317 } else {
318 return &sections[PHYS_SECTION_UNASSIGNED];
319 }
f3705d53
AK
320}
321
e5548617
BS
322bool memory_region_is_unassigned(MemoryRegion *mr)
323{
2a8e7499 324 return mr != &io_mem_rom && mr != &io_mem_notdirty && !mr->rom_device
5b6dd868 325 && mr != &io_mem_watch;
fd6ce8f6 326}
149f54b5 327
79e2b9ae 328/* Called from RCU critical section */
c7086b4a 329static MemoryRegionSection *address_space_lookup_region(AddressSpaceDispatch *d,
90260c6c
JK
330 hwaddr addr,
331 bool resolve_subpage)
9f029603 332{
90260c6c
JK
333 MemoryRegionSection *section;
334 subpage_t *subpage;
335
53cb28cb 336 section = phys_page_find(d->phys_map, addr, d->map.nodes, d->map.sections);
90260c6c
JK
337 if (resolve_subpage && section->mr->subpage) {
338 subpage = container_of(section->mr, subpage_t, iomem);
53cb28cb 339 section = &d->map.sections[subpage->sub_section[SUBPAGE_IDX(addr)]];
90260c6c
JK
340 }
341 return section;
9f029603
JK
342}
343
79e2b9ae 344/* Called from RCU critical section */
90260c6c 345static MemoryRegionSection *
c7086b4a 346address_space_translate_internal(AddressSpaceDispatch *d, hwaddr addr, hwaddr *xlat,
90260c6c 347 hwaddr *plen, bool resolve_subpage)
149f54b5
PB
348{
349 MemoryRegionSection *section;
a87f3954 350 Int128 diff;
149f54b5 351
c7086b4a 352 section = address_space_lookup_region(d, addr, resolve_subpage);
149f54b5
PB
353 /* Compute offset within MemoryRegionSection */
354 addr -= section->offset_within_address_space;
355
356 /* Compute offset within MemoryRegion */
357 *xlat = addr + section->offset_within_region;
358
359 diff = int128_sub(section->mr->size, int128_make64(addr));
3752a036 360 *plen = int128_get64(int128_min(diff, int128_make64(*plen)));
149f54b5
PB
361 return section;
362}
90260c6c 363
a87f3954
PB
364static inline bool memory_access_is_direct(MemoryRegion *mr, bool is_write)
365{
366 if (memory_region_is_ram(mr)) {
367 return !(is_write && mr->readonly);
368 }
369 if (memory_region_is_romd(mr)) {
370 return !is_write;
371 }
372
373 return false;
374}
375
5c8a00ce
PB
376MemoryRegion *address_space_translate(AddressSpace *as, hwaddr addr,
377 hwaddr *xlat, hwaddr *plen,
378 bool is_write)
90260c6c 379{
30951157
AK
380 IOMMUTLBEntry iotlb;
381 MemoryRegionSection *section;
382 MemoryRegion *mr;
383 hwaddr len = *plen;
384
79e2b9ae 385 rcu_read_lock();
30951157 386 for (;;) {
79e2b9ae
PB
387 AddressSpaceDispatch *d = atomic_rcu_read(&as->dispatch);
388 section = address_space_translate_internal(d, addr, &addr, plen, true);
30951157
AK
389 mr = section->mr;
390
391 if (!mr->iommu_ops) {
392 break;
393 }
394
8d7b8cb9 395 iotlb = mr->iommu_ops->translate(mr, addr, is_write);
30951157
AK
396 addr = ((iotlb.translated_addr & ~iotlb.addr_mask)
397 | (addr & iotlb.addr_mask));
398 len = MIN(len, (addr | iotlb.addr_mask) - addr + 1);
399 if (!(iotlb.perm & (1 << is_write))) {
400 mr = &io_mem_unassigned;
401 break;
402 }
403
404 as = iotlb.target_as;
405 }
406
fe680d0d 407 if (xen_enabled() && memory_access_is_direct(mr, is_write)) {
a87f3954
PB
408 hwaddr page = ((addr & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE) - addr;
409 len = MIN(page, len);
410 }
411
30951157
AK
412 *plen = len;
413 *xlat = addr;
79e2b9ae 414 rcu_read_unlock();
30951157 415 return mr;
90260c6c
JK
416}
417
79e2b9ae 418/* Called from RCU critical section */
90260c6c 419MemoryRegionSection *
9d82b5a7
PB
420address_space_translate_for_iotlb(CPUState *cpu, hwaddr addr,
421 hwaddr *xlat, hwaddr *plen)
90260c6c 422{
30951157 423 MemoryRegionSection *section;
9d82b5a7
PB
424 section = address_space_translate_internal(cpu->memory_dispatch,
425 addr, xlat, plen, false);
30951157
AK
426
427 assert(!section->mr->iommu_ops);
428 return section;
90260c6c 429}
5b6dd868 430#endif
fd6ce8f6 431
5b6dd868 432void cpu_exec_init_all(void)
fdbb84d1 433{
5b6dd868 434#if !defined(CONFIG_USER_ONLY)
b2a8658e 435 qemu_mutex_init(&ram_list.mutex);
5b6dd868
BS
436 memory_map_init();
437 io_mem_init();
fdbb84d1 438#endif
5b6dd868 439}
fdbb84d1 440
b170fce3 441#if !defined(CONFIG_USER_ONLY)
5b6dd868
BS
442
443static int cpu_common_post_load(void *opaque, int version_id)
fd6ce8f6 444{
259186a7 445 CPUState *cpu = opaque;
a513fe19 446
5b6dd868
BS
447 /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
448 version_id is increased. */
259186a7 449 cpu->interrupt_request &= ~0x01;
c01a71c1 450 tlb_flush(cpu, 1);
5b6dd868
BS
451
452 return 0;
a513fe19 453}
7501267e 454
6c3bff0e
PD
455static int cpu_common_pre_load(void *opaque)
456{
457 CPUState *cpu = opaque;
458
adee6424 459 cpu->exception_index = -1;
6c3bff0e
PD
460
461 return 0;
462}
463
464static bool cpu_common_exception_index_needed(void *opaque)
465{
466 CPUState *cpu = opaque;
467
adee6424 468 return tcg_enabled() && cpu->exception_index != -1;
6c3bff0e
PD
469}
470
471static const VMStateDescription vmstate_cpu_common_exception_index = {
472 .name = "cpu_common/exception_index",
473 .version_id = 1,
474 .minimum_version_id = 1,
475 .fields = (VMStateField[]) {
476 VMSTATE_INT32(exception_index, CPUState),
477 VMSTATE_END_OF_LIST()
478 }
479};
480
1a1562f5 481const VMStateDescription vmstate_cpu_common = {
5b6dd868
BS
482 .name = "cpu_common",
483 .version_id = 1,
484 .minimum_version_id = 1,
6c3bff0e 485 .pre_load = cpu_common_pre_load,
5b6dd868 486 .post_load = cpu_common_post_load,
35d08458 487 .fields = (VMStateField[]) {
259186a7
AF
488 VMSTATE_UINT32(halted, CPUState),
489 VMSTATE_UINT32(interrupt_request, CPUState),
5b6dd868 490 VMSTATE_END_OF_LIST()
6c3bff0e
PD
491 },
492 .subsections = (VMStateSubsection[]) {
493 {
494 .vmsd = &vmstate_cpu_common_exception_index,
495 .needed = cpu_common_exception_index_needed,
496 } , {
497 /* empty */
498 }
5b6dd868
BS
499 }
500};
1a1562f5 501
5b6dd868 502#endif
ea041c0e 503
38d8f5c8 504CPUState *qemu_get_cpu(int index)
ea041c0e 505{
bdc44640 506 CPUState *cpu;
ea041c0e 507
bdc44640 508 CPU_FOREACH(cpu) {
55e5c285 509 if (cpu->cpu_index == index) {
bdc44640 510 return cpu;
55e5c285 511 }
ea041c0e 512 }
5b6dd868 513
bdc44640 514 return NULL;
ea041c0e
FB
515}
516
09daed84
EI
517#if !defined(CONFIG_USER_ONLY)
518void tcg_cpu_address_space_init(CPUState *cpu, AddressSpace *as)
519{
520 /* We only support one address space per cpu at the moment. */
521 assert(cpu->as == as);
522
523 if (cpu->tcg_as_listener) {
524 memory_listener_unregister(cpu->tcg_as_listener);
525 } else {
526 cpu->tcg_as_listener = g_new0(MemoryListener, 1);
527 }
528 cpu->tcg_as_listener->commit = tcg_commit;
529 memory_listener_register(cpu->tcg_as_listener, as);
530}
531#endif
532
5b6dd868 533void cpu_exec_init(CPUArchState *env)
ea041c0e 534{
5b6dd868 535 CPUState *cpu = ENV_GET_CPU(env);
b170fce3 536 CPUClass *cc = CPU_GET_CLASS(cpu);
bdc44640 537 CPUState *some_cpu;
5b6dd868
BS
538 int cpu_index;
539
540#if defined(CONFIG_USER_ONLY)
541 cpu_list_lock();
542#endif
5b6dd868 543 cpu_index = 0;
bdc44640 544 CPU_FOREACH(some_cpu) {
5b6dd868
BS
545 cpu_index++;
546 }
55e5c285 547 cpu->cpu_index = cpu_index;
1b1ed8dc 548 cpu->numa_node = 0;
f0c3c505 549 QTAILQ_INIT(&cpu->breakpoints);
ff4700b0 550 QTAILQ_INIT(&cpu->watchpoints);
5b6dd868 551#ifndef CONFIG_USER_ONLY
09daed84 552 cpu->as = &address_space_memory;
5b6dd868
BS
553 cpu->thread_id = qemu_get_thread_id();
554#endif
bdc44640 555 QTAILQ_INSERT_TAIL(&cpus, cpu, node);
5b6dd868
BS
556#if defined(CONFIG_USER_ONLY)
557 cpu_list_unlock();
558#endif
e0d47944
AF
559 if (qdev_get_vmsd(DEVICE(cpu)) == NULL) {
560 vmstate_register(NULL, cpu_index, &vmstate_cpu_common, cpu);
561 }
5b6dd868 562#if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
5b6dd868
BS
563 register_savevm(NULL, "cpu", cpu_index, CPU_SAVE_VERSION,
564 cpu_save, cpu_load, env);
b170fce3 565 assert(cc->vmsd == NULL);
e0d47944 566 assert(qdev_get_vmsd(DEVICE(cpu)) == NULL);
5b6dd868 567#endif
b170fce3
AF
568 if (cc->vmsd != NULL) {
569 vmstate_register(NULL, cpu_index, cc->vmsd, cpu);
570 }
ea041c0e
FB
571}
572
94df27fd 573#if defined(CONFIG_USER_ONLY)
00b941e5 574static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
94df27fd
PB
575{
576 tb_invalidate_phys_page_range(pc, pc + 1, 0);
577}
578#else
00b941e5 579static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
1e7855a5 580{
e8262a1b
MF
581 hwaddr phys = cpu_get_phys_page_debug(cpu, pc);
582 if (phys != -1) {
09daed84 583 tb_invalidate_phys_addr(cpu->as,
29d8ec7b 584 phys | (pc & ~TARGET_PAGE_MASK));
e8262a1b 585 }
1e7855a5 586}
c27004ec 587#endif
d720b93d 588
c527ee8f 589#if defined(CONFIG_USER_ONLY)
75a34036 590void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
c527ee8f
PB
591
592{
593}
594
3ee887e8
PM
595int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, vaddr len,
596 int flags)
597{
598 return -ENOSYS;
599}
600
601void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint)
602{
603}
604
75a34036 605int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
c527ee8f
PB
606 int flags, CPUWatchpoint **watchpoint)
607{
608 return -ENOSYS;
609}
610#else
6658ffb8 611/* Add a watchpoint. */
75a34036 612int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
a1d1bb31 613 int flags, CPUWatchpoint **watchpoint)
6658ffb8 614{
c0ce998e 615 CPUWatchpoint *wp;
6658ffb8 616
05068c0d 617 /* forbid ranges which are empty or run off the end of the address space */
07e2863d 618 if (len == 0 || (addr + len - 1) < addr) {
75a34036
AF
619 error_report("tried to set invalid watchpoint at %"
620 VADDR_PRIx ", len=%" VADDR_PRIu, addr, len);
b4051334
AL
621 return -EINVAL;
622 }
7267c094 623 wp = g_malloc(sizeof(*wp));
a1d1bb31
AL
624
625 wp->vaddr = addr;
05068c0d 626 wp->len = len;
a1d1bb31
AL
627 wp->flags = flags;
628
2dc9f411 629 /* keep all GDB-injected watchpoints in front */
ff4700b0
AF
630 if (flags & BP_GDB) {
631 QTAILQ_INSERT_HEAD(&cpu->watchpoints, wp, entry);
632 } else {
633 QTAILQ_INSERT_TAIL(&cpu->watchpoints, wp, entry);
634 }
6658ffb8 635
31b030d4 636 tlb_flush_page(cpu, addr);
a1d1bb31
AL
637
638 if (watchpoint)
639 *watchpoint = wp;
640 return 0;
6658ffb8
PB
641}
642
a1d1bb31 643/* Remove a specific watchpoint. */
75a34036 644int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, vaddr len,
a1d1bb31 645 int flags)
6658ffb8 646{
a1d1bb31 647 CPUWatchpoint *wp;
6658ffb8 648
ff4700b0 649 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
05068c0d 650 if (addr == wp->vaddr && len == wp->len
6e140f28 651 && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
75a34036 652 cpu_watchpoint_remove_by_ref(cpu, wp);
6658ffb8
PB
653 return 0;
654 }
655 }
a1d1bb31 656 return -ENOENT;
6658ffb8
PB
657}
658
a1d1bb31 659/* Remove a specific watchpoint by reference. */
75a34036 660void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint)
a1d1bb31 661{
ff4700b0 662 QTAILQ_REMOVE(&cpu->watchpoints, watchpoint, entry);
7d03f82f 663
31b030d4 664 tlb_flush_page(cpu, watchpoint->vaddr);
a1d1bb31 665
7267c094 666 g_free(watchpoint);
a1d1bb31
AL
667}
668
669/* Remove all matching watchpoints. */
75a34036 670void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
a1d1bb31 671{
c0ce998e 672 CPUWatchpoint *wp, *next;
a1d1bb31 673
ff4700b0 674 QTAILQ_FOREACH_SAFE(wp, &cpu->watchpoints, entry, next) {
75a34036
AF
675 if (wp->flags & mask) {
676 cpu_watchpoint_remove_by_ref(cpu, wp);
677 }
c0ce998e 678 }
7d03f82f 679}
05068c0d
PM
680
681/* Return true if this watchpoint address matches the specified
682 * access (ie the address range covered by the watchpoint overlaps
683 * partially or completely with the address range covered by the
684 * access).
685 */
686static inline bool cpu_watchpoint_address_matches(CPUWatchpoint *wp,
687 vaddr addr,
688 vaddr len)
689{
690 /* We know the lengths are non-zero, but a little caution is
691 * required to avoid errors in the case where the range ends
692 * exactly at the top of the address space and so addr + len
693 * wraps round to zero.
694 */
695 vaddr wpend = wp->vaddr + wp->len - 1;
696 vaddr addrend = addr + len - 1;
697
698 return !(addr > wpend || wp->vaddr > addrend);
699}
700
c527ee8f 701#endif
7d03f82f 702
a1d1bb31 703/* Add a breakpoint. */
b3310ab3 704int cpu_breakpoint_insert(CPUState *cpu, vaddr pc, int flags,
a1d1bb31 705 CPUBreakpoint **breakpoint)
4c3a88a2 706{
c0ce998e 707 CPUBreakpoint *bp;
3b46e624 708
7267c094 709 bp = g_malloc(sizeof(*bp));
4c3a88a2 710
a1d1bb31
AL
711 bp->pc = pc;
712 bp->flags = flags;
713
2dc9f411 714 /* keep all GDB-injected breakpoints in front */
00b941e5 715 if (flags & BP_GDB) {
f0c3c505 716 QTAILQ_INSERT_HEAD(&cpu->breakpoints, bp, entry);
00b941e5 717 } else {
f0c3c505 718 QTAILQ_INSERT_TAIL(&cpu->breakpoints, bp, entry);
00b941e5 719 }
3b46e624 720
f0c3c505 721 breakpoint_invalidate(cpu, pc);
a1d1bb31 722
00b941e5 723 if (breakpoint) {
a1d1bb31 724 *breakpoint = bp;
00b941e5 725 }
4c3a88a2 726 return 0;
4c3a88a2
FB
727}
728
a1d1bb31 729/* Remove a specific breakpoint. */
b3310ab3 730int cpu_breakpoint_remove(CPUState *cpu, vaddr pc, int flags)
a1d1bb31 731{
a1d1bb31
AL
732 CPUBreakpoint *bp;
733
f0c3c505 734 QTAILQ_FOREACH(bp, &cpu->breakpoints, entry) {
a1d1bb31 735 if (bp->pc == pc && bp->flags == flags) {
b3310ab3 736 cpu_breakpoint_remove_by_ref(cpu, bp);
a1d1bb31
AL
737 return 0;
738 }
7d03f82f 739 }
a1d1bb31 740 return -ENOENT;
7d03f82f
EI
741}
742
a1d1bb31 743/* Remove a specific breakpoint by reference. */
b3310ab3 744void cpu_breakpoint_remove_by_ref(CPUState *cpu, CPUBreakpoint *breakpoint)
4c3a88a2 745{
f0c3c505
AF
746 QTAILQ_REMOVE(&cpu->breakpoints, breakpoint, entry);
747
748 breakpoint_invalidate(cpu, breakpoint->pc);
a1d1bb31 749
7267c094 750 g_free(breakpoint);
a1d1bb31
AL
751}
752
753/* Remove all matching breakpoints. */
b3310ab3 754void cpu_breakpoint_remove_all(CPUState *cpu, int mask)
a1d1bb31 755{
c0ce998e 756 CPUBreakpoint *bp, *next;
a1d1bb31 757
f0c3c505 758 QTAILQ_FOREACH_SAFE(bp, &cpu->breakpoints, entry, next) {
b3310ab3
AF
759 if (bp->flags & mask) {
760 cpu_breakpoint_remove_by_ref(cpu, bp);
761 }
c0ce998e 762 }
4c3a88a2
FB
763}
764
c33a346e
FB
765/* enable or disable single step mode. EXCP_DEBUG is returned by the
766 CPU loop after each instruction */
3825b28f 767void cpu_single_step(CPUState *cpu, int enabled)
c33a346e 768{
ed2803da
AF
769 if (cpu->singlestep_enabled != enabled) {
770 cpu->singlestep_enabled = enabled;
771 if (kvm_enabled()) {
38e478ec 772 kvm_update_guest_debug(cpu, 0);
ed2803da 773 } else {
ccbb4d44 774 /* must flush all the translated code to avoid inconsistencies */
e22a25c9 775 /* XXX: only flush what is necessary */
38e478ec 776 CPUArchState *env = cpu->env_ptr;
e22a25c9
AL
777 tb_flush(env);
778 }
c33a346e 779 }
c33a346e
FB
780}
781
a47dddd7 782void cpu_abort(CPUState *cpu, const char *fmt, ...)
7501267e
FB
783{
784 va_list ap;
493ae1f0 785 va_list ap2;
7501267e
FB
786
787 va_start(ap, fmt);
493ae1f0 788 va_copy(ap2, ap);
7501267e
FB
789 fprintf(stderr, "qemu: fatal: ");
790 vfprintf(stderr, fmt, ap);
791 fprintf(stderr, "\n");
878096ee 792 cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU | CPU_DUMP_CCOP);
93fcfe39
AL
793 if (qemu_log_enabled()) {
794 qemu_log("qemu: fatal: ");
795 qemu_log_vprintf(fmt, ap2);
796 qemu_log("\n");
a0762859 797 log_cpu_state(cpu, CPU_DUMP_FPU | CPU_DUMP_CCOP);
31b1a7b4 798 qemu_log_flush();
93fcfe39 799 qemu_log_close();
924edcae 800 }
493ae1f0 801 va_end(ap2);
f9373291 802 va_end(ap);
fd052bf6
RV
803#if defined(CONFIG_USER_ONLY)
804 {
805 struct sigaction act;
806 sigfillset(&act.sa_mask);
807 act.sa_handler = SIG_DFL;
808 sigaction(SIGABRT, &act, NULL);
809 }
810#endif
7501267e
FB
811 abort();
812}
813
0124311e 814#if !defined(CONFIG_USER_ONLY)
0dc3f44a 815/* Called from RCU critical section */
041603fe
PB
816static RAMBlock *qemu_get_ram_block(ram_addr_t addr)
817{
818 RAMBlock *block;
819
43771539 820 block = atomic_rcu_read(&ram_list.mru_block);
9b8424d5 821 if (block && addr - block->offset < block->max_length) {
041603fe
PB
822 goto found;
823 }
0dc3f44a 824 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
9b8424d5 825 if (addr - block->offset < block->max_length) {
041603fe
PB
826 goto found;
827 }
828 }
829
830 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
831 abort();
832
833found:
43771539
PB
834 /* It is safe to write mru_block outside the iothread lock. This
835 * is what happens:
836 *
837 * mru_block = xxx
838 * rcu_read_unlock()
839 * xxx removed from list
840 * rcu_read_lock()
841 * read mru_block
842 * mru_block = NULL;
843 * call_rcu(reclaim_ramblock, xxx);
844 * rcu_read_unlock()
845 *
846 * atomic_rcu_set is not needed here. The block was already published
847 * when it was placed into the list. Here we're just making an extra
848 * copy of the pointer.
849 */
041603fe
PB
850 ram_list.mru_block = block;
851 return block;
852}
853
a2f4d5be 854static void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t length)
d24981d3 855{
041603fe 856 ram_addr_t start1;
a2f4d5be
JQ
857 RAMBlock *block;
858 ram_addr_t end;
859
860 end = TARGET_PAGE_ALIGN(start + length);
861 start &= TARGET_PAGE_MASK;
d24981d3 862
0dc3f44a 863 rcu_read_lock();
041603fe
PB
864 block = qemu_get_ram_block(start);
865 assert(block == qemu_get_ram_block(end - 1));
1240be24 866 start1 = (uintptr_t)ramblock_ptr(block, start - block->offset);
041603fe 867 cpu_tlb_reset_dirty_all(start1, length);
0dc3f44a 868 rcu_read_unlock();
d24981d3
JQ
869}
870
5579c7f3 871/* Note: start and end must be within the same ram block. */
a2f4d5be 872void cpu_physical_memory_reset_dirty(ram_addr_t start, ram_addr_t length,
52159192 873 unsigned client)
1ccde1cb 874{
1ccde1cb
FB
875 if (length == 0)
876 return;
c8d6f66a 877 cpu_physical_memory_clear_dirty_range_type(start, length, client);
f23db169 878
d24981d3 879 if (tcg_enabled()) {
a2f4d5be 880 tlb_reset_dirty_range_all(start, length);
5579c7f3 881 }
1ccde1cb
FB
882}
883
981fdf23 884static void cpu_physical_memory_set_dirty_tracking(bool enable)
74576198
AL
885{
886 in_migration = enable;
74576198
AL
887}
888
79e2b9ae 889/* Called from RCU critical section */
bb0e627a 890hwaddr memory_region_section_get_iotlb(CPUState *cpu,
149f54b5
PB
891 MemoryRegionSection *section,
892 target_ulong vaddr,
893 hwaddr paddr, hwaddr xlat,
894 int prot,
895 target_ulong *address)
e5548617 896{
a8170e5e 897 hwaddr iotlb;
e5548617
BS
898 CPUWatchpoint *wp;
899
cc5bea60 900 if (memory_region_is_ram(section->mr)) {
e5548617
BS
901 /* Normal RAM. */
902 iotlb = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
149f54b5 903 + xlat;
e5548617 904 if (!section->readonly) {
b41aac4f 905 iotlb |= PHYS_SECTION_NOTDIRTY;
e5548617 906 } else {
b41aac4f 907 iotlb |= PHYS_SECTION_ROM;
e5548617
BS
908 }
909 } else {
1b3fb98f 910 iotlb = section - section->address_space->dispatch->map.sections;
149f54b5 911 iotlb += xlat;
e5548617
BS
912 }
913
914 /* Make accesses to pages with watchpoints go via the
915 watchpoint trap routines. */
ff4700b0 916 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
05068c0d 917 if (cpu_watchpoint_address_matches(wp, vaddr, TARGET_PAGE_SIZE)) {
e5548617
BS
918 /* Avoid trapping reads of pages with a write breakpoint. */
919 if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
b41aac4f 920 iotlb = PHYS_SECTION_WATCH + paddr;
e5548617
BS
921 *address |= TLB_MMIO;
922 break;
923 }
924 }
925 }
926
927 return iotlb;
928}
9fa3e853
FB
929#endif /* defined(CONFIG_USER_ONLY) */
930
e2eef170 931#if !defined(CONFIG_USER_ONLY)
8da3ff18 932
c227f099 933static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
5312bd8b 934 uint16_t section);
acc9d80b 935static subpage_t *subpage_init(AddressSpace *as, hwaddr base);
54688b1e 936
a2b257d6
IM
937static void *(*phys_mem_alloc)(size_t size, uint64_t *align) =
938 qemu_anon_ram_alloc;
91138037
MA
939
940/*
941 * Set a custom physical guest memory alloator.
942 * Accelerators with unusual needs may need this. Hopefully, we can
943 * get rid of it eventually.
944 */
a2b257d6 945void phys_mem_set_alloc(void *(*alloc)(size_t, uint64_t *align))
91138037
MA
946{
947 phys_mem_alloc = alloc;
948}
949
53cb28cb
MA
950static uint16_t phys_section_add(PhysPageMap *map,
951 MemoryRegionSection *section)
5312bd8b 952{
68f3f65b
PB
953 /* The physical section number is ORed with a page-aligned
954 * pointer to produce the iotlb entries. Thus it should
955 * never overflow into the page-aligned value.
956 */
53cb28cb 957 assert(map->sections_nb < TARGET_PAGE_SIZE);
68f3f65b 958
53cb28cb
MA
959 if (map->sections_nb == map->sections_nb_alloc) {
960 map->sections_nb_alloc = MAX(map->sections_nb_alloc * 2, 16);
961 map->sections = g_renew(MemoryRegionSection, map->sections,
962 map->sections_nb_alloc);
5312bd8b 963 }
53cb28cb 964 map->sections[map->sections_nb] = *section;
dfde4e6e 965 memory_region_ref(section->mr);
53cb28cb 966 return map->sections_nb++;
5312bd8b
AK
967}
968
058bc4b5
PB
969static void phys_section_destroy(MemoryRegion *mr)
970{
dfde4e6e
PB
971 memory_region_unref(mr);
972
058bc4b5
PB
973 if (mr->subpage) {
974 subpage_t *subpage = container_of(mr, subpage_t, iomem);
b4fefef9 975 object_unref(OBJECT(&subpage->iomem));
058bc4b5
PB
976 g_free(subpage);
977 }
978}
979
6092666e 980static void phys_sections_free(PhysPageMap *map)
5312bd8b 981{
9affd6fc
PB
982 while (map->sections_nb > 0) {
983 MemoryRegionSection *section = &map->sections[--map->sections_nb];
058bc4b5
PB
984 phys_section_destroy(section->mr);
985 }
9affd6fc
PB
986 g_free(map->sections);
987 g_free(map->nodes);
5312bd8b
AK
988}
989
ac1970fb 990static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *section)
0f0cb164
AK
991{
992 subpage_t *subpage;
a8170e5e 993 hwaddr base = section->offset_within_address_space
0f0cb164 994 & TARGET_PAGE_MASK;
97115a8d 995 MemoryRegionSection *existing = phys_page_find(d->phys_map, base,
53cb28cb 996 d->map.nodes, d->map.sections);
0f0cb164
AK
997 MemoryRegionSection subsection = {
998 .offset_within_address_space = base,
052e87b0 999 .size = int128_make64(TARGET_PAGE_SIZE),
0f0cb164 1000 };
a8170e5e 1001 hwaddr start, end;
0f0cb164 1002
f3705d53 1003 assert(existing->mr->subpage || existing->mr == &io_mem_unassigned);
0f0cb164 1004
f3705d53 1005 if (!(existing->mr->subpage)) {
acc9d80b 1006 subpage = subpage_init(d->as, base);
3be91e86 1007 subsection.address_space = d->as;
0f0cb164 1008 subsection.mr = &subpage->iomem;
ac1970fb 1009 phys_page_set(d, base >> TARGET_PAGE_BITS, 1,
53cb28cb 1010 phys_section_add(&d->map, &subsection));
0f0cb164 1011 } else {
f3705d53 1012 subpage = container_of(existing->mr, subpage_t, iomem);
0f0cb164
AK
1013 }
1014 start = section->offset_within_address_space & ~TARGET_PAGE_MASK;
052e87b0 1015 end = start + int128_get64(section->size) - 1;
53cb28cb
MA
1016 subpage_register(subpage, start, end,
1017 phys_section_add(&d->map, section));
0f0cb164
AK
1018}
1019
1020
052e87b0
PB
1021static void register_multipage(AddressSpaceDispatch *d,
1022 MemoryRegionSection *section)
33417e70 1023{
a8170e5e 1024 hwaddr start_addr = section->offset_within_address_space;
53cb28cb 1025 uint16_t section_index = phys_section_add(&d->map, section);
052e87b0
PB
1026 uint64_t num_pages = int128_get64(int128_rshift(section->size,
1027 TARGET_PAGE_BITS));
dd81124b 1028
733d5ef5
PB
1029 assert(num_pages);
1030 phys_page_set(d, start_addr >> TARGET_PAGE_BITS, num_pages, section_index);
33417e70
FB
1031}
1032
ac1970fb 1033static void mem_add(MemoryListener *listener, MemoryRegionSection *section)
0f0cb164 1034{
89ae337a 1035 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
00752703 1036 AddressSpaceDispatch *d = as->next_dispatch;
99b9cc06 1037 MemoryRegionSection now = *section, remain = *section;
052e87b0 1038 Int128 page_size = int128_make64(TARGET_PAGE_SIZE);
0f0cb164 1039
733d5ef5
PB
1040 if (now.offset_within_address_space & ~TARGET_PAGE_MASK) {
1041 uint64_t left = TARGET_PAGE_ALIGN(now.offset_within_address_space)
1042 - now.offset_within_address_space;
1043
052e87b0 1044 now.size = int128_min(int128_make64(left), now.size);
ac1970fb 1045 register_subpage(d, &now);
733d5ef5 1046 } else {
052e87b0 1047 now.size = int128_zero();
733d5ef5 1048 }
052e87b0
PB
1049 while (int128_ne(remain.size, now.size)) {
1050 remain.size = int128_sub(remain.size, now.size);
1051 remain.offset_within_address_space += int128_get64(now.size);
1052 remain.offset_within_region += int128_get64(now.size);
69b67646 1053 now = remain;
052e87b0 1054 if (int128_lt(remain.size, page_size)) {
733d5ef5 1055 register_subpage(d, &now);
88266249 1056 } else if (remain.offset_within_address_space & ~TARGET_PAGE_MASK) {
052e87b0 1057 now.size = page_size;
ac1970fb 1058 register_subpage(d, &now);
69b67646 1059 } else {
052e87b0 1060 now.size = int128_and(now.size, int128_neg(page_size));
ac1970fb 1061 register_multipage(d, &now);
69b67646 1062 }
0f0cb164
AK
1063 }
1064}
1065
62a2744c
SY
1066void qemu_flush_coalesced_mmio_buffer(void)
1067{
1068 if (kvm_enabled())
1069 kvm_flush_coalesced_mmio_buffer();
1070}
1071
b2a8658e
UD
1072void qemu_mutex_lock_ramlist(void)
1073{
1074 qemu_mutex_lock(&ram_list.mutex);
1075}
1076
1077void qemu_mutex_unlock_ramlist(void)
1078{
1079 qemu_mutex_unlock(&ram_list.mutex);
1080}
1081
e1e84ba0 1082#ifdef __linux__
c902760f
MT
1083
1084#include <sys/vfs.h>
1085
1086#define HUGETLBFS_MAGIC 0x958458f6
1087
fc7a5800 1088static long gethugepagesize(const char *path, Error **errp)
c902760f
MT
1089{
1090 struct statfs fs;
1091 int ret;
1092
1093 do {
9742bf26 1094 ret = statfs(path, &fs);
c902760f
MT
1095 } while (ret != 0 && errno == EINTR);
1096
1097 if (ret != 0) {
fc7a5800
HT
1098 error_setg_errno(errp, errno, "failed to get page size of file %s",
1099 path);
9742bf26 1100 return 0;
c902760f
MT
1101 }
1102
1103 if (fs.f_type != HUGETLBFS_MAGIC)
9742bf26 1104 fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path);
c902760f
MT
1105
1106 return fs.f_bsize;
1107}
1108
04b16653
AW
1109static void *file_ram_alloc(RAMBlock *block,
1110 ram_addr_t memory,
7f56e740
PB
1111 const char *path,
1112 Error **errp)
c902760f
MT
1113{
1114 char *filename;
8ca761f6
PF
1115 char *sanitized_name;
1116 char *c;
557529dd 1117 void *area = NULL;
c902760f 1118 int fd;
557529dd 1119 uint64_t hpagesize;
fc7a5800 1120 Error *local_err = NULL;
c902760f 1121
fc7a5800
HT
1122 hpagesize = gethugepagesize(path, &local_err);
1123 if (local_err) {
1124 error_propagate(errp, local_err);
f9a49dfa 1125 goto error;
c902760f 1126 }
a2b257d6 1127 block->mr->align = hpagesize;
c902760f
MT
1128
1129 if (memory < hpagesize) {
557529dd
HT
1130 error_setg(errp, "memory size 0x" RAM_ADDR_FMT " must be equal to "
1131 "or larger than huge page size 0x%" PRIx64,
1132 memory, hpagesize);
1133 goto error;
c902760f
MT
1134 }
1135
1136 if (kvm_enabled() && !kvm_has_sync_mmu()) {
7f56e740
PB
1137 error_setg(errp,
1138 "host lacks kvm mmu notifiers, -mem-path unsupported");
f9a49dfa 1139 goto error;
c902760f
MT
1140 }
1141
8ca761f6 1142 /* Make name safe to use with mkstemp by replacing '/' with '_'. */
83234bf2 1143 sanitized_name = g_strdup(memory_region_name(block->mr));
8ca761f6
PF
1144 for (c = sanitized_name; *c != '\0'; c++) {
1145 if (*c == '/')
1146 *c = '_';
1147 }
1148
1149 filename = g_strdup_printf("%s/qemu_back_mem.%s.XXXXXX", path,
1150 sanitized_name);
1151 g_free(sanitized_name);
c902760f
MT
1152
1153 fd = mkstemp(filename);
1154 if (fd < 0) {
7f56e740
PB
1155 error_setg_errno(errp, errno,
1156 "unable to create backing store for hugepages");
e4ada482 1157 g_free(filename);
f9a49dfa 1158 goto error;
c902760f
MT
1159 }
1160 unlink(filename);
e4ada482 1161 g_free(filename);
c902760f
MT
1162
1163 memory = (memory+hpagesize-1) & ~(hpagesize-1);
1164
1165 /*
1166 * ftruncate is not supported by hugetlbfs in older
1167 * hosts, so don't bother bailing out on errors.
1168 * If anything goes wrong with it under other filesystems,
1169 * mmap will fail.
1170 */
7f56e740 1171 if (ftruncate(fd, memory)) {
9742bf26 1172 perror("ftruncate");
7f56e740 1173 }
c902760f 1174
dbcb8981
PB
1175 area = mmap(0, memory, PROT_READ | PROT_WRITE,
1176 (block->flags & RAM_SHARED ? MAP_SHARED : MAP_PRIVATE),
1177 fd, 0);
c902760f 1178 if (area == MAP_FAILED) {
7f56e740
PB
1179 error_setg_errno(errp, errno,
1180 "unable to map backing store for hugepages");
9742bf26 1181 close(fd);
f9a49dfa 1182 goto error;
c902760f 1183 }
ef36fa14
MT
1184
1185 if (mem_prealloc) {
38183310 1186 os_mem_prealloc(fd, area, memory);
ef36fa14
MT
1187 }
1188
04b16653 1189 block->fd = fd;
c902760f 1190 return area;
f9a49dfa
MT
1191
1192error:
1193 if (mem_prealloc) {
81b07353 1194 error_report("%s", error_get_pretty(*errp));
f9a49dfa
MT
1195 exit(1);
1196 }
1197 return NULL;
c902760f
MT
1198}
1199#endif
1200
0dc3f44a 1201/* Called with the ramlist lock held. */
d17b5288 1202static ram_addr_t find_ram_offset(ram_addr_t size)
04b16653
AW
1203{
1204 RAMBlock *block, *next_block;
3e837b2c 1205 ram_addr_t offset = RAM_ADDR_MAX, mingap = RAM_ADDR_MAX;
04b16653 1206
49cd9ac6
SH
1207 assert(size != 0); /* it would hand out same offset multiple times */
1208
0dc3f44a 1209 if (QLIST_EMPTY_RCU(&ram_list.blocks)) {
04b16653 1210 return 0;
0d53d9fe 1211 }
04b16653 1212
0dc3f44a 1213 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
f15fbc4b 1214 ram_addr_t end, next = RAM_ADDR_MAX;
04b16653 1215
62be4e3a 1216 end = block->offset + block->max_length;
04b16653 1217
0dc3f44a 1218 QLIST_FOREACH_RCU(next_block, &ram_list.blocks, next) {
04b16653
AW
1219 if (next_block->offset >= end) {
1220 next = MIN(next, next_block->offset);
1221 }
1222 }
1223 if (next - end >= size && next - end < mingap) {
3e837b2c 1224 offset = end;
04b16653
AW
1225 mingap = next - end;
1226 }
1227 }
3e837b2c
AW
1228
1229 if (offset == RAM_ADDR_MAX) {
1230 fprintf(stderr, "Failed to find gap of requested size: %" PRIu64 "\n",
1231 (uint64_t)size);
1232 abort();
1233 }
1234
04b16653
AW
1235 return offset;
1236}
1237
652d7ec2 1238ram_addr_t last_ram_offset(void)
d17b5288
AW
1239{
1240 RAMBlock *block;
1241 ram_addr_t last = 0;
1242
0dc3f44a
MD
1243 rcu_read_lock();
1244 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
62be4e3a 1245 last = MAX(last, block->offset + block->max_length);
0d53d9fe 1246 }
0dc3f44a 1247 rcu_read_unlock();
d17b5288
AW
1248 return last;
1249}
1250
ddb97f1d
JB
1251static void qemu_ram_setup_dump(void *addr, ram_addr_t size)
1252{
1253 int ret;
ddb97f1d
JB
1254
1255 /* Use MADV_DONTDUMP, if user doesn't want the guest memory in the core */
47c8ca53 1256 if (!machine_dump_guest_core(current_machine)) {
ddb97f1d
JB
1257 ret = qemu_madvise(addr, size, QEMU_MADV_DONTDUMP);
1258 if (ret) {
1259 perror("qemu_madvise");
1260 fprintf(stderr, "madvise doesn't support MADV_DONTDUMP, "
1261 "but dump_guest_core=off specified\n");
1262 }
1263 }
1264}
1265
0dc3f44a
MD
1266/* Called within an RCU critical section, or while the ramlist lock
1267 * is held.
1268 */
20cfe881 1269static RAMBlock *find_ram_block(ram_addr_t addr)
84b89d78 1270{
20cfe881 1271 RAMBlock *block;
84b89d78 1272
0dc3f44a 1273 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
c5705a77 1274 if (block->offset == addr) {
20cfe881 1275 return block;
c5705a77
AK
1276 }
1277 }
20cfe881
HT
1278
1279 return NULL;
1280}
1281
ae3a7047 1282/* Called with iothread lock held. */
20cfe881
HT
1283void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev)
1284{
ae3a7047 1285 RAMBlock *new_block, *block;
20cfe881 1286
0dc3f44a 1287 rcu_read_lock();
ae3a7047 1288 new_block = find_ram_block(addr);
c5705a77
AK
1289 assert(new_block);
1290 assert(!new_block->idstr[0]);
84b89d78 1291
09e5ab63
AL
1292 if (dev) {
1293 char *id = qdev_get_dev_path(dev);
84b89d78
CM
1294 if (id) {
1295 snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
7267c094 1296 g_free(id);
84b89d78
CM
1297 }
1298 }
1299 pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
1300
0dc3f44a 1301 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
c5705a77 1302 if (block != new_block && !strcmp(block->idstr, new_block->idstr)) {
84b89d78
CM
1303 fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
1304 new_block->idstr);
1305 abort();
1306 }
1307 }
0dc3f44a 1308 rcu_read_unlock();
c5705a77
AK
1309}
1310
ae3a7047 1311/* Called with iothread lock held. */
20cfe881
HT
1312void qemu_ram_unset_idstr(ram_addr_t addr)
1313{
ae3a7047 1314 RAMBlock *block;
20cfe881 1315
ae3a7047
MD
1316 /* FIXME: arch_init.c assumes that this is not called throughout
1317 * migration. Ignore the problem since hot-unplug during migration
1318 * does not work anyway.
1319 */
1320
0dc3f44a 1321 rcu_read_lock();
ae3a7047 1322 block = find_ram_block(addr);
20cfe881
HT
1323 if (block) {
1324 memset(block->idstr, 0, sizeof(block->idstr));
1325 }
0dc3f44a 1326 rcu_read_unlock();
20cfe881
HT
1327}
1328
8490fc78
LC
1329static int memory_try_enable_merging(void *addr, size_t len)
1330{
75cc7f01 1331 if (!machine_mem_merge(current_machine)) {
8490fc78
LC
1332 /* disabled by the user */
1333 return 0;
1334 }
1335
1336 return qemu_madvise(addr, len, QEMU_MADV_MERGEABLE);
1337}
1338
62be4e3a
MT
1339/* Only legal before guest might have detected the memory size: e.g. on
1340 * incoming migration, or right after reset.
1341 *
1342 * As memory core doesn't know how is memory accessed, it is up to
1343 * resize callback to update device state and/or add assertions to detect
1344 * misuse, if necessary.
1345 */
1346int qemu_ram_resize(ram_addr_t base, ram_addr_t newsize, Error **errp)
1347{
1348 RAMBlock *block = find_ram_block(base);
1349
1350 assert(block);
1351
129ddaf3
MT
1352 newsize = TARGET_PAGE_ALIGN(newsize);
1353
62be4e3a
MT
1354 if (block->used_length == newsize) {
1355 return 0;
1356 }
1357
1358 if (!(block->flags & RAM_RESIZEABLE)) {
1359 error_setg_errno(errp, EINVAL,
1360 "Length mismatch: %s: 0x" RAM_ADDR_FMT
1361 " in != 0x" RAM_ADDR_FMT, block->idstr,
1362 newsize, block->used_length);
1363 return -EINVAL;
1364 }
1365
1366 if (block->max_length < newsize) {
1367 error_setg_errno(errp, EINVAL,
1368 "Length too large: %s: 0x" RAM_ADDR_FMT
1369 " > 0x" RAM_ADDR_FMT, block->idstr,
1370 newsize, block->max_length);
1371 return -EINVAL;
1372 }
1373
1374 cpu_physical_memory_clear_dirty_range(block->offset, block->used_length);
1375 block->used_length = newsize;
1376 cpu_physical_memory_set_dirty_range(block->offset, block->used_length);
1377 memory_region_set_size(block->mr, newsize);
1378 if (block->resized) {
1379 block->resized(block->idstr, newsize, block->host);
1380 }
1381 return 0;
1382}
1383
ef701d7b 1384static ram_addr_t ram_block_add(RAMBlock *new_block, Error **errp)
c5705a77 1385{
e1c57ab8 1386 RAMBlock *block;
0d53d9fe 1387 RAMBlock *last_block = NULL;
2152f5ca
JQ
1388 ram_addr_t old_ram_size, new_ram_size;
1389
1390 old_ram_size = last_ram_offset() >> TARGET_PAGE_BITS;
c5705a77 1391
b2a8658e 1392 qemu_mutex_lock_ramlist();
9b8424d5 1393 new_block->offset = find_ram_offset(new_block->max_length);
e1c57ab8
PB
1394
1395 if (!new_block->host) {
1396 if (xen_enabled()) {
9b8424d5
MT
1397 xen_ram_alloc(new_block->offset, new_block->max_length,
1398 new_block->mr);
e1c57ab8 1399 } else {
9b8424d5 1400 new_block->host = phys_mem_alloc(new_block->max_length,
a2b257d6 1401 &new_block->mr->align);
39228250 1402 if (!new_block->host) {
ef701d7b
HT
1403 error_setg_errno(errp, errno,
1404 "cannot set up guest memory '%s'",
1405 memory_region_name(new_block->mr));
1406 qemu_mutex_unlock_ramlist();
1407 return -1;
39228250 1408 }
9b8424d5 1409 memory_try_enable_merging(new_block->host, new_block->max_length);
6977dfe6 1410 }
c902760f 1411 }
94a6b54f 1412
0d53d9fe
MD
1413 /* Keep the list sorted from biggest to smallest block. Unlike QTAILQ,
1414 * QLIST (which has an RCU-friendly variant) does not have insertion at
1415 * tail, so save the last element in last_block.
1416 */
0dc3f44a 1417 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
0d53d9fe 1418 last_block = block;
9b8424d5 1419 if (block->max_length < new_block->max_length) {
abb26d63
PB
1420 break;
1421 }
1422 }
1423 if (block) {
0dc3f44a 1424 QLIST_INSERT_BEFORE_RCU(block, new_block, next);
0d53d9fe 1425 } else if (last_block) {
0dc3f44a 1426 QLIST_INSERT_AFTER_RCU(last_block, new_block, next);
0d53d9fe 1427 } else { /* list is empty */
0dc3f44a 1428 QLIST_INSERT_HEAD_RCU(&ram_list.blocks, new_block, next);
abb26d63 1429 }
0d6d3c87 1430 ram_list.mru_block = NULL;
94a6b54f 1431
0dc3f44a
MD
1432 /* Write list before version */
1433 smp_wmb();
f798b07f 1434 ram_list.version++;
b2a8658e 1435 qemu_mutex_unlock_ramlist();
f798b07f 1436
2152f5ca
JQ
1437 new_ram_size = last_ram_offset() >> TARGET_PAGE_BITS;
1438
1439 if (new_ram_size > old_ram_size) {
1ab4c8ce 1440 int i;
ae3a7047
MD
1441
1442 /* ram_list.dirty_memory[] is protected by the iothread lock. */
1ab4c8ce
JQ
1443 for (i = 0; i < DIRTY_MEMORY_NUM; i++) {
1444 ram_list.dirty_memory[i] =
1445 bitmap_zero_extend(ram_list.dirty_memory[i],
1446 old_ram_size, new_ram_size);
1447 }
2152f5ca 1448 }
9b8424d5
MT
1449 cpu_physical_memory_set_dirty_range(new_block->offset,
1450 new_block->used_length);
94a6b54f 1451
a904c911
PB
1452 if (new_block->host) {
1453 qemu_ram_setup_dump(new_block->host, new_block->max_length);
1454 qemu_madvise(new_block->host, new_block->max_length, QEMU_MADV_HUGEPAGE);
1455 qemu_madvise(new_block->host, new_block->max_length, QEMU_MADV_DONTFORK);
1456 if (kvm_enabled()) {
1457 kvm_setup_guest_memory(new_block->host, new_block->max_length);
1458 }
e1c57ab8 1459 }
6f0437e8 1460
94a6b54f
PB
1461 return new_block->offset;
1462}
e9a1ab19 1463
0b183fc8 1464#ifdef __linux__
e1c57ab8 1465ram_addr_t qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr,
dbcb8981 1466 bool share, const char *mem_path,
7f56e740 1467 Error **errp)
e1c57ab8
PB
1468{
1469 RAMBlock *new_block;
ef701d7b
HT
1470 ram_addr_t addr;
1471 Error *local_err = NULL;
e1c57ab8
PB
1472
1473 if (xen_enabled()) {
7f56e740
PB
1474 error_setg(errp, "-mem-path not supported with Xen");
1475 return -1;
e1c57ab8
PB
1476 }
1477
1478 if (phys_mem_alloc != qemu_anon_ram_alloc) {
1479 /*
1480 * file_ram_alloc() needs to allocate just like
1481 * phys_mem_alloc, but we haven't bothered to provide
1482 * a hook there.
1483 */
7f56e740
PB
1484 error_setg(errp,
1485 "-mem-path not supported with this accelerator");
1486 return -1;
e1c57ab8
PB
1487 }
1488
1489 size = TARGET_PAGE_ALIGN(size);
1490 new_block = g_malloc0(sizeof(*new_block));
1491 new_block->mr = mr;
9b8424d5
MT
1492 new_block->used_length = size;
1493 new_block->max_length = size;
dbcb8981 1494 new_block->flags = share ? RAM_SHARED : 0;
7f56e740
PB
1495 new_block->host = file_ram_alloc(new_block, size,
1496 mem_path, errp);
1497 if (!new_block->host) {
1498 g_free(new_block);
1499 return -1;
1500 }
1501
ef701d7b
HT
1502 addr = ram_block_add(new_block, &local_err);
1503 if (local_err) {
1504 g_free(new_block);
1505 error_propagate(errp, local_err);
1506 return -1;
1507 }
1508 return addr;
e1c57ab8 1509}
0b183fc8 1510#endif
e1c57ab8 1511
62be4e3a
MT
1512static
1513ram_addr_t qemu_ram_alloc_internal(ram_addr_t size, ram_addr_t max_size,
1514 void (*resized)(const char*,
1515 uint64_t length,
1516 void *host),
1517 void *host, bool resizeable,
ef701d7b 1518 MemoryRegion *mr, Error **errp)
e1c57ab8
PB
1519{
1520 RAMBlock *new_block;
ef701d7b
HT
1521 ram_addr_t addr;
1522 Error *local_err = NULL;
e1c57ab8
PB
1523
1524 size = TARGET_PAGE_ALIGN(size);
62be4e3a 1525 max_size = TARGET_PAGE_ALIGN(max_size);
e1c57ab8
PB
1526 new_block = g_malloc0(sizeof(*new_block));
1527 new_block->mr = mr;
62be4e3a 1528 new_block->resized = resized;
9b8424d5
MT
1529 new_block->used_length = size;
1530 new_block->max_length = max_size;
62be4e3a 1531 assert(max_size >= size);
e1c57ab8
PB
1532 new_block->fd = -1;
1533 new_block->host = host;
1534 if (host) {
7bd4f430 1535 new_block->flags |= RAM_PREALLOC;
e1c57ab8 1536 }
62be4e3a
MT
1537 if (resizeable) {
1538 new_block->flags |= RAM_RESIZEABLE;
1539 }
ef701d7b
HT
1540 addr = ram_block_add(new_block, &local_err);
1541 if (local_err) {
1542 g_free(new_block);
1543 error_propagate(errp, local_err);
1544 return -1;
1545 }
1546 return addr;
e1c57ab8
PB
1547}
1548
62be4e3a
MT
1549ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
1550 MemoryRegion *mr, Error **errp)
1551{
1552 return qemu_ram_alloc_internal(size, size, NULL, host, false, mr, errp);
1553}
1554
ef701d7b 1555ram_addr_t qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr, Error **errp)
6977dfe6 1556{
62be4e3a
MT
1557 return qemu_ram_alloc_internal(size, size, NULL, NULL, false, mr, errp);
1558}
1559
1560ram_addr_t qemu_ram_alloc_resizeable(ram_addr_t size, ram_addr_t maxsz,
1561 void (*resized)(const char*,
1562 uint64_t length,
1563 void *host),
1564 MemoryRegion *mr, Error **errp)
1565{
1566 return qemu_ram_alloc_internal(size, maxsz, resized, NULL, true, mr, errp);
6977dfe6
YT
1567}
1568
1f2e98b6
AW
1569void qemu_ram_free_from_ptr(ram_addr_t addr)
1570{
1571 RAMBlock *block;
1572
b2a8658e 1573 qemu_mutex_lock_ramlist();
0dc3f44a 1574 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1f2e98b6 1575 if (addr == block->offset) {
0dc3f44a 1576 QLIST_REMOVE_RCU(block, next);
0d6d3c87 1577 ram_list.mru_block = NULL;
0dc3f44a
MD
1578 /* Write list before version */
1579 smp_wmb();
f798b07f 1580 ram_list.version++;
43771539 1581 g_free_rcu(block, rcu);
b2a8658e 1582 break;
1f2e98b6
AW
1583 }
1584 }
b2a8658e 1585 qemu_mutex_unlock_ramlist();
1f2e98b6
AW
1586}
1587
43771539
PB
1588static void reclaim_ramblock(RAMBlock *block)
1589{
1590 if (block->flags & RAM_PREALLOC) {
1591 ;
1592 } else if (xen_enabled()) {
1593 xen_invalidate_map_cache_entry(block->host);
1594#ifndef _WIN32
1595 } else if (block->fd >= 0) {
1596 munmap(block->host, block->max_length);
1597 close(block->fd);
1598#endif
1599 } else {
1600 qemu_anon_ram_free(block->host, block->max_length);
1601 }
1602 g_free(block);
1603}
1604
c227f099 1605void qemu_ram_free(ram_addr_t addr)
e9a1ab19 1606{
04b16653
AW
1607 RAMBlock *block;
1608
b2a8658e 1609 qemu_mutex_lock_ramlist();
0dc3f44a 1610 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
04b16653 1611 if (addr == block->offset) {
0dc3f44a 1612 QLIST_REMOVE_RCU(block, next);
0d6d3c87 1613 ram_list.mru_block = NULL;
0dc3f44a
MD
1614 /* Write list before version */
1615 smp_wmb();
f798b07f 1616 ram_list.version++;
43771539 1617 call_rcu(block, reclaim_ramblock, rcu);
b2a8658e 1618 break;
04b16653
AW
1619 }
1620 }
b2a8658e 1621 qemu_mutex_unlock_ramlist();
e9a1ab19
FB
1622}
1623
cd19cfa2
HY
1624#ifndef _WIN32
1625void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
1626{
1627 RAMBlock *block;
1628 ram_addr_t offset;
1629 int flags;
1630 void *area, *vaddr;
1631
0dc3f44a 1632 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
cd19cfa2 1633 offset = addr - block->offset;
9b8424d5 1634 if (offset < block->max_length) {
1240be24 1635 vaddr = ramblock_ptr(block, offset);
7bd4f430 1636 if (block->flags & RAM_PREALLOC) {
cd19cfa2 1637 ;
dfeaf2ab
MA
1638 } else if (xen_enabled()) {
1639 abort();
cd19cfa2
HY
1640 } else {
1641 flags = MAP_FIXED;
1642 munmap(vaddr, length);
3435f395 1643 if (block->fd >= 0) {
dbcb8981
PB
1644 flags |= (block->flags & RAM_SHARED ?
1645 MAP_SHARED : MAP_PRIVATE);
3435f395
MA
1646 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1647 flags, block->fd, offset);
cd19cfa2 1648 } else {
2eb9fbaa
MA
1649 /*
1650 * Remap needs to match alloc. Accelerators that
1651 * set phys_mem_alloc never remap. If they did,
1652 * we'd need a remap hook here.
1653 */
1654 assert(phys_mem_alloc == qemu_anon_ram_alloc);
1655
cd19cfa2
HY
1656 flags |= MAP_PRIVATE | MAP_ANONYMOUS;
1657 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1658 flags, -1, 0);
cd19cfa2
HY
1659 }
1660 if (area != vaddr) {
f15fbc4b
AP
1661 fprintf(stderr, "Could not remap addr: "
1662 RAM_ADDR_FMT "@" RAM_ADDR_FMT "\n",
cd19cfa2
HY
1663 length, addr);
1664 exit(1);
1665 }
8490fc78 1666 memory_try_enable_merging(vaddr, length);
ddb97f1d 1667 qemu_ram_setup_dump(vaddr, length);
cd19cfa2 1668 }
cd19cfa2
HY
1669 }
1670 }
1671}
1672#endif /* !_WIN32 */
1673
a35ba7be
PB
1674int qemu_get_ram_fd(ram_addr_t addr)
1675{
ae3a7047
MD
1676 RAMBlock *block;
1677 int fd;
a35ba7be 1678
0dc3f44a 1679 rcu_read_lock();
ae3a7047
MD
1680 block = qemu_get_ram_block(addr);
1681 fd = block->fd;
0dc3f44a 1682 rcu_read_unlock();
ae3a7047 1683 return fd;
a35ba7be
PB
1684}
1685
3fd74b84
DM
1686void *qemu_get_ram_block_host_ptr(ram_addr_t addr)
1687{
ae3a7047
MD
1688 RAMBlock *block;
1689 void *ptr;
3fd74b84 1690
0dc3f44a 1691 rcu_read_lock();
ae3a7047
MD
1692 block = qemu_get_ram_block(addr);
1693 ptr = ramblock_ptr(block, 0);
0dc3f44a 1694 rcu_read_unlock();
ae3a7047 1695 return ptr;
3fd74b84
DM
1696}
1697
1b5ec234 1698/* Return a host pointer to ram allocated with qemu_ram_alloc.
ae3a7047
MD
1699 * This should not be used for general purpose DMA. Use address_space_map
1700 * or address_space_rw instead. For local memory (e.g. video ram) that the
1701 * device owns, use memory_region_get_ram_ptr.
0dc3f44a
MD
1702 *
1703 * By the time this function returns, the returned pointer is not protected
1704 * by RCU anymore. If the caller is not within an RCU critical section and
1705 * does not hold the iothread lock, it must have other means of protecting the
1706 * pointer, such as a reference to the region that includes the incoming
1707 * ram_addr_t.
1b5ec234
PB
1708 */
1709void *qemu_get_ram_ptr(ram_addr_t addr)
1710{
ae3a7047
MD
1711 RAMBlock *block;
1712 void *ptr;
1b5ec234 1713
0dc3f44a 1714 rcu_read_lock();
ae3a7047
MD
1715 block = qemu_get_ram_block(addr);
1716
1717 if (xen_enabled() && block->host == NULL) {
0d6d3c87
PB
1718 /* We need to check if the requested address is in the RAM
1719 * because we don't want to map the entire memory in QEMU.
1720 * In that case just map until the end of the page.
1721 */
1722 if (block->offset == 0) {
ae3a7047 1723 ptr = xen_map_cache(addr, 0, 0);
0dc3f44a 1724 goto unlock;
0d6d3c87 1725 }
ae3a7047
MD
1726
1727 block->host = xen_map_cache(block->offset, block->max_length, 1);
0d6d3c87 1728 }
ae3a7047
MD
1729 ptr = ramblock_ptr(block, addr - block->offset);
1730
0dc3f44a
MD
1731unlock:
1732 rcu_read_unlock();
ae3a7047 1733 return ptr;
dc828ca1
PB
1734}
1735
38bee5dc 1736/* Return a host pointer to guest's ram. Similar to qemu_get_ram_ptr
ae3a7047 1737 * but takes a size argument.
0dc3f44a
MD
1738 *
1739 * By the time this function returns, the returned pointer is not protected
1740 * by RCU anymore. If the caller is not within an RCU critical section and
1741 * does not hold the iothread lock, it must have other means of protecting the
1742 * pointer, such as a reference to the region that includes the incoming
1743 * ram_addr_t.
ae3a7047 1744 */
cb85f7ab 1745static void *qemu_ram_ptr_length(ram_addr_t addr, hwaddr *size)
38bee5dc 1746{
ae3a7047 1747 void *ptr;
8ab934f9
SS
1748 if (*size == 0) {
1749 return NULL;
1750 }
868bb33f 1751 if (xen_enabled()) {
e41d7c69 1752 return xen_map_cache(addr, *size, 1);
868bb33f 1753 } else {
38bee5dc 1754 RAMBlock *block;
0dc3f44a
MD
1755 rcu_read_lock();
1756 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
9b8424d5
MT
1757 if (addr - block->offset < block->max_length) {
1758 if (addr - block->offset + *size > block->max_length)
1759 *size = block->max_length - addr + block->offset;
ae3a7047 1760 ptr = ramblock_ptr(block, addr - block->offset);
0dc3f44a 1761 rcu_read_unlock();
ae3a7047 1762 return ptr;
38bee5dc
SS
1763 }
1764 }
1765
1766 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
1767 abort();
38bee5dc
SS
1768 }
1769}
1770
7443b437 1771/* Some of the softmmu routines need to translate from a host pointer
ae3a7047
MD
1772 * (typically a TLB entry) back to a ram offset.
1773 *
1774 * By the time this function returns, the returned pointer is not protected
1775 * by RCU anymore. If the caller is not within an RCU critical section and
1776 * does not hold the iothread lock, it must have other means of protecting the
1777 * pointer, such as a reference to the region that includes the incoming
1778 * ram_addr_t.
1779 */
1b5ec234 1780MemoryRegion *qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
5579c7f3 1781{
94a6b54f
PB
1782 RAMBlock *block;
1783 uint8_t *host = ptr;
ae3a7047 1784 MemoryRegion *mr;
94a6b54f 1785
868bb33f 1786 if (xen_enabled()) {
0dc3f44a 1787 rcu_read_lock();
e41d7c69 1788 *ram_addr = xen_ram_addr_from_mapcache(ptr);
ae3a7047 1789 mr = qemu_get_ram_block(*ram_addr)->mr;
0dc3f44a 1790 rcu_read_unlock();
ae3a7047 1791 return mr;
712c2b41
SS
1792 }
1793
0dc3f44a
MD
1794 rcu_read_lock();
1795 block = atomic_rcu_read(&ram_list.mru_block);
9b8424d5 1796 if (block && block->host && host - block->host < block->max_length) {
23887b79
PB
1797 goto found;
1798 }
1799
0dc3f44a 1800 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
432d268c
JN
1801 /* This case append when the block is not mapped. */
1802 if (block->host == NULL) {
1803 continue;
1804 }
9b8424d5 1805 if (host - block->host < block->max_length) {
23887b79 1806 goto found;
f471a17e 1807 }
94a6b54f 1808 }
432d268c 1809
0dc3f44a 1810 rcu_read_unlock();
1b5ec234 1811 return NULL;
23887b79
PB
1812
1813found:
1814 *ram_addr = block->offset + (host - block->host);
ae3a7047 1815 mr = block->mr;
0dc3f44a 1816 rcu_read_unlock();
ae3a7047 1817 return mr;
e890261f 1818}
f471a17e 1819
a8170e5e 1820static void notdirty_mem_write(void *opaque, hwaddr ram_addr,
0e0df1e2 1821 uint64_t val, unsigned size)
9fa3e853 1822{
52159192 1823 if (!cpu_physical_memory_get_dirty_flag(ram_addr, DIRTY_MEMORY_CODE)) {
0e0df1e2 1824 tb_invalidate_phys_page_fast(ram_addr, size);
3a7d929e 1825 }
0e0df1e2
AK
1826 switch (size) {
1827 case 1:
1828 stb_p(qemu_get_ram_ptr(ram_addr), val);
1829 break;
1830 case 2:
1831 stw_p(qemu_get_ram_ptr(ram_addr), val);
1832 break;
1833 case 4:
1834 stl_p(qemu_get_ram_ptr(ram_addr), val);
1835 break;
1836 default:
1837 abort();
3a7d929e 1838 }
6886867e 1839 cpu_physical_memory_set_dirty_range_nocode(ram_addr, size);
f23db169
FB
1840 /* we remove the notdirty callback only if the code has been
1841 flushed */
a2cd8c85 1842 if (!cpu_physical_memory_is_clean(ram_addr)) {
4917cf44 1843 CPUArchState *env = current_cpu->env_ptr;
93afeade 1844 tlb_set_dirty(env, current_cpu->mem_io_vaddr);
4917cf44 1845 }
9fa3e853
FB
1846}
1847
b018ddf6
PB
1848static bool notdirty_mem_accepts(void *opaque, hwaddr addr,
1849 unsigned size, bool is_write)
1850{
1851 return is_write;
1852}
1853
0e0df1e2 1854static const MemoryRegionOps notdirty_mem_ops = {
0e0df1e2 1855 .write = notdirty_mem_write,
b018ddf6 1856 .valid.accepts = notdirty_mem_accepts,
0e0df1e2 1857 .endianness = DEVICE_NATIVE_ENDIAN,
1ccde1cb
FB
1858};
1859
0f459d16 1860/* Generate a debug exception if a watchpoint has been hit. */
05068c0d 1861static void check_watchpoint(int offset, int len, int flags)
0f459d16 1862{
93afeade
AF
1863 CPUState *cpu = current_cpu;
1864 CPUArchState *env = cpu->env_ptr;
06d55cc1 1865 target_ulong pc, cs_base;
0f459d16 1866 target_ulong vaddr;
a1d1bb31 1867 CPUWatchpoint *wp;
06d55cc1 1868 int cpu_flags;
0f459d16 1869
ff4700b0 1870 if (cpu->watchpoint_hit) {
06d55cc1
AL
1871 /* We re-entered the check after replacing the TB. Now raise
1872 * the debug interrupt so that is will trigger after the
1873 * current instruction. */
93afeade 1874 cpu_interrupt(cpu, CPU_INTERRUPT_DEBUG);
06d55cc1
AL
1875 return;
1876 }
93afeade 1877 vaddr = (cpu->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
ff4700b0 1878 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
05068c0d
PM
1879 if (cpu_watchpoint_address_matches(wp, vaddr, len)
1880 && (wp->flags & flags)) {
08225676
PM
1881 if (flags == BP_MEM_READ) {
1882 wp->flags |= BP_WATCHPOINT_HIT_READ;
1883 } else {
1884 wp->flags |= BP_WATCHPOINT_HIT_WRITE;
1885 }
1886 wp->hitaddr = vaddr;
ff4700b0
AF
1887 if (!cpu->watchpoint_hit) {
1888 cpu->watchpoint_hit = wp;
239c51a5 1889 tb_check_watchpoint(cpu);
6e140f28 1890 if (wp->flags & BP_STOP_BEFORE_ACCESS) {
27103424 1891 cpu->exception_index = EXCP_DEBUG;
5638d180 1892 cpu_loop_exit(cpu);
6e140f28
AL
1893 } else {
1894 cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
648f034c 1895 tb_gen_code(cpu, pc, cs_base, cpu_flags, 1);
0ea8cb88 1896 cpu_resume_from_signal(cpu, NULL);
6e140f28 1897 }
06d55cc1 1898 }
6e140f28
AL
1899 } else {
1900 wp->flags &= ~BP_WATCHPOINT_HIT;
0f459d16
PB
1901 }
1902 }
1903}
1904
6658ffb8
PB
1905/* Watchpoint access routines. Watchpoints are inserted using TLB tricks,
1906 so these check for a hit then pass through to the normal out-of-line
1907 phys routines. */
a8170e5e 1908static uint64_t watch_mem_read(void *opaque, hwaddr addr,
1ec9b909 1909 unsigned size)
6658ffb8 1910{
05068c0d 1911 check_watchpoint(addr & ~TARGET_PAGE_MASK, size, BP_MEM_READ);
1ec9b909 1912 switch (size) {
2c17449b 1913 case 1: return ldub_phys(&address_space_memory, addr);
41701aa4 1914 case 2: return lduw_phys(&address_space_memory, addr);
fdfba1a2 1915 case 4: return ldl_phys(&address_space_memory, addr);
1ec9b909
AK
1916 default: abort();
1917 }
6658ffb8
PB
1918}
1919
a8170e5e 1920static void watch_mem_write(void *opaque, hwaddr addr,
1ec9b909 1921 uint64_t val, unsigned size)
6658ffb8 1922{
05068c0d 1923 check_watchpoint(addr & ~TARGET_PAGE_MASK, size, BP_MEM_WRITE);
1ec9b909 1924 switch (size) {
67364150 1925 case 1:
db3be60d 1926 stb_phys(&address_space_memory, addr, val);
67364150
MF
1927 break;
1928 case 2:
5ce5944d 1929 stw_phys(&address_space_memory, addr, val);
67364150
MF
1930 break;
1931 case 4:
ab1da857 1932 stl_phys(&address_space_memory, addr, val);
67364150 1933 break;
1ec9b909
AK
1934 default: abort();
1935 }
6658ffb8
PB
1936}
1937
1ec9b909
AK
1938static const MemoryRegionOps watch_mem_ops = {
1939 .read = watch_mem_read,
1940 .write = watch_mem_write,
1941 .endianness = DEVICE_NATIVE_ENDIAN,
6658ffb8 1942};
6658ffb8 1943
a8170e5e 1944static uint64_t subpage_read(void *opaque, hwaddr addr,
70c68e44 1945 unsigned len)
db7b5426 1946{
acc9d80b 1947 subpage_t *subpage = opaque;
ff6cff75 1948 uint8_t buf[8];
791af8c8 1949
db7b5426 1950#if defined(DEBUG_SUBPAGE)
016e9d62 1951 printf("%s: subpage %p len %u addr " TARGET_FMT_plx "\n", __func__,
acc9d80b 1952 subpage, len, addr);
db7b5426 1953#endif
acc9d80b
JK
1954 address_space_read(subpage->as, addr + subpage->base, buf, len);
1955 switch (len) {
1956 case 1:
1957 return ldub_p(buf);
1958 case 2:
1959 return lduw_p(buf);
1960 case 4:
1961 return ldl_p(buf);
ff6cff75
PB
1962 case 8:
1963 return ldq_p(buf);
acc9d80b
JK
1964 default:
1965 abort();
1966 }
db7b5426
BS
1967}
1968
a8170e5e 1969static void subpage_write(void *opaque, hwaddr addr,
70c68e44 1970 uint64_t value, unsigned len)
db7b5426 1971{
acc9d80b 1972 subpage_t *subpage = opaque;
ff6cff75 1973 uint8_t buf[8];
acc9d80b 1974
db7b5426 1975#if defined(DEBUG_SUBPAGE)
016e9d62 1976 printf("%s: subpage %p len %u addr " TARGET_FMT_plx
acc9d80b
JK
1977 " value %"PRIx64"\n",
1978 __func__, subpage, len, addr, value);
db7b5426 1979#endif
acc9d80b
JK
1980 switch (len) {
1981 case 1:
1982 stb_p(buf, value);
1983 break;
1984 case 2:
1985 stw_p(buf, value);
1986 break;
1987 case 4:
1988 stl_p(buf, value);
1989 break;
ff6cff75
PB
1990 case 8:
1991 stq_p(buf, value);
1992 break;
acc9d80b
JK
1993 default:
1994 abort();
1995 }
1996 address_space_write(subpage->as, addr + subpage->base, buf, len);
db7b5426
BS
1997}
1998
c353e4cc 1999static bool subpage_accepts(void *opaque, hwaddr addr,
016e9d62 2000 unsigned len, bool is_write)
c353e4cc 2001{
acc9d80b 2002 subpage_t *subpage = opaque;
c353e4cc 2003#if defined(DEBUG_SUBPAGE)
016e9d62 2004 printf("%s: subpage %p %c len %u addr " TARGET_FMT_plx "\n",
acc9d80b 2005 __func__, subpage, is_write ? 'w' : 'r', len, addr);
c353e4cc
PB
2006#endif
2007
acc9d80b 2008 return address_space_access_valid(subpage->as, addr + subpage->base,
016e9d62 2009 len, is_write);
c353e4cc
PB
2010}
2011
70c68e44
AK
2012static const MemoryRegionOps subpage_ops = {
2013 .read = subpage_read,
2014 .write = subpage_write,
ff6cff75
PB
2015 .impl.min_access_size = 1,
2016 .impl.max_access_size = 8,
2017 .valid.min_access_size = 1,
2018 .valid.max_access_size = 8,
c353e4cc 2019 .valid.accepts = subpage_accepts,
70c68e44 2020 .endianness = DEVICE_NATIVE_ENDIAN,
db7b5426
BS
2021};
2022
c227f099 2023static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
5312bd8b 2024 uint16_t section)
db7b5426
BS
2025{
2026 int idx, eidx;
2027
2028 if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE)
2029 return -1;
2030 idx = SUBPAGE_IDX(start);
2031 eidx = SUBPAGE_IDX(end);
2032#if defined(DEBUG_SUBPAGE)
016e9d62
AK
2033 printf("%s: %p start %08x end %08x idx %08x eidx %08x section %d\n",
2034 __func__, mmio, start, end, idx, eidx, section);
db7b5426 2035#endif
db7b5426 2036 for (; idx <= eidx; idx++) {
5312bd8b 2037 mmio->sub_section[idx] = section;
db7b5426
BS
2038 }
2039
2040 return 0;
2041}
2042
acc9d80b 2043static subpage_t *subpage_init(AddressSpace *as, hwaddr base)
db7b5426 2044{
c227f099 2045 subpage_t *mmio;
db7b5426 2046
7267c094 2047 mmio = g_malloc0(sizeof(subpage_t));
1eec614b 2048
acc9d80b 2049 mmio->as = as;
1eec614b 2050 mmio->base = base;
2c9b15ca 2051 memory_region_init_io(&mmio->iomem, NULL, &subpage_ops, mmio,
b4fefef9 2052 NULL, TARGET_PAGE_SIZE);
b3b00c78 2053 mmio->iomem.subpage = true;
db7b5426 2054#if defined(DEBUG_SUBPAGE)
016e9d62
AK
2055 printf("%s: %p base " TARGET_FMT_plx " len %08x\n", __func__,
2056 mmio, base, TARGET_PAGE_SIZE);
db7b5426 2057#endif
b41aac4f 2058 subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, PHYS_SECTION_UNASSIGNED);
db7b5426
BS
2059
2060 return mmio;
2061}
2062
a656e22f
PC
2063static uint16_t dummy_section(PhysPageMap *map, AddressSpace *as,
2064 MemoryRegion *mr)
5312bd8b 2065{
a656e22f 2066 assert(as);
5312bd8b 2067 MemoryRegionSection section = {
a656e22f 2068 .address_space = as,
5312bd8b
AK
2069 .mr = mr,
2070 .offset_within_address_space = 0,
2071 .offset_within_region = 0,
052e87b0 2072 .size = int128_2_64(),
5312bd8b
AK
2073 };
2074
53cb28cb 2075 return phys_section_add(map, &section);
5312bd8b
AK
2076}
2077
9d82b5a7 2078MemoryRegion *iotlb_to_region(CPUState *cpu, hwaddr index)
aa102231 2079{
79e2b9ae
PB
2080 AddressSpaceDispatch *d = atomic_rcu_read(&cpu->memory_dispatch);
2081 MemoryRegionSection *sections = d->map.sections;
9d82b5a7
PB
2082
2083 return sections[index & ~TARGET_PAGE_MASK].mr;
aa102231
AK
2084}
2085
e9179ce1
AK
2086static void io_mem_init(void)
2087{
1f6245e5 2088 memory_region_init_io(&io_mem_rom, NULL, &unassigned_mem_ops, NULL, NULL, UINT64_MAX);
2c9b15ca 2089 memory_region_init_io(&io_mem_unassigned, NULL, &unassigned_mem_ops, NULL,
1f6245e5 2090 NULL, UINT64_MAX);
2c9b15ca 2091 memory_region_init_io(&io_mem_notdirty, NULL, &notdirty_mem_ops, NULL,
1f6245e5 2092 NULL, UINT64_MAX);
2c9b15ca 2093 memory_region_init_io(&io_mem_watch, NULL, &watch_mem_ops, NULL,
1f6245e5 2094 NULL, UINT64_MAX);
e9179ce1
AK
2095}
2096
ac1970fb 2097static void mem_begin(MemoryListener *listener)
00752703
PB
2098{
2099 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
53cb28cb
MA
2100 AddressSpaceDispatch *d = g_new0(AddressSpaceDispatch, 1);
2101 uint16_t n;
2102
a656e22f 2103 n = dummy_section(&d->map, as, &io_mem_unassigned);
53cb28cb 2104 assert(n == PHYS_SECTION_UNASSIGNED);
a656e22f 2105 n = dummy_section(&d->map, as, &io_mem_notdirty);
53cb28cb 2106 assert(n == PHYS_SECTION_NOTDIRTY);
a656e22f 2107 n = dummy_section(&d->map, as, &io_mem_rom);
53cb28cb 2108 assert(n == PHYS_SECTION_ROM);
a656e22f 2109 n = dummy_section(&d->map, as, &io_mem_watch);
53cb28cb 2110 assert(n == PHYS_SECTION_WATCH);
00752703 2111
9736e55b 2112 d->phys_map = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, .skip = 1 };
00752703
PB
2113 d->as = as;
2114 as->next_dispatch = d;
2115}
2116
79e2b9ae
PB
2117static void address_space_dispatch_free(AddressSpaceDispatch *d)
2118{
2119 phys_sections_free(&d->map);
2120 g_free(d);
2121}
2122
00752703 2123static void mem_commit(MemoryListener *listener)
ac1970fb 2124{
89ae337a 2125 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
0475d94f
PB
2126 AddressSpaceDispatch *cur = as->dispatch;
2127 AddressSpaceDispatch *next = as->next_dispatch;
2128
53cb28cb 2129 phys_page_compact_all(next, next->map.nodes_nb);
b35ba30f 2130
79e2b9ae 2131 atomic_rcu_set(&as->dispatch, next);
53cb28cb 2132 if (cur) {
79e2b9ae 2133 call_rcu(cur, address_space_dispatch_free, rcu);
53cb28cb 2134 }
9affd6fc
PB
2135}
2136
1d71148e 2137static void tcg_commit(MemoryListener *listener)
50c1e149 2138{
182735ef 2139 CPUState *cpu;
117712c3
AK
2140
2141 /* since each CPU stores ram addresses in its TLB cache, we must
2142 reset the modified entries */
2143 /* XXX: slow ! */
bdc44640 2144 CPU_FOREACH(cpu) {
33bde2e1
EI
2145 /* FIXME: Disentangle the cpu.h circular files deps so we can
2146 directly get the right CPU from listener. */
2147 if (cpu->tcg_as_listener != listener) {
2148 continue;
2149 }
76e5c76f 2150 cpu_reload_memory_map(cpu);
117712c3 2151 }
50c1e149
AK
2152}
2153
93632747
AK
2154static void core_log_global_start(MemoryListener *listener)
2155{
981fdf23 2156 cpu_physical_memory_set_dirty_tracking(true);
93632747
AK
2157}
2158
2159static void core_log_global_stop(MemoryListener *listener)
2160{
981fdf23 2161 cpu_physical_memory_set_dirty_tracking(false);
93632747
AK
2162}
2163
93632747 2164static MemoryListener core_memory_listener = {
93632747
AK
2165 .log_global_start = core_log_global_start,
2166 .log_global_stop = core_log_global_stop,
ac1970fb 2167 .priority = 1,
93632747
AK
2168};
2169
ac1970fb
AK
2170void address_space_init_dispatch(AddressSpace *as)
2171{
00752703 2172 as->dispatch = NULL;
89ae337a 2173 as->dispatch_listener = (MemoryListener) {
ac1970fb 2174 .begin = mem_begin,
00752703 2175 .commit = mem_commit,
ac1970fb
AK
2176 .region_add = mem_add,
2177 .region_nop = mem_add,
2178 .priority = 0,
2179 };
89ae337a 2180 memory_listener_register(&as->dispatch_listener, as);
ac1970fb
AK
2181}
2182
6e48e8f9
PB
2183void address_space_unregister(AddressSpace *as)
2184{
2185 memory_listener_unregister(&as->dispatch_listener);
2186}
2187
83f3c251
AK
2188void address_space_destroy_dispatch(AddressSpace *as)
2189{
2190 AddressSpaceDispatch *d = as->dispatch;
2191
79e2b9ae
PB
2192 atomic_rcu_set(&as->dispatch, NULL);
2193 if (d) {
2194 call_rcu(d, address_space_dispatch_free, rcu);
2195 }
83f3c251
AK
2196}
2197
62152b8a
AK
2198static void memory_map_init(void)
2199{
7267c094 2200 system_memory = g_malloc(sizeof(*system_memory));
03f49957 2201
57271d63 2202 memory_region_init(system_memory, NULL, "system", UINT64_MAX);
7dca8043 2203 address_space_init(&address_space_memory, system_memory, "memory");
309cb471 2204
7267c094 2205 system_io = g_malloc(sizeof(*system_io));
3bb28b72
JK
2206 memory_region_init_io(system_io, NULL, &unassigned_io_ops, NULL, "io",
2207 65536);
7dca8043 2208 address_space_init(&address_space_io, system_io, "I/O");
93632747 2209
f6790af6 2210 memory_listener_register(&core_memory_listener, &address_space_memory);
62152b8a
AK
2211}
2212
2213MemoryRegion *get_system_memory(void)
2214{
2215 return system_memory;
2216}
2217
309cb471
AK
2218MemoryRegion *get_system_io(void)
2219{
2220 return system_io;
2221}
2222
e2eef170
PB
2223#endif /* !defined(CONFIG_USER_ONLY) */
2224
13eb76e0
FB
2225/* physical memory access (slow version, mainly for debug) */
2226#if defined(CONFIG_USER_ONLY)
f17ec444 2227int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
a68fe89c 2228 uint8_t *buf, int len, int is_write)
13eb76e0
FB
2229{
2230 int l, flags;
2231 target_ulong page;
53a5960a 2232 void * p;
13eb76e0
FB
2233
2234 while (len > 0) {
2235 page = addr & TARGET_PAGE_MASK;
2236 l = (page + TARGET_PAGE_SIZE) - addr;
2237 if (l > len)
2238 l = len;
2239 flags = page_get_flags(page);
2240 if (!(flags & PAGE_VALID))
a68fe89c 2241 return -1;
13eb76e0
FB
2242 if (is_write) {
2243 if (!(flags & PAGE_WRITE))
a68fe89c 2244 return -1;
579a97f7 2245 /* XXX: this code should not depend on lock_user */
72fb7daa 2246 if (!(p = lock_user(VERIFY_WRITE, addr, l, 0)))
a68fe89c 2247 return -1;
72fb7daa
AJ
2248 memcpy(p, buf, l);
2249 unlock_user(p, addr, l);
13eb76e0
FB
2250 } else {
2251 if (!(flags & PAGE_READ))
a68fe89c 2252 return -1;
579a97f7 2253 /* XXX: this code should not depend on lock_user */
72fb7daa 2254 if (!(p = lock_user(VERIFY_READ, addr, l, 1)))
a68fe89c 2255 return -1;
72fb7daa 2256 memcpy(buf, p, l);
5b257578 2257 unlock_user(p, addr, 0);
13eb76e0
FB
2258 }
2259 len -= l;
2260 buf += l;
2261 addr += l;
2262 }
a68fe89c 2263 return 0;
13eb76e0 2264}
8df1cd07 2265
13eb76e0 2266#else
51d7a9eb 2267
a8170e5e
AK
2268static void invalidate_and_set_dirty(hwaddr addr,
2269 hwaddr length)
51d7a9eb 2270{
f874bf90
PM
2271 if (cpu_physical_memory_range_includes_clean(addr, length)) {
2272 tb_invalidate_phys_range(addr, addr + length, 0);
6886867e 2273 cpu_physical_memory_set_dirty_range_nocode(addr, length);
51d7a9eb 2274 }
e226939d 2275 xen_modified_memory(addr, length);
51d7a9eb
AP
2276}
2277
23326164 2278static int memory_access_size(MemoryRegion *mr, unsigned l, hwaddr addr)
82f2563f 2279{
e1622f4b 2280 unsigned access_size_max = mr->ops->valid.max_access_size;
23326164
RH
2281
2282 /* Regions are assumed to support 1-4 byte accesses unless
2283 otherwise specified. */
23326164
RH
2284 if (access_size_max == 0) {
2285 access_size_max = 4;
2286 }
2287
2288 /* Bound the maximum access by the alignment of the address. */
2289 if (!mr->ops->impl.unaligned) {
2290 unsigned align_size_max = addr & -addr;
2291 if (align_size_max != 0 && align_size_max < access_size_max) {
2292 access_size_max = align_size_max;
2293 }
82f2563f 2294 }
23326164
RH
2295
2296 /* Don't attempt accesses larger than the maximum. */
2297 if (l > access_size_max) {
2298 l = access_size_max;
82f2563f 2299 }
098178f2
PB
2300 if (l & (l - 1)) {
2301 l = 1 << (qemu_fls(l) - 1);
2302 }
23326164
RH
2303
2304 return l;
82f2563f
PB
2305}
2306
fd8aaa76 2307bool address_space_rw(AddressSpace *as, hwaddr addr, uint8_t *buf,
ac1970fb 2308 int len, bool is_write)
13eb76e0 2309{
149f54b5 2310 hwaddr l;
13eb76e0 2311 uint8_t *ptr;
791af8c8 2312 uint64_t val;
149f54b5 2313 hwaddr addr1;
5c8a00ce 2314 MemoryRegion *mr;
fd8aaa76 2315 bool error = false;
3b46e624 2316
13eb76e0 2317 while (len > 0) {
149f54b5 2318 l = len;
5c8a00ce 2319 mr = address_space_translate(as, addr, &addr1, &l, is_write);
3b46e624 2320
13eb76e0 2321 if (is_write) {
5c8a00ce
PB
2322 if (!memory_access_is_direct(mr, is_write)) {
2323 l = memory_access_size(mr, l, addr1);
4917cf44 2324 /* XXX: could force current_cpu to NULL to avoid
6a00d601 2325 potential bugs */
23326164
RH
2326 switch (l) {
2327 case 8:
2328 /* 64 bit write access */
2329 val = ldq_p(buf);
2330 error |= io_mem_write(mr, addr1, val, 8);
2331 break;
2332 case 4:
1c213d19 2333 /* 32 bit write access */
c27004ec 2334 val = ldl_p(buf);
5c8a00ce 2335 error |= io_mem_write(mr, addr1, val, 4);
23326164
RH
2336 break;
2337 case 2:
1c213d19 2338 /* 16 bit write access */
c27004ec 2339 val = lduw_p(buf);
5c8a00ce 2340 error |= io_mem_write(mr, addr1, val, 2);
23326164
RH
2341 break;
2342 case 1:
1c213d19 2343 /* 8 bit write access */
c27004ec 2344 val = ldub_p(buf);
5c8a00ce 2345 error |= io_mem_write(mr, addr1, val, 1);
23326164
RH
2346 break;
2347 default:
2348 abort();
13eb76e0 2349 }
2bbfa05d 2350 } else {
5c8a00ce 2351 addr1 += memory_region_get_ram_addr(mr);
13eb76e0 2352 /* RAM case */
5579c7f3 2353 ptr = qemu_get_ram_ptr(addr1);
13eb76e0 2354 memcpy(ptr, buf, l);
51d7a9eb 2355 invalidate_and_set_dirty(addr1, l);
13eb76e0
FB
2356 }
2357 } else {
5c8a00ce 2358 if (!memory_access_is_direct(mr, is_write)) {
13eb76e0 2359 /* I/O case */
5c8a00ce 2360 l = memory_access_size(mr, l, addr1);
23326164
RH
2361 switch (l) {
2362 case 8:
2363 /* 64 bit read access */
2364 error |= io_mem_read(mr, addr1, &val, 8);
2365 stq_p(buf, val);
2366 break;
2367 case 4:
13eb76e0 2368 /* 32 bit read access */
5c8a00ce 2369 error |= io_mem_read(mr, addr1, &val, 4);
c27004ec 2370 stl_p(buf, val);
23326164
RH
2371 break;
2372 case 2:
13eb76e0 2373 /* 16 bit read access */
5c8a00ce 2374 error |= io_mem_read(mr, addr1, &val, 2);
c27004ec 2375 stw_p(buf, val);
23326164
RH
2376 break;
2377 case 1:
1c213d19 2378 /* 8 bit read access */
5c8a00ce 2379 error |= io_mem_read(mr, addr1, &val, 1);
c27004ec 2380 stb_p(buf, val);
23326164
RH
2381 break;
2382 default:
2383 abort();
13eb76e0
FB
2384 }
2385 } else {
2386 /* RAM case */
5c8a00ce 2387 ptr = qemu_get_ram_ptr(mr->ram_addr + addr1);
f3705d53 2388 memcpy(buf, ptr, l);
13eb76e0
FB
2389 }
2390 }
2391 len -= l;
2392 buf += l;
2393 addr += l;
2394 }
fd8aaa76
PB
2395
2396 return error;
13eb76e0 2397}
8df1cd07 2398
fd8aaa76 2399bool address_space_write(AddressSpace *as, hwaddr addr,
ac1970fb
AK
2400 const uint8_t *buf, int len)
2401{
fd8aaa76 2402 return address_space_rw(as, addr, (uint8_t *)buf, len, true);
ac1970fb
AK
2403}
2404
fd8aaa76 2405bool address_space_read(AddressSpace *as, hwaddr addr, uint8_t *buf, int len)
ac1970fb 2406{
fd8aaa76 2407 return address_space_rw(as, addr, buf, len, false);
ac1970fb
AK
2408}
2409
2410
a8170e5e 2411void cpu_physical_memory_rw(hwaddr addr, uint8_t *buf,
ac1970fb
AK
2412 int len, int is_write)
2413{
fd8aaa76 2414 address_space_rw(&address_space_memory, addr, buf, len, is_write);
ac1970fb
AK
2415}
2416
582b55a9
AG
2417enum write_rom_type {
2418 WRITE_DATA,
2419 FLUSH_CACHE,
2420};
2421
2a221651 2422static inline void cpu_physical_memory_write_rom_internal(AddressSpace *as,
582b55a9 2423 hwaddr addr, const uint8_t *buf, int len, enum write_rom_type type)
d0ecd2aa 2424{
149f54b5 2425 hwaddr l;
d0ecd2aa 2426 uint8_t *ptr;
149f54b5 2427 hwaddr addr1;
5c8a00ce 2428 MemoryRegion *mr;
3b46e624 2429
d0ecd2aa 2430 while (len > 0) {
149f54b5 2431 l = len;
2a221651 2432 mr = address_space_translate(as, addr, &addr1, &l, true);
3b46e624 2433
5c8a00ce
PB
2434 if (!(memory_region_is_ram(mr) ||
2435 memory_region_is_romd(mr))) {
d0ecd2aa
FB
2436 /* do nothing */
2437 } else {
5c8a00ce 2438 addr1 += memory_region_get_ram_addr(mr);
d0ecd2aa 2439 /* ROM/RAM case */
5579c7f3 2440 ptr = qemu_get_ram_ptr(addr1);
582b55a9
AG
2441 switch (type) {
2442 case WRITE_DATA:
2443 memcpy(ptr, buf, l);
2444 invalidate_and_set_dirty(addr1, l);
2445 break;
2446 case FLUSH_CACHE:
2447 flush_icache_range((uintptr_t)ptr, (uintptr_t)ptr + l);
2448 break;
2449 }
d0ecd2aa
FB
2450 }
2451 len -= l;
2452 buf += l;
2453 addr += l;
2454 }
2455}
2456
582b55a9 2457/* used for ROM loading : can write in RAM and ROM */
2a221651 2458void cpu_physical_memory_write_rom(AddressSpace *as, hwaddr addr,
582b55a9
AG
2459 const uint8_t *buf, int len)
2460{
2a221651 2461 cpu_physical_memory_write_rom_internal(as, addr, buf, len, WRITE_DATA);
582b55a9
AG
2462}
2463
2464void cpu_flush_icache_range(hwaddr start, int len)
2465{
2466 /*
2467 * This function should do the same thing as an icache flush that was
2468 * triggered from within the guest. For TCG we are always cache coherent,
2469 * so there is no need to flush anything. For KVM / Xen we need to flush
2470 * the host's instruction cache at least.
2471 */
2472 if (tcg_enabled()) {
2473 return;
2474 }
2475
2a221651
EI
2476 cpu_physical_memory_write_rom_internal(&address_space_memory,
2477 start, NULL, len, FLUSH_CACHE);
582b55a9
AG
2478}
2479
6d16c2f8 2480typedef struct {
d3e71559 2481 MemoryRegion *mr;
6d16c2f8 2482 void *buffer;
a8170e5e
AK
2483 hwaddr addr;
2484 hwaddr len;
6d16c2f8
AL
2485} BounceBuffer;
2486
2487static BounceBuffer bounce;
2488
ba223c29
AL
2489typedef struct MapClient {
2490 void *opaque;
2491 void (*callback)(void *opaque);
72cf2d4f 2492 QLIST_ENTRY(MapClient) link;
ba223c29
AL
2493} MapClient;
2494
72cf2d4f
BS
2495static QLIST_HEAD(map_client_list, MapClient) map_client_list
2496 = QLIST_HEAD_INITIALIZER(map_client_list);
ba223c29
AL
2497
2498void *cpu_register_map_client(void *opaque, void (*callback)(void *opaque))
2499{
7267c094 2500 MapClient *client = g_malloc(sizeof(*client));
ba223c29
AL
2501
2502 client->opaque = opaque;
2503 client->callback = callback;
72cf2d4f 2504 QLIST_INSERT_HEAD(&map_client_list, client, link);
ba223c29
AL
2505 return client;
2506}
2507
8b9c99d9 2508static void cpu_unregister_map_client(void *_client)
ba223c29
AL
2509{
2510 MapClient *client = (MapClient *)_client;
2511
72cf2d4f 2512 QLIST_REMOVE(client, link);
7267c094 2513 g_free(client);
ba223c29
AL
2514}
2515
2516static void cpu_notify_map_clients(void)
2517{
2518 MapClient *client;
2519
72cf2d4f
BS
2520 while (!QLIST_EMPTY(&map_client_list)) {
2521 client = QLIST_FIRST(&map_client_list);
ba223c29 2522 client->callback(client->opaque);
34d5e948 2523 cpu_unregister_map_client(client);
ba223c29
AL
2524 }
2525}
2526
51644ab7
PB
2527bool address_space_access_valid(AddressSpace *as, hwaddr addr, int len, bool is_write)
2528{
5c8a00ce 2529 MemoryRegion *mr;
51644ab7
PB
2530 hwaddr l, xlat;
2531
2532 while (len > 0) {
2533 l = len;
5c8a00ce
PB
2534 mr = address_space_translate(as, addr, &xlat, &l, is_write);
2535 if (!memory_access_is_direct(mr, is_write)) {
2536 l = memory_access_size(mr, l, addr);
2537 if (!memory_region_access_valid(mr, xlat, l, is_write)) {
51644ab7
PB
2538 return false;
2539 }
2540 }
2541
2542 len -= l;
2543 addr += l;
2544 }
2545 return true;
2546}
2547
6d16c2f8
AL
2548/* Map a physical memory region into a host virtual address.
2549 * May map a subset of the requested range, given by and returned in *plen.
2550 * May return NULL if resources needed to perform the mapping are exhausted.
2551 * Use only for reads OR writes - not for read-modify-write operations.
ba223c29
AL
2552 * Use cpu_register_map_client() to know when retrying the map operation is
2553 * likely to succeed.
6d16c2f8 2554 */
ac1970fb 2555void *address_space_map(AddressSpace *as,
a8170e5e
AK
2556 hwaddr addr,
2557 hwaddr *plen,
ac1970fb 2558 bool is_write)
6d16c2f8 2559{
a8170e5e 2560 hwaddr len = *plen;
e3127ae0
PB
2561 hwaddr done = 0;
2562 hwaddr l, xlat, base;
2563 MemoryRegion *mr, *this_mr;
2564 ram_addr_t raddr;
6d16c2f8 2565
e3127ae0
PB
2566 if (len == 0) {
2567 return NULL;
2568 }
38bee5dc 2569
e3127ae0
PB
2570 l = len;
2571 mr = address_space_translate(as, addr, &xlat, &l, is_write);
2572 if (!memory_access_is_direct(mr, is_write)) {
2573 if (bounce.buffer) {
2574 return NULL;
6d16c2f8 2575 }
e85d9db5
KW
2576 /* Avoid unbounded allocations */
2577 l = MIN(l, TARGET_PAGE_SIZE);
2578 bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, l);
e3127ae0
PB
2579 bounce.addr = addr;
2580 bounce.len = l;
d3e71559
PB
2581
2582 memory_region_ref(mr);
2583 bounce.mr = mr;
e3127ae0
PB
2584 if (!is_write) {
2585 address_space_read(as, addr, bounce.buffer, l);
8ab934f9 2586 }
6d16c2f8 2587
e3127ae0
PB
2588 *plen = l;
2589 return bounce.buffer;
2590 }
2591
2592 base = xlat;
2593 raddr = memory_region_get_ram_addr(mr);
2594
2595 for (;;) {
6d16c2f8
AL
2596 len -= l;
2597 addr += l;
e3127ae0
PB
2598 done += l;
2599 if (len == 0) {
2600 break;
2601 }
2602
2603 l = len;
2604 this_mr = address_space_translate(as, addr, &xlat, &l, is_write);
2605 if (this_mr != mr || xlat != base + done) {
2606 break;
2607 }
6d16c2f8 2608 }
e3127ae0 2609
d3e71559 2610 memory_region_ref(mr);
e3127ae0
PB
2611 *plen = done;
2612 return qemu_ram_ptr_length(raddr + base, plen);
6d16c2f8
AL
2613}
2614
ac1970fb 2615/* Unmaps a memory region previously mapped by address_space_map().
6d16c2f8
AL
2616 * Will also mark the memory as dirty if is_write == 1. access_len gives
2617 * the amount of memory that was actually read or written by the caller.
2618 */
a8170e5e
AK
2619void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len,
2620 int is_write, hwaddr access_len)
6d16c2f8
AL
2621{
2622 if (buffer != bounce.buffer) {
d3e71559
PB
2623 MemoryRegion *mr;
2624 ram_addr_t addr1;
2625
2626 mr = qemu_ram_addr_from_host(buffer, &addr1);
2627 assert(mr != NULL);
6d16c2f8 2628 if (is_write) {
6886867e 2629 invalidate_and_set_dirty(addr1, access_len);
6d16c2f8 2630 }
868bb33f 2631 if (xen_enabled()) {
e41d7c69 2632 xen_invalidate_map_cache_entry(buffer);
050a0ddf 2633 }
d3e71559 2634 memory_region_unref(mr);
6d16c2f8
AL
2635 return;
2636 }
2637 if (is_write) {
ac1970fb 2638 address_space_write(as, bounce.addr, bounce.buffer, access_len);
6d16c2f8 2639 }
f8a83245 2640 qemu_vfree(bounce.buffer);
6d16c2f8 2641 bounce.buffer = NULL;
d3e71559 2642 memory_region_unref(bounce.mr);
ba223c29 2643 cpu_notify_map_clients();
6d16c2f8 2644}
d0ecd2aa 2645
a8170e5e
AK
2646void *cpu_physical_memory_map(hwaddr addr,
2647 hwaddr *plen,
ac1970fb
AK
2648 int is_write)
2649{
2650 return address_space_map(&address_space_memory, addr, plen, is_write);
2651}
2652
a8170e5e
AK
2653void cpu_physical_memory_unmap(void *buffer, hwaddr len,
2654 int is_write, hwaddr access_len)
ac1970fb
AK
2655{
2656 return address_space_unmap(&address_space_memory, buffer, len, is_write, access_len);
2657}
2658
8df1cd07 2659/* warning: addr must be aligned */
fdfba1a2 2660static inline uint32_t ldl_phys_internal(AddressSpace *as, hwaddr addr,
1e78bcc1 2661 enum device_endian endian)
8df1cd07 2662{
8df1cd07 2663 uint8_t *ptr;
791af8c8 2664 uint64_t val;
5c8a00ce 2665 MemoryRegion *mr;
149f54b5
PB
2666 hwaddr l = 4;
2667 hwaddr addr1;
8df1cd07 2668
fdfba1a2 2669 mr = address_space_translate(as, addr, &addr1, &l, false);
5c8a00ce 2670 if (l < 4 || !memory_access_is_direct(mr, false)) {
8df1cd07 2671 /* I/O case */
5c8a00ce 2672 io_mem_read(mr, addr1, &val, 4);
1e78bcc1
AG
2673#if defined(TARGET_WORDS_BIGENDIAN)
2674 if (endian == DEVICE_LITTLE_ENDIAN) {
2675 val = bswap32(val);
2676 }
2677#else
2678 if (endian == DEVICE_BIG_ENDIAN) {
2679 val = bswap32(val);
2680 }
2681#endif
8df1cd07
FB
2682 } else {
2683 /* RAM case */
5c8a00ce 2684 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
06ef3525 2685 & TARGET_PAGE_MASK)
149f54b5 2686 + addr1);
1e78bcc1
AG
2687 switch (endian) {
2688 case DEVICE_LITTLE_ENDIAN:
2689 val = ldl_le_p(ptr);
2690 break;
2691 case DEVICE_BIG_ENDIAN:
2692 val = ldl_be_p(ptr);
2693 break;
2694 default:
2695 val = ldl_p(ptr);
2696 break;
2697 }
8df1cd07
FB
2698 }
2699 return val;
2700}
2701
fdfba1a2 2702uint32_t ldl_phys(AddressSpace *as, hwaddr addr)
1e78bcc1 2703{
fdfba1a2 2704 return ldl_phys_internal(as, addr, DEVICE_NATIVE_ENDIAN);
1e78bcc1
AG
2705}
2706
fdfba1a2 2707uint32_t ldl_le_phys(AddressSpace *as, hwaddr addr)
1e78bcc1 2708{
fdfba1a2 2709 return ldl_phys_internal(as, addr, DEVICE_LITTLE_ENDIAN);
1e78bcc1
AG
2710}
2711
fdfba1a2 2712uint32_t ldl_be_phys(AddressSpace *as, hwaddr addr)
1e78bcc1 2713{
fdfba1a2 2714 return ldl_phys_internal(as, addr, DEVICE_BIG_ENDIAN);
1e78bcc1
AG
2715}
2716
84b7b8e7 2717/* warning: addr must be aligned */
2c17449b 2718static inline uint64_t ldq_phys_internal(AddressSpace *as, hwaddr addr,
1e78bcc1 2719 enum device_endian endian)
84b7b8e7 2720{
84b7b8e7
FB
2721 uint8_t *ptr;
2722 uint64_t val;
5c8a00ce 2723 MemoryRegion *mr;
149f54b5
PB
2724 hwaddr l = 8;
2725 hwaddr addr1;
84b7b8e7 2726
2c17449b 2727 mr = address_space_translate(as, addr, &addr1, &l,
5c8a00ce
PB
2728 false);
2729 if (l < 8 || !memory_access_is_direct(mr, false)) {
84b7b8e7 2730 /* I/O case */
5c8a00ce 2731 io_mem_read(mr, addr1, &val, 8);
968a5627
PB
2732#if defined(TARGET_WORDS_BIGENDIAN)
2733 if (endian == DEVICE_LITTLE_ENDIAN) {
2734 val = bswap64(val);
2735 }
2736#else
2737 if (endian == DEVICE_BIG_ENDIAN) {
2738 val = bswap64(val);
2739 }
84b7b8e7
FB
2740#endif
2741 } else {
2742 /* RAM case */
5c8a00ce 2743 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
06ef3525 2744 & TARGET_PAGE_MASK)
149f54b5 2745 + addr1);
1e78bcc1
AG
2746 switch (endian) {
2747 case DEVICE_LITTLE_ENDIAN:
2748 val = ldq_le_p(ptr);
2749 break;
2750 case DEVICE_BIG_ENDIAN:
2751 val = ldq_be_p(ptr);
2752 break;
2753 default:
2754 val = ldq_p(ptr);
2755 break;
2756 }
84b7b8e7
FB
2757 }
2758 return val;
2759}
2760
2c17449b 2761uint64_t ldq_phys(AddressSpace *as, hwaddr addr)
1e78bcc1 2762{
2c17449b 2763 return ldq_phys_internal(as, addr, DEVICE_NATIVE_ENDIAN);
1e78bcc1
AG
2764}
2765
2c17449b 2766uint64_t ldq_le_phys(AddressSpace *as, hwaddr addr)
1e78bcc1 2767{
2c17449b 2768 return ldq_phys_internal(as, addr, DEVICE_LITTLE_ENDIAN);
1e78bcc1
AG
2769}
2770
2c17449b 2771uint64_t ldq_be_phys(AddressSpace *as, hwaddr addr)
1e78bcc1 2772{
2c17449b 2773 return ldq_phys_internal(as, addr, DEVICE_BIG_ENDIAN);
1e78bcc1
AG
2774}
2775
aab33094 2776/* XXX: optimize */
2c17449b 2777uint32_t ldub_phys(AddressSpace *as, hwaddr addr)
aab33094
FB
2778{
2779 uint8_t val;
2c17449b 2780 address_space_rw(as, addr, &val, 1, 0);
aab33094
FB
2781 return val;
2782}
2783
733f0b02 2784/* warning: addr must be aligned */
41701aa4 2785static inline uint32_t lduw_phys_internal(AddressSpace *as, hwaddr addr,
1e78bcc1 2786 enum device_endian endian)
aab33094 2787{
733f0b02
MT
2788 uint8_t *ptr;
2789 uint64_t val;
5c8a00ce 2790 MemoryRegion *mr;
149f54b5
PB
2791 hwaddr l = 2;
2792 hwaddr addr1;
733f0b02 2793
41701aa4 2794 mr = address_space_translate(as, addr, &addr1, &l,
5c8a00ce
PB
2795 false);
2796 if (l < 2 || !memory_access_is_direct(mr, false)) {
733f0b02 2797 /* I/O case */
5c8a00ce 2798 io_mem_read(mr, addr1, &val, 2);
1e78bcc1
AG
2799#if defined(TARGET_WORDS_BIGENDIAN)
2800 if (endian == DEVICE_LITTLE_ENDIAN) {
2801 val = bswap16(val);
2802 }
2803#else
2804 if (endian == DEVICE_BIG_ENDIAN) {
2805 val = bswap16(val);
2806 }
2807#endif
733f0b02
MT
2808 } else {
2809 /* RAM case */
5c8a00ce 2810 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
06ef3525 2811 & TARGET_PAGE_MASK)
149f54b5 2812 + addr1);
1e78bcc1
AG
2813 switch (endian) {
2814 case DEVICE_LITTLE_ENDIAN:
2815 val = lduw_le_p(ptr);
2816 break;
2817 case DEVICE_BIG_ENDIAN:
2818 val = lduw_be_p(ptr);
2819 break;
2820 default:
2821 val = lduw_p(ptr);
2822 break;
2823 }
733f0b02
MT
2824 }
2825 return val;
aab33094
FB
2826}
2827
41701aa4 2828uint32_t lduw_phys(AddressSpace *as, hwaddr addr)
1e78bcc1 2829{
41701aa4 2830 return lduw_phys_internal(as, addr, DEVICE_NATIVE_ENDIAN);
1e78bcc1
AG
2831}
2832
41701aa4 2833uint32_t lduw_le_phys(AddressSpace *as, hwaddr addr)
1e78bcc1 2834{
41701aa4 2835 return lduw_phys_internal(as, addr, DEVICE_LITTLE_ENDIAN);
1e78bcc1
AG
2836}
2837
41701aa4 2838uint32_t lduw_be_phys(AddressSpace *as, hwaddr addr)
1e78bcc1 2839{
41701aa4 2840 return lduw_phys_internal(as, addr, DEVICE_BIG_ENDIAN);
1e78bcc1
AG
2841}
2842
8df1cd07
FB
2843/* warning: addr must be aligned. The ram page is not masked as dirty
2844 and the code inside is not invalidated. It is useful if the dirty
2845 bits are used to track modified PTEs */
2198a121 2846void stl_phys_notdirty(AddressSpace *as, hwaddr addr, uint32_t val)
8df1cd07 2847{
8df1cd07 2848 uint8_t *ptr;
5c8a00ce 2849 MemoryRegion *mr;
149f54b5
PB
2850 hwaddr l = 4;
2851 hwaddr addr1;
8df1cd07 2852
2198a121 2853 mr = address_space_translate(as, addr, &addr1, &l,
5c8a00ce
PB
2854 true);
2855 if (l < 4 || !memory_access_is_direct(mr, true)) {
2856 io_mem_write(mr, addr1, val, 4);
8df1cd07 2857 } else {
5c8a00ce 2858 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
5579c7f3 2859 ptr = qemu_get_ram_ptr(addr1);
8df1cd07 2860 stl_p(ptr, val);
74576198
AL
2861
2862 if (unlikely(in_migration)) {
a2cd8c85 2863 if (cpu_physical_memory_is_clean(addr1)) {
74576198
AL
2864 /* invalidate code */
2865 tb_invalidate_phys_page_range(addr1, addr1 + 4, 0);
2866 /* set dirty bit */
6886867e 2867 cpu_physical_memory_set_dirty_range_nocode(addr1, 4);
74576198
AL
2868 }
2869 }
8df1cd07
FB
2870 }
2871}
2872
2873/* warning: addr must be aligned */
ab1da857
EI
2874static inline void stl_phys_internal(AddressSpace *as,
2875 hwaddr addr, uint32_t val,
1e78bcc1 2876 enum device_endian endian)
8df1cd07 2877{
8df1cd07 2878 uint8_t *ptr;
5c8a00ce 2879 MemoryRegion *mr;
149f54b5
PB
2880 hwaddr l = 4;
2881 hwaddr addr1;
8df1cd07 2882
ab1da857 2883 mr = address_space_translate(as, addr, &addr1, &l,
5c8a00ce
PB
2884 true);
2885 if (l < 4 || !memory_access_is_direct(mr, true)) {
1e78bcc1
AG
2886#if defined(TARGET_WORDS_BIGENDIAN)
2887 if (endian == DEVICE_LITTLE_ENDIAN) {
2888 val = bswap32(val);
2889 }
2890#else
2891 if (endian == DEVICE_BIG_ENDIAN) {
2892 val = bswap32(val);
2893 }
2894#endif
5c8a00ce 2895 io_mem_write(mr, addr1, val, 4);
8df1cd07 2896 } else {
8df1cd07 2897 /* RAM case */
5c8a00ce 2898 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
5579c7f3 2899 ptr = qemu_get_ram_ptr(addr1);
1e78bcc1
AG
2900 switch (endian) {
2901 case DEVICE_LITTLE_ENDIAN:
2902 stl_le_p(ptr, val);
2903 break;
2904 case DEVICE_BIG_ENDIAN:
2905 stl_be_p(ptr, val);
2906 break;
2907 default:
2908 stl_p(ptr, val);
2909 break;
2910 }
51d7a9eb 2911 invalidate_and_set_dirty(addr1, 4);
8df1cd07
FB
2912 }
2913}
2914
ab1da857 2915void stl_phys(AddressSpace *as, hwaddr addr, uint32_t val)
1e78bcc1 2916{
ab1da857 2917 stl_phys_internal(as, addr, val, DEVICE_NATIVE_ENDIAN);
1e78bcc1
AG
2918}
2919
ab1da857 2920void stl_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
1e78bcc1 2921{
ab1da857 2922 stl_phys_internal(as, addr, val, DEVICE_LITTLE_ENDIAN);
1e78bcc1
AG
2923}
2924
ab1da857 2925void stl_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
1e78bcc1 2926{
ab1da857 2927 stl_phys_internal(as, addr, val, DEVICE_BIG_ENDIAN);
1e78bcc1
AG
2928}
2929
aab33094 2930/* XXX: optimize */
db3be60d 2931void stb_phys(AddressSpace *as, hwaddr addr, uint32_t val)
aab33094
FB
2932{
2933 uint8_t v = val;
db3be60d 2934 address_space_rw(as, addr, &v, 1, 1);
aab33094
FB
2935}
2936
733f0b02 2937/* warning: addr must be aligned */
5ce5944d
EI
2938static inline void stw_phys_internal(AddressSpace *as,
2939 hwaddr addr, uint32_t val,
1e78bcc1 2940 enum device_endian endian)
aab33094 2941{
733f0b02 2942 uint8_t *ptr;
5c8a00ce 2943 MemoryRegion *mr;
149f54b5
PB
2944 hwaddr l = 2;
2945 hwaddr addr1;
733f0b02 2946
5ce5944d 2947 mr = address_space_translate(as, addr, &addr1, &l, true);
5c8a00ce 2948 if (l < 2 || !memory_access_is_direct(mr, true)) {
1e78bcc1
AG
2949#if defined(TARGET_WORDS_BIGENDIAN)
2950 if (endian == DEVICE_LITTLE_ENDIAN) {
2951 val = bswap16(val);
2952 }
2953#else
2954 if (endian == DEVICE_BIG_ENDIAN) {
2955 val = bswap16(val);
2956 }
2957#endif
5c8a00ce 2958 io_mem_write(mr, addr1, val, 2);
733f0b02 2959 } else {
733f0b02 2960 /* RAM case */
5c8a00ce 2961 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
733f0b02 2962 ptr = qemu_get_ram_ptr(addr1);
1e78bcc1
AG
2963 switch (endian) {
2964 case DEVICE_LITTLE_ENDIAN:
2965 stw_le_p(ptr, val);
2966 break;
2967 case DEVICE_BIG_ENDIAN:
2968 stw_be_p(ptr, val);
2969 break;
2970 default:
2971 stw_p(ptr, val);
2972 break;
2973 }
51d7a9eb 2974 invalidate_and_set_dirty(addr1, 2);
733f0b02 2975 }
aab33094
FB
2976}
2977
5ce5944d 2978void stw_phys(AddressSpace *as, hwaddr addr, uint32_t val)
1e78bcc1 2979{
5ce5944d 2980 stw_phys_internal(as, addr, val, DEVICE_NATIVE_ENDIAN);
1e78bcc1
AG
2981}
2982
5ce5944d 2983void stw_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
1e78bcc1 2984{
5ce5944d 2985 stw_phys_internal(as, addr, val, DEVICE_LITTLE_ENDIAN);
1e78bcc1
AG
2986}
2987
5ce5944d 2988void stw_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
1e78bcc1 2989{
5ce5944d 2990 stw_phys_internal(as, addr, val, DEVICE_BIG_ENDIAN);
1e78bcc1
AG
2991}
2992
aab33094 2993/* XXX: optimize */
f606604f 2994void stq_phys(AddressSpace *as, hwaddr addr, uint64_t val)
aab33094
FB
2995{
2996 val = tswap64(val);
f606604f 2997 address_space_rw(as, addr, (void *) &val, 8, 1);
aab33094
FB
2998}
2999
f606604f 3000void stq_le_phys(AddressSpace *as, hwaddr addr, uint64_t val)
1e78bcc1
AG
3001{
3002 val = cpu_to_le64(val);
f606604f 3003 address_space_rw(as, addr, (void *) &val, 8, 1);
1e78bcc1
AG
3004}
3005
f606604f 3006void stq_be_phys(AddressSpace *as, hwaddr addr, uint64_t val)
1e78bcc1
AG
3007{
3008 val = cpu_to_be64(val);
f606604f 3009 address_space_rw(as, addr, (void *) &val, 8, 1);
1e78bcc1
AG
3010}
3011
5e2972fd 3012/* virtual memory access for debug (includes writing to ROM) */
f17ec444 3013int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
b448f2f3 3014 uint8_t *buf, int len, int is_write)
13eb76e0
FB
3015{
3016 int l;
a8170e5e 3017 hwaddr phys_addr;
9b3c35e0 3018 target_ulong page;
13eb76e0
FB
3019
3020 while (len > 0) {
3021 page = addr & TARGET_PAGE_MASK;
f17ec444 3022 phys_addr = cpu_get_phys_page_debug(cpu, page);
13eb76e0
FB
3023 /* if no physical page mapped, return an error */
3024 if (phys_addr == -1)
3025 return -1;
3026 l = (page + TARGET_PAGE_SIZE) - addr;
3027 if (l > len)
3028 l = len;
5e2972fd 3029 phys_addr += (addr & ~TARGET_PAGE_MASK);
2e38847b
EI
3030 if (is_write) {
3031 cpu_physical_memory_write_rom(cpu->as, phys_addr, buf, l);
3032 } else {
3033 address_space_rw(cpu->as, phys_addr, buf, l, 0);
3034 }
13eb76e0
FB
3035 len -= l;
3036 buf += l;
3037 addr += l;
3038 }
3039 return 0;
3040}
a68fe89c 3041#endif
13eb76e0 3042
8e4a424b
BS
3043/*
3044 * A helper function for the _utterly broken_ virtio device model to find out if
3045 * it's running on a big endian machine. Don't do this at home kids!
3046 */
98ed8ecf
GK
3047bool target_words_bigendian(void);
3048bool target_words_bigendian(void)
8e4a424b
BS
3049{
3050#if defined(TARGET_WORDS_BIGENDIAN)
3051 return true;
3052#else
3053 return false;
3054#endif
3055}
3056
76f35538 3057#ifndef CONFIG_USER_ONLY
a8170e5e 3058bool cpu_physical_memory_is_io(hwaddr phys_addr)
76f35538 3059{
5c8a00ce 3060 MemoryRegion*mr;
149f54b5 3061 hwaddr l = 1;
76f35538 3062
5c8a00ce
PB
3063 mr = address_space_translate(&address_space_memory,
3064 phys_addr, &phys_addr, &l, false);
76f35538 3065
5c8a00ce
PB
3066 return !(memory_region_is_ram(mr) ||
3067 memory_region_is_romd(mr));
76f35538 3068}
bd2fa51f
MH
3069
3070void qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque)
3071{
3072 RAMBlock *block;
3073
0dc3f44a
MD
3074 rcu_read_lock();
3075 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
9b8424d5 3076 func(block->host, block->offset, block->used_length, opaque);
bd2fa51f 3077 }
0dc3f44a 3078 rcu_read_unlock();
bd2fa51f 3079}
ec3f8c99 3080#endif