]> git.ipfire.org Git - thirdparty/qemu.git/commitdiff
migration: merge fragmented clear_dirty ioctls
authorChuang Xu <xuchuangxclwt@bytedance.com>
Thu, 18 Dec 2025 11:42:20 +0000 (19:42 +0800)
committerPeter Xu <peterx@redhat.com>
Tue, 23 Dec 2025 14:24:34 +0000 (09:24 -0500)
In our long-term experience in Bytedance, we've found that under
the same load, live migration of larger VMs with more devices is
often more difficult to converge (requiring a larger downtime limit).

Through some testing and calculations, we conclude that bitmap sync time
affects the calculation of live migration bandwidth.

When the addresses processed are not aligned, a large number of
clear_dirty ioctl occur (e.g. a 4MB misaligned memory can generate
2048 clear_dirty ioctls from two different memory_listener),
which increases the time required for bitmap_sync and makes it
more difficult for dirty pages to converge.

For a 64C256G vm with 8 vhost-user-net(32 queue per nic) and
16 vhost-user-blk(4 queue per blk), the sync time is as high as *73ms*
(tested with 10GBps dirty rate, the sync time increases as the dirty
page rate increases), Here are each part of the sync time:

- sync from kvm to ram_list: 2.5ms
- vhost_log_sync:3ms
- sync aligned memory from ram_list to RAMBlock: 5ms
- sync misaligned memory from ram_list to RAMBlock: 61ms

Attempt to merge those fragmented clear_dirty ioctls, then syncing
misaligned memory from ram_list to RAMBlock takes only about 1ms,
and the total sync time is only *12ms*.

Signed-off-by: Chuang Xu <xuchuangxclwt@bytedance.com>
Reviewed-by: Fabiano Rosas <farosas@suse.de>
Link: https://lore.kernel.org/r/20251218114220.83354-1-xuchuangxclwt@bytedance.com
[peterx: drop var "offset" in physical_memory_sync_dirty_bitmap]
Signed-off-by: Peter Xu <peterx@redhat.com>
accel/tcg/cputlb.c
include/system/physmem.h
migration/ram.c
system/memory.c
system/physmem.c

index fd1606c85656eb602f22f46d0f94412de33496d6..c8827c8b0d2cf0176299bb7fa62c2df4758b623d 100644 (file)
@@ -857,8 +857,9 @@ void tlb_flush_page_bits_by_mmuidx_all_cpus_synced(CPUState *src_cpu,
 void tlb_protect_code(ram_addr_t ram_addr)
 {
     physical_memory_test_and_clear_dirty(ram_addr & TARGET_PAGE_MASK,
-                                             TARGET_PAGE_SIZE,
-                                             DIRTY_MEMORY_CODE);
+                                         TARGET_PAGE_SIZE,
+                                         DIRTY_MEMORY_CODE,
+                                         NULL);
 }
 
 /* update the TLB so that writes in physical page 'phys_addr' are no longer
index 879f6eae38b07dc5c679dc35a9c74b323f730335..a59724ef10dacf912f4ebba480a0a6daddfc1801 100644 (file)
@@ -39,9 +39,10 @@ uint64_t physical_memory_set_dirty_lebitmap(unsigned long *bitmap,
 
 void physical_memory_dirty_bits_cleared(ram_addr_t start, ram_addr_t length);
 
-bool physical_memory_test_and_clear_dirty(ram_addr_t start,
-                                          ram_addr_t length,
-                                          unsigned client);
+uint64_t physical_memory_test_and_clear_dirty(ram_addr_t start,
+                                              ram_addr_t length,
+                                              unsigned client,
+                                              unsigned long *bmap);
 
 DirtyBitmapSnapshot *
 physical_memory_snapshot_and_clear_dirty(MemoryRegion *mr, hwaddr offset,
index df7e1548777af31bcf2c7d2156e2027550b0e971..c403fd73a60cdb2a986356191e3896ff7319f575 100644 (file)
@@ -941,7 +941,6 @@ static uint64_t physical_memory_sync_dirty_bitmap(RAMBlock *rb,
                                                   ram_addr_t start,
                                                   ram_addr_t length)
 {
-    ram_addr_t addr;
     unsigned long word = BIT_WORD((start + rb->offset) >> TARGET_PAGE_BITS);
     uint64_t num_dirty = 0;
     unsigned long *dest = rb->bmap;
@@ -993,19 +992,11 @@ static uint64_t physical_memory_sync_dirty_bitmap(RAMBlock *rb,
             memory_region_clear_dirty_bitmap(rb->mr, start, length);
         }
     } else {
-        ram_addr_t offset = rb->offset;
-
-        for (addr = 0; addr < length; addr += TARGET_PAGE_SIZE) {
-            if (physical_memory_test_and_clear_dirty(
-                        start + addr + offset,
-                        TARGET_PAGE_SIZE,
-                        DIRTY_MEMORY_MIGRATION)) {
-                long k = (start + addr) >> TARGET_PAGE_BITS;
-                if (!test_and_set_bit(k, dest)) {
-                    num_dirty++;
-                }
-            }
-        }
+        num_dirty = physical_memory_test_and_clear_dirty(
+                        start + rb->offset,
+                        length,
+                        DIRTY_MEMORY_MIGRATION,
+                        dest);
     }
 
     return num_dirty;
index 8b84661ae36c5c60a9129470530930a26e35162f..666364392da5c61cfa6bc27829db1624d8d5684e 100644 (file)
@@ -2424,7 +2424,7 @@ void memory_region_reset_dirty(MemoryRegion *mr, hwaddr addr,
 {
     assert(mr->ram_block);
     physical_memory_test_and_clear_dirty(
-        memory_region_get_ram_addr(mr) + addr, size, client);
+        memory_region_get_ram_addr(mr) + addr, size, client, NULL);
 }
 
 int memory_region_get_fd(MemoryRegion *mr)
index c9869e4049f852c46cd9df92a27b24b03a4e9f76..26bf30af17985ff3853cea992db65d46a0b4b7ef 100644 (file)
@@ -1089,19 +1089,30 @@ void physical_memory_set_dirty_range(ram_addr_t start, ram_addr_t length,
     }
 }
 
-/* Note: start and end must be within the same ram block.  */
-bool physical_memory_test_and_clear_dirty(ram_addr_t start,
+/*
+ * Note: start and end must be within the same ram block.
+ *
+ * @bmap usage:
+ * - When @bmap is provided, set bits for dirty pages, but
+ *   only count those pages if the bit wasn't already set in @bmap.
+ * - When @bmap is NULL, count all dirty pages in the range.
+ *
+ * @return:
+ * - Number of dirty guest pages found within [start, start + length).
+ */
+uint64_t physical_memory_test_and_clear_dirty(ram_addr_t start,
                                               ram_addr_t length,
-                                              unsigned client)
+                                              unsigned client,
+                                              unsigned long *bmap)
 {
     DirtyMemoryBlocks *blocks;
     unsigned long end, page, start_page;
-    bool dirty = false;
+    uint64_t num_dirty = 0;
     RAMBlock *ramblock;
     uint64_t mr_offset, mr_size;
 
     if (length == 0) {
-        return false;
+        return 0;
     }
 
     end = TARGET_PAGE_ALIGN(start + length) >> TARGET_PAGE_BITS;
@@ -1118,12 +1129,19 @@ bool physical_memory_test_and_clear_dirty(ram_addr_t start,
         while (page < end) {
             unsigned long idx = page / DIRTY_MEMORY_BLOCK_SIZE;
             unsigned long offset = page % DIRTY_MEMORY_BLOCK_SIZE;
-            unsigned long num = MIN(end - page,
-                                    DIRTY_MEMORY_BLOCK_SIZE - offset);
 
-            dirty |= bitmap_test_and_clear_atomic(blocks->blocks[idx],
-                                                  offset, num);
-            page += num;
+            if (bitmap_test_and_clear_atomic(blocks->blocks[idx], offset, 1)) {
+                if (bmap) {
+                    unsigned long k = page - (ramblock->offset >> TARGET_PAGE_BITS);
+                    if (!test_and_set_bit(k, bmap)) {
+                        num_dirty++;
+                    }
+                } else {
+                    num_dirty++;
+                }
+            }
+
+            page++;
         }
 
         mr_offset = (ram_addr_t)(start_page << TARGET_PAGE_BITS) - ramblock->offset;
@@ -1131,18 +1149,18 @@ bool physical_memory_test_and_clear_dirty(ram_addr_t start,
         memory_region_clear_dirty_bitmap(ramblock->mr, mr_offset, mr_size);
     }
 
-    if (dirty) {
+    if (num_dirty) {
         physical_memory_dirty_bits_cleared(start, length);
     }
 
-    return dirty;
+    return num_dirty;
 }
 
 static void physical_memory_clear_dirty_range(ram_addr_t addr, ram_addr_t length)
 {
-    physical_memory_test_and_clear_dirty(addr, length, DIRTY_MEMORY_MIGRATION);
-    physical_memory_test_and_clear_dirty(addr, length, DIRTY_MEMORY_VGA);
-    physical_memory_test_and_clear_dirty(addr, length, DIRTY_MEMORY_CODE);
+    physical_memory_test_and_clear_dirty(addr, length, DIRTY_MEMORY_MIGRATION, NULL);
+    physical_memory_test_and_clear_dirty(addr, length, DIRTY_MEMORY_VGA, NULL);
+    physical_memory_test_and_clear_dirty(addr, length, DIRTY_MEMORY_CODE, NULL);
 }
 
 DirtyBitmapSnapshot *physical_memory_snapshot_and_clear_dirty