migration/postcopy: Push blocktime start/end into page req mutex

author Peter Xu <peterx@redhat.com>

Fri, 13 Jun 2025 14:12:05 +0000 (10:12 -0400)

committer Fabiano Rosas <farosas@suse.de>

Fri, 11 Jul 2025 13:37:37 +0000 (10:37 -0300)
author Peter Xu <peterx@redhat.com>
Fri, 13 Jun 2025 14:12:05 +0000 (10:12 -0400)
committer Fabiano Rosas <farosas@suse.de>
Fri, 11 Jul 2025 13:37:37 +0000 (10:37 -0300)
diff --git a/migration/migration.c b/migration/migration.c

index 923400f801771d3d96410ffc0112377ba21afee2..10c216d25dec01f206eacad2edd24d21f00e614c 100644 (file)
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -576,22 +576,27 @@ int migrate_send_rp_message_req_pages(MigrationIncomingState *mis,
  }
  
  int migrate_send_rp_req_pages(MigrationIncomingState *mis,
-                              RAMBlock *rb, ram_addr_t start, uint64_t haddr)
+                              RAMBlock *rb, ram_addr_t start, uint64_t haddr,
+                              uint32_t tid)
  {
      void *aligned = (void *)(uintptr_t)ROUND_DOWN(haddr, qemu_ram_pagesize(rb));
      bool received = false;
  
      WITH_QEMU_LOCK_GUARD(&mis->page_request_mutex) {
          received = ramblock_recv_bitmap_test_byte_offset(rb, start);
-        if (!received && !g_tree_lookup(mis->page_requested, aligned)) {
-            /*
-             * The page has not been received, and it's not yet in the page
-             * request list.  Queue it.  Set the value of element to 1, so that
-             * things like g_tree_lookup() will return TRUE (1) when found.
-             */
-            g_tree_insert(mis->page_requested, aligned, (gpointer)1);
-            qatomic_inc(&mis->page_requested_count);
-            trace_postcopy_page_req_add(aligned, mis->page_requested_count);
+        if (!received) {
+            if (!g_tree_lookup(mis->page_requested, aligned)) {
+                /*
+                 * The page has not been received, and it's not yet in the
+                 * page request list.  Queue it.  Set the value of element
+                 * to 1, so that things like g_tree_lookup() will return
+                 * TRUE (1) when found.
+                 */
+                g_tree_insert(mis->page_requested, aligned, (gpointer)1);
+                qatomic_inc(&mis->page_requested_count);
+                trace_postcopy_page_req_add(aligned, mis->page_requested_count);
+            }
+            mark_postcopy_blocktime_begin(haddr, tid, rb);
          }
      }
  
diff --git a/migration/migration.h b/migration/migration.h

index 739289de9342b0479b3d655dbdbfb09ff0405e89..01329bf824893dff9006f836c688f3dfee08e62e 100644 (file)
--- a/migration/migration.h
+++ b/migration/migration.h
@@ -546,7 +546,7 @@ void migrate_send_rp_shut(MigrationIncomingState *mis,
  void migrate_send_rp_pong(MigrationIncomingState *mis,
                            uint32_t value);
  int migrate_send_rp_req_pages(MigrationIncomingState *mis, RAMBlock *rb,
-                              ram_addr_t start, uint64_t haddr);
+                              ram_addr_t start, uint64_t haddr, uint32_t tid);
  int migrate_send_rp_message_req_pages(MigrationIncomingState *mis,
                                        RAMBlock *rb, ram_addr_t start);
  void migrate_send_rp_recv_bitmap(MigrationIncomingState *mis,
diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c

index 75fd310fb2b00f6073309d692ec619ff4c8b2f20..32fa06dabd54900ade9643b803a12c3cab51f4b5 100644 (file)
--- a/migration/postcopy-ram.c
+++ b/migration/postcopy-ram.c
@@ -752,8 +752,12 @@ int postcopy_wake_shared(struct PostCopyFD *pcfd,
                         pagesize);
  }
  
+/*
+ * NOTE: @tid is only used when postcopy-blocktime feature is enabled, and
+ * also optional: when zero is provided, the fault accounting will be ignored.
+ */
  static int postcopy_request_page(MigrationIncomingState *mis, RAMBlock *rb,
-                                 ram_addr_t start, uint64_t haddr)
+                                 ram_addr_t start, uint64_t haddr, uint32_t tid)
  {
      void *aligned = (void *)(uintptr_t)ROUND_DOWN(haddr, qemu_ram_pagesize(rb));
  
@@ -772,7 +776,7 @@ static int postcopy_request_page(MigrationIncomingState *mis, RAMBlock *rb,
          return received ? 0 : postcopy_place_page_zero(mis, aligned, rb);
      }
  
-    return migrate_send_rp_req_pages(mis, rb, start, haddr);
+    return migrate_send_rp_req_pages(mis, rb, start, haddr, tid);
  }
  
  /*
@@ -793,7 +797,8 @@ int postcopy_request_shared_page(struct PostCopyFD *pcfd, RAMBlock *rb,
                                          qemu_ram_get_idstr(rb), rb_offset);
          return postcopy_wake_shared(pcfd, client_addr, rb);
      }
-    postcopy_request_page(mis, rb, aligned_rbo, client_addr);
+    /* TODO: support blocktime tracking */
+    postcopy_request_page(mis, rb, aligned_rbo, client_addr, 0);
      return 0;
  }
  
@@ -819,17 +824,17 @@ static uint32_t get_low_time_offset(PostcopyBlocktimeContext *dc)
  }
  
  /*
- * This function is being called when pagefault occurs. It
- * tracks down vCPU blocking time.
+ * This function is being called when pagefault occurs. It tracks down vCPU
+ * blocking time.  It's protected by @page_request_mutex.
   *
   * @addr: faulted host virtual address
   * @ptid: faulted process thread id
   * @rb: ramblock appropriate to addr
   */
-static void mark_postcopy_blocktime_begin(uintptr_t addr, uint32_t ptid,
-                                          RAMBlock *rb)
+void mark_postcopy_blocktime_begin(uintptr_t addr, uint32_t ptid,
+                                   RAMBlock *rb)
  {
-    int cpu, already_received;
+    int cpu;
      MigrationIncomingState *mis = migration_incoming_get_current();
      PostcopyBlocktimeContext *dc = mis->blocktime_ctx;
      uint32_t low_time_offset;
@@ -852,24 +857,19 @@ static void mark_postcopy_blocktime_begin(uintptr_t addr, uint32_t ptid,
      qatomic_xchg(&dc->vcpu_addr[cpu], addr);
  
      /*
-     * check it here, not at the beginning of the function,
-     * due to, check could occur early than bitmap_set in
-     * qemu_ufd_copy_ioctl
+     * The caller should only inject a blocktime entry when the page is
+     * yet missing.
       */
-    already_received = ramblock_recv_bitmap_test(rb, (void *)addr);
-    if (already_received) {
-        qatomic_xchg(&dc->vcpu_addr[cpu], 0);
-        qatomic_xchg(&dc->page_fault_vcpu_time[cpu], 0);
-        qatomic_dec(&dc->smp_cpus_down);
-    }
+    assert(!ramblock_recv_bitmap_test(rb, (void *)addr));
+
      trace_mark_postcopy_blocktime_begin(addr, dc, dc->page_fault_vcpu_time[cpu],
-                                        cpu, already_received);
+                                        cpu);
  }
  
  /*
- *  This function just provide calculated blocktime per cpu and trace it.
- *  Total blocktime is calculated in mark_postcopy_blocktime_end.
- *
+ * This function just provide calculated blocktime per cpu and trace it.
+ * Total blocktime is calculated in mark_postcopy_blocktime_end.  It's
+ * protected by @page_request_mutex.
   *
   * Assume we have 3 CPU
   *
@@ -1068,17 +1068,14 @@ static void *postcopy_ram_fault_thread(void *opaque)
                                                  qemu_ram_get_idstr(rb),
                                                  rb_offset,
                                                  msg.arg.pagefault.feat.ptid);
-            mark_postcopy_blocktime_begin(
-                    (uintptr_t)(msg.arg.pagefault.address),
-                                msg.arg.pagefault.feat.ptid, rb);
-
  retry:
              /*
               * Send the request to the source - we want to request one
               * of our host page sizes (which is >= TPS)
               */
              ret = postcopy_request_page(mis, rb, rb_offset,
-                                        msg.arg.pagefault.address);
+                                        msg.arg.pagefault.address,
+                                        msg.arg.pagefault.feat.ptid);
              if (ret) {
                  /* May be network failure, try to wait for recovery */
                  postcopy_pause_fault_thread(mis);
@@ -1299,8 +1296,8 @@ static int qemu_ufd_copy_ioctl(MigrationIncomingState *mis, void *host_addr,
                  qemu_cond_signal(&mis->page_request_cond);
              }
          }
-        qemu_mutex_unlock(&mis->page_request_mutex);
          mark_postcopy_blocktime_end((uintptr_t)host_addr);
+        qemu_mutex_unlock(&mis->page_request_mutex);
      }
      return ret;
  }
@@ -1430,6 +1427,11 @@ int postcopy_wake_shared(struct PostCopyFD *pcfd,
  {
      g_assert_not_reached();
  }
+
+void mark_postcopy_blocktime_begin(uintptr_t addr, uint32_t ptid,
+                                   RAMBlock *rb)
+{
+}
  #endif
  
  /* ------------------------------------------------------------------------- */
diff --git a/migration/postcopy-ram.h b/migration/postcopy-ram.h

index a6df1b2811b0c6a00e64ca6043def17d4c6cbfb4..3852141d7e37ab18bada4b46c137fef0969d0070 100644 (file)
--- a/migration/postcopy-ram.h
+++ b/migration/postcopy-ram.h
@@ -196,5 +196,7 @@ void postcopy_preempt_new_channel(MigrationIncomingState *mis, QEMUFile *file);
  void postcopy_preempt_setup(MigrationState *s);
  int postcopy_preempt_establish_channel(MigrationState *s);
  bool postcopy_is_paused(MigrationStatus status);
+void mark_postcopy_blocktime_begin(uintptr_t addr, uint32_t ptid,
+                                   RAMBlock *rb);
  
  #endif
diff --git a/migration/trace-events b/migration/trace-events

index dcd8fe9a0cccdda5bbba75d6ccaa3f4880b7ffab..917f521e8839026fd0ca0dbf7044aa0b17ff21db 100644 (file)
--- a/migration/trace-events
+++ b/migration/trace-events
@@ -285,7 +285,7 @@ postcopy_nhp_range(const char *ramblock, void *host_addr, size_t offset, size_t
  postcopy_place_page(void *host_addr) "host=%p"
  postcopy_place_page_zero(void *host_addr) "host=%p"
  postcopy_ram_enable_notify(void) ""
-mark_postcopy_blocktime_begin(uint64_t addr, void *dd, uint32_t time, int cpu, int received) "addr: 0x%" PRIx64 ", dd: %p, time: %u, cpu: %d, already_received: %d"
+mark_postcopy_blocktime_begin(uint64_t addr, void *dd, uint32_t time, int cpu) "addr: 0x%" PRIx64 ", dd: %p, time: %u, cpu: %d"
  mark_postcopy_blocktime_end(uint64_t addr, void *dd, uint32_t time, int affected_cpu) "addr: 0x%" PRIx64 ", dd: %p, time: %u, affected_cpu: %d"
  postcopy_pause_fault_thread(void) ""
  postcopy_pause_fault_thread_continued(void) ""
author	Peter Xu <peterx@redhat.com>
	Fri, 13 Jun 2025 14:12:05 +0000 (10:12 -0400)
committer	Fabiano Rosas <farosas@suse.de>
	Fri, 11 Jul 2025 13:37:37 +0000 (10:37 -0300)
migration/migration.c		patch \| blob \| blame \| history
migration/migration.h		patch \| blob \| blame \| history
migration/postcopy-ram.c		patch \| blob \| blame \| history
migration/postcopy-ram.h		patch \| blob \| blame \| history
migration/trace-events		patch \| blob \| blame \| history