ms->clear_bitmap_shift);
}
+static const gchar *format_time_str(uint64_t us)
+{
+ const char *units[] = {"us", "ms", "sec"};
+ int index = 0;
+
+ while (us > 1000) {
+ us /= 1000;
+ if (++index >= (sizeof(units) - 1)) {
+ break;
+ }
+ }
+
+ return g_strdup_printf("%"PRIu64" %s", us, units[index]);
+}
+
static void migration_dump_blocktime(Monitor *mon, MigrationInfo *info)
{
if (info->has_postcopy_blocktime) {
}
monitor_printf(mon, "]\n");
}
+
+ if (info->has_postcopy_latency_dist) {
+ uint64List *item = info->postcopy_latency_dist;
+ int count = 0;
+
+ monitor_printf(mon, "Postcopy Latency Distribution:\n");
+
+ while (item) {
+ g_autofree const gchar *from = format_time_str(1UL << count);
+ g_autofree const gchar *to = format_time_str(1UL << (count + 1));
+
+ monitor_printf(mon, " [ %8s - %8s ]: %10"PRIu64"\n",
+ from, to, item->value);
+ item = item->next;
+ count++;
+ }
+ }
}
void hmp_info_migrate(Monitor *mon, const QDict *qdict)
#include <sys/eventfd.h>
#include <linux/userfaultfd.h>
+/*
+ * Here we use 24 buckets, which means the last bucket will cover [2^24 us,
+ * 2^25 us) ~= [16, 32) seconds. It should be far enough to record even
+ * extreme (perf-wise broken) 1G pages moving over, which can sometimes
+ * take a few seconds due to various reasons. Anything more than that
+ * might be unsensible to account anymore.
+ */
+#define BLOCKTIME_LATENCY_BUCKET_N (24)
+
/* All the time records are in unit of nanoseconds */
typedef struct PostcopyBlocktimeContext {
/* blocktime per vCPU */
* that a fault was requested.
*/
GHashTable *vcpu_addr_hash;
+ /*
+ * Each bucket stores the count of faults that were resolved within the
+ * bucket window [2^N us, 2^(N+1) us).
+ */
+ uint64_t latency_buckets[BLOCKTIME_LATENCY_BUCKET_N];
/* total blocktime when all vCPUs are stopped */
uint64_t total_blocktime;
/* point in time when last page fault was initiated */
unsigned int smp_cpus = ms->smp.cpus;
PostcopyBlocktimeContext *ctx = g_new0(PostcopyBlocktimeContext, 1);
+ /* Initialize all counters to be zeros */
+ memset(ctx->latency_buckets, 0, sizeof(ctx->latency_buckets));
+
ctx->vcpu_blocktime_total = g_new0(uint64_t, smp_cpus);
ctx->vcpu_faults_count = g_new0(uint64_t, smp_cpus);
ctx->vcpu_faults_current = g_new0(uint8_t, smp_cpus);
uint64_t latency_total = 0, faults = 0;
uint32List *list_blocktime = NULL;
uint64List *list_latency = NULL;
+ uint64List *latency_buckets = NULL;
int i;
if (!bc) {
QAPI_LIST_PREPEND(list_latency, latency);
}
+ for (i = BLOCKTIME_LATENCY_BUCKET_N - 1; i >= 0; i--) {
+ QAPI_LIST_PREPEND(latency_buckets, bc->latency_buckets[i]);
+ }
+
latency_total += bc->non_vcpu_blocktime_total;
faults += bc->non_vcpu_faults;
info->postcopy_latency = faults ? (latency_total / faults) : 0;
info->has_postcopy_vcpu_latency = true;
info->postcopy_vcpu_latency = list_latency;
+ info->has_postcopy_latency_dist = true;
+ info->postcopy_latency_dist = latency_buckets;
}
static uint64_t get_postcopy_total_blocktime(void)
blocktime_fault_inject(dc, addr, cpu, current);
}
+static void blocktime_latency_account(PostcopyBlocktimeContext *ctx,
+ uint64_t time_us)
+{
+ /*
+ * Convert time (in us) to bucket index it belongs. Take extra caution
+ * of time_us==0 even if normally rare - when happens put into bucket 0.
+ */
+ int index = time_us ? (63 - clz64(time_us)) : 0;
+
+ assert(index >= 0);
+
+ /* If it's too large, put into top bucket */
+ if (index >= BLOCKTIME_LATENCY_BUCKET_N) {
+ index = BLOCKTIME_LATENCY_BUCKET_N - 1;
+ }
+
+ ctx->latency_buckets[index]++;
+}
+
typedef struct {
PostcopyBlocktimeContext *ctx;
uint64_t current;
assert(iter->current >= entry->fault_time);
time_passed = iter->current - entry->fault_time;
+ /* Latency buckets are in microseconds */
+ blocktime_latency_account(ctx, time_passed / SCALE_US);
+
if (cpu >= 0) {
/*
* If we resolved all pending faults on one vCPU due to this page
# average page fault latency. This is only present when the
# postcopy-blocktime migration capability is enabled. (Since 10.1)
#
+# @postcopy-latency-dist: remote page fault latency distributions. Each
+# element of the array is the number of faults that fall into the
+# bucket period. For the N-th bucket (N>=0), the latency window is
+# [2^Nus, 2^(N+1)us). For example, the 8th element stores how many
+# remote faults got resolved within [256us, 512us) window. This is only
+# present when the postcopy-blocktime migration capability is enabled.
+# (Since 10.1)
+#
# @postcopy-vcpu-latency: average remote page fault latency per vCPU (in
# ns). It has the same definition of @postcopy-latency, but instead
# this is the per-vCPU statistics. This is only present when the
# Features:
#
# @unstable: Members @postcopy-latency, @postcopy-vcpu-latency,
-# @postcopy-non-vcpu-latency are experimental.
+# @postcopy-latency-dist, @postcopy-non-vcpu-latency are experimental.
#
# Since: 0.14
##
'*postcopy-vcpu-blocktime': ['uint32'],
'*postcopy-latency': {
'type': 'uint64', 'features': [ 'unstable' ] },
+ '*postcopy-latency-dist': {
+ 'type': ['uint64'], 'features': [ 'unstable' ] },
'*postcopy-vcpu-latency': {
'type': ['uint64'], 'features': [ 'unstable' ] },
'*postcopy-non-vcpu-latency': {