static virClass *virCHMonitorClass;
static void virCHMonitorDispose(void *obj);
+static void virCHMonitorThreadInfoFree(virCHMonitor *mon);
static int virCHMonitorOnceInit(void)
{
virCHMonitor *mon = opaque;
VIR_DEBUG("mon=%p", mon);
+ virCHMonitorThreadInfoFree(mon);
virObjectUnref(mon->vm);
}
return ret;
}
+static void
+virCHMonitorThreadInfoFree(virCHMonitor *mon)
+{
+ mon->nthreads = 0;
+ VIR_FREE(mon->threads);
+}
+
+static size_t
+virCHMonitorRefreshThreadInfo(virCHMonitor *mon)
+{
+ virCHMonitorThreadInfo *info = NULL;
+ g_autofree pid_t *tids = NULL;
+ virDomainObj *vm = mon->vm;
+ size_t ntids = 0;
+ size_t i;
+
+
+ virCHMonitorThreadInfoFree(mon);
+ if (virProcessGetPids(vm->pid, &ntids, &tids) < 0)
+ return 0;
+
+ info = g_new0(virCHMonitorThreadInfo, ntids);
+ for (i = 0; i < ntids; i++) {
+ g_autofree char *proc = NULL;
+ g_autofree char *data = NULL;
+
+ proc = g_strdup_printf("/proc/%d/task/%d/comm",
+ (int)vm->pid, (int)tids[i]);
+
+ if (virFileReadAll(proc, (1 << 16), &data) < 0) {
+ continue;
+ }
+
+ VIR_DEBUG("VM PID: %d, TID %d, COMM: %s",
+ (int)vm->pid, (int)tids[i], data);
+ if (STRPREFIX(data, "vcpu")) {
+ int cpuid;
+ char *tmp;
+
+ if (virStrToLong_i(data + 4, &tmp, 0, &cpuid) < 0) {
+ VIR_WARN("Index is not specified correctly");
+ continue;
+ }
+ info[i].type = virCHThreadTypeVcpu;
+ info[i].vcpuInfo.tid = tids[i];
+ info[i].vcpuInfo.online = true;
+ info[i].vcpuInfo.cpuid = cpuid;
+ VIR_DEBUG("vcpu%d -> tid: %d", cpuid, tids[i]);
+ } else if (STRPREFIX(data, "_disk") || STRPREFIX(data, "_net") ||
+ STRPREFIX(data, "_rng")) {
+ /* Prefixes used by cloud-hypervisor for IO Threads are captured at
+ * https://github.com/cloud-hypervisor/cloud-hypervisor/blob/main/vmm/src/device_manager.rs */
+ info[i].type = virCHThreadTypeIO;
+ info[i].ioInfo.tid = tids[i];
+ virStrcpy(info[i].ioInfo.thrName, data, VIRCH_THREAD_NAME_LEN - 1);
+ } else {
+ info[i].type = virCHThreadTypeEmulator;
+ info[i].emuInfo.tid = tids[i];
+ virStrcpy(info[i].emuInfo.thrName, data, VIRCH_THREAD_NAME_LEN - 1);
+ }
+ mon->nthreads++;
+
+ }
+ mon->threads = info;
+
+ return mon->nthreads;
+}
+
+/**
+ * virCHMonitorGetThreadInfo:
+ * @mon: Pointer to the monitor
+ * @refresh: Refresh thread info or not
+ *
+ * Retrieve thread info and store to @threads
+ *
+ * Returns count of threads on success.
+ */
+size_t
+virCHMonitorGetThreadInfo(virCHMonitor *mon,
+ bool refresh,
+ virCHMonitorThreadInfo **threads)
+{
+ int nthreads = 0;
+
+ if (refresh)
+ nthreads = virCHMonitorRefreshThreadInfo(mon);
+
+ *threads = mon->threads;
+
+ return nthreads;
+}
+
int
virCHMonitorShutdownVMM(virCHMonitor *mon)
{
#include "ch_domain.h"
#include "ch_monitor.h"
#include "ch_process.h"
+#include "domain_cgroup.h"
+#include "virnuma.h"
#include "viralloc.h"
#include "virerror.h"
#include "virjson.h"
return 0;
}
+static int
+virCHProcessGetAllCpuAffinity(virBitmap **cpumapRet)
+{
+ *cpumapRet = NULL;
+
+ if (!virHostCPUHasBitmap())
+ return 0;
+
+ if (!(*cpumapRet = virHostCPUGetOnlineBitmap()))
+ return -1;
+
+ return 0;
+}
+
+#if defined(WITH_SCHED_GETAFFINITY) || defined(WITH_BSD_CPU_AFFINITY)
+static int
+virCHProcessInitCpuAffinity(virDomainObj *vm)
+{
+ g_autoptr(virBitmap) cpumapToSet = NULL;
+ virDomainNumatuneMemMode mem_mode;
+ virCHDomainObjPrivate *priv = vm->privateData;
+
+ if (!vm->pid) {
+ virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
+ _("Cannot setup CPU affinity until process is started"));
+ return -1;
+ }
+
+ if (virDomainNumaGetNodeCount(vm->def->numa) <= 1 &&
+ virDomainNumatuneGetMode(vm->def->numa, -1, &mem_mode) == 0 &&
+ mem_mode == VIR_DOMAIN_NUMATUNE_MEM_STRICT) {
+ virBitmap *nodeset = NULL;
+
+ if (virDomainNumatuneMaybeGetNodeset(vm->def->numa,
+ priv->autoNodeset,
+ &nodeset, -1) < 0)
+ return -1;
+
+ if (virNumaNodesetToCPUset(nodeset, &cpumapToSet) < 0)
+ return -1;
+ } else if (vm->def->cputune.emulatorpin) {
+ if (!(cpumapToSet = virBitmapNewCopy(vm->def->cputune.emulatorpin)))
+ return -1;
+ } else {
+ if (virCHProcessGetAllCpuAffinity(&cpumapToSet) < 0)
+ return -1;
+ }
+
+ if (cpumapToSet && virProcessSetAffinity(vm->pid, cpumapToSet, false) < 0) {
+ return -1;
+ }
+
+ return 0;
+}
+#else /* !defined(WITH_SCHED_GETAFFINITY) && !defined(WITH_BSD_CPU_AFFINITY) */
+static int
+virCHProcessInitCpuAffinity(virDomainObj *vm G_GNUC_UNUSED)
+{
+ return 0;
+}
+#endif /* !defined(WITH_SCHED_GETAFFINITY) && !defined(WITH_BSD_CPU_AFFINITY) */
+
+/**
+ * virCHProcessSetupPid:
+ *
+ * This function sets resource properties (affinity, cgroups,
+ * scheduler) for any PID associated with a domain. It should be used
+ * to set up emulator PIDs as well as vCPU and I/O thread pids to
+ * ensure they are all handled the same way.
+ *
+ * Returns 0 on success, -1 on error.
+ */
+static int
+virCHProcessSetupPid(virDomainObj *vm,
+ pid_t pid,
+ virCgroupThreadName nameval,
+ int id,
+ virBitmap *cpumask,
+ unsigned long long period,
+ long long quota,
+ virDomainThreadSchedParam *sched)
+{
+ virCHDomainObjPrivate *priv = vm->privateData;
+ virDomainNumatuneMemMode mem_mode;
+ g_autoptr(virCgroup) cgroup = NULL;
+ virBitmap *use_cpumask = NULL;
+ virBitmap *affinity_cpumask = NULL;
+ g_autoptr(virBitmap) hostcpumap = NULL;
+ g_autofree char *mem_mask = NULL;
+ int ret = -1;
+
+ if ((period || quota) &&
+ !virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_CPU)) {
+ virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
+ _("cgroup cpu is required for scheduler tuning"));
+ goto cleanup;
+ }
+
+ /* Infer which cpumask shall be used. */
+ if (cpumask) {
+ use_cpumask = cpumask;
+ } else if (vm->def->placement_mode == VIR_DOMAIN_CPU_PLACEMENT_MODE_AUTO) {
+ use_cpumask = priv->autoCpuset;
+ } else if (vm->def->cpumask) {
+ use_cpumask = vm->def->cpumask;
+ } else {
+ /* we can't assume cloud-hypervisor itself is running on all pCPUs,
+ * so we need to explicitly set the spawned instance to all pCPUs. */
+ if (virCHProcessGetAllCpuAffinity(&hostcpumap) < 0)
+ goto cleanup;
+ affinity_cpumask = hostcpumap;
+ }
+
+ /*
+ * If CPU cgroup controller is not initialized here, then we need
+ * neither period nor quota settings. And if CPUSET controller is
+ * not initialized either, then there's nothing to do anyway.
+ */
+ if (virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_CPU) ||
+ virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_CPUSET)) {
+
+ if (virDomainNumatuneGetMode(vm->def->numa, -1, &mem_mode) == 0 &&
+ mem_mode == VIR_DOMAIN_NUMATUNE_MEM_STRICT &&
+ virDomainNumatuneMaybeFormatNodeset(vm->def->numa,
+ priv->autoNodeset,
+ &mem_mask, -1) < 0)
+ goto cleanup;
+
+ if (virCgroupNewThread(priv->cgroup, nameval, id, true, &cgroup) < 0)
+ goto cleanup;
+
+ if (virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_CPUSET)) {
+ if (use_cpumask &&
+ virDomainCgroupSetupCpusetCpus(cgroup, use_cpumask) < 0)
+ goto cleanup;
+
+ if (mem_mask && virCgroupSetCpusetMems(cgroup, mem_mask) < 0)
+ goto cleanup;
+
+ }
+
+ if (virDomainCgroupSetupVcpuBW(cgroup, period, quota) < 0)
+ goto cleanup;
+
+ /* Move the thread to the sub dir */
+ VIR_INFO("Adding pid %d to cgroup", pid);
+ if (virCgroupAddThread(cgroup, pid) < 0)
+ goto cleanup;
+
+ }
+
+ if (!affinity_cpumask)
+ affinity_cpumask = use_cpumask;
+
+ /* Setup legacy affinity. */
+ if (affinity_cpumask
+ && virProcessSetAffinity(pid, affinity_cpumask, false) < 0)
+ goto cleanup;
+
+ /* Set scheduler type and priority, but not for the main thread. */
+ if (sched &&
+ nameval != VIR_CGROUP_THREAD_EMULATOR &&
+ virProcessSetScheduler(pid, sched->policy, sched->priority) < 0)
+ goto cleanup;
+
+ ret = 0;
+ cleanup:
+ if (ret < 0 && cgroup)
+ virCgroupRemove(cgroup);
+
+ return ret;
+}
+
+/**
+ * virCHProcessSetupVcpu:
+ * @vm: domain object
+ * @vcpuid: id of VCPU to set defaults
+ *
+ * This function sets resource properties (cgroups, affinity, scheduler) for a
+ * vCPU. This function expects that the vCPU is online and the vCPU pids were
+ * correctly detected at the point when it's called.
+ *
+ * Returns 0 on success, -1 on error.
+ */
+int
+virCHProcessSetupVcpu(virDomainObj *vm,
+ unsigned int vcpuid)
+{
+ pid_t vcpupid = virCHDomainGetVcpuPid(vm, vcpuid);
+ virDomainVcpuDef *vcpu = virDomainDefGetVcpu(vm->def, vcpuid);
+
+ return virCHProcessSetupPid(vm, vcpupid, VIR_CGROUP_THREAD_VCPU,
+ vcpuid, vcpu->cpumask,
+ vm->def->cputune.period,
+ vm->def->cputune.quota, &vcpu->sched);
+}
+
+static int
+virCHProcessSetupVcpus(virDomainObj *vm)
+{
+ virDomainVcpuDef *vcpu;
+ unsigned int maxvcpus = virDomainDefGetVcpusMax(vm->def);
+ size_t i;
+
+ if ((vm->def->cputune.period || vm->def->cputune.quota) &&
+ !virCgroupHasController(((virCHDomainObjPrivate *) vm->privateData)->
+ cgroup, VIR_CGROUP_CONTROLLER_CPU)) {
+ virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
+ _("cgroup cpu is required for scheduler tuning"));
+ return -1;
+ }
+
+ if (!virCHDomainHasVcpuPids(vm)) {
+ /* If any CPU has custom affinity that differs from the
+ * VM default affinity, we must reject it */
+ for (i = 0; i < maxvcpus; i++) {
+ vcpu = virDomainDefGetVcpu(vm->def, i);
+
+ if (!vcpu->online)
+ continue;
+
+ if (vcpu->cpumask &&
+ !virBitmapEqual(vm->def->cpumask, vcpu->cpumask)) {
+ virReportError(VIR_ERR_OPERATION_INVALID, "%s",
+ _("cpu affinity is not supported"));
+ return -1;
+ }
+ }
+
+ return 0;
+ }
+
+ for (i = 0; i < maxvcpus; i++) {
+ vcpu = virDomainDefGetVcpu(vm->def, i);
+
+ if (!vcpu->online)
+ continue;
+
+ if (virCHProcessSetupVcpu(vm, i) < 0)
+ return -1;
+ }
+
+ return 0;
+}
+
/**
* virCHProcessStart:
* @driver: pointer to driver structure
*
* Returns 0 on success or -1 in case of error
*/
-int virCHProcessStart(virCHDriver *driver,
- virDomainObj *vm,
- virDomainRunningReason reason)
+int
+virCHProcessStart(virCHDriver *driver,
+ virDomainObj *vm,
+ virDomainRunningReason reason)
{
int ret = -1;
virCHDomainObjPrivate *priv = vm->privateData;
+ g_autoptr(virCHDriverConfig) cfg = virCHDriverGetConfig(priv->driver);
g_autofree int *nicindexes = NULL;
size_t nnicindexes = 0;
/* And we can get the first monitor connection now too */
if (!(priv->monitor = virCHProcessConnectMonitor(driver, vm))) {
virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
- _("failed to create connection to CH socket"));
+ _("failed to create connection to CH socket"));
goto cleanup;
}
if (virCHMonitorCreateVM(priv->monitor,
&nnicindexes, &nicindexes) < 0) {
virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
- _("failed to create guest VM"));
+ _("failed to create guest VM"));
goto cleanup;
}
}
+ vm->pid = priv->monitor->pid;
+ vm->def->id = vm->pid;
+ priv->machineName = virCHDomainGetMachineName(vm);
+
+ if (virDomainCgroupSetupCgroup("ch", vm,
+ nnicindexes, nicindexes,
+ &priv->cgroup,
+ cfg->cgroupControllers,
+ 0, /*maxThreadsPerProc*/
+ priv->driver->privileged,
+ priv->machineName) < 0)
+ goto cleanup;
+
+ if (virCHProcessInitCpuAffinity(vm) < 0)
+ goto cleanup;
+
if (virCHMonitorBootVM(priv->monitor) < 0) {
virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
_("failed to boot guest VM"));
goto cleanup;
}
- priv->machineName = virCHDomainGetMachineName(vm);
- vm->pid = priv->monitor->pid;
- vm->def->id = vm->pid;
+ virCHDomainRefreshThreadInfo(vm);
- virCHProcessUpdateInfo(vm);
+ VIR_DEBUG("Setting global CPU cgroup (if required)");
+ if (virDomainCgroupSetupGlobalCpuCgroup(vm,
+ priv->cgroup,
+ priv->autoNodeset) < 0)
+ goto cleanup;
+ VIR_DEBUG("Setting vCPU tuning/settings");
+ if (virCHProcessSetupVcpus(vm) < 0)
+ goto cleanup;
+
+ virCHProcessUpdateInfo(vm);
virDomainObjSetState(vm, VIR_DOMAIN_RUNNING, reason);
return 0;
return ret;
}
-int virCHProcessStop(virCHDriver *driver G_GNUC_UNUSED,
- virDomainObj *vm,
- virDomainShutoffReason reason)
+int
+virCHProcessStop(virCHDriver *driver G_GNUC_UNUSED,
+ virDomainObj *vm,
+ virDomainShutoffReason reason)
{
+ int ret;
+ int retries = 0;
virCHDomainObjPrivate *priv = vm->privateData;
VIR_DEBUG("Stopping VM name=%s pid=%d reason=%d",
priv->monitor = NULL;
}
+ retry:
+ if ((ret = virDomainCgroupRemoveCgroup(vm,
+ priv->cgroup,
+ priv->machineName)) < 0) {
+ if (ret == -EBUSY && (retries++ < 5)) {
+ g_usleep(200*1000);
+ goto retry;
+ }
+ VIR_WARN("Failed to remove cgroup for %s",
+ vm->def->name);
+ }
+
vm->pid = -1;
vm->def->id = -1;
g_clear_pointer(&priv->machineName, g_free);