]> git.ipfire.org Git - thirdparty/libvirt.git/commitdiff
qemu, hypervisor: refactor some cgroup mgmt methods
authorPraveen K Paladugu <prapal@linux.microsoft.com>
Tue, 25 Jan 2022 16:19:53 +0000 (16:19 +0000)
committerMichal Privoznik <mprivozn@redhat.com>
Fri, 28 Jan 2022 16:04:50 +0000 (17:04 +0100)
Refactor some cgroup management methods from qemu into hypervisor.
These methods will be shared with ch driver for cgroup management.

Signed-off-by: Praveen K Paladugu <prapal@linux.microsoft.com>
Signed-off-by: Michal Privoznik <mprivozn@redhat.com>
Reviewed-by: Michal Privoznik <mprivozn@redhat.com>
src/hypervisor/domain_cgroup.c
src/hypervisor/domain_cgroup.h
src/libvirt_private.syms
src/qemu/qemu_cgroup.c
src/qemu/qemu_cgroup.h
src/qemu/qemu_driver.c
src/qemu/qemu_hotplug.c
src/qemu/qemu_process.c

index 61b54f071cc3de756422cce140465614798191fd..f5bcced1a5f3744cf700dcee0be0ae31c1b5371b 100644 (file)
 
 #include "domain_cgroup.h"
 #include "domain_driver.h"
-
+#include "util/virnuma.h"
+#include "virlog.h"
 #include "virutil.h"
 
 #define VIR_FROM_THIS VIR_FROM_DOMAIN
-
+VIR_LOG_INIT("domain.cgroup");
 
 int
 virDomainCgroupSetupBlkio(virCgroup *cgroup, virDomainBlkiotune blkio)
@@ -269,3 +270,447 @@ virDomainCgroupSetMemoryLimitParameters(virCgroup *cgroup,
 
     return 0;
 }
+
+
+int
+virDomainCgroupSetupBlkioCgroup(virDomainObj *vm,
+                                virCgroup *cgroup)
+{
+    if (!virCgroupHasController(cgroup, VIR_CGROUP_CONTROLLER_BLKIO)) {
+        if (vm->def->blkio.weight || vm->def->blkio.ndevices) {
+            virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
+                           _("Block I/O tuning is not available on this host"));
+            return -1;
+        }
+        return 0;
+    }
+
+    return virDomainCgroupSetupBlkio(cgroup, vm->def->blkio);
+}
+
+
+int
+virDomainCgroupSetupMemoryCgroup(virDomainObj *vm,
+                                 virCgroup *cgroup)
+{
+    if (!virCgroupHasController(cgroup, VIR_CGROUP_CONTROLLER_MEMORY)) {
+        if (virMemoryLimitIsSet(vm->def->mem.hard_limit) ||
+            virMemoryLimitIsSet(vm->def->mem.soft_limit) ||
+            virMemoryLimitIsSet(vm->def->mem.swap_hard_limit)) {
+            virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
+                           _("Memory cgroup is not available on this host"));
+            return -1;
+        }
+        return 0;
+    }
+
+    return virDomainCgroupSetupMemtune(cgroup, vm->def->mem);
+}
+
+
+int
+virDomainCgroupSetupCpusetCgroup(virCgroup *cgroup)
+{
+    if (!virCgroupHasController(cgroup, VIR_CGROUP_CONTROLLER_CPUSET))
+        return 0;
+
+    if (virCgroupSetCpusetMemoryMigrate(cgroup, true) < 0)
+        return -1;
+
+    return 0;
+}
+
+
+int
+virDomainCgroupSetupCpuCgroup(virDomainObj *vm,
+                              virCgroup *cgroup)
+{
+    if (!virCgroupHasController(cgroup, VIR_CGROUP_CONTROLLER_CPU)) {
+        if (vm->def->cputune.sharesSpecified) {
+            virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
+                           _("CPU tuning is not available on this host"));
+            return -1;
+        }
+        return 0;
+    }
+
+    if (vm->def->cputune.sharesSpecified) {
+        if (virCgroupSetCpuShares(cgroup, vm->def->cputune.shares) < 0)
+            return -1;
+    }
+
+    return 0;
+}
+
+
+int
+virDomainCgroupInitCgroup(const char *prefix,
+                          virDomainObj *vm,
+                          size_t nnicindexes,
+                          int *nicindexes,
+                          virCgroup **cgroup,
+                          int cgroupControllers,
+                          unsigned int maxThreadsPerProc,
+                          bool privileged,
+                          char *machineName)
+{
+    if (!privileged)
+        return 0;
+
+    if (!virCgroupAvailable())
+        return 0;
+
+    g_clear_pointer(cgroup, virCgroupFree);
+
+    if (!vm->def->resource)
+        vm->def->resource = g_new0(virDomainResourceDef, 1);
+
+    if (!vm->def->resource->partition)
+        vm->def->resource->partition = g_strdup("/machine");
+
+    if (!g_path_is_absolute(vm->def->resource->partition)) {
+        virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
+                       _("Resource partition '%s' must start with '/'"),
+                       vm->def->resource->partition);
+        return -1;
+    }
+
+    if (virCgroupNewMachine(machineName,
+                            prefix,
+                            vm->def->uuid,
+                            NULL,
+                            vm->pid,
+                            false,
+                            nnicindexes, nicindexes,
+                            vm->def->resource->partition,
+                            cgroupControllers,
+                            maxThreadsPerProc,
+                            cgroup) < 0) {
+        if (virCgroupNewIgnoreError())
+            return 0;
+
+        return -1;
+    }
+
+    return 0;
+}
+
+
+void
+virDomainCgroupRestoreCgroupState(virDomainObj *vm,
+                                  virCgroup *cgroup)
+{
+    g_autofree char *mem_mask = NULL;
+    size_t i = 0;
+    g_autoptr(virBitmap) all_nodes = NULL;
+
+    if (!virNumaIsAvailable() ||
+        !virCgroupHasController(cgroup, VIR_CGROUP_CONTROLLER_CPUSET))
+        return;
+
+    if (!(all_nodes = virNumaGetHostMemoryNodeset()))
+        goto error;
+
+    if (!(mem_mask = virBitmapFormat(all_nodes)))
+        goto error;
+
+    if (virCgroupHasEmptyTasks(cgroup, VIR_CGROUP_CONTROLLER_CPUSET) <= 0)
+        goto error;
+
+    if (virCgroupSetCpusetMems(cgroup, mem_mask) < 0)
+        goto error;
+
+    for (i = 0; i < virDomainDefGetVcpusMax(vm->def); i++) {
+        virDomainVcpuDef *vcpu = virDomainDefGetVcpu(vm->def, i);
+
+        if (!vcpu->online)
+            continue;
+
+        if (virDomainCgroupRestoreCgroupThread(cgroup,
+                                               VIR_CGROUP_THREAD_VCPU,
+                                               i) < 0)
+            return;
+    }
+
+    for (i = 0; i < vm->def->niothreadids; i++) {
+        if (virDomainCgroupRestoreCgroupThread(cgroup,
+                                               VIR_CGROUP_THREAD_IOTHREAD,
+                                               vm->def->iothreadids[i]->iothread_id) < 0)
+            return;
+    }
+
+    if (virDomainCgroupRestoreCgroupThread(cgroup,
+                                           VIR_CGROUP_THREAD_EMULATOR,
+                                           0) < 0)
+        return;
+
+    return;
+
+ error:
+    virResetLastError();
+    VIR_DEBUG("Couldn't restore cgroups to meaningful state");
+    return;
+}
+
+
+int
+virDomainCgroupRestoreCgroupThread(virCgroup *cgroup,
+                                   virCgroupThreadName thread,
+                                   int id)
+{
+    g_autoptr(virCgroup) cgroup_temp = NULL;
+    g_autofree char *nodeset = NULL;
+
+    if (virCgroupNewThread(cgroup, thread, id, false, &cgroup_temp) < 0)
+        return -1;
+
+    if (virCgroupSetCpusetMemoryMigrate(cgroup_temp, true) < 0)
+        return -1;
+
+    if (virCgroupGetCpusetMems(cgroup_temp, &nodeset) < 0)
+        return -1;
+
+    if (virCgroupSetCpusetMems(cgroup_temp, nodeset) < 0)
+        return -1;
+
+    return 0;
+}
+
+
+int
+virDomainCgroupConnectCgroup(const char *prefix,
+                             virDomainObj *vm,
+                             virCgroup **cgroup,
+                             int cgroupControllers,
+                             bool privileged,
+                             char *machineName)
+{
+    if (privileged)
+        return 0;
+
+    if (!virCgroupAvailable())
+        return 0;
+
+    g_clear_pointer(cgroup, virCgroupFree);
+
+    if (virCgroupNewDetectMachine(vm->def->name,
+                                  prefix,
+                                  vm->pid,
+                                  cgroupControllers,
+                                  machineName,
+                                  cgroup) < 0)
+        return -1;
+
+    virDomainCgroupRestoreCgroupState(vm, *cgroup);
+    return 0;
+}
+
+
+int
+virDomainCgroupSetupCgroup(const char *prefix,
+                           virDomainObj *vm,
+                           size_t nnicindexes,
+                           int *nicindexes,
+                           virCgroup **cgroup,
+                           int cgroupControllers,
+                           unsigned int maxThreadsPerProc,
+                           bool privileged,
+                           char *machineName)
+{
+    if (!vm->pid) {
+        virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
+                       _("Cannot setup cgroups until process is started"));
+        return -1;
+    }
+
+    if (virDomainCgroupInitCgroup(prefix,
+                                  vm,
+                                  nnicindexes,
+                                  nicindexes,
+                                  cgroup,
+                                  cgroupControllers,
+                                  maxThreadsPerProc,
+                                  privileged,
+                                  machineName) < 0)
+        return -1;
+
+    if (!*cgroup)
+        return 0;
+
+    if (virDomainCgroupSetupBlkioCgroup(vm, *cgroup) < 0)
+        return -1;
+
+    if (virDomainCgroupSetupMemoryCgroup(vm, *cgroup) < 0)
+        return -1;
+
+    if (virDomainCgroupSetupCpuCgroup(vm, *cgroup) < 0)
+        return -1;
+
+    if (virDomainCgroupSetupCpusetCgroup(*cgroup) < 0)
+        return -1;
+
+    return 0;
+}
+
+
+int
+virDomainCgroupSetupVcpuBW(virCgroup *cgroup,
+                           unsigned long long period,
+                           long long quota)
+{
+    return virCgroupSetupCpuPeriodQuota(cgroup, period, quota);
+}
+
+
+int
+virDomainCgroupSetupCpusetCpus(virCgroup *cgroup,
+                               virBitmap *cpumask)
+{
+    return virCgroupSetupCpusetCpus(cgroup, cpumask);
+}
+
+
+int
+virDomainCgroupSetupGlobalCpuCgroup(virDomainObj *vm,
+                                    virCgroup *cgroup,
+                                    virBitmap *autoNodeset)
+{
+    unsigned long long period = vm->def->cputune.global_period;
+    long long quota = vm->def->cputune.global_quota;
+    g_autofree char *mem_mask = NULL;
+    virDomainNumatuneMemMode mem_mode;
+
+    if ((period || quota) &&
+        !virCgroupHasController(cgroup, VIR_CGROUP_CONTROLLER_CPU)) {
+        virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
+                       _("cgroup cpu is required for scheduler tuning"));
+        return -1;
+    }
+
+    /*
+     * If CPU cgroup controller is not initialized here, then we need
+     * neither period nor quota settings.  And if CPUSET controller is
+     * not initialized either, then there's nothing to do anyway.
+     */
+    if (!virCgroupHasController(cgroup, VIR_CGROUP_CONTROLLER_CPU) &&
+        !virCgroupHasController(cgroup, VIR_CGROUP_CONTROLLER_CPUSET))
+        return 0;
+
+
+    if (virDomainNumatuneGetMode(vm->def->numa, -1, &mem_mode) == 0 &&
+        mem_mode == VIR_DOMAIN_NUMATUNE_MEM_STRICT &&
+        virDomainNumatuneMaybeFormatNodeset(vm->def->numa,
+                                            autoNodeset, &mem_mask, -1) < 0)
+        return -1;
+
+    if (period || quota) {
+        if (virDomainCgroupSetupVcpuBW(cgroup, period, quota) < 0)
+            return -1;
+    }
+
+    return 0;
+}
+
+
+int
+virDomainCgroupRemoveCgroup(virDomainObj *vm,
+                            virCgroup *cgroup,
+                            char *machineName)
+{
+    if (cgroup == NULL)
+        return 0;               /* Not supported, so claim success */
+
+    if (virCgroupTerminateMachine(machineName) < 0) {
+        if (!virCgroupNewIgnoreError())
+            VIR_DEBUG("Failed to terminate cgroup for %s", vm->def->name);
+    }
+
+    return virCgroupRemove(cgroup);
+}
+
+
+void
+virDomainCgroupEmulatorAllNodesDataFree(virCgroupEmulatorAllNodesData *data)
+{
+    if (!data)
+        return;
+
+    virCgroupFree(data->emulatorCgroup);
+    g_free(data->emulatorMemMask);
+    g_free(data);
+}
+
+
+/**
+ * virDomainCgroupEmulatorAllNodesAllow:
+ * @cgroup: domain cgroup pointer
+ * @retData: filled with structure used to roll back the operation
+ *
+ * Allows all NUMA nodes for the cloud hypervisor thread temporarily. This is
+ * necessary when hotplugging cpus since it requires memory allocated in the
+ * DMA region. Afterwards the operation can be reverted by
+ * virDomainCgroupEmulatorAllNodesRestore.
+ *
+ * Returns 0 on success -1 on error
+ */
+int
+virDomainCgroupEmulatorAllNodesAllow(virCgroup *cgroup,
+                                     virCgroupEmulatorAllNodesData **retData)
+{
+    virCgroupEmulatorAllNodesData *data = NULL;
+    g_autofree char *all_nodes_str = NULL;
+
+    g_autoptr(virBitmap) all_nodes = NULL;
+    int ret = -1;
+
+    if (!virNumaIsAvailable() ||
+        !virCgroupHasController(cgroup, VIR_CGROUP_CONTROLLER_CPUSET))
+        return 0;
+
+    if (!(all_nodes = virNumaGetHostMemoryNodeset()))
+        goto cleanup;
+
+    if (!(all_nodes_str = virBitmapFormat(all_nodes)))
+        goto cleanup;
+
+    data = g_new0(virCgroupEmulatorAllNodesData, 1);
+
+    if (virCgroupNewThread(cgroup, VIR_CGROUP_THREAD_EMULATOR, 0,
+                           false, &data->emulatorCgroup) < 0)
+        goto cleanup;
+
+    if (virCgroupGetCpusetMems(data->emulatorCgroup, &data->emulatorMemMask) < 0
+        || virCgroupSetCpusetMems(data->emulatorCgroup, all_nodes_str) < 0)
+        goto cleanup;
+
+    *retData = g_steal_pointer(&data);
+    ret = 0;
+
+ cleanup:
+    virDomainCgroupEmulatorAllNodesDataFree(data);
+
+    return ret;
+}
+
+
+/**
+ * virDomainCgroupEmulatorAllNodesRestore:
+ * @data: data structure created by virDomainCgroupEmulatorAllNodesAllow
+ *
+ * Rolls back the setting done by virDomainCgroupEmulatorAllNodesAllow and frees the
+ * associated data.
+ */
+void
+virDomainCgroupEmulatorAllNodesRestore(virCgroupEmulatorAllNodesData *data)
+{
+    virError *err;
+
+    if (!data)
+        return;
+
+    virErrorPreserveLast(&err);
+    virCgroupSetCpusetMems(data->emulatorCgroup, data->emulatorMemMask);
+    virErrorRestore(&err);
+
+    virDomainCgroupEmulatorAllNodesDataFree(data);
+}
index f93e5f74fe9ff3982bfb78b6a65c69ae1ab379ae..20893e1b46bba7386b5662e53aad4235eb5259b2 100644 (file)
 #include "vircgroup.h"
 #include "domain_conf.h"
 
+typedef struct _virCgroupEmulatorAllNodesData virCgroupEmulatorAllNodesData;
+struct _virCgroupEmulatorAllNodesData {
+    virCgroup *emulatorCgroup;
+    char *emulatorMemMask;
+};
 
 int virDomainCgroupSetupBlkio(virCgroup *cgroup, virDomainBlkiotune blkio);
 int virDomainCgroupSetupMemtune(virCgroup *cgroup, virDomainMemtune mem);
@@ -36,3 +41,70 @@ int virDomainCgroupSetMemoryLimitParameters(virCgroup *cgroup,
                                             virDomainDef *persistentDef,
                                             virTypedParameterPtr params,
                                             int nparams);
+int
+virDomainCgroupSetupBlkioCgroup(virDomainObj *vm,
+                                virCgroup *cgroup);
+int
+virDomainCgroupSetupMemoryCgroup(virDomainObj *vm,
+                                 virCgroup *cgroup);
+int
+virDomainCgroupSetupCpusetCgroup(virCgroup *cgroup);
+int
+virDomainCgroupSetupCpuCgroup(virDomainObj *vm,
+                              virCgroup *cgroup);
+int
+virDomainCgroupInitCgroup(const char *prefix,
+                          virDomainObj *vm,
+                          size_t nnicindexes,
+                          int *nicindexes,
+                          virCgroup **cgroup,
+                          int cgroupControllers,
+                          unsigned int maxThreadsPerProc,
+                          bool privileged,
+                          char *machineName);
+void
+virDomainCgroupRestoreCgroupState(virDomainObj *vm,
+                                  virCgroup *cgroup);
+int
+virDomainCgroupConnectCgroup(const char *prefix,
+                             virDomainObj *vm,
+                             virCgroup **cgroup,
+                             int cgroupControllers,
+                             bool privileged,
+                             char *machineName);
+int
+virDomainCgroupSetupCgroup(const char *prefix,
+                           virDomainObj *vm,
+                           size_t nnicindexes,
+                           int *nicindexes,
+                           virCgroup **cgroup,
+                           int cgroupControllers,
+                           unsigned int maxThreadsPerProc,
+                           bool privileged,
+                           char *machineName);
+void
+virDomainCgroupEmulatorAllNodesDataFree(virCgroupEmulatorAllNodesData *data);
+int
+virDomainCgroupEmulatorAllNodesAllow(virCgroup *cgroup,
+                                     virCgroupEmulatorAllNodesData **retData);
+void
+virDomainCgroupEmulatorAllNodesRestore(virCgroupEmulatorAllNodesData *data);
+int
+virDomainCgroupSetupVcpuBW(virCgroup *cgroup,
+                           unsigned long long period,
+                           long long quota);
+int
+virDomainCgroupSetupCpusetCpus(virCgroup *cgroup,
+                               virBitmap *cpumask);
+int
+virDomainCgroupSetupGlobalCpuCgroup(virDomainObj *vm,
+                                    virCgroup *cgroup,
+                                    virBitmap *autoNodeset);
+int
+virDomainCgroupRemoveCgroup(virDomainObj *vm,
+                            virCgroup *cgroup,
+                            char *machineName);
+int
+virDomainCgroupRestoreCgroupThread(virCgroup *cgroup,
+                                   virCgroupThreadName thread,
+                                   int id);
index ba3462d8499ea38bba56d97d9ea2db76f531c64c..bc6fa191bf9a432fbe0bd2fa8cf32a6cd59f78ee 100644 (file)
@@ -1544,11 +1544,23 @@ virSetConnectStorage;
 
 
 # hypervisor/domain_cgroup.h
+virDomainCgroupConnectCgroup;
+virDomainCgroupEmulatorAllNodesAllow;
+virDomainCgroupEmulatorAllNodesRestore;
+virDomainCgroupInitCgroup;
+virDomainCgroupRemoveCgroup;
 virDomainCgroupSetMemoryLimitParameters;
 virDomainCgroupSetupBlkio;
+virDomainCgroupSetupBlkioCgroup;
+virDomainCgroupSetupCgroup;
+virDomainCgroupSetupCpuCgroup;
+virDomainCgroupSetupCpusetCgroup;
+virDomainCgroupSetupCpusetCpus;
 virDomainCgroupSetupDomainBlkioParameters;
+virDomainCgroupSetupGlobalCpuCgroup;
+virDomainCgroupSetupMemoryCgroup;
 virDomainCgroupSetupMemtune;
-
+virDomainCgroupSetupVcpuBW;
 
 # hypervisor/domain_driver.h
 virDomainDriverAddIOThreadCheck;
index 22a6f56cf9c654460f014030682df9350bee4927..34b50ddd1d07544aacbb816e350565d4774a33d7 100644 (file)
@@ -593,46 +593,6 @@ qemuSetupVideoCgroup(virDomainObj *vm,
     return ret;
 }
 
-
-static int
-qemuSetupBlkioCgroup(virDomainObj *vm)
-{
-    qemuDomainObjPrivate *priv = vm->privateData;
-
-    if (!virCgroupHasController(priv->cgroup,
-                                VIR_CGROUP_CONTROLLER_BLKIO)) {
-        if (vm->def->blkio.weight || vm->def->blkio.ndevices) {
-            virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
-                           _("Block I/O tuning is not available on this host"));
-            return -1;
-        }
-        return 0;
-    }
-
-    return virDomainCgroupSetupBlkio(priv->cgroup, vm->def->blkio);
-}
-
-
-static int
-qemuSetupMemoryCgroup(virDomainObj *vm)
-{
-    qemuDomainObjPrivate *priv = vm->privateData;
-
-    if (!virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_MEMORY)) {
-        if (virMemoryLimitIsSet(vm->def->mem.hard_limit) ||
-            virMemoryLimitIsSet(vm->def->mem.soft_limit) ||
-            virMemoryLimitIsSet(vm->def->mem.swap_hard_limit)) {
-            virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
-                           _("Memory cgroup is not available on this host"));
-            return -1;
-        }
-        return 0;
-    }
-
-    return virDomainCgroupSetupMemtune(priv->cgroup, vm->def->mem);
-}
-
-
 static int
 qemuSetupFirmwareCgroup(virDomainObj *vm)
 {
@@ -861,44 +821,6 @@ qemuSetupDevicesCgroup(virDomainObj *vm)
 }
 
 
-static int
-qemuSetupCpusetCgroup(virDomainObj *vm)
-{
-    qemuDomainObjPrivate *priv = vm->privateData;
-
-    if (!virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_CPUSET))
-        return 0;
-
-    if (virCgroupSetCpusetMemoryMigrate(priv->cgroup, true) < 0)
-        return -1;
-
-    return 0;
-}
-
-
-static int
-qemuSetupCpuCgroup(virDomainObj *vm)
-{
-    qemuDomainObjPrivate *priv = vm->privateData;
-
-    if (!virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_CPU)) {
-       if (vm->def->cputune.sharesSpecified) {
-           virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
-                          _("CPU tuning is not available on this host"));
-           return -1;
-       }
-       return 0;
-    }
-
-    if (vm->def->cputune.sharesSpecified) {
-        if (virCgroupSetCpuShares(priv->cgroup, vm->def->cputune.shares) < 0)
-            return -1;
-    }
-
-    return 0;
-}
-
-
 static int
 qemuSetupCgroupAppid(virDomainObj *vm)
 {
@@ -927,174 +849,24 @@ qemuSetupCgroupAppid(virDomainObj *vm)
 }
 
 
-static int
-qemuInitCgroup(virDomainObj *vm,
-               size_t nnicindexes,
-               int *nicindexes)
-{
-    qemuDomainObjPrivate *priv = vm->privateData;
-    g_autoptr(virQEMUDriverConfig) cfg = virQEMUDriverGetConfig(priv->driver);
-
-    if (!priv->driver->privileged)
-        return 0;
-
-    if (!virCgroupAvailable())
-        return 0;
-
-    virCgroupFree(priv->cgroup);
-    priv->cgroup = NULL;
-
-    if (!vm->def->resource)
-        vm->def->resource = g_new0(virDomainResourceDef, 1);
-
-    if (!vm->def->resource->partition)
-        vm->def->resource->partition = g_strdup("/machine");
-
-    if (!g_path_is_absolute(vm->def->resource->partition)) {
-        virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
-                       _("Resource partition '%s' must start with '/'"),
-                       vm->def->resource->partition);
-        return -1;
-    }
-
-    if (virCgroupNewMachine(priv->machineName,
-                            "qemu",
-                            vm->def->uuid,
-                            NULL,
-                            vm->pid,
-                            false,
-                            nnicindexes, nicindexes,
-                            vm->def->resource->partition,
-                            cfg->cgroupControllers,
-                            cfg->maxThreadsPerProc,
-                            &priv->cgroup) < 0) {
-        if (virCgroupNewIgnoreError())
-            return 0;
-
-        return -1;
-    }
-
-    return 0;
-}
-
-static int
-qemuRestoreCgroupThread(virCgroup *cgroup,
-                        virCgroupThreadName thread,
-                        int id)
-{
-    g_autoptr(virCgroup) cgroup_temp = NULL;
-    g_autofree char *nodeset = NULL;
-
-    if (virCgroupNewThread(cgroup, thread, id, false, &cgroup_temp) < 0)
-        return -1;
-
-    if (virCgroupSetCpusetMemoryMigrate(cgroup_temp, true) < 0)
-        return -1;
-
-    if (virCgroupGetCpusetMems(cgroup_temp, &nodeset) < 0)
-        return -1;
-
-    if (virCgroupSetCpusetMems(cgroup_temp, nodeset) < 0)
-        return -1;
-
-    return 0;
-}
-
-static void
-qemuRestoreCgroupState(virDomainObj *vm)
-{
-    g_autofree char *mem_mask = NULL;
-    qemuDomainObjPrivate *priv = vm->privateData;
-    size_t i = 0;
-    g_autoptr(virBitmap) all_nodes = NULL;
-
-    if (!virNumaIsAvailable() ||
-        !virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_CPUSET))
-        return;
-
-    if (!(all_nodes = virNumaGetHostMemoryNodeset()))
-        goto error;
-
-    if (!(mem_mask = virBitmapFormat(all_nodes)))
-        goto error;
-
-    if (virCgroupHasEmptyTasks(priv->cgroup,
-                               VIR_CGROUP_CONTROLLER_CPUSET) <= 0)
-        goto error;
-
-    if (virCgroupSetCpusetMems(priv->cgroup, mem_mask) < 0)
-        goto error;
-
-    for (i = 0; i < virDomainDefGetVcpusMax(vm->def); i++) {
-        virDomainVcpuDef *vcpu = virDomainDefGetVcpu(vm->def, i);
-
-        if (!vcpu->online)
-            continue;
-
-        if (qemuRestoreCgroupThread(priv->cgroup,
-                                    VIR_CGROUP_THREAD_VCPU, i) < 0)
-            return;
-    }
-
-    for (i = 0; i < vm->def->niothreadids; i++) {
-        if (qemuRestoreCgroupThread(priv->cgroup, VIR_CGROUP_THREAD_IOTHREAD,
-                                    vm->def->iothreadids[i]->iothread_id) < 0)
-            return;
-    }
-
-    if (qemuRestoreCgroupThread(priv->cgroup,
-                                VIR_CGROUP_THREAD_EMULATOR, 0) < 0)
-        return;
-
-    return;
-
- error:
-    virResetLastError();
-    VIR_DEBUG("Couldn't restore cgroups to meaningful state");
-    return;
-}
-
-int
-qemuConnectCgroup(virDomainObj *vm)
-{
-    qemuDomainObjPrivate *priv = vm->privateData;
-    g_autoptr(virQEMUDriverConfig) cfg = virQEMUDriverGetConfig(priv->driver);
-
-    if (!priv->driver->privileged)
-        return 0;
-
-    if (!virCgroupAvailable())
-        return 0;
-
-    virCgroupFree(priv->cgroup);
-    priv->cgroup = NULL;
-
-    if (virCgroupNewDetectMachine(vm->def->name,
-                                  "qemu",
-                                  vm->pid,
-                                  cfg->cgroupControllers,
-                                  priv->machineName,
-                                  &priv->cgroup) < 0)
-        return -1;
-
-    qemuRestoreCgroupState(vm);
-    return 0;
-}
-
 int
 qemuSetupCgroup(virDomainObj *vm,
                 size_t nnicindexes,
                 int *nicindexes)
 {
     qemuDomainObjPrivate *priv = vm->privateData;
+    g_autoptr(virQEMUDriverConfig) cfg = virQEMUDriverGetConfig(priv->driver);
 
-    if (!vm->pid) {
-        virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
-                       _("Cannot setup cgroups until process is started"));
-        return -1;
-    }
+    if (virDomainCgroupSetupCgroup("qemu",
+                                   vm,
+                                   nnicindexes,
+                                   nicindexes,
+                                   &priv->cgroup,
+                                   cfg->cgroupControllers,
+                                   cfg->maxThreadsPerProc,
+                                   priv->driver->privileged,
+                                   priv->machineName) < 0)
 
-    if (qemuInitCgroup(vm, nnicindexes, nicindexes) < 0)
         return -1;
 
     if (!priv->cgroup)
@@ -1103,41 +875,12 @@ qemuSetupCgroup(virDomainObj *vm,
     if (qemuSetupDevicesCgroup(vm) < 0)
         return -1;
 
-    if (qemuSetupBlkioCgroup(vm) < 0)
-        return -1;
-
-    if (qemuSetupMemoryCgroup(vm) < 0)
-        return -1;
-
-    if (qemuSetupCpuCgroup(vm) < 0)
-        return -1;
-
-    if (qemuSetupCpusetCgroup(vm) < 0)
-        return -1;
-
     if (qemuSetupCgroupAppid(vm) < 0)
         return -1;
 
     return 0;
 }
 
-int
-qemuSetupCgroupVcpuBW(virCgroup *cgroup,
-                      unsigned long long period,
-                      long long quota)
-{
-    return virCgroupSetupCpuPeriodQuota(cgroup, period, quota);
-}
-
-
-int
-qemuSetupCgroupCpusetCpus(virCgroup *cgroup,
-                          virBitmap *cpumask)
-{
-    return virCgroupSetupCpusetCpus(cgroup, cpumask);
-}
-
-
 int
 qemuSetupCgroupForExtDevices(virDomainObj *vm,
                              virQEMUDriver *driver)
@@ -1164,148 +907,3 @@ qemuSetupCgroupForExtDevices(virDomainObj *vm,
 
     return qemuExtDevicesSetupCgroup(driver, vm, cgroup_temp);
 }
-
-
-int
-qemuSetupGlobalCpuCgroup(virDomainObj *vm)
-{
-    qemuDomainObjPrivate *priv = vm->privateData;
-    unsigned long long period = vm->def->cputune.global_period;
-    long long quota = vm->def->cputune.global_quota;
-    g_autofree char *mem_mask = NULL;
-    virDomainNumatuneMemMode mem_mode;
-
-    if ((period || quota) &&
-        !virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_CPU)) {
-        virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
-                       _("cgroup cpu is required for scheduler tuning"));
-        return -1;
-    }
-
-    /*
-     * If CPU cgroup controller is not initialized here, then we need
-     * neither period nor quota settings.  And if CPUSET controller is
-     * not initialized either, then there's nothing to do anyway.
-     */
-    if (!virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_CPU) &&
-        !virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_CPUSET))
-        return 0;
-
-
-    if (virDomainNumatuneGetMode(vm->def->numa, -1, &mem_mode) == 0 &&
-        mem_mode == VIR_DOMAIN_NUMATUNE_MEM_STRICT &&
-        virDomainNumatuneMaybeFormatNodeset(vm->def->numa,
-                                            priv->autoNodeset,
-                                            &mem_mask, -1) < 0)
-        return -1;
-
-    if (period || quota) {
-        if (qemuSetupCgroupVcpuBW(priv->cgroup, period, quota) < 0)
-            return -1;
-    }
-
-    return 0;
-}
-
-
-int
-qemuRemoveCgroup(virDomainObj *vm)
-{
-    qemuDomainObjPrivate *priv = vm->privateData;
-
-    if (priv->cgroup == NULL)
-        return 0; /* Not supported, so claim success */
-
-    if (virCgroupTerminateMachine(priv->machineName) < 0) {
-        if (!virCgroupNewIgnoreError())
-            VIR_DEBUG("Failed to terminate cgroup for %s", vm->def->name);
-    }
-
-    return virCgroupRemove(priv->cgroup);
-}
-
-
-static void
-qemuCgroupEmulatorAllNodesDataFree(qemuCgroupEmulatorAllNodesData *data)
-{
-    if (!data)
-        return;
-
-    virCgroupFree(data->emulatorCgroup);
-    g_free(data->emulatorMemMask);
-    g_free(data);
-}
-
-
-/**
- * qemuCgroupEmulatorAllNodesAllow:
- * @cgroup: domain cgroup pointer
- * @retData: filled with structure used to roll back the operation
- *
- * Allows all NUMA nodes for the qemu emulator thread temporarily. This is
- * necessary when hotplugging cpus since it requires memory allocated in the
- * DMA region. Afterwards the operation can be reverted by
- * qemuCgroupEmulatorAllNodesRestore.
- *
- * Returns 0 on success -1 on error
- */
-int
-qemuCgroupEmulatorAllNodesAllow(virCgroup *cgroup,
-                                qemuCgroupEmulatorAllNodesData **retData)
-{
-    qemuCgroupEmulatorAllNodesData *data = NULL;
-    g_autofree char *all_nodes_str = NULL;
-    g_autoptr(virBitmap) all_nodes = NULL;
-    int ret = -1;
-
-    if (!virNumaIsAvailable() ||
-        !virCgroupHasController(cgroup, VIR_CGROUP_CONTROLLER_CPUSET))
-        return 0;
-
-    if (!(all_nodes = virNumaGetHostMemoryNodeset()))
-        goto cleanup;
-
-    if (!(all_nodes_str = virBitmapFormat(all_nodes)))
-        goto cleanup;
-
-    data = g_new0(qemuCgroupEmulatorAllNodesData, 1);
-
-    if (virCgroupNewThread(cgroup, VIR_CGROUP_THREAD_EMULATOR, 0,
-                           false, &data->emulatorCgroup) < 0)
-        goto cleanup;
-
-    if (virCgroupGetCpusetMems(data->emulatorCgroup, &data->emulatorMemMask) < 0 ||
-        virCgroupSetCpusetMems(data->emulatorCgroup, all_nodes_str) < 0)
-        goto cleanup;
-
-    *retData = g_steal_pointer(&data);
-    ret = 0;
-
- cleanup:
-    qemuCgroupEmulatorAllNodesDataFree(data);
-
-    return ret;
-}
-
-
-/**
- * qemuCgroupEmulatorAllNodesRestore:
- * @data: data structure created by qemuCgroupEmulatorAllNodesAllow
- *
- * Rolls back the setting done by qemuCgroupEmulatorAllNodesAllow and frees the
- * associated data.
- */
-void
-qemuCgroupEmulatorAllNodesRestore(qemuCgroupEmulatorAllNodesData *data)
-{
-    virErrorPtr err;
-
-    if (!data)
-        return;
-
-    virErrorPreserveLast(&err);
-    virCgroupSetCpusetMems(data->emulatorCgroup, data->emulatorMemMask);
-    virErrorRestore(&err);
-
-    qemuCgroupEmulatorAllNodesDataFree(data);
-}
index cd537ebd823c64221745d257841faba44fb4e0c6..f09134947ffc3b2cc1be44b5f38db117ea67ed68 100644 (file)
@@ -56,18 +56,11 @@ int qemuSetupChardevCgroup(virDomainObj *vm,
                            virDomainChrDef *dev);
 int qemuTeardownChardevCgroup(virDomainObj *vm,
                               virDomainChrDef *dev);
-int qemuConnectCgroup(virDomainObj *vm);
 int qemuSetupCgroup(virDomainObj *vm,
                     size_t nnicindexes,
                     int *nicindexes);
-int qemuSetupCgroupVcpuBW(virCgroup *cgroup,
-                          unsigned long long period,
-                          long long quota);
-int qemuSetupCgroupCpusetCpus(virCgroup *cgroup, virBitmap *cpumask);
-int qemuSetupGlobalCpuCgroup(virDomainObj *vm);
 int qemuSetupCgroupForExtDevices(virDomainObj *vm,
                                  virQEMUDriver *driver);
-int qemuRemoveCgroup(virDomainObj *vm);
 
 typedef struct _qemuCgroupEmulatorAllNodesData qemuCgroupEmulatorAllNodesData;
 struct _qemuCgroupEmulatorAllNodesData {
@@ -75,8 +68,4 @@ struct _qemuCgroupEmulatorAllNodesData {
     char *emulatorMemMask;
 };
 
-int qemuCgroupEmulatorAllNodesAllow(virCgroup *cgroup,
-                                    qemuCgroupEmulatorAllNodesData **data);
-void qemuCgroupEmulatorAllNodesRestore(qemuCgroupEmulatorAllNodesData *data);
-
 extern const char *const defaultDeviceACL[];
index 0a1ba74e653bd2c573f943bbc035cedd9af47042..1141efef4bfbbbe079ca92b2307f57892782a29b 100644 (file)
@@ -4419,7 +4419,7 @@ qemuDomainPinVcpuLive(virDomainObj *vm,
             if (virCgroupNewThread(priv->cgroup, VIR_CGROUP_THREAD_VCPU, vcpu,
                                    false, &cgroup_vcpu) < 0)
                 goto cleanup;
-            if (qemuSetupCgroupCpusetCpus(cgroup_vcpu, cpumap) < 0)
+            if (virDomainCgroupSetupCpusetCpus(cgroup_vcpu, cpumap) < 0)
                 goto cleanup;
         }
 
@@ -4628,7 +4628,7 @@ qemuDomainPinEmulator(virDomainPtr dom,
                                    0, false, &cgroup_emulator) < 0)
                 goto endjob;
 
-            if (qemuSetupCgroupCpusetCpus(cgroup_emulator, pcpumap) < 0) {
+            if (virDomainCgroupSetupCpusetCpus(cgroup_emulator, pcpumap) < 0) {
                 virReportError(VIR_ERR_OPERATION_INVALID, "%s",
                                _("failed to set cpuset.cpus in cgroup"
                                  " for emulator threads"));
@@ -5025,7 +5025,7 @@ qemuDomainPinIOThread(virDomainPtr dom,
             if (virCgroupNewThread(priv->cgroup, VIR_CGROUP_THREAD_IOTHREAD,
                                    iothread_id, false, &cgroup_iothread) < 0)
                 goto endjob;
-            if (qemuSetupCgroupCpusetCpus(cgroup_iothread, pcpumap) < 0) {
+            if (virDomainCgroupSetupCpusetCpus(cgroup_iothread, pcpumap) < 0) {
                 virReportError(VIR_ERR_OPERATION_INVALID,
                                _("failed to set cpuset.cpus in cgroup"
                                  " for iothread %d"), iothread_id);
@@ -8925,7 +8925,7 @@ qemuSetGlobalBWLive(virCgroup *cgroup, unsigned long long period,
     if (period == 0 && quota == 0)
         return 0;
 
-    if (qemuSetupCgroupVcpuBW(cgroup, period, quota) < 0)
+    if (virDomainCgroupSetupVcpuBW(cgroup, period, quota) < 0)
         return -1;
 
     return 0;
@@ -9120,7 +9120,7 @@ qemuSetVcpusBWLive(virDomainObj *vm, virCgroup *cgroup,
                                false, &cgroup_vcpu) < 0)
             return -1;
 
-        if (qemuSetupCgroupVcpuBW(cgroup_vcpu, period, quota) < 0)
+        if (virDomainCgroupSetupVcpuBW(cgroup_vcpu, period, quota) < 0)
             return -1;
     }
 
@@ -9141,7 +9141,7 @@ qemuSetEmulatorBandwidthLive(virCgroup *cgroup,
                            false, &cgroup_emulator) < 0)
         return -1;
 
-    if (qemuSetupCgroupVcpuBW(cgroup_emulator, period, quota) < 0)
+    if (virDomainCgroupSetupVcpuBW(cgroup_emulator, period, quota) < 0)
         return -1;
 
     return 0;
@@ -9168,7 +9168,7 @@ qemuSetIOThreadsBWLive(virDomainObj *vm, virCgroup *cgroup,
                                false, &cgroup_iothread) < 0)
             return -1;
 
-        if (qemuSetupCgroupVcpuBW(cgroup_iothread, period, quota) < 0)
+        if (virDomainCgroupSetupVcpuBW(cgroup_iothread, period, quota) < 0)
             return -1;
     }
 
index 409f492ec7dfba8299f8963d7baa9de8cf96f78f..d801559057c0834d02b0ecf87d70169dd7888aa5 100644 (file)
@@ -37,6 +37,7 @@
 #include "qemu_snapshot.h"
 #include "qemu_virtiofs.h"
 #include "domain_audit.h"
+#include "domain_cgroup.h"
 #include "netdev_bandwidth_conf.h"
 #include "domain_nwfilter.h"
 #include "virlog.h"
@@ -6538,11 +6539,11 @@ qemuDomainSetVcpusLive(virQEMUDriver *driver,
                        bool enable)
 {
     qemuDomainObjPrivate *priv = vm->privateData;
-    qemuCgroupEmulatorAllNodesData *emulatorCgroup = NULL;
+    virCgroupEmulatorAllNodesData *emulatorCgroup = NULL;
     ssize_t nextvcpu = -1;
     int ret = -1;
 
-    if (qemuCgroupEmulatorAllNodesAllow(priv->cgroup, &emulatorCgroup) < 0)
+    if (virDomainCgroupEmulatorAllNodesAllow(priv->cgroup, &emulatorCgroup) < 0)
         goto cleanup;
 
     if (enable) {
@@ -6563,7 +6564,7 @@ qemuDomainSetVcpusLive(virQEMUDriver *driver,
     ret = 0;
 
  cleanup:
-    qemuCgroupEmulatorAllNodesRestore(emulatorCgroup);
+    virDomainCgroupEmulatorAllNodesRestore(emulatorCgroup);
 
     return ret;
 }
index fa6a5e5e7d7f94d2ed5920d2f9584b03137b49cf..c13280c8f39a5f23c7737a579ab704b9a81f7bb3 100644 (file)
@@ -73,6 +73,7 @@
 #include "virpidfile.h"
 #include "virhostcpu.h"
 #include "domain_audit.h"
+#include "domain_cgroup.h"
 #include "domain_nwfilter.h"
 #include "domain_validate.h"
 #include "locking/domain_lock.h"
@@ -2685,7 +2686,7 @@ qemuProcessSetupPid(virDomainObj *vm,
 
         if (virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_CPUSET)) {
             if (use_cpumask &&
-                qemuSetupCgroupCpusetCpus(cgroup, use_cpumask) < 0)
+                virDomainCgroupSetupCpusetCpus(cgroup, use_cpumask) < 0)
                 goto cleanup;
 
             if (mem_mask && virCgroupSetCpusetMems(cgroup, mem_mask) < 0)
@@ -2694,7 +2695,7 @@ qemuProcessSetupPid(virDomainObj *vm,
         }
 
         if ((period || quota) &&
-            qemuSetupCgroupVcpuBW(cgroup, period, quota) < 0)
+            virDomainCgroupSetupVcpuBW(cgroup, period, quota) < 0)
             goto cleanup;
 
         /* Move the thread to the sub dir */
@@ -5951,7 +5952,7 @@ qemuProcessSetupHotpluggableVcpus(virQEMUDriver *driver,
 {
     unsigned int maxvcpus = virDomainDefGetVcpusMax(vm->def);
     qemuDomainObjPrivate *priv = vm->privateData;
-    qemuCgroupEmulatorAllNodesData *emulatorCgroup = NULL;
+    virCgroupEmulatorAllNodesData *emulatorCgroup = NULL;
     virDomainVcpuDef *vcpu;
     qemuDomainVcpuPrivate *vcpupriv;
     size_t i;
@@ -5979,7 +5980,7 @@ qemuProcessSetupHotpluggableVcpus(virQEMUDriver *driver,
     qsort(bootHotplug, nbootHotplug, sizeof(*bootHotplug),
           qemuProcessVcpusSortOrder);
 
-    if (qemuCgroupEmulatorAllNodesAllow(priv->cgroup, &emulatorCgroup) < 0)
+    if (virDomainCgroupEmulatorAllNodesAllow(priv->cgroup, &emulatorCgroup) < 0)
         goto cleanup;
 
     for (i = 0; i < nbootHotplug; i++) {
@@ -6003,7 +6004,7 @@ qemuProcessSetupHotpluggableVcpus(virQEMUDriver *driver,
     ret = 0;
 
  cleanup:
-    qemuCgroupEmulatorAllNodesRestore(emulatorCgroup);
+    virDomainCgroupEmulatorAllNodesRestore(emulatorCgroup);
     return ret;
 }
 
@@ -6993,7 +6994,7 @@ qemuProcessPrepareHost(virQEMUDriver *driver,
     /* Ensure no historical cgroup for this VM is lying around bogus
      * settings */
     VIR_DEBUG("Ensuring no historical cgroup is lying around");
-    qemuRemoveCgroup(vm);
+    virDomainCgroupRemoveCgroup(vm, priv->cgroup, priv->machineName);
 
     if (g_mkdir_with_parents(cfg->logDir, 0777) < 0) {
         virReportSystemError(errno,
@@ -7602,7 +7603,7 @@ qemuProcessLaunch(virConnectPtr conn,
         goto cleanup;
 
     VIR_DEBUG("Setting global CPU cgroup (if required)");
-    if (qemuSetupGlobalCpuCgroup(vm) < 0)
+    if (virDomainCgroupSetupGlobalCpuCgroup(vm, priv->cgroup, priv->autoNodeset) < 0)
         goto cleanup;
 
     VIR_DEBUG("Setting vCPU tuning/settings");
@@ -8201,7 +8202,7 @@ void qemuProcessStop(virQEMUDriver *driver,
     }
 
  retry:
-    if ((ret = qemuRemoveCgroup(vm)) < 0) {
+    if ((ret = virDomainCgroupRemoveCgroup(vm, priv->cgroup, priv->machineName)) < 0) {
         if (ret == -EBUSY && (retries++ < 5)) {
             g_usleep(200*1000);
             goto retry;
@@ -8760,7 +8761,12 @@ qemuProcessReconnect(void *opaque)
     if (!priv->machineName)
         goto error;
 
-    if (qemuConnectCgroup(obj) < 0)
+    if (virDomainCgroupConnectCgroup("qemu",
+                                     obj,
+                                     &priv->cgroup,
+                                     cfg->cgroupControllers,
+                                     priv->driver->privileged,
+                                     priv->machineName) < 0)
         goto error;
 
     if (qemuDomainPerfRestart(obj) < 0)