Refactor some cgroup management methods from qemu into hypervisor.
These methods will be shared with ch driver for cgroup management.
Signed-off-by: Praveen K Paladugu <prapal(a)linux.microsoft.com>
---
src/hypervisor/domain_cgroup.c | 457 ++++++++++++++++++++++++++++++++-
src/hypervisor/domain_cgroup.h | 72 ++++++
src/libvirt_private.syms | 14 +-
src/qemu/qemu_cgroup.c | 413 +----------------------------
src/qemu/qemu_cgroup.h | 11 -
src/qemu/qemu_driver.c | 14 +-
src/qemu/qemu_hotplug.c | 7 +-
src/qemu/qemu_process.c | 24 +-
8 files changed, 580 insertions(+), 432 deletions(-)
diff --git a/src/hypervisor/domain_cgroup.c b/src/hypervisor/domain_cgroup.c
index 61b54f071c..05c53ff7d1 100644
--- a/src/hypervisor/domain_cgroup.c
+++ b/src/hypervisor/domain_cgroup.c
@@ -22,11 +22,12 @@
#include "domain_cgroup.h"
#include "domain_driver.h"
-
+#include "util/virnuma.h"
+#include "virlog.h"
#include "virutil.h"
#define VIR_FROM_THIS VIR_FROM_DOMAIN
-
+VIR_LOG_INIT("domain.cgroup");
int
virDomainCgroupSetupBlkio(virCgroup *cgroup, virDomainBlkiotune blkio)
@@ -269,3 +270,455 @@ virDomainCgroupSetMemoryLimitParameters(virCgroup *cgroup,
return 0;
}
+
+
+int
+virDomainCgroupSetupBlkioCgroup(virDomainObj *vm,
+ virCgroup *cgroup)
+{
+
+ if (!virCgroupHasController(cgroup, VIR_CGROUP_CONTROLLER_BLKIO)) {
+ if (vm->def->blkio.weight || vm->def->blkio.ndevices) {
+ virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
+ _("Block I/O tuning is not available on this
host"));
+ return -1;
+ } else {
+ return 0;
+ }
+ }
+
+ return virDomainCgroupSetupBlkio(cgroup, vm->def->blkio);
+}
+
+
+int
+virDomainCgroupSetupMemoryCgroup(virDomainObj *vm,
+ virCgroup *cgroup)
+{
+
+ if (!virCgroupHasController(cgroup, VIR_CGROUP_CONTROLLER_MEMORY)) {
+ if (virMemoryLimitIsSet(vm->def->mem.hard_limit) ||
+ virMemoryLimitIsSet(vm->def->mem.soft_limit) ||
+ virMemoryLimitIsSet(vm->def->mem.swap_hard_limit)) {
+ virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
+ _("Memory cgroup is not available on this host"));
+ return -1;
+ } else {
+ return 0;
+ }
+ }
+
+ return virDomainCgroupSetupMemtune(cgroup, vm->def->mem);
+}
+
+
+int
+virDomainCgroupSetupCpusetCgroup(virCgroup *cgroup)
+{
+
+ if (!virCgroupHasController(cgroup, VIR_CGROUP_CONTROLLER_CPUSET))
+ return 0;
+
+ if (virCgroupSetCpusetMemoryMigrate(cgroup, true) < 0)
+ return -1;
+
+ return 0;
+}
+
+
+int
+virDomainCgroupSetupCpuCgroup(virDomainObj *vm,
+ virCgroup *cgroup)
+{
+
+ if (!virCgroupHasController(cgroup, VIR_CGROUP_CONTROLLER_CPU)) {
+ if (vm->def->cputune.sharesSpecified) {
+ virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
+ _("CPU tuning is not available on this host"));
+ return -1;
+ } else {
+ return 0;
+ }
+ }
+
+ if (vm->def->cputune.sharesSpecified) {
+
+ if (virCgroupSetCpuShares(cgroup, vm->def->cputune.shares) < 0)
+ return -1;
+
+ }
+
+ return 0;
+}
+
+
+int
+virDomainCgroupInitCgroup(const char *prefix,
+ virDomainObj * vm,
+ size_t nnicindexes,
+ int *nicindexes,
+ virCgroup * cgroup,
+ int cgroupControllers,
+ unsigned int maxThreadsPerProc,
+ bool privileged,
+ char *machineName)
+{
+ if (!privileged)
+ return 0;
+
+ if (!virCgroupAvailable())
+ return 0;
+
+ virCgroupFree(cgroup);
+ cgroup = NULL;
+
+ if (!vm->def->resource)
+ vm->def->resource = g_new0(virDomainResourceDef, 1);
+
+ if (!vm->def->resource->partition)
+ vm->def->resource->partition = g_strdup("/machine");
+
+ if (!g_path_is_absolute(vm->def->resource->partition)) {
+ virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
+ _("Resource partition '%s' must start with
'/'"),
+ vm->def->resource->partition);
+ return -1;
+ }
+
+ if (virCgroupNewMachine(machineName,
+ prefix,
+ vm->def->uuid,
+ NULL,
+ vm->pid,
+ false,
+ nnicindexes, nicindexes,
+ vm->def->resource->partition,
+ cgroupControllers,
+ maxThreadsPerProc, &cgroup) < 0) {
+ if (virCgroupNewIgnoreError())
+ return 0;
+
+ return -1;
+ }
+
+ return 0;
+}
+
+
+void
+virDomainCgroupRestoreCgroupState(virDomainObj *vm,
+ virCgroup *cgroup)
+{
+ g_autofree char *mem_mask = NULL;
+ size_t i = 0;
+
+ g_autoptr(virBitmap) all_nodes = NULL;
+
+ if (!virNumaIsAvailable() ||
+ !virCgroupHasController(cgroup, VIR_CGROUP_CONTROLLER_CPUSET))
+ return;
+
+ if (!(all_nodes = virNumaGetHostMemoryNodeset()))
+ goto error;
+
+ if (!(mem_mask = virBitmapFormat(all_nodes)))
+ goto error;
+
+ if (virCgroupHasEmptyTasks(cgroup, VIR_CGROUP_CONTROLLER_CPUSET) <= 0)
+ goto error;
+
+ if (virCgroupSetCpusetMems(cgroup, mem_mask) < 0)
+ goto error;
+
+ for (i = 0; i < virDomainDefGetVcpusMax(vm->def); i++) {
+ virDomainVcpuDef *vcpu = virDomainDefGetVcpu(vm->def, i);
+
+ if (!vcpu->online)
+ continue;
+
+ if (virDomainCgroupRestoreCgroupThread(cgroup,
+ VIR_CGROUP_THREAD_VCPU,
+ i) < 0)
+ return;
+ }
+
+ for (i = 0; i < vm->def->niothreadids; i++) {
+ if (virDomainCgroupRestoreCgroupThread(cgroup,
+ VIR_CGROUP_THREAD_IOTHREAD,
+
vm->def->iothreadids[i]->iothread_id) < 0)
+ return;
+ }
+
+ if (virDomainCgroupRestoreCgroupThread(cgroup,
+ VIR_CGROUP_THREAD_EMULATOR,
+ 0) < 0)
+ return;
+
+ return;
+
+ error:
+ virResetLastError();
+ VIR_DEBUG("Couldn't restore cgroups to meaningful state");
+ return;
+}
+
+
+int
+virDomainCgroupRestoreCgroupThread(virCgroup *cgroup,
+ virCgroupThreadName thread,
+ int id)
+{
+ g_autoptr(virCgroup) cgroup_temp = NULL;
+ g_autofree char *nodeset = NULL;
+
+ if (virCgroupNewThread(cgroup, thread, id, false, &cgroup_temp) < 0)
+ return -1;
+
+ if (virCgroupSetCpusetMemoryMigrate(cgroup_temp, true) < 0)
+ return -1;
+
+ if (virCgroupGetCpusetMems(cgroup_temp, &nodeset) < 0)
+ return -1;
+
+ if (virCgroupSetCpusetMems(cgroup_temp, nodeset) < 0)
+ return -1;
+
+ return 0;
+}
+
+
+int
+virDomainCgroupConnectCgroup(const char *prefix,
+ virDomainObj *vm,
+ virCgroup *cgroup,
+ int cgroupControllers,
+ bool privileged,
+ char *machineName)
+{
+ if (privileged)
+ return 0;
+
+ if (!virCgroupAvailable())
+ return 0;
+
+ virCgroupFree(cgroup);
+
+ if (virCgroupNewDetectMachine(vm->def->name,
+ prefix,
+ vm->pid,
+ cgroupControllers, machineName, &cgroup) < 0)
+ return -1;
+
+ virDomainCgroupRestoreCgroupState(vm, cgroup);
+ return 0;
+}
+
+
+int
+virDomainCgroupSetupCgroup(const char *prefix,
+ virDomainObj * vm,
+ size_t nnicindexes,
+ int *nicindexes,
+ virCgroup * cgroup,
+ int cgroupControllers,
+ unsigned int maxThreadsPerProc,
+ bool privileged,
+ char *machineName)
+{
+ if (!vm->pid) {
+ virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
+ _("Cannot setup cgroups until process is started"));
+ return -1;
+ }
+
+ if (virDomainCgroupInitCgroup(prefix,
+ vm,
+ nnicindexes,
+ nicindexes,
+ cgroup,
+ cgroupControllers,
+ maxThreadsPerProc,
+ privileged,
+ machineName) < 0)
+ return -1;
+
+ if (!cgroup)
+ return 0;
+
+ if (virDomainCgroupSetupBlkioCgroup(vm, cgroup) < 0)
+ return -1;
+
+ if (virDomainCgroupSetupMemoryCgroup(vm, cgroup) < 0)
+ return -1;
+
+ if (virDomainCgroupSetupCpuCgroup(vm, cgroup) < 0)
+ return -1;
+
+ if (virDomainCgroupSetupCpusetCgroup(cgroup) < 0)
+ return -1;
+
+ return 0;
+}
+
+
+int
+virDomainCgroupSetupVcpuBW(virCgroup *cgroup,
+ unsigned long long period,
+ long long quota)
+{
+ return virCgroupSetupCpuPeriodQuota(cgroup, period, quota);
+}
+
+
+int
+virDomainCgroupSetupCpusetCpus(virCgroup *cgroup,
+ virBitmap *cpumask)
+{
+ return virCgroupSetupCpusetCpus(cgroup, cpumask);
+}
+
+
+int
+virDomainCgroupSetupGlobalCpuCgroup(virDomainObj *vm,
+ virCgroup *cgroup,
+ virBitmap *autoNodeset)
+{
+ unsigned long long period = vm->def->cputune.global_period;
+ long long quota = vm->def->cputune.global_quota;
+ g_autofree char *mem_mask = NULL;
+ virDomainNumatuneMemMode mem_mode;
+
+ if ((period || quota) &&
+ !virCgroupHasController(cgroup, VIR_CGROUP_CONTROLLER_CPU)) {
+ virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
+ _("cgroup cpu is required for scheduler tuning"));
+ return -1;
+ }
+
+ /*
+ * If CPU cgroup controller is not initialized here, then we need
+ * neither period nor quota settings. And if CPUSET controller is
+ * not initialized either, then there's nothing to do anyway.
+ */
+ if (!virCgroupHasController(cgroup, VIR_CGROUP_CONTROLLER_CPU) &&
+ !virCgroupHasController(cgroup, VIR_CGROUP_CONTROLLER_CPUSET))
+ return 0;
+
+
+ if (virDomainNumatuneGetMode(vm->def->numa, -1, &mem_mode) == 0 &&
+ mem_mode == VIR_DOMAIN_NUMATUNE_MEM_STRICT &&
+ virDomainNumatuneMaybeFormatNodeset(vm->def->numa,
+ autoNodeset, &mem_mask, -1) < 0)
+ return -1;
+
+ if (period || quota) {
+ if (virDomainCgroupSetupVcpuBW(cgroup, period, quota) < 0)
+ return -1;
+ }
+
+ return 0;
+}
+
+
+int
+virDomainCgroupRemoveCgroup(virDomainObj *vm,
+ virCgroup *cgroup,
+ char *machineName)
+{
+ if (cgroup == NULL)
+ return 0; /* Not supported, so claim success */
+
+ if (virCgroupTerminateMachine(machineName) < 0) {
+ if (!virCgroupNewIgnoreError())
+ VIR_DEBUG("Failed to terminate cgroup for %s",
vm->def->name);
+ }
+
+ return virCgroupRemove(cgroup);
+}
+
+
+void
+virDomainCgroupEmulatorAllNodesDataFree(virCgroupEmulatorAllNodesData *data)
+{
+ if (!data)
+ return;
+
+ virCgroupFree(data->emulatorCgroup);
+ g_free(data->emulatorMemMask);
+ g_free(data);
+}
+
+
+/**
+ * virDomainCgroupEmulatorAllNodesAllow:
+ * @cgroup: domain cgroup pointer
+ * @retData: filled with structure used to roll back the operation
+ *
+ * Allows all NUMA nodes for the cloud hypervisor thread temporarily. This is
+ * necessary when hotplugging cpus since it requires memory allocated in the
+ * DMA region. Afterwards the operation can be reverted by
+ * virDomainCgroupEmulatorAllNodesRestore.
+ *
+ * Returns 0 on success -1 on error
+ */
+int
+virDomainCgroupEmulatorAllNodesAllow(virCgroup *cgroup,
+ virCgroupEmulatorAllNodesData **retData)
+{
+ virCgroupEmulatorAllNodesData *data = NULL;
+ g_autofree char *all_nodes_str = NULL;
+
+ g_autoptr(virBitmap) all_nodes = NULL;
+ int ret = -1;
+
+ if (!virNumaIsAvailable() ||
+ !virCgroupHasController(cgroup, VIR_CGROUP_CONTROLLER_CPUSET))
+ return 0;
+
+ if (!(all_nodes = virNumaGetHostMemoryNodeset()))
+ goto cleanup;
+
+ if (!(all_nodes_str = virBitmapFormat(all_nodes)))
+ goto cleanup;
+
+ data = g_new0(virCgroupEmulatorAllNodesData, 1);
+
+ if (virCgroupNewThread(cgroup, VIR_CGROUP_THREAD_EMULATOR, 0,
+ false, &data->emulatorCgroup) < 0)
+ goto cleanup;
+
+ if (virCgroupGetCpusetMems(data->emulatorCgroup, &data->emulatorMemMask)
< 0
+ || virCgroupSetCpusetMems(data->emulatorCgroup, all_nodes_str) < 0)
+ goto cleanup;
+
+ *retData = g_steal_pointer(&data);
+ ret = 0;
+
+ cleanup:
+ virDomainCgroupEmulatorAllNodesDataFree(data);
+
+ return ret;
+}
+
+
+/**
+ * virDomainCgroupEmulatorAllNodesRestore:
+ * @data: data structure created by virDomainCgroupEmulatorAllNodesAllow
+ *
+ * Rolls back the setting done by virDomainCgroupEmulatorAllNodesAllow and frees the
+ * associated data.
+ */
+void
+virDomainCgroupEmulatorAllNodesRestore(virCgroupEmulatorAllNodesData *data)
+{
+ virError *err;
+
+ if (!data)
+ return;
+
+ virErrorPreserveLast(&err);
+ virCgroupSetCpusetMems(data->emulatorCgroup, data->emulatorMemMask);
+ virErrorRestore(&err);
+
+ virDomainCgroupEmulatorAllNodesDataFree(data);
+}
diff --git a/src/hypervisor/domain_cgroup.h b/src/hypervisor/domain_cgroup.h
index f93e5f74fe..dfe7107674 100644
--- a/src/hypervisor/domain_cgroup.h
+++ b/src/hypervisor/domain_cgroup.h
@@ -23,6 +23,11 @@
#include "vircgroup.h"
#include "domain_conf.h"
+typedef struct _virCgroupEmulatorAllNodesData virCgroupEmulatorAllNodesData;
+struct _virCgroupEmulatorAllNodesData {
+ virCgroup *emulatorCgroup;
+ char *emulatorMemMask;
+};
int virDomainCgroupSetupBlkio(virCgroup *cgroup, virDomainBlkiotune blkio);
int virDomainCgroupSetupMemtune(virCgroup *cgroup, virDomainMemtune mem);
@@ -36,3 +41,70 @@ int virDomainCgroupSetMemoryLimitParameters(virCgroup *cgroup,
virDomainDef *persistentDef,
virTypedParameterPtr params,
int nparams);
+int
+virDomainCgroupSetupBlkioCgroup(virDomainObj * vm,
+ virCgroup *cgroup);
+int
+virDomainCgroupSetupMemoryCgroup(virDomainObj * vm,
+ virCgroup *cgroup);
+int
+virDomainCgroupSetupCpusetCgroup(virCgroup *cgroup);
+int
+virDomainCgroupSetupCpuCgroup(virDomainObj * vm,
+ virCgroup *cgroup);
+int
+virDomainCgroupInitCgroup(const char *prefix,
+ virDomainObj * vm,
+ size_t nnicindexes,
+ int *nicindexes,
+ virCgroup *cgroup,
+ int cgroupControllers,
+ unsigned int maxThreadsPerProc,
+ bool privileged,
+ char *machineName);
+void
+virDomainCgroupRestoreCgroupState(virDomainObj * vm,
+ virCgroup *cgroup);
+int
+virDomainCgroupConnectCgroup(const char *prefix,
+ virDomainObj * vm,
+ virCgroup *cgroup,
+ int cgroupControllers,
+ bool privileged,
+ char *machineName);
+int
+virDomainCgroupSetupCgroup(const char *prefix,
+ virDomainObj * vm,
+ size_t nnicindexes,
+ int *nicindexes,
+ virCgroup *cgroup,
+ int cgroupControllers,
+ unsigned int maxThreadsPerProc,
+ bool privileged,
+ char *machineName);
+void
+virDomainCgroupEmulatorAllNodesDataFree(virCgroupEmulatorAllNodesData * data);
+int
+virDomainCgroupEmulatorAllNodesAllow(virCgroup * cgroup,
+ virCgroupEmulatorAllNodesData ** retData);
+void
+virDomainCgroupEmulatorAllNodesRestore(virCgroupEmulatorAllNodesData * data);
+int
+virDomainCgroupSetupVcpuBW(virCgroup * cgroup,
+ unsigned long long period,
+ long long quota);
+int
+virDomainCgroupSetupCpusetCpus(virCgroup * cgroup,
+ virBitmap * cpumask);
+int
+virDomainCgroupSetupGlobalCpuCgroup(virDomainObj * vm,
+ virCgroup * cgroup,
+ virBitmap *autoNodeset);
+int
+virDomainCgroupRemoveCgroup(virDomainObj * vm,
+ virCgroup * cgroup,
+ char *machineName);
+int
+virDomainCgroupRestoreCgroupThread(virCgroup *cgroup,
+ virCgroupThreadName thread,
+ int id);
diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms
index ee14b99d88..d9d83d2a99 100644
--- a/src/libvirt_private.syms
+++ b/src/libvirt_private.syms
@@ -1541,11 +1541,23 @@ virSetConnectStorage;
# hypervisor/domain_cgroup.h
+virDomainCgroupConnectCgroup;
+virDomainCgroupEmulatorAllNodesAllow;
+virDomainCgroupEmulatorAllNodesRestore;
+virDomainCgroupInitCgroup;
+virDomainCgroupRemoveCgroup;
virDomainCgroupSetMemoryLimitParameters;
virDomainCgroupSetupBlkio;
+virDomainCgroupSetupBlkioCgroup;
+virDomainCgroupSetupCgroup;
+virDomainCgroupSetupCpuCgroup;
+virDomainCgroupSetupCpusetCgroup;
+virDomainCgroupSetupCpusetCpus;
virDomainCgroupSetupDomainBlkioParameters;
+virDomainCgroupSetupGlobalCpuCgroup;
+virDomainCgroupSetupMemoryCgroup;
virDomainCgroupSetupMemtune;
-
+virDomainCgroupSetupVcpuBW;
# hypervisor/domain_driver.h
virDomainDriverAddIOThreadCheck;
diff --git a/src/qemu/qemu_cgroup.c b/src/qemu/qemu_cgroup.c
index 308be5b00f..bc0f73cb95 100644
--- a/src/qemu/qemu_cgroup.c
+++ b/src/qemu/qemu_cgroup.c
@@ -593,46 +593,6 @@ qemuSetupVideoCgroup(virDomainObj *vm,
return ret;
}
-
-static int
-qemuSetupBlkioCgroup(virDomainObj *vm)
-{
- qemuDomainObjPrivate *priv = vm->privateData;
-
- if (!virCgroupHasController(priv->cgroup,
- VIR_CGROUP_CONTROLLER_BLKIO)) {
- if (vm->def->blkio.weight || vm->def->blkio.ndevices) {
- virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
- _("Block I/O tuning is not available on this
host"));
- return -1;
- }
- return 0;
- }
-
- return virDomainCgroupSetupBlkio(priv->cgroup, vm->def->blkio);
-}
-
-
-static int
-qemuSetupMemoryCgroup(virDomainObj *vm)
-{
- qemuDomainObjPrivate *priv = vm->privateData;
-
- if (!virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_MEMORY)) {
- if (virMemoryLimitIsSet(vm->def->mem.hard_limit) ||
- virMemoryLimitIsSet(vm->def->mem.soft_limit) ||
- virMemoryLimitIsSet(vm->def->mem.swap_hard_limit)) {
- virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
- _("Memory cgroup is not available on this host"));
- return -1;
- }
- return 0;
- }
-
- return virDomainCgroupSetupMemtune(priv->cgroup, vm->def->mem);
-}
-
-
static int
qemuSetupFirmwareCgroup(virDomainObj *vm)
{
@@ -861,44 +821,6 @@ qemuSetupDevicesCgroup(virDomainObj *vm)
}
-static int
-qemuSetupCpusetCgroup(virDomainObj *vm)
-{
- qemuDomainObjPrivate *priv = vm->privateData;
-
- if (!virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_CPUSET))
- return 0;
-
- if (virCgroupSetCpusetMemoryMigrate(priv->cgroup, true) < 0)
- return -1;
-
- return 0;
-}
-
-
-static int
-qemuSetupCpuCgroup(virDomainObj *vm)
-{
- qemuDomainObjPrivate *priv = vm->privateData;
-
- if (!virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_CPU)) {
- if (vm->def->cputune.sharesSpecified) {
- virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
- _("CPU tuning is not available on this host"));
- return -1;
- }
- return 0;
- }
-
- if (vm->def->cputune.sharesSpecified) {
- if (virCgroupSetCpuShares(priv->cgroup, vm->def->cputune.shares) <
0)
- return -1;
- }
-
- return 0;
-}
-
-
static int
qemuSetupCgroupAppid(virDomainObj *vm)
{
@@ -927,166 +849,13 @@ qemuSetupCgroupAppid(virDomainObj *vm)
}
-static int
-qemuInitCgroup(virDomainObj *vm,
- size_t nnicindexes,
- int *nicindexes)
-{
- qemuDomainObjPrivate *priv = vm->privateData;
- g_autoptr(virQEMUDriverConfig) cfg = virQEMUDriverGetConfig(priv->driver);
-
- if (!priv->driver->privileged)
- return 0;
-
- if (!virCgroupAvailable())
- return 0;
-
- virCgroupFree(priv->cgroup);
- priv->cgroup = NULL;
-
- if (!vm->def->resource)
- vm->def->resource = g_new0(virDomainResourceDef, 1);
-
- if (!vm->def->resource->partition)
- vm->def->resource->partition = g_strdup("/machine");
-
- if (!g_path_is_absolute(vm->def->resource->partition)) {
- virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
- _("Resource partition '%s' must start with
'/'"),
- vm->def->resource->partition);
- return -1;
- }
-
- if (virCgroupNewMachine(priv->machineName,
- "qemu",
- vm->def->uuid,
- NULL,
- vm->pid,
- false,
- nnicindexes, nicindexes,
- vm->def->resource->partition,
- cfg->cgroupControllers,
- cfg->maxThreadsPerProc,
- &priv->cgroup) < 0) {
- if (virCgroupNewIgnoreError())
- return 0;
-
- return -1;
- }
-
- return 0;
-}
-
-static int
-qemuRestoreCgroupThread(virCgroup *cgroup,
- virCgroupThreadName thread,
- int id)
-{
- g_autoptr(virCgroup) cgroup_temp = NULL;
- g_autofree char *nodeset = NULL;
-
- if (virCgroupNewThread(cgroup, thread, id, false, &cgroup_temp) < 0)
- return -1;
-
- if (virCgroupSetCpusetMemoryMigrate(cgroup_temp, true) < 0)
- return -1;
-
- if (virCgroupGetCpusetMems(cgroup_temp, &nodeset) < 0)
- return -1;
-
- if (virCgroupSetCpusetMems(cgroup_temp, nodeset) < 0)
- return -1;
-
- return 0;
-}
-
-static void
-qemuRestoreCgroupState(virDomainObj *vm)
-{
- g_autofree char *mem_mask = NULL;
- qemuDomainObjPrivate *priv = vm->privateData;
- size_t i = 0;
- g_autoptr(virBitmap) all_nodes = NULL;
-
- if (!virNumaIsAvailable() ||
- !virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_CPUSET))
- return;
-
- if (!(all_nodes = virNumaGetHostMemoryNodeset()))
- goto error;
-
- if (!(mem_mask = virBitmapFormat(all_nodes)))
- goto error;
-
- if (virCgroupHasEmptyTasks(priv->cgroup,
- VIR_CGROUP_CONTROLLER_CPUSET) <= 0)
- goto error;
-
- if (virCgroupSetCpusetMems(priv->cgroup, mem_mask) < 0)
- goto error;
-
- for (i = 0; i < virDomainDefGetVcpusMax(vm->def); i++) {
- virDomainVcpuDef *vcpu = virDomainDefGetVcpu(vm->def, i);
-
- if (!vcpu->online)
- continue;
-
- if (qemuRestoreCgroupThread(priv->cgroup,
- VIR_CGROUP_THREAD_VCPU, i) < 0)
- return;
- }
-
- for (i = 0; i < vm->def->niothreadids; i++) {
- if (qemuRestoreCgroupThread(priv->cgroup, VIR_CGROUP_THREAD_IOTHREAD,
- vm->def->iothreadids[i]->iothread_id) <
0)
- return;
- }
-
- if (qemuRestoreCgroupThread(priv->cgroup,
- VIR_CGROUP_THREAD_EMULATOR, 0) < 0)
- return;
-
- return;
-
- error:
- virResetLastError();
- VIR_DEBUG("Couldn't restore cgroups to meaningful state");
- return;
-}
-
-int
-qemuConnectCgroup(virDomainObj *vm)
-{
- qemuDomainObjPrivate *priv = vm->privateData;
- g_autoptr(virQEMUDriverConfig) cfg = virQEMUDriverGetConfig(priv->driver);
-
- if (!priv->driver->privileged)
- return 0;
-
- if (!virCgroupAvailable())
- return 0;
-
- virCgroupFree(priv->cgroup);
- priv->cgroup = NULL;
-
- if (virCgroupNewDetectMachine(vm->def->name,
- "qemu",
- vm->pid,
- cfg->cgroupControllers,
- priv->machineName,
- &priv->cgroup) < 0)
- return -1;
-
- qemuRestoreCgroupState(vm);
- return 0;
-}
-
int
qemuSetupCgroup(virDomainObj *vm,
size_t nnicindexes,
int *nicindexes)
{
qemuDomainObjPrivate *priv = vm->privateData;
+ g_autoptr(virQEMUDriverConfig) cfg = virQEMUDriverGetConfig(priv->driver);
if (!vm->pid) {
virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
@@ -1094,7 +863,15 @@ qemuSetupCgroup(virDomainObj *vm,
return -1;
}
- if (qemuInitCgroup(vm, nnicindexes, nicindexes) < 0)
+ if (virDomainCgroupInitCgroup("qemu",
+ vm,
+ nnicindexes,
+ nicindexes,
+ priv->cgroup,
+ cfg->cgroupControllers,
+ cfg->maxThreadsPerProc,
+ priv->driver->privileged,
+ priv->machineName) < 0)
return -1;
if (!priv->cgroup)
@@ -1103,16 +880,16 @@ qemuSetupCgroup(virDomainObj *vm,
if (qemuSetupDevicesCgroup(vm) < 0)
return -1;
- if (qemuSetupBlkioCgroup(vm) < 0)
+ if (virDomainCgroupSetupBlkioCgroup(vm, priv->cgroup) < 0)
return -1;
- if (qemuSetupMemoryCgroup(vm) < 0)
+ if (virDomainCgroupSetupMemoryCgroup(vm, priv->cgroup) < 0)
return -1;
- if (qemuSetupCpuCgroup(vm) < 0)
+ if (virDomainCgroupSetupCpuCgroup(vm, priv->cgroup) < 0)
return -1;
- if (qemuSetupCpusetCgroup(vm) < 0)
+ if (virDomainCgroupSetupCpusetCgroup(priv->cgroup) < 0)
return -1;
if (qemuSetupCgroupAppid(vm) < 0)
@@ -1121,23 +898,6 @@ qemuSetupCgroup(virDomainObj *vm,
return 0;
}
-int
-qemuSetupCgroupVcpuBW(virCgroup *cgroup,
- unsigned long long period,
- long long quota)
-{
- return virCgroupSetupCpuPeriodQuota(cgroup, period, quota);
-}
-
-
-int
-qemuSetupCgroupCpusetCpus(virCgroup *cgroup,
- virBitmap *cpumask)
-{
- return virCgroupSetupCpusetCpus(cgroup, cpumask);
-}
-
-
int
qemuSetupCgroupForExtDevices(virDomainObj *vm,
virQEMUDriver *driver)
@@ -1164,148 +924,3 @@ qemuSetupCgroupForExtDevices(virDomainObj *vm,
return qemuExtDevicesSetupCgroup(driver, vm, cgroup_temp);
}
-
-
-int
-qemuSetupGlobalCpuCgroup(virDomainObj *vm)
-{
- qemuDomainObjPrivate *priv = vm->privateData;
- unsigned long long period = vm->def->cputune.global_period;
- long long quota = vm->def->cputune.global_quota;
- g_autofree char *mem_mask = NULL;
- virDomainNumatuneMemMode mem_mode;
-
- if ((period || quota) &&
- !virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_CPU)) {
- virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
- _("cgroup cpu is required for scheduler tuning"));
- return -1;
- }
-
- /*
- * If CPU cgroup controller is not initialized here, then we need
- * neither period nor quota settings. And if CPUSET controller is
- * not initialized either, then there's nothing to do anyway.
- */
- if (!virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_CPU) &&
- !virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_CPUSET))
- return 0;
-
-
- if (virDomainNumatuneGetMode(vm->def->numa, -1, &mem_mode) == 0 &&
- mem_mode == VIR_DOMAIN_NUMATUNE_MEM_STRICT &&
- virDomainNumatuneMaybeFormatNodeset(vm->def->numa,
- priv->autoNodeset,
- &mem_mask, -1) < 0)
- return -1;
-
- if (period || quota) {
- if (qemuSetupCgroupVcpuBW(priv->cgroup, period, quota) < 0)
- return -1;
- }
-
- return 0;
-}
-
-
-int
-qemuRemoveCgroup(virDomainObj *vm)
-{
- qemuDomainObjPrivate *priv = vm->privateData;
-
- if (priv->cgroup == NULL)
- return 0; /* Not supported, so claim success */
-
- if (virCgroupTerminateMachine(priv->machineName) < 0) {
- if (!virCgroupNewIgnoreError())
- VIR_DEBUG("Failed to terminate cgroup for %s",
vm->def->name);
- }
-
- return virCgroupRemove(priv->cgroup);
-}
-
-
-static void
-qemuCgroupEmulatorAllNodesDataFree(qemuCgroupEmulatorAllNodesData *data)
-{
- if (!data)
- return;
-
- virCgroupFree(data->emulatorCgroup);
- g_free(data->emulatorMemMask);
- g_free(data);
-}
-
-
-/**
- * qemuCgroupEmulatorAllNodesAllow:
- * @cgroup: domain cgroup pointer
- * @retData: filled with structure used to roll back the operation
- *
- * Allows all NUMA nodes for the qemu emulator thread temporarily. This is
- * necessary when hotplugging cpus since it requires memory allocated in the
- * DMA region. Afterwards the operation can be reverted by
- * qemuCgroupEmulatorAllNodesRestore.
- *
- * Returns 0 on success -1 on error
- */
-int
-qemuCgroupEmulatorAllNodesAllow(virCgroup *cgroup,
- qemuCgroupEmulatorAllNodesData **retData)
-{
- qemuCgroupEmulatorAllNodesData *data = NULL;
- g_autofree char *all_nodes_str = NULL;
- g_autoptr(virBitmap) all_nodes = NULL;
- int ret = -1;
-
- if (!virNumaIsAvailable() ||
- !virCgroupHasController(cgroup, VIR_CGROUP_CONTROLLER_CPUSET))
- return 0;
-
- if (!(all_nodes = virNumaGetHostMemoryNodeset()))
- goto cleanup;
-
- if (!(all_nodes_str = virBitmapFormat(all_nodes)))
- goto cleanup;
-
- data = g_new0(qemuCgroupEmulatorAllNodesData, 1);
-
- if (virCgroupNewThread(cgroup, VIR_CGROUP_THREAD_EMULATOR, 0,
- false, &data->emulatorCgroup) < 0)
- goto cleanup;
-
- if (virCgroupGetCpusetMems(data->emulatorCgroup, &data->emulatorMemMask)
< 0 ||
- virCgroupSetCpusetMems(data->emulatorCgroup, all_nodes_str) < 0)
- goto cleanup;
-
- *retData = g_steal_pointer(&data);
- ret = 0;
-
- cleanup:
- qemuCgroupEmulatorAllNodesDataFree(data);
-
- return ret;
-}
-
-
-/**
- * qemuCgroupEmulatorAllNodesRestore:
- * @data: data structure created by qemuCgroupEmulatorAllNodesAllow
- *
- * Rolls back the setting done by qemuCgroupEmulatorAllNodesAllow and frees the
- * associated data.
- */
-void
-qemuCgroupEmulatorAllNodesRestore(qemuCgroupEmulatorAllNodesData *data)
-{
- virErrorPtr err;
-
- if (!data)
- return;
-
- virErrorPreserveLast(&err);
- virCgroupSetCpusetMems(data->emulatorCgroup, data->emulatorMemMask);
- virErrorRestore(&err);
-
- qemuCgroupEmulatorAllNodesDataFree(data);
-}
diff --git a/src/qemu/qemu_cgroup.h b/src/qemu/qemu_cgroup.h
index cd537ebd82..f09134947f 100644
--- a/src/qemu/qemu_cgroup.h
+++ b/src/qemu/qemu_cgroup.h
@@ -56,18 +56,11 @@ int qemuSetupChardevCgroup(virDomainObj *vm,
virDomainChrDef *dev);
int qemuTeardownChardevCgroup(virDomainObj *vm,
virDomainChrDef *dev);
-int qemuConnectCgroup(virDomainObj *vm);
int qemuSetupCgroup(virDomainObj *vm,
size_t nnicindexes,
int *nicindexes);
-int qemuSetupCgroupVcpuBW(virCgroup *cgroup,
- unsigned long long period,
- long long quota);
-int qemuSetupCgroupCpusetCpus(virCgroup *cgroup, virBitmap *cpumask);
-int qemuSetupGlobalCpuCgroup(virDomainObj *vm);
int qemuSetupCgroupForExtDevices(virDomainObj *vm,
virQEMUDriver *driver);
-int qemuRemoveCgroup(virDomainObj *vm);
typedef struct _qemuCgroupEmulatorAllNodesData qemuCgroupEmulatorAllNodesData;
struct _qemuCgroupEmulatorAllNodesData {
@@ -75,8 +68,4 @@ struct _qemuCgroupEmulatorAllNodesData {
char *emulatorMemMask;
};
-int qemuCgroupEmulatorAllNodesAllow(virCgroup *cgroup,
- qemuCgroupEmulatorAllNodesData **data);
-void qemuCgroupEmulatorAllNodesRestore(qemuCgroupEmulatorAllNodesData *data);
-
extern const char *const defaultDeviceACL[];
diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c
index d3d76c003f..c3d4bc1270 100644
--- a/src/qemu/qemu_driver.c
+++ b/src/qemu/qemu_driver.c
@@ -4424,7 +4424,7 @@ qemuDomainPinVcpuLive(virDomainObj *vm,
if (virCgroupNewThread(priv->cgroup, VIR_CGROUP_THREAD_VCPU, vcpu,
false, &cgroup_vcpu) < 0)
goto cleanup;
- if (qemuSetupCgroupCpusetCpus(cgroup_vcpu, cpumap) < 0)
+ if (virDomainCgroupSetupCpusetCpus(cgroup_vcpu, cpumap) < 0)
goto cleanup;
}
@@ -4633,7 +4633,7 @@ qemuDomainPinEmulator(virDomainPtr dom,
0, false, &cgroup_emulator) < 0)
goto endjob;
- if (qemuSetupCgroupCpusetCpus(cgroup_emulator, pcpumap) < 0) {
+ if (virDomainCgroupSetupCpusetCpus(cgroup_emulator, pcpumap) < 0) {
virReportError(VIR_ERR_OPERATION_INVALID, "%s",
_("failed to set cpuset.cpus in cgroup"
" for emulator threads"));
@@ -5038,7 +5038,7 @@ qemuDomainPinIOThread(virDomainPtr dom,
if (virCgroupNewThread(priv->cgroup, VIR_CGROUP_THREAD_IOTHREAD,
iothread_id, false, &cgroup_iothread) < 0)
goto endjob;
- if (qemuSetupCgroupCpusetCpus(cgroup_iothread, pcpumap) < 0) {
+ if (virDomainCgroupSetupCpusetCpus(cgroup_iothread, pcpumap) < 0) {
virReportError(VIR_ERR_OPERATION_INVALID,
_("failed to set cpuset.cpus in cgroup"
" for iothread %d"), iothread_id);
@@ -8938,7 +8938,7 @@ qemuSetGlobalBWLive(virCgroup *cgroup, unsigned long long period,
if (period == 0 && quota == 0)
return 0;
- if (qemuSetupCgroupVcpuBW(cgroup, period, quota) < 0)
+ if (virDomainCgroupSetupVcpuBW(cgroup, period, quota) < 0)
return -1;
return 0;
@@ -9133,7 +9133,7 @@ qemuSetVcpusBWLive(virDomainObj *vm, virCgroup *cgroup,
false, &cgroup_vcpu) < 0)
return -1;
- if (qemuSetupCgroupVcpuBW(cgroup_vcpu, period, quota) < 0)
+ if (virDomainCgroupSetupVcpuBW(cgroup_vcpu, period, quota) < 0)
return -1;
}
@@ -9154,7 +9154,7 @@ qemuSetEmulatorBandwidthLive(virCgroup *cgroup,
false, &cgroup_emulator) < 0)
return -1;
- if (qemuSetupCgroupVcpuBW(cgroup_emulator, period, quota) < 0)
+ if (virDomainCgroupSetupVcpuBW(cgroup_emulator, period, quota) < 0)
return -1;
return 0;
@@ -9181,7 +9181,7 @@ qemuSetIOThreadsBWLive(virDomainObj *vm, virCgroup *cgroup,
false, &cgroup_iothread) < 0)
return -1;
- if (qemuSetupCgroupVcpuBW(cgroup_iothread, period, quota) < 0)
+ if (virDomainCgroupSetupVcpuBW(cgroup_iothread, period, quota) < 0)
return -1;
}
diff --git a/src/qemu/qemu_hotplug.c b/src/qemu/qemu_hotplug.c
index 04c6600f26..01d734e294 100644
--- a/src/qemu/qemu_hotplug.c
+++ b/src/qemu/qemu_hotplug.c
@@ -37,6 +37,7 @@
#include "qemu_snapshot.h"
#include "qemu_virtiofs.h"
#include "domain_audit.h"
+#include "domain_cgroup.h"
#include "netdev_bandwidth_conf.h"
#include "domain_nwfilter.h"
#include "virlog.h"
@@ -6551,11 +6552,11 @@ qemuDomainSetVcpusLive(virQEMUDriver *driver,
bool enable)
{
qemuDomainObjPrivate *priv = vm->privateData;
- qemuCgroupEmulatorAllNodesData *emulatorCgroup = NULL;
+ virCgroupEmulatorAllNodesData *emulatorCgroup = NULL;
ssize_t nextvcpu = -1;
int ret = -1;
- if (qemuCgroupEmulatorAllNodesAllow(priv->cgroup, &emulatorCgroup) < 0)
+ if (virDomainCgroupEmulatorAllNodesAllow(priv->cgroup, &emulatorCgroup) <
0)
goto cleanup;
if (enable) {
@@ -6576,7 +6577,7 @@ qemuDomainSetVcpusLive(virQEMUDriver *driver,
ret = 0;
cleanup:
- qemuCgroupEmulatorAllNodesRestore(emulatorCgroup);
+ virDomainCgroupEmulatorAllNodesRestore(emulatorCgroup);
return ret;
}
diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c
index 5c9ca0fe4f..605fa2efa2 100644
--- a/src/qemu/qemu_process.c
+++ b/src/qemu/qemu_process.c
@@ -73,6 +73,7 @@
#include "virpidfile.h"
#include "virhostcpu.h"
#include "domain_audit.h"
+#include "domain_cgroup.h"
#include "domain_nwfilter.h"
#include "domain_validate.h"
#include "locking/domain_lock.h"
@@ -2686,7 +2687,7 @@ qemuProcessSetupPid(virDomainObj *vm,
if (virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_CPUSET)) {
if (use_cpumask &&
- qemuSetupCgroupCpusetCpus(cgroup, use_cpumask) < 0)
+ virDomainCgroupSetupCpusetCpus(cgroup, use_cpumask) < 0)
goto cleanup;
if (mem_mask && virCgroupSetCpusetMems(cgroup, mem_mask) < 0)
@@ -2695,7 +2696,7 @@ qemuProcessSetupPid(virDomainObj *vm,
}
if ((period || quota) &&
- qemuSetupCgroupVcpuBW(cgroup, period, quota) < 0)
+ virDomainCgroupSetupVcpuBW(cgroup, period, quota) < 0)
goto cleanup;
/* Move the thread to the sub dir */
@@ -5964,7 +5965,7 @@ qemuProcessSetupHotpluggableVcpus(virQEMUDriver *driver,
{
unsigned int maxvcpus = virDomainDefGetVcpusMax(vm->def);
qemuDomainObjPrivate *priv = vm->privateData;
- qemuCgroupEmulatorAllNodesData *emulatorCgroup = NULL;
+ virCgroupEmulatorAllNodesData *emulatorCgroup = NULL;
virDomainVcpuDef *vcpu;
qemuDomainVcpuPrivate *vcpupriv;
size_t i;
@@ -5992,7 +5993,7 @@ qemuProcessSetupHotpluggableVcpus(virQEMUDriver *driver,
qsort(bootHotplug, nbootHotplug, sizeof(*bootHotplug),
qemuProcessVcpusSortOrder);
- if (qemuCgroupEmulatorAllNodesAllow(priv->cgroup, &emulatorCgroup) < 0)
+ if (virDomainCgroupEmulatorAllNodesAllow(priv->cgroup, &emulatorCgroup) <
0)
goto cleanup;
for (i = 0; i < nbootHotplug; i++) {
@@ -6016,7 +6017,7 @@ qemuProcessSetupHotpluggableVcpus(virQEMUDriver *driver,
ret = 0;
cleanup:
- qemuCgroupEmulatorAllNodesRestore(emulatorCgroup);
+ virDomainCgroupEmulatorAllNodesRestore(emulatorCgroup);
return ret;
}
@@ -7006,7 +7007,7 @@ qemuProcessPrepareHost(virQEMUDriver *driver,
/* Ensure no historical cgroup for this VM is lying around bogus
* settings */
VIR_DEBUG("Ensuring no historical cgroup is lying around");
- qemuRemoveCgroup(vm);
+ virDomainCgroupRemoveCgroup(vm, priv->cgroup, priv->machineName);
if (g_mkdir_with_parents(cfg->logDir, 0777) < 0) {
virReportSystemError(errno,
@@ -7615,7 +7616,7 @@ qemuProcessLaunch(virConnectPtr conn,
goto cleanup;
VIR_DEBUG("Setting global CPU cgroup (if required)");
- if (qemuSetupGlobalCpuCgroup(vm) < 0)
+ if (virDomainCgroupSetupGlobalCpuCgroup(vm, priv->cgroup, priv->autoNodeset)
< 0)
goto cleanup;
VIR_DEBUG("Setting vCPU tuning/settings");
@@ -8223,7 +8224,7 @@ void qemuProcessStop(virQEMUDriver *driver,
}
retry:
- if ((ret = qemuRemoveCgroup(vm)) < 0) {
+ if ((ret = virDomainCgroupRemoveCgroup(vm, priv->cgroup, priv->machineName))
< 0) {
if (ret == -EBUSY && (retries++ < 5)) {
g_usleep(200*1000);
goto retry;
@@ -8782,7 +8783,12 @@ qemuProcessReconnect(void *opaque)
if (!priv->machineName)
goto error;
- if (qemuConnectCgroup(obj) < 0)
+ if (virDomainCgroupConnectCgroup("qemu",
+ obj,
+ priv->cgroup,
+ cfg->cgroupControllers,
+ priv->driver->privileged,
+ priv->machineName) < 0)
goto error;
if (qemuDomainPerfRestart(obj) < 0)
--
2.27.0