Refactor some cgroup management methods from qemu into hypervisor.
These methods will be shared with ch driver for cgroup management.
Signed-off-by: Praveen K Paladugu <prapal(a)linux.microsoft.com>
---
src/hypervisor/domain_cgroup.c | 426 ++++++++++++++++++++++++++++++++-
src/hypervisor/domain_cgroup.h | 52 ++++
src/libvirt_private.syms | 13 +
src/qemu/qemu_cgroup.c | 410 +------------------------------
src/qemu/qemu_cgroup.h | 11 -
src/qemu/qemu_driver.c | 14 +-
src/qemu/qemu_hotplug.c | 7 +-
src/qemu/qemu_process.c | 20 +-
8 files changed, 522 insertions(+), 431 deletions(-)
diff --git a/src/hypervisor/domain_cgroup.c b/src/hypervisor/domain_cgroup.c
index 61b54f071c..0a1b3867b1 100644
--- a/src/hypervisor/domain_cgroup.c
+++ b/src/hypervisor/domain_cgroup.c
@@ -22,11 +22,12 @@
#include "domain_cgroup.h"
#include "domain_driver.h"
-
+#include "util/virnuma.h"
+#include "virlog.h"
#include "virutil.h"
#define VIR_FROM_THIS VIR_FROM_DOMAIN
-
+VIR_LOG_INIT("domain.cgroup");
int
virDomainCgroupSetupBlkio(virCgroup *cgroup, virDomainBlkiotune blkio)
@@ -269,3 +270,424 @@ virDomainCgroupSetMemoryLimitParameters(virCgroup *cgroup,
return 0;
}
+
+
+int
+virSetupBlkioCgroup(virDomainObj *vm, virCgroup *cgroup)
+{
+
+ if (!virCgroupHasController(cgroup, VIR_CGROUP_CONTROLLER_BLKIO)) {
+ if (vm->def->blkio.weight || vm->def->blkio.ndevices) {
+ virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
+ _("Block I/O tuning is not available on this
host"));
+ return -1;
+ } else {
+ return 0;
+ }
+ }
+
+ return virDomainCgroupSetupBlkio(cgroup, vm->def->blkio);
+}
+
+
+int
+virSetupMemoryCgroup(virDomainObj *vm, virCgroup *cgroup)
+{
+
+ if (!virCgroupHasController(cgroup, VIR_CGROUP_CONTROLLER_MEMORY)) {
+ if (virMemoryLimitIsSet(vm->def->mem.hard_limit) ||
+ virMemoryLimitIsSet(vm->def->mem.soft_limit) ||
+ virMemoryLimitIsSet(vm->def->mem.swap_hard_limit)) {
+ virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
+ _("Memory cgroup is not available on this host"));
+ return -1;
+ } else {
+ return 0;
+ }
+ }
+
+ return virDomainCgroupSetupMemtune(cgroup, vm->def->mem);
+}
+
+
+int
+virSetupCpusetCgroup(virCgroup *cgroup)
+{
+
+ if (!virCgroupHasController(cgroup, VIR_CGROUP_CONTROLLER_CPUSET))
+ return 0;
+
+ if (virCgroupSetCpusetMemoryMigrate(cgroup, true) < 0)
+ return -1;
+
+ return 0;
+}
+
+
+int
+virSetupCpuCgroup(virDomainObj *vm, virCgroup *cgroup)
+{
+
+ if (!virCgroupHasController(cgroup, VIR_CGROUP_CONTROLLER_CPU)) {
+ if (vm->def->cputune.sharesSpecified) {
+ virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
+ _("CPU tuning is not available on this host"));
+ return -1;
+ } else {
+ return 0;
+ }
+ }
+
+ if (vm->def->cputune.sharesSpecified) {
+
+ if (virCgroupSetCpuShares(cgroup, vm->def->cputune.shares) < 0)
+ return -1;
+
+ }
+
+ return 0;
+}
+
+
+int
+virInitCgroup(const char *prefix,
+ virDomainObj * vm,
+ size_t nnicindexes, int *nicindexes,
+ virCgroup * cgroup, int cgroupControllers,
+ unsigned int maxThreadsPerProc,
+ bool privileged, char *machineName)
+{
+ if (!privileged)
+ return 0;
+
+ if (!virCgroupAvailable())
+ return 0;
+
+ virCgroupFree(cgroup);
+ cgroup = NULL;
+
+ if (!vm->def->resource)
+ vm->def->resource = g_new0(virDomainResourceDef, 1);
+
+ if (!vm->def->resource->partition)
+ vm->def->resource->partition = g_strdup("/machine");
+
+ if (!g_path_is_absolute(vm->def->resource->partition)) {
+ virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
+ _("Resource partition '%s' must start with
'/'"),
+ vm->def->resource->partition);
+ return -1;
+ }
+
+ if (virCgroupNewMachine(machineName,
+ prefix,
+ vm->def->uuid,
+ NULL,
+ vm->pid,
+ false,
+ nnicindexes, nicindexes,
+ vm->def->resource->partition,
+ cgroupControllers,
+ maxThreadsPerProc, &cgroup) < 0) {
+ if (virCgroupNewIgnoreError())
+ return 0;
+
+ return -1;
+ }
+
+ return 0;
+}
+
+
+void
+virRestoreCgroupState(virDomainObj *vm, virCgroup *cgroup)
+{
+ g_autofree char *mem_mask = NULL;
+ size_t i = 0;
+
+ g_autoptr(virBitmap) all_nodes = NULL;
+
+ if (!virNumaIsAvailable() ||
+ !virCgroupHasController(cgroup, VIR_CGROUP_CONTROLLER_CPUSET))
+ return;
+
+ if (!(all_nodes = virNumaGetHostMemoryNodeset()))
+ goto error;
+
+ if (!(mem_mask = virBitmapFormat(all_nodes)))
+ goto error;
+
+ if (virCgroupHasEmptyTasks(cgroup, VIR_CGROUP_CONTROLLER_CPUSET) <= 0)
+ goto error;
+
+ if (virCgroupSetCpusetMems(cgroup, mem_mask) < 0)
+ goto error;
+
+ for (i = 0; i < virDomainDefGetVcpusMax(vm->def); i++) {
+ virDomainVcpuDef *vcpu = virDomainDefGetVcpu(vm->def, i);
+
+ if (!vcpu->online)
+ continue;
+
+ if (virRestoreCgroupThread(cgroup, VIR_CGROUP_THREAD_VCPU, i) < 0)
+ return;
+ }
+
+ for (i = 0; i < vm->def->niothreadids; i++) {
+ if (virRestoreCgroupThread(cgroup, VIR_CGROUP_THREAD_IOTHREAD,
+ vm->def->iothreadids[i]->iothread_id) <
0)
+ return;
+ }
+
+ if (virRestoreCgroupThread(cgroup, VIR_CGROUP_THREAD_EMULATOR, 0) < 0)
+ return;
+
+ return;
+
+ error:
+ virResetLastError();
+ VIR_DEBUG("Couldn't restore cgroups to meaningful state");
+ return;
+}
+
+
+int
+virRestoreCgroupThread(virCgroup *cgroup, virCgroupThreadName thread, int id)
+{
+ g_autoptr(virCgroup) cgroup_temp = NULL;
+ g_autofree char *nodeset = NULL;
+
+ if (virCgroupNewThread(cgroup, thread, id, false, &cgroup_temp) < 0)
+ return -1;
+
+ if (virCgroupSetCpusetMemoryMigrate(cgroup_temp, true) < 0)
+ return -1;
+
+ if (virCgroupGetCpusetMems(cgroup_temp, &nodeset) < 0)
+ return -1;
+
+ if (virCgroupSetCpusetMems(cgroup_temp, nodeset) < 0)
+ return -1;
+
+ return 0;
+}
+
+
+int
+virConnectCgroup(const char *prefix, virDomainObj *vm, virCgroup *cgroup,
+ int cgroupControllers, bool privileged, char *machineName)
+{
+ if (privileged)
+ return 0;
+
+ if (!virCgroupAvailable())
+ return 0;
+
+ virCgroupFree(cgroup);
+
+ if (virCgroupNewDetectMachine(vm->def->name,
+ prefix,
+ vm->pid,
+ cgroupControllers, machineName, &cgroup) < 0)
+ return -1;
+
+ virRestoreCgroupState(vm, cgroup);
+ return 0;
+}
+
+
+int
+virSetupCgroup(const char *prefix,
+ virDomainObj * vm,
+ size_t nnicindexes, int *nicindexes,
+ virCgroup * cgroup,
+ int cgroupControllers,
+ unsigned int maxThreadsPerProc,
+ bool privileged, char *machineName)
+{
+ if (!vm->pid) {
+ virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
+ _("Cannot setup cgroups until process is started"));
+ return -1;
+ }
+
+ if (virInitCgroup(prefix, vm, nnicindexes, nicindexes,
+ cgroup, cgroupControllers,
+ maxThreadsPerProc, privileged, machineName) < 0)
+ return -1;
+
+ if (!cgroup)
+ return 0;
+
+ if (virSetupBlkioCgroup(vm, cgroup) < 0)
+ return -1;
+
+ if (virSetupMemoryCgroup(vm, cgroup) < 0)
+ return -1;
+
+ if (virSetupCpuCgroup(vm, cgroup) < 0)
+ return -1;
+
+ if (virSetupCpusetCgroup(cgroup) < 0)
+ return -1;
+
+ return 0;
+}
+
+
+int
+virSetupCgroupVcpuBW(virCgroup *cgroup,
+ unsigned long long period, long long quota)
+{
+ return virCgroupSetupCpuPeriodQuota(cgroup, period, quota);
+}
+
+
+int
+virSetupCgroupCpusetCpus(virCgroup *cgroup, virBitmap *cpumask)
+{
+ return virCgroupSetupCpusetCpus(cgroup, cpumask);
+}
+
+
+int
+virSetupGlobalCpuCgroup(virDomainObj *vm, virCgroup *cgroup,
+ virBitmap *autoNodeset)
+{
+ unsigned long long period = vm->def->cputune.global_period;
+ long long quota = vm->def->cputune.global_quota;
+ g_autofree char *mem_mask = NULL;
+ virDomainNumatuneMemMode mem_mode;
+
+ if ((period || quota) &&
+ !virCgroupHasController(cgroup, VIR_CGROUP_CONTROLLER_CPU)) {
+ virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
+ _("cgroup cpu is required for scheduler tuning"));
+ return -1;
+ }
+
+ /*
+ * If CPU cgroup controller is not initialized here, then we need
+ * neither period nor quota settings. And if CPUSET controller is
+ * not initialized either, then there's nothing to do anyway.
+ */
+ if (!virCgroupHasController(cgroup, VIR_CGROUP_CONTROLLER_CPU) &&
+ !virCgroupHasController(cgroup, VIR_CGROUP_CONTROLLER_CPUSET))
+ return 0;
+
+
+ if (virDomainNumatuneGetMode(vm->def->numa, -1, &mem_mode) == 0 &&
+ mem_mode == VIR_DOMAIN_NUMATUNE_MEM_STRICT &&
+ virDomainNumatuneMaybeFormatNodeset(vm->def->numa,
+ autoNodeset, &mem_mask, -1) < 0)
+ return -1;
+
+ if (period || quota) {
+ if (virSetupCgroupVcpuBW(cgroup, period, quota) < 0)
+ return -1;
+ }
+
+ return 0;
+}
+
+
+int
+virRemoveCgroup(virDomainObj *vm, virCgroup *cgroup, char *machineName)
+{
+ if (cgroup == NULL)
+ return 0; /* Not supported, so claim success */
+
+ if (virCgroupTerminateMachine(machineName) < 0) {
+ if (!virCgroupNewIgnoreError())
+ VIR_DEBUG("Failed to terminate cgroup for %s",
vm->def->name);
+ }
+
+ return virCgroupRemove(cgroup);
+}
+
+
+void
+virCgroupEmulatorAllNodesDataFree(virCgroupEmulatorAllNodesData *data)
+{
+ if (!data)
+ return;
+
+ virCgroupFree(data->emulatorCgroup);
+ g_free(data->emulatorMemMask);
+ g_free(data);
+}
+
+
+/**
+ * virCgroupEmulatorAllNodesAllow:
+ * @cgroup: domain cgroup pointer
+ * @retData: filled with structure used to roll back the operation
+ *
+ * Allows all NUMA nodes for the cloud hypervisor thread temporarily. This is
+ * necessary when hotplugging cpus since it requires memory allocated in the
+ * DMA region. Afterwards the operation can be reverted by
+ * virCgroupEmulatorAllNodesRestore.
+ *
+ * Returns 0 on success -1 on error
+ */
+int
+virCgroupEmulatorAllNodesAllow(virCgroup *cgroup,
+ virCgroupEmulatorAllNodesData **retData)
+{
+ virCgroupEmulatorAllNodesData *data = NULL;
+ g_autofree char *all_nodes_str = NULL;
+
+ g_autoptr(virBitmap) all_nodes = NULL;
+ int ret = -1;
+
+ if (!virNumaIsAvailable() ||
+ !virCgroupHasController(cgroup, VIR_CGROUP_CONTROLLER_CPUSET))
+ return 0;
+
+ if (!(all_nodes = virNumaGetHostMemoryNodeset()))
+ goto cleanup;
+
+ if (!(all_nodes_str = virBitmapFormat(all_nodes)))
+ goto cleanup;
+
+ data = g_new0(virCgroupEmulatorAllNodesData, 1);
+
+ if (virCgroupNewThread(cgroup, VIR_CGROUP_THREAD_EMULATOR, 0,
+ false, &data->emulatorCgroup) < 0)
+ goto cleanup;
+
+ if (virCgroupGetCpusetMems(data->emulatorCgroup, &data->emulatorMemMask)
< 0
+ || virCgroupSetCpusetMems(data->emulatorCgroup, all_nodes_str) < 0)
+ goto cleanup;
+
+ *retData = g_steal_pointer(&data);
+ ret = 0;
+
+ cleanup:
+ virCgroupEmulatorAllNodesDataFree(data);
+
+ return ret;
+}
+
+
+/**
+ * virCgroupEmulatorAllNodesRestore:
+ * @data: data structure created by virCgroupEmulatorAllNodesAllow
+ *
+ * Rolls back the setting done by virCgroupEmulatorAllNodesAllow and frees the
+ * associated data.
+ */
+void
+virCgroupEmulatorAllNodesRestore(virCgroupEmulatorAllNodesData *data)
+{
+ virError *err;
+
+ if (!data)
+ return;
+
+ virErrorPreserveLast(&err);
+ virCgroupSetCpusetMems(data->emulatorCgroup, data->emulatorMemMask);
+ virErrorRestore(&err);
+
+ virCgroupEmulatorAllNodesDataFree(data);
+}
diff --git a/src/hypervisor/domain_cgroup.h b/src/hypervisor/domain_cgroup.h
index f93e5f74fe..fd8b74a4ee 100644
--- a/src/hypervisor/domain_cgroup.h
+++ b/src/hypervisor/domain_cgroup.h
@@ -23,6 +23,11 @@
#include "vircgroup.h"
#include "domain_conf.h"
+typedef struct _virCgroupEmulatorAllNodesData virCgroupEmulatorAllNodesData;
+struct _virCgroupEmulatorAllNodesData {
+ virCgroup *emulatorCgroup;
+ char *emulatorMemMask;
+};
int virDomainCgroupSetupBlkio(virCgroup *cgroup, virDomainBlkiotune blkio);
int virDomainCgroupSetupMemtune(virCgroup *cgroup, virDomainMemtune mem);
@@ -36,3 +41,50 @@ int virDomainCgroupSetMemoryLimitParameters(virCgroup *cgroup,
virDomainDef *persistentDef,
virTypedParameterPtr params,
int nparams);
+int
+virSetupBlkioCgroup(virDomainObj * vm, virCgroup *cgroup);
+int
+virSetupMemoryCgroup(virDomainObj * vm, virCgroup *cgroup);
+int
+virSetupCpusetCgroup(virCgroup *cgroup);
+int
+virSetupCpuCgroup(virDomainObj * vm, virCgroup *cgroup);
+int
+virInitCgroup(const char *prefix, virDomainObj * vm,
+ size_t nnicindexes, int *nicindexes,
+ virCgroup *cgroup, int cgroupControllers,
+ unsigned int maxThreadsPerProc,
+ bool privileged,
+ char *machineName);
+void
+virRestoreCgroupState(virDomainObj * vm, virCgroup *cgroup);
+int
+virConnectCgroup(const char *prefix, virDomainObj * vm, virCgroup *cgroup,
+ int cgroupControllers, bool privileged, char *machineName);
+int
+virSetupCgroup(const char *prefix, virDomainObj * vm,
+ size_t nnicindexes, int *nicindexes,
+ virCgroup *cgroup, int cgroupControllers,
+ unsigned int maxThreadsPerProc,
+ bool privileged,
+ char *machineName);
+void
+virCgroupEmulatorAllNodesDataFree(virCgroupEmulatorAllNodesData * data);
+int
+virCgroupEmulatorAllNodesAllow(virCgroup * cgroup,
+ virCgroupEmulatorAllNodesData ** retData);
+void
+virCgroupEmulatorAllNodesRestore(virCgroupEmulatorAllNodesData * data);
+int
+virSetupCgroupVcpuBW(virCgroup * cgroup,
+ unsigned long long period, long long quota);
+int
+virSetupCgroupCpusetCpus(virCgroup * cgroup, virBitmap * cpumask);
+int
+virSetupGlobalCpuCgroup(virDomainObj * vm, virCgroup * cgroup,
+ virBitmap *autoNodeset);
+int
+virRemoveCgroup(virDomainObj * vm, virCgroup * cgroup, char *machineName);
+int
+virRestoreCgroupThread(virCgroup *cgroup, virCgroupThreadName thread,
+ int id);
diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms
index 56adc192cd..09b1fbb8c4 100644
--- a/src/libvirt_private.syms
+++ b/src/libvirt_private.syms
@@ -1543,10 +1543,23 @@ virSetConnectStorage;
# hypervisor/domain_cgroup.h
+virCgroupEmulatorAllNodesAllow;
+virCgroupEmulatorAllNodesRestore;
+virConnectCgroup;
virDomainCgroupSetMemoryLimitParameters;
virDomainCgroupSetupBlkio;
virDomainCgroupSetupDomainBlkioParameters;
virDomainCgroupSetupMemtune;
+virInitCgroup;
+virRemoveCgroup;
+virSetupBlkioCgroup;
+virSetupCgroup;
+virSetupCgroupCpusetCpus;
+virSetupCgroupVcpuBW;
+virSetupCpuCgroup;
+virSetupCpusetCgroup;
+virSetupGlobalCpuCgroup;
+virSetupMemoryCgroup;
# hypervisor/domain_driver.h
diff --git a/src/qemu/qemu_cgroup.c b/src/qemu/qemu_cgroup.c
index 308be5b00f..b1d2875959 100644
--- a/src/qemu/qemu_cgroup.c
+++ b/src/qemu/qemu_cgroup.c
@@ -593,46 +593,6 @@ qemuSetupVideoCgroup(virDomainObj *vm,
return ret;
}
-
-static int
-qemuSetupBlkioCgroup(virDomainObj *vm)
-{
- qemuDomainObjPrivate *priv = vm->privateData;
-
- if (!virCgroupHasController(priv->cgroup,
- VIR_CGROUP_CONTROLLER_BLKIO)) {
- if (vm->def->blkio.weight || vm->def->blkio.ndevices) {
- virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
- _("Block I/O tuning is not available on this
host"));
- return -1;
- }
- return 0;
- }
-
- return virDomainCgroupSetupBlkio(priv->cgroup, vm->def->blkio);
-}
-
-
-static int
-qemuSetupMemoryCgroup(virDomainObj *vm)
-{
- qemuDomainObjPrivate *priv = vm->privateData;
-
- if (!virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_MEMORY)) {
- if (virMemoryLimitIsSet(vm->def->mem.hard_limit) ||
- virMemoryLimitIsSet(vm->def->mem.soft_limit) ||
- virMemoryLimitIsSet(vm->def->mem.swap_hard_limit)) {
- virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
- _("Memory cgroup is not available on this host"));
- return -1;
- }
- return 0;
- }
-
- return virDomainCgroupSetupMemtune(priv->cgroup, vm->def->mem);
-}
-
-
static int
qemuSetupFirmwareCgroup(virDomainObj *vm)
{
@@ -861,44 +821,6 @@ qemuSetupDevicesCgroup(virDomainObj *vm)
}
-static int
-qemuSetupCpusetCgroup(virDomainObj *vm)
-{
- qemuDomainObjPrivate *priv = vm->privateData;
-
- if (!virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_CPUSET))
- return 0;
-
- if (virCgroupSetCpusetMemoryMigrate(priv->cgroup, true) < 0)
- return -1;
-
- return 0;
-}
-
-
-static int
-qemuSetupCpuCgroup(virDomainObj *vm)
-{
- qemuDomainObjPrivate *priv = vm->privateData;
-
- if (!virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_CPU)) {
- if (vm->def->cputune.sharesSpecified) {
- virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
- _("CPU tuning is not available on this host"));
- return -1;
- }
- return 0;
- }
-
- if (vm->def->cputune.sharesSpecified) {
- if (virCgroupSetCpuShares(priv->cgroup, vm->def->cputune.shares) <
0)
- return -1;
- }
-
- return 0;
-}
-
-
static int
qemuSetupCgroupAppid(virDomainObj *vm)
{
@@ -927,166 +849,13 @@ qemuSetupCgroupAppid(virDomainObj *vm)
}
-static int
-qemuInitCgroup(virDomainObj *vm,
- size_t nnicindexes,
- int *nicindexes)
-{
- qemuDomainObjPrivate *priv = vm->privateData;
- g_autoptr(virQEMUDriverConfig) cfg = virQEMUDriverGetConfig(priv->driver);
-
- if (!priv->driver->privileged)
- return 0;
-
- if (!virCgroupAvailable())
- return 0;
-
- virCgroupFree(priv->cgroup);
- priv->cgroup = NULL;
-
- if (!vm->def->resource)
- vm->def->resource = g_new0(virDomainResourceDef, 1);
-
- if (!vm->def->resource->partition)
- vm->def->resource->partition = g_strdup("/machine");
-
- if (!g_path_is_absolute(vm->def->resource->partition)) {
- virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
- _("Resource partition '%s' must start with
'/'"),
- vm->def->resource->partition);
- return -1;
- }
-
- if (virCgroupNewMachine(priv->machineName,
- "qemu",
- vm->def->uuid,
- NULL,
- vm->pid,
- false,
- nnicindexes, nicindexes,
- vm->def->resource->partition,
- cfg->cgroupControllers,
- cfg->maxThreadsPerProc,
- &priv->cgroup) < 0) {
- if (virCgroupNewIgnoreError())
- return 0;
-
- return -1;
- }
-
- return 0;
-}
-
-static int
-qemuRestoreCgroupThread(virCgroup *cgroup,
- virCgroupThreadName thread,
- int id)
-{
- g_autoptr(virCgroup) cgroup_temp = NULL;
- g_autofree char *nodeset = NULL;
-
- if (virCgroupNewThread(cgroup, thread, id, false, &cgroup_temp) < 0)
- return -1;
-
- if (virCgroupSetCpusetMemoryMigrate(cgroup_temp, true) < 0)
- return -1;
-
- if (virCgroupGetCpusetMems(cgroup_temp, &nodeset) < 0)
- return -1;
-
- if (virCgroupSetCpusetMems(cgroup_temp, nodeset) < 0)
- return -1;
-
- return 0;
-}
-
-static void
-qemuRestoreCgroupState(virDomainObj *vm)
-{
- g_autofree char *mem_mask = NULL;
- qemuDomainObjPrivate *priv = vm->privateData;
- size_t i = 0;
- g_autoptr(virBitmap) all_nodes = NULL;
-
- if (!virNumaIsAvailable() ||
- !virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_CPUSET))
- return;
-
- if (!(all_nodes = virNumaGetHostMemoryNodeset()))
- goto error;
-
- if (!(mem_mask = virBitmapFormat(all_nodes)))
- goto error;
-
- if (virCgroupHasEmptyTasks(priv->cgroup,
- VIR_CGROUP_CONTROLLER_CPUSET) <= 0)
- goto error;
-
- if (virCgroupSetCpusetMems(priv->cgroup, mem_mask) < 0)
- goto error;
-
- for (i = 0; i < virDomainDefGetVcpusMax(vm->def); i++) {
- virDomainVcpuDef *vcpu = virDomainDefGetVcpu(vm->def, i);
-
- if (!vcpu->online)
- continue;
-
- if (qemuRestoreCgroupThread(priv->cgroup,
- VIR_CGROUP_THREAD_VCPU, i) < 0)
- return;
- }
-
- for (i = 0; i < vm->def->niothreadids; i++) {
- if (qemuRestoreCgroupThread(priv->cgroup, VIR_CGROUP_THREAD_IOTHREAD,
- vm->def->iothreadids[i]->iothread_id) <
0)
- return;
- }
-
- if (qemuRestoreCgroupThread(priv->cgroup,
- VIR_CGROUP_THREAD_EMULATOR, 0) < 0)
- return;
-
- return;
-
- error:
- virResetLastError();
- VIR_DEBUG("Couldn't restore cgroups to meaningful state");
- return;
-}
-
-int
-qemuConnectCgroup(virDomainObj *vm)
-{
- qemuDomainObjPrivate *priv = vm->privateData;
- g_autoptr(virQEMUDriverConfig) cfg = virQEMUDriverGetConfig(priv->driver);
-
- if (!priv->driver->privileged)
- return 0;
-
- if (!virCgroupAvailable())
- return 0;
-
- virCgroupFree(priv->cgroup);
- priv->cgroup = NULL;
-
- if (virCgroupNewDetectMachine(vm->def->name,
- "qemu",
- vm->pid,
- cfg->cgroupControllers,
- priv->machineName,
- &priv->cgroup) < 0)
- return -1;
-
- qemuRestoreCgroupState(vm);
- return 0;
-}
-
int
qemuSetupCgroup(virDomainObj *vm,
size_t nnicindexes,
int *nicindexes)
{
qemuDomainObjPrivate *priv = vm->privateData;
+ g_autoptr(virQEMUDriverConfig) cfg = virQEMUDriverGetConfig(priv->driver);
if (!vm->pid) {
virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
@@ -1094,7 +863,12 @@ qemuSetupCgroup(virDomainObj *vm,
return -1;
}
- if (qemuInitCgroup(vm, nnicindexes, nicindexes) < 0)
+ if (virInitCgroup("qemu", vm, nnicindexes, nicindexes,
+ priv->cgroup,
+ cfg->cgroupControllers,
+ cfg->maxThreadsPerProc,
+ priv->driver->privileged,
+ priv->machineName) < 0)
return -1;
if (!priv->cgroup)
@@ -1103,16 +877,16 @@ qemuSetupCgroup(virDomainObj *vm,
if (qemuSetupDevicesCgroup(vm) < 0)
return -1;
- if (qemuSetupBlkioCgroup(vm) < 0)
+ if (virSetupBlkioCgroup(vm, priv->cgroup) < 0)
return -1;
- if (qemuSetupMemoryCgroup(vm) < 0)
+ if (virSetupMemoryCgroup(vm, priv->cgroup) < 0)
return -1;
- if (qemuSetupCpuCgroup(vm) < 0)
+ if (virSetupCpuCgroup(vm, priv->cgroup) < 0)
return -1;
- if (qemuSetupCpusetCgroup(vm) < 0)
+ if (virSetupCpusetCgroup(priv->cgroup) < 0)
return -1;
if (qemuSetupCgroupAppid(vm) < 0)
@@ -1121,23 +895,6 @@ qemuSetupCgroup(virDomainObj *vm,
return 0;
}
-int
-qemuSetupCgroupVcpuBW(virCgroup *cgroup,
- unsigned long long period,
- long long quota)
-{
- return virCgroupSetupCpuPeriodQuota(cgroup, period, quota);
-}
-
-
-int
-qemuSetupCgroupCpusetCpus(virCgroup *cgroup,
- virBitmap *cpumask)
-{
- return virCgroupSetupCpusetCpus(cgroup, cpumask);
-}
-
-
int
qemuSetupCgroupForExtDevices(virDomainObj *vm,
virQEMUDriver *driver)
@@ -1164,148 +921,3 @@ qemuSetupCgroupForExtDevices(virDomainObj *vm,
return qemuExtDevicesSetupCgroup(driver, vm, cgroup_temp);
}
-
-
-int
-qemuSetupGlobalCpuCgroup(virDomainObj *vm)
-{
- qemuDomainObjPrivate *priv = vm->privateData;
- unsigned long long period = vm->def->cputune.global_period;
- long long quota = vm->def->cputune.global_quota;
- g_autofree char *mem_mask = NULL;
- virDomainNumatuneMemMode mem_mode;
-
- if ((period || quota) &&
- !virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_CPU)) {
- virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
- _("cgroup cpu is required for scheduler tuning"));
- return -1;
- }
-
- /*
- * If CPU cgroup controller is not initialized here, then we need
- * neither period nor quota settings. And if CPUSET controller is
- * not initialized either, then there's nothing to do anyway.
- */
- if (!virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_CPU) &&
- !virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_CPUSET))
- return 0;
-
-
- if (virDomainNumatuneGetMode(vm->def->numa, -1, &mem_mode) == 0 &&
- mem_mode == VIR_DOMAIN_NUMATUNE_MEM_STRICT &&
- virDomainNumatuneMaybeFormatNodeset(vm->def->numa,
- priv->autoNodeset,
- &mem_mask, -1) < 0)
- return -1;
-
- if (period || quota) {
- if (qemuSetupCgroupVcpuBW(priv->cgroup, period, quota) < 0)
- return -1;
- }
-
- return 0;
-}
-
-
-int
-qemuRemoveCgroup(virDomainObj *vm)
-{
- qemuDomainObjPrivate *priv = vm->privateData;
-
- if (priv->cgroup == NULL)
- return 0; /* Not supported, so claim success */
-
- if (virCgroupTerminateMachine(priv->machineName) < 0) {
- if (!virCgroupNewIgnoreError())
- VIR_DEBUG("Failed to terminate cgroup for %s",
vm->def->name);
- }
-
- return virCgroupRemove(priv->cgroup);
-}
-
-
-static void
-qemuCgroupEmulatorAllNodesDataFree(qemuCgroupEmulatorAllNodesData *data)
-{
- if (!data)
- return;
-
- virCgroupFree(data->emulatorCgroup);
- g_free(data->emulatorMemMask);
- g_free(data);
-}
-
-
-/**
- * qemuCgroupEmulatorAllNodesAllow:
- * @cgroup: domain cgroup pointer
- * @retData: filled with structure used to roll back the operation
- *
- * Allows all NUMA nodes for the qemu emulator thread temporarily. This is
- * necessary when hotplugging cpus since it requires memory allocated in the
- * DMA region. Afterwards the operation can be reverted by
- * qemuCgroupEmulatorAllNodesRestore.
- *
- * Returns 0 on success -1 on error
- */
-int
-qemuCgroupEmulatorAllNodesAllow(virCgroup *cgroup,
- qemuCgroupEmulatorAllNodesData **retData)
-{
- qemuCgroupEmulatorAllNodesData *data = NULL;
- g_autofree char *all_nodes_str = NULL;
- g_autoptr(virBitmap) all_nodes = NULL;
- int ret = -1;
-
- if (!virNumaIsAvailable() ||
- !virCgroupHasController(cgroup, VIR_CGROUP_CONTROLLER_CPUSET))
- return 0;
-
- if (!(all_nodes = virNumaGetHostMemoryNodeset()))
- goto cleanup;
-
- if (!(all_nodes_str = virBitmapFormat(all_nodes)))
- goto cleanup;
-
- data = g_new0(qemuCgroupEmulatorAllNodesData, 1);
-
- if (virCgroupNewThread(cgroup, VIR_CGROUP_THREAD_EMULATOR, 0,
- false, &data->emulatorCgroup) < 0)
- goto cleanup;
-
- if (virCgroupGetCpusetMems(data->emulatorCgroup, &data->emulatorMemMask)
< 0 ||
- virCgroupSetCpusetMems(data->emulatorCgroup, all_nodes_str) < 0)
- goto cleanup;
-
- *retData = g_steal_pointer(&data);
- ret = 0;
-
- cleanup:
- qemuCgroupEmulatorAllNodesDataFree(data);
-
- return ret;
-}
-
-
-/**
- * qemuCgroupEmulatorAllNodesRestore:
- * @data: data structure created by qemuCgroupEmulatorAllNodesAllow
- *
- * Rolls back the setting done by qemuCgroupEmulatorAllNodesAllow and frees the
- * associated data.
- */
-void
-qemuCgroupEmulatorAllNodesRestore(qemuCgroupEmulatorAllNodesData *data)
-{
- virErrorPtr err;
-
- if (!data)
- return;
-
- virErrorPreserveLast(&err);
- virCgroupSetCpusetMems(data->emulatorCgroup, data->emulatorMemMask);
- virErrorRestore(&err);
-
- qemuCgroupEmulatorAllNodesDataFree(data);
-}
diff --git a/src/qemu/qemu_cgroup.h b/src/qemu/qemu_cgroup.h
index cd537ebd82..f09134947f 100644
--- a/src/qemu/qemu_cgroup.h
+++ b/src/qemu/qemu_cgroup.h
@@ -56,18 +56,11 @@ int qemuSetupChardevCgroup(virDomainObj *vm,
virDomainChrDef *dev);
int qemuTeardownChardevCgroup(virDomainObj *vm,
virDomainChrDef *dev);
-int qemuConnectCgroup(virDomainObj *vm);
int qemuSetupCgroup(virDomainObj *vm,
size_t nnicindexes,
int *nicindexes);
-int qemuSetupCgroupVcpuBW(virCgroup *cgroup,
- unsigned long long period,
- long long quota);
-int qemuSetupCgroupCpusetCpus(virCgroup *cgroup, virBitmap *cpumask);
-int qemuSetupGlobalCpuCgroup(virDomainObj *vm);
int qemuSetupCgroupForExtDevices(virDomainObj *vm,
virQEMUDriver *driver);
-int qemuRemoveCgroup(virDomainObj *vm);
typedef struct _qemuCgroupEmulatorAllNodesData qemuCgroupEmulatorAllNodesData;
struct _qemuCgroupEmulatorAllNodesData {
@@ -75,8 +68,4 @@ struct _qemuCgroupEmulatorAllNodesData {
char *emulatorMemMask;
};
-int qemuCgroupEmulatorAllNodesAllow(virCgroup *cgroup,
- qemuCgroupEmulatorAllNodesData **data);
-void qemuCgroupEmulatorAllNodesRestore(qemuCgroupEmulatorAllNodesData *data);
-
extern const char *const defaultDeviceACL[];
diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c
index ba3efef42b..d365036743 100644
--- a/src/qemu/qemu_driver.c
+++ b/src/qemu/qemu_driver.c
@@ -4424,7 +4424,7 @@ qemuDomainPinVcpuLive(virDomainObj *vm,
if (virCgroupNewThread(priv->cgroup, VIR_CGROUP_THREAD_VCPU, vcpu,
false, &cgroup_vcpu) < 0)
goto cleanup;
- if (qemuSetupCgroupCpusetCpus(cgroup_vcpu, cpumap) < 0)
+ if (virSetupCgroupCpusetCpus(cgroup_vcpu, cpumap) < 0)
goto cleanup;
}
@@ -4633,7 +4633,7 @@ qemuDomainPinEmulator(virDomainPtr dom,
0, false, &cgroup_emulator) < 0)
goto endjob;
- if (qemuSetupCgroupCpusetCpus(cgroup_emulator, pcpumap) < 0) {
+ if (virSetupCgroupCpusetCpus(cgroup_emulator, pcpumap) < 0) {
virReportError(VIR_ERR_OPERATION_INVALID, "%s",
_("failed to set cpuset.cpus in cgroup"
" for emulator threads"));
@@ -5038,7 +5038,7 @@ qemuDomainPinIOThread(virDomainPtr dom,
if (virCgroupNewThread(priv->cgroup, VIR_CGROUP_THREAD_IOTHREAD,
iothread_id, false, &cgroup_iothread) < 0)
goto endjob;
- if (qemuSetupCgroupCpusetCpus(cgroup_iothread, pcpumap) < 0) {
+ if (virSetupCgroupCpusetCpus(cgroup_iothread, pcpumap) < 0) {
virReportError(VIR_ERR_OPERATION_INVALID,
_("failed to set cpuset.cpus in cgroup"
" for iothread %d"), iothread_id);
@@ -8926,7 +8926,7 @@ qemuSetGlobalBWLive(virCgroup *cgroup, unsigned long long period,
if (period == 0 && quota == 0)
return 0;
- if (qemuSetupCgroupVcpuBW(cgroup, period, quota) < 0)
+ if (virSetupCgroupVcpuBW(cgroup, period, quota) < 0)
return -1;
return 0;
@@ -9121,7 +9121,7 @@ qemuSetVcpusBWLive(virDomainObj *vm, virCgroup *cgroup,
false, &cgroup_vcpu) < 0)
return -1;
- if (qemuSetupCgroupVcpuBW(cgroup_vcpu, period, quota) < 0)
+ if (virSetupCgroupVcpuBW(cgroup_vcpu, period, quota) < 0)
return -1;
}
@@ -9142,7 +9142,7 @@ qemuSetEmulatorBandwidthLive(virCgroup *cgroup,
false, &cgroup_emulator) < 0)
return -1;
- if (qemuSetupCgroupVcpuBW(cgroup_emulator, period, quota) < 0)
+ if (virSetupCgroupVcpuBW(cgroup_emulator, period, quota) < 0)
return -1;
return 0;
@@ -9169,7 +9169,7 @@ qemuSetIOThreadsBWLive(virDomainObj *vm, virCgroup *cgroup,
false, &cgroup_iothread) < 0)
return -1;
- if (qemuSetupCgroupVcpuBW(cgroup_iothread, period, quota) < 0)
+ if (virSetupCgroupVcpuBW(cgroup_iothread, period, quota) < 0)
return -1;
}
diff --git a/src/qemu/qemu_hotplug.c b/src/qemu/qemu_hotplug.c
index 04c6600f26..30f04aac57 100644
--- a/src/qemu/qemu_hotplug.c
+++ b/src/qemu/qemu_hotplug.c
@@ -37,6 +37,7 @@
#include "qemu_snapshot.h"
#include "qemu_virtiofs.h"
#include "domain_audit.h"
+#include "domain_cgroup.h"
#include "netdev_bandwidth_conf.h"
#include "domain_nwfilter.h"
#include "virlog.h"
@@ -6551,11 +6552,11 @@ qemuDomainSetVcpusLive(virQEMUDriver *driver,
bool enable)
{
qemuDomainObjPrivate *priv = vm->privateData;
- qemuCgroupEmulatorAllNodesData *emulatorCgroup = NULL;
+ virCgroupEmulatorAllNodesData *emulatorCgroup = NULL;
ssize_t nextvcpu = -1;
int ret = -1;
- if (qemuCgroupEmulatorAllNodesAllow(priv->cgroup, &emulatorCgroup) < 0)
+ if (virCgroupEmulatorAllNodesAllow(priv->cgroup, &emulatorCgroup) < 0)
goto cleanup;
if (enable) {
@@ -6576,7 +6577,7 @@ qemuDomainSetVcpusLive(virQEMUDriver *driver,
ret = 0;
cleanup:
- qemuCgroupEmulatorAllNodesRestore(emulatorCgroup);
+ virCgroupEmulatorAllNodesRestore(emulatorCgroup);
return ret;
}
diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c
index 4963ce383f..21c41e83c9 100644
--- a/src/qemu/qemu_process.c
+++ b/src/qemu/qemu_process.c
@@ -73,6 +73,7 @@
#include "virpidfile.h"
#include "virhostcpu.h"
#include "domain_audit.h"
+#include "domain_cgroup.h"
#include "domain_nwfilter.h"
#include "domain_validate.h"
#include "locking/domain_lock.h"
@@ -2744,7 +2745,7 @@ qemuProcessSetupPid(virDomainObj *vm,
if (virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_CPUSET)) {
if (use_cpumask &&
- qemuSetupCgroupCpusetCpus(cgroup, use_cpumask) < 0)
+ virSetupCgroupCpusetCpus(cgroup, use_cpumask) < 0)
goto cleanup;
if (mem_mask && virCgroupSetCpusetMems(cgroup, mem_mask) < 0)
@@ -2753,7 +2754,7 @@ qemuProcessSetupPid(virDomainObj *vm,
}
if ((period || quota) &&
- qemuSetupCgroupVcpuBW(cgroup, period, quota) < 0)
+ virSetupCgroupVcpuBW(cgroup, period, quota) < 0)
goto cleanup;
/* Move the thread to the sub dir */
@@ -6027,7 +6028,7 @@ qemuProcessSetupHotpluggableVcpus(virQEMUDriver *driver,
{
unsigned int maxvcpus = virDomainDefGetVcpusMax(vm->def);
qemuDomainObjPrivate *priv = vm->privateData;
- qemuCgroupEmulatorAllNodesData *emulatorCgroup = NULL;
+ virCgroupEmulatorAllNodesData *emulatorCgroup = NULL;
virDomainVcpuDef *vcpu;
qemuDomainVcpuPrivate *vcpupriv;
size_t i;
@@ -6055,7 +6056,7 @@ qemuProcessSetupHotpluggableVcpus(virQEMUDriver *driver,
qsort(bootHotplug, nbootHotplug, sizeof(*bootHotplug),
qemuProcessVcpusSortOrder);
- if (qemuCgroupEmulatorAllNodesAllow(priv->cgroup, &emulatorCgroup) < 0)
+ if (virCgroupEmulatorAllNodesAllow(priv->cgroup, &emulatorCgroup) < 0)
goto cleanup;
for (i = 0; i < nbootHotplug; i++) {
@@ -6079,7 +6080,7 @@ qemuProcessSetupHotpluggableVcpus(virQEMUDriver *driver,
ret = 0;
cleanup:
- qemuCgroupEmulatorAllNodesRestore(emulatorCgroup);
+ virCgroupEmulatorAllNodesRestore(emulatorCgroup);
return ret;
}
@@ -7061,7 +7062,7 @@ qemuProcessPrepareHost(virQEMUDriver *driver,
/* Ensure no historical cgroup for this VM is lying around bogus
* settings */
VIR_DEBUG("Ensuring no historical cgroup is lying around");
- qemuRemoveCgroup(vm);
+ virRemoveCgroup(vm, priv->cgroup, priv->machineName);
if (g_mkdir_with_parents(cfg->logDir, 0777) < 0) {
virReportSystemError(errno,
@@ -7678,7 +7679,7 @@ qemuProcessLaunch(virConnectPtr conn,
goto cleanup;
VIR_DEBUG("Setting global CPU cgroup (if required)");
- if (qemuSetupGlobalCpuCgroup(vm) < 0)
+ if (virSetupGlobalCpuCgroup(vm, priv->cgroup, priv->autoNodeset) < 0)
goto cleanup;
VIR_DEBUG("Setting vCPU tuning/settings");
@@ -8290,7 +8291,7 @@ void qemuProcessStop(virQEMUDriver *driver,
}
retry:
- if ((ret = qemuRemoveCgroup(vm)) < 0) {
+ if ((ret = virRemoveCgroup(vm, priv->cgroup, priv->machineName)) < 0) {
if (ret == -EBUSY && (retries++ < 5)) {
g_usleep(200*1000);
goto retry;
@@ -8849,7 +8850,8 @@ qemuProcessReconnect(void *opaque)
if (!priv->machineName)
goto error;
- if (qemuConnectCgroup(obj) < 0)
+ if (virConnectCgroup("qemu", obj, priv->cgroup,
cfg->cgroupControllers,
+ priv->driver->privileged, priv->machineName) < 0)
goto error;
if (qemuDomainPerfRestart(obj) < 0)
--
2.27.0