Hi,
This patch implements suspend/resume functions for LXC
using cgroups freezer subsystem.
The use of freezer is quite simple and well documented
in Documentation/cgroups/freezer-subsystem.txt. The patch
just uses it as it is. That is like:
# freeze the container
echo FROZEN > /target/container/freezer.state
# unfreeze the container
echo THAWED > /target/container/freezer.state
However, freezer sometimes looks behaving different from the
document. The document is saying "It's important to note that
freezing can be incomplete. In that case we return EBUSY.",
but in the case returning 0, freezing can be incomplete under
load. So the code doesn't believe the return value and always
check the content of freezer.state to identify whether the
freezing is finished.
Please refer to the comments in the code for more details.
And the patch modifies domain_conf.c that assumes
obj->monitor_chr != NULL but that is true only when qemu.
Thanks,
ozaki-r
From 50d273941eaf764f3ac3458ff10e618c771b77ef Mon Sep 17 00:00:00 2001
From: Ryota Ozaki <ozaki.ryota(a)gmail.com>
Date: Wed, 16 Sep 2009 17:59:54 +0900
Subject: [PATCH] lxc: suspend/resume support
---
src/cgroup.c | 23 ++++++-
src/cgroup.h | 4 +
src/domain_conf.c | 27 ++++----
src/lxc_driver.c | 186 ++++++++++++++++++++++++++++++++++++++++++++++++++++-
4 files changed, 225 insertions(+), 15 deletions(-)
diff --git a/src/cgroup.c b/src/cgroup.c
index 111601d..2e646fd 100644
--- a/src/cgroup.c
+++ b/src/cgroup.c
@@ -32,7 +32,8 @@
#define CGROUP_MAX_VAL 512
VIR_ENUM_IMPL(virCgroupController, VIR_CGROUP_CONTROLLER_LAST,
- "cpu", "cpuacct", "cpuset",
"memory", "devices");
+ "cpu", "cpuacct", "cpuset",
"memory", "devices",
+ "freezer");
struct virCgroupController {
int type;
@@ -896,3 +897,23 @@ int virCgroupGetCpuacctUsage(virCgroupPtr group,
unsigned long long *usage)
VIR_CGROUP_CONTROLLER_CPUACCT,
"cpuacct.usage", (uint64_t *)usage);
}
+
+int virCgroupSetFreezerState(virCgroupPtr group, const char *state)
+{
+ return virCgroupSetValueStr(group,
+ VIR_CGROUP_CONTROLLER_CPU,
+ "freezer.state", state);
+}
+
+int virCgroupGetFreezerState(virCgroupPtr group, char **state)
+{
+ int ret;
+ ret = virCgroupGetValueStr(group,
+ VIR_CGROUP_CONTROLLER_CPU,
+ "freezer.state", state);
+ if (ret == 0) {
+ char *p = strchr(*state, '¥n');
+ if (p) *p = '¥0';
+ }
+ return ret;
+}
diff --git a/src/cgroup.h b/src/cgroup.h
index 6d43b14..aba56c6 100644
--- a/src/cgroup.h
+++ b/src/cgroup.h
@@ -21,6 +21,7 @@ enum {
VIR_CGROUP_CONTROLLER_CPUSET,
VIR_CGROUP_CONTROLLER_MEMORY,
VIR_CGROUP_CONTROLLER_DEVICES,
+ VIR_CGROUP_CONTROLLER_FREEZER,
VIR_CGROUP_CONTROLLER_LAST
};
@@ -68,6 +69,9 @@ int virCgroupGetCpuShares(virCgroupPtr group,
unsigned long long *shares);
int virCgroupGetCpuacctUsage(virCgroupPtr group, unsigned long long *usage);
+int virCgroupSetFreezerState(virCgroupPtr group, const char *state);
+int virCgroupGetFreezerState(virCgroupPtr group, char **state);
+
int virCgroupRemove(virCgroupPtr group);
void virCgroupFree(virCgroupPtr *group);
diff --git a/src/domain_conf.c b/src/domain_conf.c
index 5ae0775..5e37d96 100644
--- a/src/domain_conf.c
+++ b/src/domain_conf.c
@@ -4433,19 +4433,22 @@ char *virDomainObjFormat(virConnectPtr conn,
virDomainStateTypeToString(obj->state),
obj->pid);
- switch (obj->monitor_chr->type) {
- case VIR_DOMAIN_CHR_TYPE_UNIX:
- monitorpath = obj->monitor_chr->data.nix.path;
- break;
- default:
- case VIR_DOMAIN_CHR_TYPE_PTY:
- monitorpath = obj->monitor_chr->data.file.path;
- break;
- }
+ /* obj->monitor_chr is set only for qemu */
+ if (obj->monitor_chr) {
+ switch (obj->monitor_chr->type) {
+ case VIR_DOMAIN_CHR_TYPE_UNIX:
+ monitorpath = obj->monitor_chr->data.nix.path;
+ break;
+ default:
+ case VIR_DOMAIN_CHR_TYPE_PTY:
+ monitorpath = obj->monitor_chr->data.file.path;
+ break;
+ }
- virBufferEscapeString(&buf, " <monitor path='%s'",
monitorpath);
- virBufferVSprintf(&buf, " type='%s'/>¥n",
- virDomainChrTypeToString(obj->monitor_chr->type));
+ virBufferEscapeString(&buf, " <monitor path='%s'",
monitorpath);
+ virBufferVSprintf(&buf, " type='%s'/>¥n",
+ virDomainChrTypeToString(obj->monitor_chr->type));
+ }
if (obj->nvcpupids) {
diff --git a/src/lxc_driver.c b/src/lxc_driver.c
index 0ec1e92..2399d98 100644
--- a/src/lxc_driver.c
+++ b/src/lxc_driver.c
@@ -1862,6 +1862,188 @@ static char *lxcGetHostname (virConnectPtr conn)
return result;
}
+static int lxcFreezeContainer(lxc_driver_t *driver, virDomainObjPtr vm)
+{
+ int timeout = 3; /* In seconds */
+ int check_interval = 500; /* In milliseconds */
+ int n_try = (timeout * (1000 / check_interval));
+ int i = 0;
+ int ret = -1;
+ char *state = NULL;
+ virCgroupPtr cgroup = NULL;
+
+ if (!(driver->cgroup &&
+ virCgroupForDomain(driver->cgroup, vm->def->name, &cgroup, 0) ==
0))
+ return -1;
+
+ while (++i <= n_try) {
+ int r;
+ /*
+ * Writing "FROZEN" to the "freezer.state" freezes the
group,
+ * i.e., the container, temporarily transiting "FREEZING" state.
+ * Once the freezing is completed, the state of the group transits
+ * to "FROZEN".
+ * (see linux-2.6/Documentation/cgroups/freezer-subsystem.txt)
+ */
+ r = virCgroupSetFreezerState(cgroup, "FROZEN");
+
+ /*
+ * Returning EBUSY explicitly indicates that the group is
+ * being freezed but incomplete and other errors are true
+ * errors.
+ */
+ if (r < 0 && r != -EBUSY) {
+ VIR_DEBUG("Writing freezer.state failed with errno: %d", r);
+ goto error;
+ }
+
+ /*
+ * Unfortunately, returning 0 (success) is likely to happen
+ * even when the freezing has not been completed. Sometimes
+ * the state of the group remains "FREEZING" like when
+ * returning -EBUSY and even worse may never transit to
+ * "FROZEN" even if writing "FROZEN" again.
+ *
+ * So we don't trust the return value anyway and always
+ * decide that the freezing has been complete only with
+ * the state actually transit to "FROZEN".
+ */
+ usleep(check_interval * 1000);
+
+ r = virCgroupGetFreezerState(cgroup, &state);
+
+ if (r < 0) {
+ VIR_DEBUG("Reading freezer.state failed with errno: %d", r);
+ goto error;
+ }
+ VIR_DEBUG("Read freezer.state: %s", state);
+
+ if (STREQ(state, "FROZEN")) {
+ ret = 0;
+ goto cleanup;
+ }
+
+ VIR_FREE(state);
+ }
+ VIR_DEBUG0("lxcFreezeContainer timeout");
+error:
+ /*
+ * If timeout or an error on reading the state occurs,
+ * activate the group again and return an error.
+ * This is likely to fall the group back again gracefully.
+ */
+ virCgroupSetFreezerState(cgroup, "THAWED");
+ ret = -1;
+
+cleanup:
+ if (cgroup)
+ virCgroupFree(&cgroup);
+ VIR_FREE(state);
+ return ret;
+}
+
+static int lxcDomainSuspend(virDomainPtr dom)
+{
+ lxc_driver_t *driver = dom->conn->privateData;
+ virDomainObjPtr vm;
+ int ret = -1;
+
+ lxcDriverLock(driver);
+ vm = virDomainFindByUUID(&driver->domains, dom->uuid);
+
+ if (!vm) {
+ char uuidstr[VIR_UUID_STRING_BUFLEN];
+ virUUIDFormat(dom->uuid, uuidstr);
+ lxcError(dom->conn, dom, VIR_ERR_NO_DOMAIN,
+ _("no domain with matching uuid '%s'"), uuidstr);
+ goto cleanup;
+ }
+
+ if (!virDomainIsActive(vm)) {
+ lxcError(dom->conn, dom, VIR_ERR_OPERATION_INVALID,
+ "%s", _("domain is not running"));
+ goto cleanup;
+ }
+
+ if (vm->state != VIR_DOMAIN_PAUSED) {
+ if (lxcFreezeContainer(driver, vm) < 0) {
+ lxcError(dom->conn, dom, VIR_ERR_OPERATION_FAILED,
+ "%s", _("suspend operation failed"));
+ goto cleanup;
+ }
+ vm->state = VIR_DOMAIN_PAUSED;
+ }
+
+ if (virDomainSaveStatus(dom->conn, driver->stateDir, vm) < 0)
+ goto cleanup;
+ ret = 0;
+
+cleanup:
+ if (vm)
+ virDomainObjUnlock(vm);
+ lxcDriverUnlock(driver);
+ return ret;
+}
+
+static int lxcUnfreezeContainer(lxc_driver_t *driver, virDomainObjPtr vm)
+{
+ int ret;
+ virCgroupPtr cgroup = NULL;
+
+ if (!(driver->cgroup &&
+ virCgroupForDomain(driver->cgroup, vm->def->name, &cgroup, 0) ==
0))
+ return -1;
+
+ ret = virCgroupSetFreezerState(cgroup, "THAWED");
+
+ virCgroupFree(&cgroup);
+ return ret;
+}
+
+static int lxcDomainResume(virDomainPtr dom)
+{
+ lxc_driver_t *driver = dom->conn->privateData;
+ virDomainObjPtr vm;
+ int ret = -1;
+
+ lxcDriverLock(driver);
+ vm = virDomainFindByUUID(&driver->domains, dom->uuid);
+
+ if (!vm) {
+ char uuidstr[VIR_UUID_STRING_BUFLEN];
+ virUUIDFormat(dom->uuid, uuidstr);
+ lxcError(dom->conn, dom, VIR_ERR_NO_DOMAIN,
+ _("no domain with matching uuid '%s'"), uuidstr);
+ goto cleanup;
+ }
+
+ if (!virDomainIsActive(vm)) {
+ lxcError(dom->conn, dom, VIR_ERR_OPERATION_INVALID,
+ "%s", _("domain is not running"));
+ goto cleanup;
+ }
+
+ if (vm->state == VIR_DOMAIN_PAUSED) {
+ if (lxcUnfreezeContainer(driver, vm) < 0) {
+ lxcError(dom->conn, dom, VIR_ERR_OPERATION_FAILED,
+ "%s", _("resume operation failed"));
+ goto cleanup;
+ }
+ vm->state = VIR_DOMAIN_RUNNING;
+ }
+
+ if (virDomainSaveStatus(dom->conn, driver->stateDir, vm) < 0)
+ goto cleanup;
+ ret = 0;
+
+cleanup:
+ if (vm)
+ virDomainObjUnlock(vm);
+ lxcDriverUnlock(driver);
+ return ret;
+}
+
+
/* Function Tables */
static virDriver lxcDriver = {
VIR_DRV_LXC, /* the number virDrvNo */
@@ -1881,8 +2063,8 @@ static virDriver lxcDriver = {
lxcDomainLookupByID, /* domainLookupByID */
lxcDomainLookupByUUID, /* domainLookupByUUID */
lxcDomainLookupByName, /* domainLookupByName */
- NULL, /* domainSuspend */
- NULL, /* domainResume */
+ lxcDomainSuspend, /* domainSuspend */
+ lxcDomainResume, /* domainResume */
lxcDomainShutdown, /* domainShutdown */
NULL, /* domainReboot */
lxcDomainDestroy, /* domainDestroy */
--
1.6.0.6