The current LXC I/O controller looks for HUP to detect
when a guest has quit. This isn't reliable as during
initial bootup it is possible that 'init' will close
the console and let mingetty re-open it. The shutdown
of containers was also flakey because it only killed
the libvirt I/O controller and expected container
processes to gracefully follow.
Change the I/O controller such that when it see HUP
or an I/O error, it uses kill($PID, 0) to see if the
process has really quit.
Change the container shutdown sequence to use the
virCgroupKillPainfully function to ensure every
really goes away
* docs/drvlxc.html.in: Document that certain cgroups
controllers are now mandatory
* src/lxc/lxc_controller.c: Check if PID is still
alive before quitting on I/O error/HUP
* src/lxc/lxc_driver.c: Use virCgroupKillPainfully
---
docs/drvlxc.html.in | 18 ++++++
src/lxc/lxc_controller.c | 42 ++++++++++----
src/lxc/lxc_driver.c | 142 ++++++++++++++++------------------------------
3 files changed, 98 insertions(+), 104 deletions(-)
diff --git a/docs/drvlxc.html.in b/docs/drvlxc.html.in
index 35058c4..3e715b1 100644
--- a/docs/drvlxc.html.in
+++ b/docs/drvlxc.html.in
@@ -9,6 +9,24 @@ light-weight "application container" which does not have
it's own root image. Y
start it using
</p>
+<h2>Cgroups Requirements</h2>
+
+<p>
+The libvirt LXC driver requires that certain cgroups controllers are
+mounted on the host OS. The minimum required controllers are 'cpuacct',
+'memory' and 'devices', while recommended extra controllers are
+'cpu', 'freezer' and 'blkio'. The /etc/cgconfig.conf &
cgconfig
+init service used to mount cgroups at host boot time. To manually
+mount them use. NB, the blkio controller in some kernels will not
+allow creation of nested sub-directories which will prevent correct
+operation of the libvirt LXC driver. On such kernels the blkio controller
+must not be mounted.
+</p>
+
+<pre>
+ # mount -t cgroup cgroup /dev/cgroup -o cpuacct,memory,devices,cpu,freezer,blkio
+</pre>
+
<h3>Example config version 1</h3>
<p></p>
<pre>
diff --git a/src/lxc/lxc_controller.c b/src/lxc/lxc_controller.c
index b742a33..0cd7dc4 100644
--- a/src/lxc/lxc_controller.c
+++ b/src/lxc/lxc_controller.c
@@ -31,6 +31,7 @@
#include <sys/un.h>
#include <unistd.h>
#include <paths.h>
+#include <errno.h>
#include <fcntl.h>
#include <signal.h>
#include <getopt.h>
@@ -119,12 +120,10 @@ static int lxcSetContainerResources(virDomainDefPtr def)
virReportSystemError(-rc,
_("Unable to set memory limit for domain %s"),
def->name);
- /* Don't fail if we can't set memory due to lack of kernel support */
- if (rc != -ENOENT)
- goto cleanup;
+ goto cleanup;
}
- if(def->mem.hard_limit) {
+ if (def->mem.hard_limit) {
rc = virCgroupSetMemoryHardLimit(cgroup, def->mem.hard_limit);
if (rc != 0) {
virReportSystemError(-rc,
@@ -134,7 +133,7 @@ static int lxcSetContainerResources(virDomainDefPtr def)
}
}
- if(def->mem.soft_limit) {
+ if (def->mem.soft_limit) {
rc = virCgroupSetMemorySoftLimit(cgroup, def->mem.soft_limit);
if (rc != 0) {
virReportSystemError(-rc,
@@ -144,7 +143,7 @@ static int lxcSetContainerResources(virDomainDefPtr def)
}
}
- if(def->mem.swap_hard_limit) {
+ if (def->mem.swap_hard_limit) {
rc = virCgroupSetSwapHardLimit(cgroup, def->mem.swap_hard_limit);
if (rc != 0) {
virReportSystemError(-rc,
@@ -323,6 +322,18 @@ ignorable_epoll_accept_errno(int errnum)
|| errnum == EWOULDBLOCK);
}
+static bool
+lxcPidGone(pid_t container)
+{
+ waitpid(container, NULL, WNOHANG);
+
+ if (kill(container, 0) < 0 &&
+ errno == ESRCH)
+ return true;
+
+ return false;
+}
+
/**
* lxcControllerMain
* @monitor: server socket fd to accept client requests
@@ -340,7 +351,8 @@ ignorable_epoll_accept_errno(int errnum)
static int lxcControllerMain(int monitor,
int client,
int appPty,
- int contPty)
+ int contPty,
+ pid_t container)
{
int rc = -1;
int epollFd;
@@ -446,7 +458,13 @@ static int lxcControllerMain(int monitor,
++numActive;
}
} else if (epollEvent.events & EPOLLHUP) {
- VIR_DEBUG("EPOLLHUP from fd %d", epollEvent.data.fd);
+ if (lxcPidGone(container))
+ goto cleanup;
+ curFdOff = epollEvent.data.fd == appPty ? 0 : 1;
+ if (fdArray[curFdOff].active) {
+ fdArray[curFdOff].active = 0;
+ --numActive;
+ }
continue;
} else {
lxcError(VIR_ERR_INTERNAL_ERROR,
@@ -485,7 +503,9 @@ static int lxcControllerMain(int monitor,
--numActive;
fdArray[curFdOff].active = 0;
} else if (-1 == rc) {
- goto cleanup;
+ if (lxcPidGone(container))
+ goto cleanup;
+ continue;
}
}
@@ -564,7 +584,7 @@ lxcControllerRun(virDomainDefPtr def,
int rc = -1;
int control[2] = { -1, -1};
int containerPty = -1;
- char *containerPtyPath;
+ char *containerPtyPath = NULL;
pid_t container = -1;
virDomainFSDefPtr root;
char *devpts = NULL;
@@ -683,7 +703,7 @@ lxcControllerRun(virDomainDefPtr def,
if (lxcControllerClearCapabilities() < 0)
goto cleanup;
- rc = lxcControllerMain(monitor, client, appPty, containerPty);
+ rc = lxcControllerMain(monitor, client, appPty, containerPty, container);
cleanup:
VIR_FREE(devptmx);
diff --git a/src/lxc/lxc_driver.c b/src/lxc/lxc_driver.c
index 625777a..e47c201 100644
--- a/src/lxc/lxc_driver.c
+++ b/src/lxc/lxc_driver.c
@@ -953,36 +953,16 @@ cleanup:
* @driver: pointer to driver structure
* @vm: pointer to VM to clean up
*
- * waitpid() on the container process. kill and wait the tty process
- * This is called by both lxcDomainDestroy and lxcSigHandler when a
- * container exits.
- *
- * Returns 0 on success or -1 in case of error
+ * Cleanout resources associated with the now dead VM
+ *
*/
-static int lxcVmCleanup(lxc_driver_t *driver,
+static void lxcVmCleanup(lxc_driver_t *driver,
virDomainObjPtr vm)
{
- int rc = 0;
- int waitRc;
- int childStatus = -1;
virCgroupPtr cgroup;
int i;
lxcDomainObjPrivatePtr priv = vm->privateData;
- while (((waitRc = waitpid(vm->pid, &childStatus, 0)) == -1) &&
- errno == EINTR)
- ; /* empty */
-
- if ((waitRc != vm->pid) && (errno != ECHILD)) {
- virReportSystemError(errno,
- _("waitpid failed to wait for container %d: %d"),
- vm->pid, waitRc);
- rc = -1;
- } else if (WIFEXITED(childStatus)) {
- VIR_DEBUG("container exited with rc: %d", WEXITSTATUS(childStatus));
- rc = -1;
- }
-
/* now that we know it's stopped call the hook if present */
if (virHookPresent(VIR_HOOK_DRIVER_LXC)) {
char *xml = virDomainDefFormat(vm->def, 0);
@@ -1022,8 +1002,6 @@ static int lxcVmCleanup(lxc_driver_t *driver,
vm->def->id = -1;
vm->newDef = NULL;
}
-
- return rc;
}
/**
@@ -1182,11 +1160,10 @@ error:
static int lxcVmTerminate(lxc_driver_t *driver,
- virDomainObjPtr vm,
- int signum)
+ virDomainObjPtr vm)
{
- if (signum == 0)
- signum = SIGINT;
+ virCgroupPtr group = NULL;
+ int rc;
if (vm->pid <= 0) {
lxcError(VIR_ERR_INTERNAL_ERROR,
@@ -1194,18 +1171,30 @@ static int lxcVmTerminate(lxc_driver_t *driver,
return -1;
}
- if (kill(vm->pid, signum) < 0) {
- if (errno != ESRCH) {
- virReportSystemError(errno,
- _("Failed to kill pid %d"),
- vm->pid);
- return -1;
- }
- }
+ if (virCgroupForDomain(driver->cgroup, vm->def->name, &group, 0) != 0)
+ return -1;
+ rc = virCgroupKillPainfully(group);
+ if (rc < 0) {
+ virReportSystemError(-rc, "%s",
+ _("Failed to kill container PIDs"));
+ rc = -1;
+ goto cleanup;
+ }
+ if (rc == 1) {
+ lxcError(VIR_ERR_INTERNAL_ERROR, "%s",
+ _("Some container PIDs refused to die"));
+ rc = -1;
+ goto cleanup;
+ }
+ lxcVmCleanup(driver, vm);
vm->state = VIR_DOMAIN_SHUTDOWN;
- return lxcVmCleanup(driver, vm);
+ rc = 0;
+
+cleanup:
+ virCgroupFree(&group);
+ return rc;
}
static void lxcMonitorEvent(int watch,
@@ -1229,7 +1218,7 @@ static void lxcMonitorEvent(int watch,
goto cleanup;
}
- if (lxcVmTerminate(driver, vm, SIGINT) < 0) {
+ if (lxcVmTerminate(driver, vm) < 0) {
virEventRemoveHandle(watch);
} else {
event = virDomainEventNewFromObj(vm,
@@ -1544,7 +1533,7 @@ static int lxcVmStart(virConnectPtr conn,
VIR_EVENT_HANDLE_ERROR | VIR_EVENT_HANDLE_HANGUP,
lxcMonitorEvent,
vm, NULL)) < 0) {
- lxcVmTerminate(driver, vm, 0);
+ lxcVmTerminate(driver, vm);
goto cleanup;
}
@@ -1712,55 +1701,6 @@ cleanup:
return dom;
}
-/**
- * lxcDomainShutdown:
- * @dom: pointer to domain to shutdown
- *
- * Sends SIGINT to container root process to request it to shutdown
- *
- * Returns 0 on success or -1 in case of error
- */
-static int lxcDomainShutdown(virDomainPtr dom)
-{
- lxc_driver_t *driver = dom->conn->privateData;
- virDomainObjPtr vm;
- virDomainEventPtr event = NULL;
- int ret = -1;
-
- lxcDriverLock(driver);
- vm = virDomainFindByUUID(&driver->domains, dom->uuid);
- if (!vm) {
- char uuidstr[VIR_UUID_STRING_BUFLEN];
- virUUIDFormat(dom->uuid, uuidstr);
- lxcError(VIR_ERR_NO_DOMAIN,
- _("No domain with matching uuid '%s'"), uuidstr);
- goto cleanup;
- }
-
- if (!virDomainObjIsActive(vm)) {
- lxcError(VIR_ERR_OPERATION_INVALID,
- "%s", _("Domain is not running"));
- goto cleanup;
- }
-
- ret = lxcVmTerminate(driver, vm, 0);
- event = virDomainEventNewFromObj(vm,
- VIR_DOMAIN_EVENT_STOPPED,
- VIR_DOMAIN_EVENT_STOPPED_SHUTDOWN);
- if (!vm->persistent) {
- virDomainRemoveInactive(&driver->domains, vm);
- vm = NULL;
- }
-
-cleanup:
- if (vm)
- virDomainObjUnlock(vm);
- if (event)
- lxcDomainEventQueue(driver, event);
- lxcDriverUnlock(driver);
- return ret;
-}
-
static int
lxcDomainEventRegister(virConnectPtr conn,
@@ -1928,7 +1868,7 @@ static int lxcDomainDestroy(virDomainPtr dom)
goto cleanup;
}
- ret = lxcVmTerminate(driver, vm, SIGKILL);
+ ret = lxcVmTerminate(driver, vm);
event = virDomainEventNewFromObj(vm,
VIR_DOMAIN_EVENT_STOPPED,
VIR_DOMAIN_EVENT_STOPPED_DESTROYED);
@@ -2057,7 +1997,7 @@ lxcReconnectVM(void *payload, const void *name ATTRIBUTE_UNUSED,
void *opaque)
VIR_EVENT_HANDLE_ERROR | VIR_EVENT_HANDLE_HANGUP,
lxcMonitorEvent,
vm, NULL)) < 0) {
- lxcVmTerminate(driver, vm, 0);
+ lxcVmTerminate(driver, vm);
goto cleanup;
}
} else {
@@ -2124,8 +2064,24 @@ static int lxcStartup(int privileged)
rc = virCgroupForDriver("lxc", &lxc_driver->cgroup, privileged, 1);
if (rc < 0) {
char buf[1024];
- VIR_WARN("Unable to create cgroup for driver: %s",
+ VIR_WARN("Unable to create cgroup, disabling LXC driver: %s",
virStrerror(-rc, buf, sizeof(buf)));
+ goto cleanup;
+ }
+ if (!virCgroupMounted(lxc_driver->cgroup,
+ VIR_CGROUP_CONTROLLER_CPUACCT)) {
+ VIR_WARN0("Unable to find 'cpuacct' cgroups controller
mount");
+ goto cleanup;
+ }
+ if (!virCgroupMounted(lxc_driver->cgroup,
+ VIR_CGROUP_CONTROLLER_DEVICES)) {
+ VIR_WARN0("Unable to find 'devices' cgroups controller
mount");
+ goto cleanup;
+ }
+ if (!virCgroupMounted(lxc_driver->cgroup,
+ VIR_CGROUP_CONTROLLER_MEMORY)) {
+ VIR_WARN0("Unable to find 'memory' cgroups controller mount");
+ goto cleanup;
}
/* Call function to load lxc driver configuration information */
@@ -2845,7 +2801,7 @@ static virDriver lxcDriver = {
lxcDomainLookupByName, /* domainLookupByName */
lxcDomainSuspend, /* domainSuspend */
lxcDomainResume, /* domainResume */
- lxcDomainShutdown, /* domainShutdown */
+ NULL, /* domainShutdown */
NULL, /* domainReboot */
lxcDomainDestroy, /* domainDestroy */
lxcGetOSType, /* domainGetOSType */
--
1.7.4