Currently the lxc controller process itself is responsible for
daemonizing itself into the background and writing out its pid
file. The lxc driver would fork the controller and then attempt
to connect to the lxc monitor. This connection would only
succeed after the controller has backgrounded itself, setup
cgroups and written its pid file, so startup was race free.
The problem is that we need to delay create of the cgroups to
much later, such that we can tell systemd the container init
pid when we create the cgroups. If we delay cgroup creation
though the current synchronization won't work.
A second problem is that the controller needs the XML config
of the guest. Currently we write out the plain virDomainDefPtr
XML before starting the controller, and then later replace it
with the full virDomainObjPtr status XML. This is kind of gross
and also means that the controller doesn't get a record of the
live XML config right away. This means it doesn't have a record
of the veth device names either and so can't give that info
to systemd when creating the cgroups.
To address this we change the startup sequencing. The goal
is that we want to get the PID as soon as possible, before
the LXC controller even starts. So we stop letting the LXC
controller daemonize itself, and instead use virCommand's
built-in capabilities. This daemonizes and writes the PID
before LXC controller is exec'd. So the driver can read
the PID as soon as virCommandRun returns. It is no longer
safe to connect to the monitor or detect the cgroups though.
Fortunately the LXC controller already has a second point
of synchronization. Immediately before its event loop
starts running, it performs a handshake with the driver.
So we move the opening of the monitor connection and cgroup
detection after this synchronization point.
---
src/lxc/lxc_process.c | 88 +++++++++++++++++++++++++++------------------------
1 file changed, 47 insertions(+), 41 deletions(-)
diff --git a/src/lxc/lxc_process.c b/src/lxc/lxc_process.c
index 6bdfe3d..764cdab 100644
--- a/src/lxc/lxc_process.c
+++ b/src/lxc/lxc_process.c
@@ -750,7 +750,9 @@ virLXCProcessBuildControllerCmd(virLXCDriverPtr driver,
size_t nttyFDs,
int *files,
size_t nfiles,
- int handshakefd)
+ int handshakefd,
+ int logfd,
+ const char *pidfile)
{
size_t i;
char *filterstr;
@@ -812,12 +814,15 @@ virLXCProcessBuildControllerCmd(virLXCDriverPtr driver,
virCommandAddArg(cmd, "--handshake");
virCommandAddArgFormat(cmd, "%d", handshakefd);
- virCommandAddArg(cmd, "--background");
for (i = 0; i < nveths; i++)
virCommandAddArgList(cmd, "--veth", veths[i], NULL);
virCommandPassFD(cmd, handshakefd, 0);
+ virCommandDaemonize(cmd);
+ virCommandSetPidFile(cmd, pidfile);
+ virCommandSetOutputFD(cmd, &logfd);
+ virCommandSetErrorFD(cmd, &logfd);
return cmd;
cleanup:
@@ -1189,10 +1194,10 @@ int virLXCProcessStart(virConnectPtr conn,
nveths, veths,
ttyFDs, nttyFDs,
files, nfiles,
- handshakefds[1])))
+ handshakefds[1],
+ logfd,
+ pidfile)))
goto cleanup;
- virCommandSetOutputFD(cmd, &logfd);
- virCommandSetErrorFD(cmd, &logfd);
/* now that we know it is about to start call the hook if present */
if (virHookPresent(VIR_HOOK_DRIVER_LXC)) {
@@ -1245,28 +1250,7 @@ int virLXCProcessStart(virConnectPtr conn,
goto cleanup;
}
-
- if (VIR_CLOSE(handshakefds[1]) < 0) {
- virReportSystemError(errno, "%s", _("could not close handshake
fd"));
- goto cleanup;
- }
-
- /* Connect to the controller as a client *first* because
- * this will block until the child has written their
- * pid file out to disk & created their cgroup */
- if (!(priv->monitor = virLXCProcessConnectMonitor(driver, vm))) {
- /* Intentionally overwrite the real monitor error message,
- * since a better one is almost always found in the logs
- */
- if (virLXCProcessReadLogOutput(vm, logfile, pos, ebuf, sizeof(ebuf)) > 0) {
- virResetLastError();
- virReportError(VIR_ERR_INTERNAL_ERROR,
- _("guest failed to start: %s"), ebuf);
- }
- goto cleanup;
- }
-
- /* And get its pid */
+ /* It has started running, so get its pid */
if ((r = virPidFileReadPath(pidfile, &vm->pid)) < 0) {
if (virLXCProcessReadLogOutput(vm, logfile, pos, ebuf, sizeof(ebuf)) > 0)
virReportError(VIR_ERR_INTERNAL_ERROR,
@@ -1278,26 +1262,17 @@ int virLXCProcessStart(virConnectPtr conn,
goto cleanup;
}
- if (virCgroupNewDetectMachine(vm->def->name, "lxc", vm->pid,
- vm->def->resource ?
- vm->def->resource->partition :
- NULL,
- -1, &priv->cgroup) < 0)
- goto error;
-
- if (!priv->cgroup) {
- virReportError(VIR_ERR_INTERNAL_ERROR,
- _("No valid cgroup for machine %s"),
- vm->def->name);
- goto error;
- }
-
priv->stopReason = VIR_DOMAIN_EVENT_STOPPED_FAILED;
priv->wantReboot = false;
vm->def->id = vm->pid;
virDomainObjSetState(vm, VIR_DOMAIN_RUNNING, reason);
priv->doneStopEvent = false;
+ if (VIR_CLOSE(handshakefds[1]) < 0) {
+ virReportSystemError(errno, "%s", _("could not close handshake
fd"));
+ goto error;
+ }
+
if (virAtomicIntInc(&driver->nactive) == 1 &&
driver->inhibitCallback)
driver->inhibitCallback(true, driver->inhibitOpaque);
@@ -1312,6 +1287,37 @@ int virLXCProcessStart(virConnectPtr conn,
goto error;
}
+ /* We know the cgroup must exist by this synchronization
+ * point so lets detect that first, since it gives us a
+ * more reliable way to kill everything off if something
+ * goes wrong from here onwards ... */
+ if (virCgroupNewDetectMachine(vm->def->name, "lxc", vm->pid,
+ vm->def->resource ?
+ vm->def->resource->partition :
+ NULL,
+ -1, &priv->cgroup) < 0)
+ goto error;
+
+ if (!priv->cgroup) {
+ virReportError(VIR_ERR_INTERNAL_ERROR,
+ _("No valid cgroup for machine %s"),
+ vm->def->name);
+ goto error;
+ }
+
+ /* And we can get the first monitor connection now too */
+ if (!(priv->monitor = virLXCProcessConnectMonitor(driver, vm))) {
+ /* Intentionally overwrite the real monitor error message,
+ * since a better one is almost always found in the logs
+ */
+ if (virLXCProcessReadLogOutput(vm, logfile, pos, ebuf, sizeof(ebuf)) > 0) {
+ virResetLastError();
+ virReportError(VIR_ERR_INTERNAL_ERROR,
+ _("guest failed to start: %s"), ebuf);
+ }
+ goto error;
+ }
+
if (autoDestroy &&
virCloseCallbacksSet(driver->closeCallbacks, vm,
conn, lxcProcessAutoDestroy) < 0)
--
2.1.0