From: "Daniel P. Berrange" <berrange(a)redhat.com>
The LXC controller and container attempt to run in lockstep
using a series of handshakes. There was a flaw in the container
side though, causing it todo partial setup work, before waiting
for the initial controller handshake.
This in turn meant some of the controller setup could fail,
and obscure the real error from the container setup. Moving
the lxcContainerWaitForContinue call to be the first thing
the container does solves that.
The controller still, however, pollutes the log with a message
about the container handshake failing, when the container
shuts down during startup.
To deal with this requires special handling of the EOF condition
on the controllers lxcContainerWaitForContinue() call.
* src/lxc/lxc_container.c: Wait for continue message from
controller before doing anything at all
* src/lxc/lxc_controller.c: Don't pollute log with error
message if EOF was returned from lxcContainerWaitForContinue.
* src/lxc/lxc_driver.c: Handle EOF from controller handshake
---
src/lxc/lxc_container.c | 38 ++++++++++++++++++++++++++------------
src/lxc/lxc_controller.c | 31 ++++++++++++++++++++++++++++---
src/lxc/lxc_driver.c | 2 +-
3 files changed, 55 insertions(+), 16 deletions(-)
diff --git a/src/lxc/lxc_container.c b/src/lxc/lxc_container.c
index 787df9a..ee5ca9f 100644
--- a/src/lxc/lxc_container.c
+++ b/src/lxc/lxc_container.c
@@ -214,7 +214,7 @@ error_out:
* parent process. It will send this message on the socket pair stored in
* the vm structure once it has completed the post clone container setup.
*
- * Returns 0 on success or -1 in case of error
+ * Returns 1 on success, 0 on EOF, or -1 in case of error
*/
int lxcContainerWaitForContinue(int control)
{
@@ -222,12 +222,18 @@ int lxcContainerWaitForContinue(int control)
int readLen;
readLen = saferead(control, &msg, sizeof(msg));
- if (readLen != sizeof(msg) ||
- msg != LXC_CONTINUE_MSG) {
+ if (readLen < 0)
+ return -1;
+
+ if (readLen != sizeof(msg))
+ return 0;
+
+ if (msg != LXC_CONTINUE_MSG) {
+ errno = EINVAL;
return -1;
}
- return 0;
+ return 1;
}
@@ -1036,6 +1042,20 @@ static int lxcContainerChild( void *data )
char *ttyPath = NULL;
virDomainFSDefPtr root;
virCommandPtr cmd = NULL;
+ int rc;
+
+ /* Wait for interface devices to show up */
+ if ((rc = lxcContainerWaitForContinue(argv->monitor)) <= 0) {
+ if (rc < 0)
+ virReportSystemError(errno, "%s",
+ _("Failed to read the container continue
message"));
+ else
+ lxcError(VIR_ERR_INTERNAL_ERROR, "%s",
+ _("Controller unexpectedly quit"));
+ goto cleanup;
+ }
+
+ VIR_DEBUG("Received container continue message");
if (NULL == vmDef) {
lxcError(VIR_ERR_INTERNAL_ERROR,
@@ -1079,14 +1099,6 @@ static int lxcContainerChild( void *data )
goto cleanup;
}
- /* Wait for interface devices to show up */
- if (lxcContainerWaitForContinue(argv->monitor) < 0) {
- virReportSystemError(errno, "%s",
- _("Failed to read the container continue
message"));
- goto cleanup;
- }
- VIR_DEBUG("Received container continue message");
-
/* rename and enable interfaces */
if (lxcContainerRenameAndEnableInterfaces(argv->nveths,
argv->veths) < 0) {
@@ -1120,6 +1132,8 @@ cleanup:
}
virCommandFree(cmd);
+ if (ret < 0)
+ virDispatchError(NULL);
return ret;
}
diff --git a/src/lxc/lxc_controller.c b/src/lxc/lxc_controller.c
index 52d382e..8c3a54f 100644
--- a/src/lxc/lxc_controller.c
+++ b/src/lxc/lxc_controller.c
@@ -780,6 +780,9 @@ static int lxcSetPersonality(virDomainDefPtr def)
# define MS_SLAVE (1<<19)
#endif
+/*
+ * Returns -1 on error, 0 if container quit unexpected, 1 on success
+ */
static int
lxcControllerRun(virDomainDefPtr def,
unsigned int nveths,
@@ -801,6 +804,7 @@ lxcControllerRun(virDomainDefPtr def,
size_t nloopDevs = 0;
int *loopDevs = NULL;
size_t i;
+ int ret;
if (socketpair(PF_UNIX, SOCK_STREAM, 0, control) < 0) {
virReportSystemError(errno, "%s",
@@ -935,12 +939,25 @@ lxcControllerRun(virDomainDefPtr def,
goto cleanup;
}
- if (lxcContainerWaitForContinue(containerhandshake[0]) < 0) {
+ if ((ret = lxcContainerWaitForContinue(containerhandshake[0])) < 0) {
virReportSystemError(errno, "%s",
_("error receiving signal from container"));
goto cleanup;
}
+ if (ret == 0) {
+ rc = 0;
+ /* We're not raising an error, since the container will have
+ * done that already and we don't want to confuse the driver
+ * when it fetches the error message from logs
+ */
+#if 0
+ lxcError(VIR_ERR_INTERNAL_ERROR, "%s",
+ _("container quit during startup handshake"));
+#endif
+ goto cleanup;
+ }
+
/* Now the container is fully setup... */
/* ...we can close the loop devices... */
@@ -959,7 +976,10 @@ lxcControllerRun(virDomainDefPtr def,
}
VIR_FORCE_CLOSE(handshakefd);
- rc = lxcControllerMain(monitor, client, appPty, containerPty, container);
+ if (lxcControllerMain(monitor, client, appPty, containerPty, container) != 0)
+ goto cleanup;
+
+ rc = 1;
cleanup:
VIR_FREE(devptmx);
@@ -1186,5 +1206,10 @@ cleanup:
unlink(sockpath);
VIR_FREE(sockpath);
- return rc ? EXIT_FAILURE : EXIT_SUCCESS;
+ /* If rc == 0, we have an error, but the lxc container
+ * startup will have printed it already.*/
+ if (rc < 0)
+ virDispatchError(NULL);
+
+ return rc <=0 ? EXIT_FAILURE : EXIT_SUCCESS;
}
diff --git a/src/lxc/lxc_driver.c b/src/lxc/lxc_driver.c
index 4b62600..759e3a9 100644
--- a/src/lxc/lxc_driver.c
+++ b/src/lxc/lxc_driver.c
@@ -1622,7 +1622,7 @@ static int lxcVmStart(virConnectPtr conn,
vm->def->id = vm->pid;
virDomainObjSetState(vm, VIR_DOMAIN_RUNNING, reason);
- if (lxcContainerWaitForContinue(handshakefds[0]) < 0) {
+ if (lxcContainerWaitForContinue(handshakefds[0]) <= 0) {
char out[1024];
if (!(lxcReadLogOutput(vm, logfile, pos, out, 1024) < 0)) {
--
1.7.6.2