Extend `lxcContainerStart` with support for restore from fd
of directory that contains saved state of lxc container.
Signed-off-by: Radostin Stoyanov <rstoyanov1(a)gmail.com>
---
src/lxc/lxc_container.c | 162 +++++++++++++++++++++++++++++++++++++++++++++--
src/lxc/lxc_container.h | 3 +-
src/lxc/lxc_controller.c | 104 ++++++++++++++++++++++++++++--
src/lxc/lxc_driver.c | 4 +-
src/lxc/lxc_process.c | 23 +++++--
src/lxc/lxc_process.h | 1 +
6 files changed, 280 insertions(+), 17 deletions(-)
diff --git a/src/lxc/lxc_container.c b/src/lxc/lxc_container.c
index 532fd0be0..6cd203d7f 100644
--- a/src/lxc/lxc_container.c
+++ b/src/lxc/lxc_container.c
@@ -69,6 +69,8 @@
#include "virprocess.h"
#include "virstring.h"
+#include "lxc_criu.h"
+
#define VIR_FROM_THIS VIR_FROM_LXC
VIR_LOG_INIT("lxc.lxc_container");
@@ -111,6 +113,7 @@ struct __lxc_child_argv {
char **ttyPaths;
int handshakefd;
int *nsInheritFDs;
+ int restorefd;
};
static int lxcContainerMountFSBlock(virDomainFSDefPtr fs,
@@ -263,6 +266,7 @@ static virCommandPtr lxcContainerBuildInitCmd(virDomainDefPtr vmDef,
* @ttyfd: FD of tty to set as the container console
* @npassFDs: number of extra FDs
* @passFDs: list of extra FDs
+ * @restorefd: FD of folder where container was dumped
*
* Setup file descriptors in the container. @ttyfd is set to be
* the container's stdin, stdout & stderr. Any FDs included in
@@ -272,7 +276,7 @@ static virCommandPtr lxcContainerBuildInitCmd(virDomainDefPtr vmDef,
* Returns 0 on success or -1 in case of error
*/
static int lxcContainerSetupFDs(int *ttyfd,
- size_t npassFDs, int *passFDs)
+ size_t npassFDs, int *passFDs, int restorefd)
{
int rc = -1;
int open_max;
@@ -368,6 +372,8 @@ static int lxcContainerSetupFDs(int *ttyfd,
}
for (fd = last_fd + 1; fd < open_max; fd++) {
+ if (fd == restorefd)
+ continue;
int tmpfd = fd;
VIR_MASS_CLOSE(tmpfd);
}
@@ -1083,6 +1089,31 @@ static int lxcContainerMountFSDev(virDomainDefPtr def,
return ret;
}
+static int lxcContainerMountFSDevPTSRestore(virDomainDefPtr def,
+ const char *stateDir)
+{
+ int ret = -1;
+ char *path = NULL;
+ int flags = MS_MOVE;
+
+ VIR_DEBUG("Mount /dev/pts stateDir=%s", stateDir);
+
+ if (virAsprintf(&path, "%s/%s.devpts", stateDir, def->name) < 0)
+ return ret;
+
+ VIR_DEBUG("Trying to move %s to /dev/pts", path);
+
+ if (mount(path, "/dev/pts", NULL, flags, NULL) < 0) {
+ virReportSystemError(errno, _("Failed to mount %s on /dev/pts"),
path);
+ goto cleanup;
+ }
+
+ ret = 0;
+ cleanup:
+ VIR_FREE(path);
+ return ret;
+}
+
static int lxcContainerMountFSDevPTS(virDomainDefPtr def,
const char *stateDir)
{
@@ -2191,6 +2222,116 @@ static int lxcContainerSetHostname(virDomainDefPtr def)
return ret;
}
+/*
+ * lxcContainerChildRestore:
+ * @data: pointer to container arguments
+ */
+static int lxcContainerChildRestore(void *data)
+{
+ lxc_child_argv_t *argv = data;
+ virDomainDefPtr vmDef = argv->config;
+ int ttyfd = -1;
+ int ret = -1;
+ char *ttyPath = NULL;
+ virDomainFSDefPtr root;
+ char *sec_mount_options = NULL;
+ char *stateDir = NULL;
+
+ if (vmDef == NULL) {
+ virReportError(VIR_ERR_INTERNAL_ERROR,
+ "%s", _("lxcChild() passed invalid vm
definition"));
+ goto cleanup;
+ }
+
+ if (lxcContainerWaitForContinue(argv->monitor) < 0) {
+ virReportSystemError(errno, "%s",
+ _("Failed to read the container continue
message"));
+ goto cleanup;
+ }
+ VIR_DEBUG("Received container continue message");
+
+ if (lxcContainerSetID(vmDef) < 0)
+ goto cleanup;
+
+ root = virDomainGetFilesystemForTarget(vmDef, "/");
+
+ if (argv->nttyPaths) {
+ const char *tty = argv->ttyPaths[0];
+ if (STRPREFIX(tty, "/dev/pts/"))
+ tty += strlen("/dev/pts/");
+ if (virAsprintf(&ttyPath, "%s/%s.devpts/%s",
+ LXC_STATE_DIR, vmDef->name, tty) < 0)
+ goto cleanup;
+ } else {
+ virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
+ _("At least one tty is required"));
+ goto cleanup;
+ }
+
+ VIR_DEBUG("Container TTY path: %s", ttyPath);
+
+ ttyfd = open(ttyPath, O_RDWR);
+ if (ttyfd < 0) {
+ virReportSystemError(errno, _("Failed to open tty %s"), ttyPath);
+ goto cleanup;
+ }
+ VIR_DEBUG("Container TTY fd: %d", ttyfd);
+
+ if (!(sec_mount_options = virSecurityManagerGetMountOptions(
+ argv->securityDriver,
+ vmDef)))
+ goto cleanup;
+
+ if (lxcContainerPrepareRoot(vmDef, root, sec_mount_options) < 0)
+ goto cleanup;
+
+ if (lxcContainerSendContinue(argv->handshakefd) < 0) {
+ virReportSystemError(errno, "%s",
+ _("Failed to send continue signal to
controller"));
+ goto cleanup;
+ }
+
+ VIR_DEBUG("Setting up container's std streams");
+
+ if (lxcContainerSetupFDs(&ttyfd, argv->npassFDs,
+ argv->passFDs, argv->restorefd) < 0)
+ goto cleanup;
+
+ if (virFileResolveAllLinks(LXC_STATE_DIR, &stateDir) < 0)
+ goto cleanup;
+
+ /* Mounts /dev/pts */
+ if (lxcContainerMountFSDevPTSRestore(vmDef, stateDir) < 0) {
+ virReportSystemError(errno, "%s", _("Failed to mount
dev/pts"));
+ goto cleanup;
+ }
+
+ if (setsid() < 0)
+ virReportSystemError(errno, "%s", _("Unable to become session
leader"));
+
+ VIR_DEBUG("Executing container restore criu function");
+ ret = lxcCriuRestore(vmDef, argv->restorefd, 0);
+
+ cleanup:
+ VIR_FORCE_CLOSE(argv->monitor);
+ VIR_FORCE_CLOSE(argv->handshakefd);
+ VIR_FORCE_CLOSE(ttyfd);
+ VIR_FREE(ttyPath);
+ VIR_FREE(stateDir);
+ VIR_FREE(sec_mount_options);
+
+ if (ret != 0) {
+ VIR_DEBUG("Tearing down container");
+ fprintf(stderr,
+ _("Failure in libvirt_lxc startup: %s\n"),
+ virGetLastErrorMessage());
+ }
+
+ return ret;
+}
+
+
+
/**
* lxcContainerChild:
* @data: pointer to container arguments
@@ -2322,7 +2463,7 @@ static int lxcContainerChild(void *data)
VIR_FORCE_CLOSE(argv->handshakefd);
VIR_FORCE_CLOSE(argv->monitor);
if (lxcContainerSetupFDs(&ttyfd,
- argv->npassFDs, argv->passFDs) < 0)
+ argv->npassFDs, argv->passFDs, -1) < 0)
goto cleanup;
/* Make init process of the container the leader of the new session.
@@ -2403,6 +2544,7 @@ virArch lxcContainerGetAlt32bitArch(virArch arch)
* @veths: interface names
* @control: control FD to the container
* @ttyPath: path of tty to set as the container console
+ * @restorefd: FD to folder where container was dumped
*
* Starts a container process by calling clone() with the namespace flags
*
@@ -2418,7 +2560,8 @@ int lxcContainerStart(virDomainDefPtr def,
int handshakefd,
int *nsInheritFDs,
size_t nttyPaths,
- char **ttyPaths)
+ char **ttyPaths,
+ int restorefd)
{
pid_t pid;
int cflags;
@@ -2436,6 +2579,7 @@ int lxcContainerStart(virDomainDefPtr def,
.ttyPaths = ttyPaths,
.handshakefd = handshakefd,
.nsInheritFDs = nsInheritFDs,
+ .restorefd = restorefd,
};
/* allocate a stack for the container */
@@ -2484,10 +2628,16 @@ int lxcContainerStart(virDomainDefPtr def,
VIR_DEBUG("Inheriting a UTS namespace");
}
- VIR_DEBUG("Cloning container init process");
- pid = clone(lxcContainerChild, stacktop, cflags, &args);
+ if (restorefd != -1) {
+ VIR_DEBUG("Cloning container process that will spawn criu restore");
+ pid = clone(lxcContainerChildRestore, stacktop, SIGCHLD, &args);
+ } else {
+ VIR_DEBUG("Cloning container init process");
+ pid = clone(lxcContainerChild, stacktop, cflags, &args);
+ VIR_DEBUG("clone() completed, new container PID is %d", pid);
+ }
+
VIR_FREE(stack);
- VIR_DEBUG("clone() completed, new container PID is %d", pid);
if (pid < 0) {
virReportSystemError(errno, "%s",
diff --git a/src/lxc/lxc_container.h b/src/lxc/lxc_container.h
index 641e2d460..9a6ac2073 100644
--- a/src/lxc/lxc_container.h
+++ b/src/lxc/lxc_container.h
@@ -58,7 +58,8 @@ int lxcContainerStart(virDomainDefPtr def,
int handshakefd,
int *nsInheritFDs,
size_t nttyPaths,
- char **ttyPaths);
+ char **ttyPaths,
+ int restorefd);
int lxcContainerSetupHostdevCapsMakePath(const char *dev);
diff --git a/src/lxc/lxc_controller.c b/src/lxc/lxc_controller.c
index 507bffda0..a5eb5e336 100644
--- a/src/lxc/lxc_controller.c
+++ b/src/lxc/lxc_controller.c
@@ -146,6 +146,8 @@ struct _virLXCController {
virCgroupPtr cgroup;
virLXCFusePtr fuse;
+
+ int restore;
};
#include "lxc_controller_dispatch.h"
@@ -1015,6 +1017,65 @@ static int lxcControllerClearCapabilities(void)
return 0;
}
+static int
+lxcControllerFindRestoredPid(int fd)
+{
+ int initpid = 0;
+ int ret = -1;
+ char *checkpointdir = NULL;
+ char *pidfile = NULL;
+ char *checkpointfd = NULL;
+ int pidfilefd;
+ char c;
+
+ if (fd < 0)
+ goto cleanup;
+
+ if (virAsprintf(&checkpointfd, "/proc/self/fd/%d", fd) < 0) {
+ virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
+ _("Failed to write checkpoint dir path"));
+ goto cleanup;
+ }
+
+ if (virFileResolveLink(checkpointfd, &checkpointdir) < 0) {
+ virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
+ _("Failed to readlink checkpoint dir path"));
+ goto cleanup;
+ }
+
+ if (virAsprintf(&pidfile, "%s/pidfile", checkpointdir) < 0) {
+ virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
+ _("Failed to write pidfile path"));
+ goto cleanup;
+ }
+
+ if ((pidfilefd = virFileOpenAs(pidfile, O_RDONLY, 0, -1, -1, 0)) < 0) {
+ virReportSystemError(pidfilefd,
+ _("Failed to open domain's pidfile
'%s'"),
+ pidfile);
+ goto cleanup;
+ }
+
+ while ((saferead(pidfilefd, &c, 1) == 1) && c != EOF)
+ initpid = initpid*10 + c - '0';
+
+ ret = initpid;
+
+ if (virFileRemove(pidfile, -1, -1) < 0) {
+ virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
+ _("Failed to delete pidfile path"));
+ }
+
+ cleanup:
+ VIR_FORCE_CLOSE(fd);
+ VIR_FORCE_CLOSE(pidfilefd);
+ VIR_FREE(pidfile);
+ VIR_FREE(checkpointdir);
+ VIR_FREE(checkpointfd);
+ return ret;
+}
+
+
static bool wantReboot;
static virMutex lock = VIR_MUTEX_INITIALIZER;
@@ -2327,6 +2388,7 @@ virLXCControllerRun(virLXCControllerPtr ctrl)
int containerhandshake[2] = { -1, -1 };
char **containerTTYPaths = NULL;
size_t i;
+ bool restore_mode = (ctrl->restore != -1);
if (VIR_ALLOC_N(containerTTYPaths, ctrl->nconsoles) < 0)
goto cleanup;
@@ -2383,7 +2445,8 @@ virLXCControllerRun(virLXCControllerPtr ctrl)
containerhandshake[1],
ctrl->nsFDs,
ctrl->nconsoles,
- containerTTYPaths)) < 0)
+ containerTTYPaths,
+ ctrl->restore)) < 0)
goto cleanup;
VIR_FORCE_CLOSE(control[1]);
VIR_FORCE_CLOSE(containerhandshake[1]);
@@ -2395,10 +2458,10 @@ virLXCControllerRun(virLXCControllerPtr ctrl)
for (i = 0; i < VIR_LXC_DOMAIN_NAMESPACE_LAST; i++)
VIR_FORCE_CLOSE(ctrl->nsFDs[i]);
- if (virLXCControllerSetupCgroupLimits(ctrl) < 0)
+ if (!restore_mode && virLXCControllerSetupCgroupLimits(ctrl) < 0)
goto cleanup;
- if (virLXCControllerSetupUserns(ctrl) < 0)
+ if (!restore_mode && virLXCControllerSetupUserns(ctrl) < 0)
goto cleanup;
if (virLXCControllerMoveInterfaces(ctrl) < 0)
@@ -2423,6 +2486,26 @@ virLXCControllerRun(virLXCControllerPtr ctrl)
if (lxcControllerClearCapabilities() < 0)
goto cleanup;
+ if (restore_mode) {
+ int status;
+ int ret = waitpid(-1, &status, 0);
+ VIR_DEBUG("Got sig child %d", ret);
+
+ /* There could be two cases here:
+ * 1. CRIU died bacause of restore error and the container is not running
+ * 2. CRIU detached itself from the running container
+ */
+ int initpid;
+ if ((initpid = lxcControllerFindRestoredPid(ctrl->restore)) < 0) {
+ virReportSystemError(errno, "%s",
+ _("Unable to get restored task pid"));
+ virNetDaemonQuit(ctrl->daemon);
+ goto cleanup;
+ }
+
+ ctrl->initpid = initpid;
+ }
+
for (i = 0; i < ctrl->nconsoles; i++)
if (virLXCControllerConsoleSetNonblocking(&(ctrl->consoles[i])) < 0)
goto cleanup;
@@ -2466,6 +2549,7 @@ int main(int argc, char *argv[])
int ns_fd[VIR_LXC_DOMAIN_NAMESPACE_LAST];
int handshakeFd = -1;
bool bg = false;
+ int restore = -1;
const struct option options[] = {
{ "background", 0, NULL, 'b' },
{ "name", 1, NULL, 'n' },
@@ -2477,6 +2561,7 @@ int main(int argc, char *argv[])
{ "share-net", 1, NULL, 'N' },
{ "share-ipc", 1, NULL, 'I' },
{ "share-uts", 1, NULL, 'U' },
+ { "restore", 1, NULL, 'r' },
{ "help", 0, NULL, 'h' },
{ 0, 0, 0, 0 },
};
@@ -2504,7 +2589,7 @@ int main(int argc, char *argv[])
while (1) {
int c;
- c = getopt_long(argc, argv, "dn:v:p:m:c:s:h:S:N:I:U:",
+ c = getopt_long(argc, argv, "dn:v:p:m:c:s:h:S:N:I:U:r:",
options, NULL);
if (c == -1)
@@ -2580,6 +2665,14 @@ int main(int argc, char *argv[])
securityDriver = optarg;
break;
+ case 'r':
+ if (virStrToLong_i(optarg, NULL, 10, &restore) < 0) {
+ fprintf(stderr, "malformed --restore argument '%s'",
+ optarg);
+ goto cleanup;
+ }
+ break;
+
case 'h':
case '?':
fprintf(stderr, "\n");
@@ -2596,6 +2689,7 @@ int main(int argc, char *argv[])
fprintf(stderr, " -N FD, --share-net FD\n");
fprintf(stderr, " -I FD, --share-ipc FD\n");
fprintf(stderr, " -U FD, --share-uts FD\n");
+ fprintf(stderr, " -r FD, --restore FD\n");
fprintf(stderr, " -h, --help\n");
fprintf(stderr, "\n");
rc = 0;
@@ -2648,6 +2742,8 @@ int main(int argc, char *argv[])
ctrl->passFDs = passFDs;
ctrl->npassFDs = npassFDs;
+ ctrl->restore = restore;
+
for (i = 0; i < VIR_LXC_DOMAIN_NAMESPACE_LAST; i++) {
if (ns_fd[i] != -1) {
if (!ctrl->nsFDs) {/*allocate only once */
diff --git a/src/lxc/lxc_driver.c b/src/lxc/lxc_driver.c
index 4f600f3df..f52085ebf 100644
--- a/src/lxc/lxc_driver.c
+++ b/src/lxc/lxc_driver.c
@@ -1125,7 +1125,7 @@ static int lxcDomainCreateWithFiles(virDomainPtr dom,
ret = virLXCProcessStart(dom->conn, driver, vm,
nfiles, files,
- (flags & VIR_DOMAIN_START_AUTODESTROY),
+ (flags & VIR_DOMAIN_START_AUTODESTROY), -1,
VIR_DOMAIN_RUNNING_BOOTED);
if (ret == 0) {
@@ -1252,7 +1252,7 @@ lxcDomainCreateXMLWithFiles(virConnectPtr conn,
if (virLXCProcessStart(conn, driver, vm,
nfiles, files,
- (flags & VIR_DOMAIN_START_AUTODESTROY),
+ (flags & VIR_DOMAIN_START_AUTODESTROY), -1,
VIR_DOMAIN_RUNNING_BOOTED) < 0) {
virDomainAuditStart(vm, "booted", false);
virLXCDomainObjEndJob(driver, vm);
diff --git a/src/lxc/lxc_process.c b/src/lxc/lxc_process.c
index 96041f2ec..1cd7f5bfe 100644
--- a/src/lxc/lxc_process.c
+++ b/src/lxc/lxc_process.c
@@ -118,7 +118,7 @@ virLXCProcessReboot(virLXCDriverPtr driver,
virLXCProcessStop(driver, vm, VIR_DOMAIN_SHUTOFF_SHUTDOWN);
vm->newDef = savedDef;
if (virLXCProcessStart(conn, driver, vm,
- 0, NULL, autodestroy, reason) < 0) {
+ 0, NULL, autodestroy, -1, reason) < 0) {
VIR_WARN("Unable to handle reboot of vm %s",
vm->def->name);
goto cleanup;
@@ -914,7 +914,8 @@ virLXCProcessBuildControllerCmd(virLXCDriverPtr driver,
size_t nfiles,
int handshakefd,
int * const logfd,
- const char *pidfile)
+ const char *pidfile,
+ int restorefd)
{
size_t i;
char *filterstr;
@@ -993,6 +994,12 @@ virLXCProcessBuildControllerCmd(virLXCDriverPtr driver,
for (i = 0; i < nveths; i++)
virCommandAddArgList(cmd, "--veth", veths[i], NULL);
+ if (restorefd != -1) {
+ virCommandAddArg(cmd, "--restore");
+ virCommandAddArgFormat(cmd, "%d", restorefd);
+ virCommandPassFD(cmd, restorefd, 0);
+ }
+
virCommandPassFD(cmd, handshakefd, 0);
virCommandDaemonize(cmd);
virCommandSetPidFile(cmd, pidfile);
@@ -1166,6 +1173,8 @@ virLXCProcessEnsureRootFS(virDomainObjPtr vm)
* @driver: pointer to driver structure
* @vm: pointer to virtual machine structure
* @autoDestroy: mark the domain for auto destruction
+ * @restorefd: file descriptor pointing to the restore directory (-1 if not
+ * restoring)
* @reason: reason for switching vm to running state
*
* Starts a vm
@@ -1177,6 +1186,7 @@ int virLXCProcessStart(virConnectPtr conn,
virDomainObjPtr vm,
unsigned int nfiles, int *files,
bool autoDestroy,
+ int restorefd,
virDomainRunningReason reason)
{
int rc = -1, r;
@@ -1386,7 +1396,7 @@ int virLXCProcessStart(virConnectPtr conn,
files, nfiles,
handshakefds[1],
&logfd,
- pidfile)))
+ pidfile, restorefd)))
goto cleanup;
/* now that we know it is about to start call the hook if present */
@@ -1494,6 +1504,9 @@ int virLXCProcessStart(virConnectPtr conn,
if (!priv->machineName)
goto cleanup;
+ if (restorefd != -1)
+ goto skip_cgroup_checks;
+
/* We know the cgroup must exist by this synchronization
* point so lets detect that first, since it gives us a
* more reliable way to kill everything off if something
@@ -1510,6 +1523,8 @@ int virLXCProcessStart(virConnectPtr conn,
goto cleanup;
}
+ skip_cgroup_checks:
+
/* And we can get the first monitor connection now too */
if (!(priv->monitor = virLXCProcessConnectMonitor(driver, vm))) {
/* Intentionally overwrite the real monitor error message,
@@ -1596,7 +1611,7 @@ virLXCProcessAutostartDomain(virDomainObjPtr vm,
if (vm->autostart &&
!virDomainObjIsActive(vm)) {
ret = virLXCProcessStart(data->conn, data->driver, vm,
- 0, NULL, false,
+ 0, NULL, false, -1,
VIR_DOMAIN_RUNNING_BOOTED);
virDomainAuditStart(vm, "booted", ret >= 0);
if (ret < 0) {
diff --git a/src/lxc/lxc_process.h b/src/lxc/lxc_process.h
index d78cddef4..c724f31a7 100644
--- a/src/lxc/lxc_process.h
+++ b/src/lxc/lxc_process.h
@@ -29,6 +29,7 @@ int virLXCProcessStart(virConnectPtr conn,
virDomainObjPtr vm,
unsigned int nfiles, int *files,
bool autoDestroy,
+ int restorefd,
virDomainRunningReason reason);
int virLXCProcessStop(virLXCDriverPtr driver,
virDomainObjPtr vm,
--
2.14.3