When doing lxc migration or simply restoring the container from a
saved state, we need restore the container from CRIU img files that
we have stored in disk. In this patch, we should extend
lxcContainerStart into a more generic one, that either starts a container
from scratch or restores it from a snapshot.
Signed-off-by: Katerina Koukiou <k.koukiou(a)gmail.com>
---
src/Makefile.am | 3 +-
src/lxc/lxc_container.c | 200 +++++++++++++++++++++++++++++++++++++++++++++--
src/lxc/lxc_container.h | 3 +-
src/lxc/lxc_controller.c | 109 ++++++++++++++++++++++++--
src/lxc/lxc_driver.c | 4 +-
src/lxc/lxc_process.c | 23 +++++-
src/lxc/lxc_process.h | 1 +
7 files changed, 323 insertions(+), 20 deletions(-)
diff --git a/src/Makefile.am b/src/Makefile.am
index 64a7680..1542251 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -761,7 +761,8 @@ LXC_CONTROLLER_SOURCES = \
lxc/lxc_cgroup.c lxc/lxc_cgroup.h \
lxc/lxc_domain.c lxc/lxc_domain.h \
lxc/lxc_fuse.c lxc/lxc_fuse.h \
- lxc/lxc_controller.c
+ lxc/lxc_controller.c \
+ lxc/lxc_criu.c lxc/lxc_criu.h
SECURITY_DRIVER_APPARMOR_HELPER_SOURCES = \
$(DATATYPES_SOURCES) \
diff --git a/src/lxc/lxc_container.c b/src/lxc/lxc_container.c
index b857431..7d307ee 100644
--- a/src/lxc/lxc_container.c
+++ b/src/lxc/lxc_container.c
@@ -70,6 +70,8 @@
#include "virprocess.h"
#include "virstring.h"
+#include "lxc_criu.h"
+
#define VIR_FROM_THIS VIR_FROM_LXC
VIR_LOG_INIT("lxc.lxc_container");
@@ -112,6 +114,7 @@ struct __lxc_child_argv {
char **ttyPaths;
int handshakefd;
int *nsInheritFDs;
+ int restorefd;
};
static int lxcContainerMountFSBlock(virDomainFSDefPtr fs,
@@ -266,7 +269,7 @@ static virCommandPtr lxcContainerBuildInitCmd(virDomainDefPtr vmDef,
* Returns 0 on success or -1 in case of error
*/
static int lxcContainerSetupFDs(int *ttyfd,
- size_t npassFDs, int *passFDs)
+ size_t npassFDs, int *passFDs, int restorefd)
{
int rc = -1;
int open_max;
@@ -362,6 +365,8 @@ static int lxcContainerSetupFDs(int *ttyfd,
}
for (fd = last_fd + 1; fd < open_max; fd++) {
+ if (fd == restorefd)
+ continue;
int tmpfd = fd;
VIR_MASS_CLOSE(tmpfd);
}
@@ -1077,6 +1082,36 @@ static int lxcContainerMountFSDev(virDomainDefPtr def,
return ret;
}
+
+static int lxcContainerMountFSDevPTSRestore(virDomainDefPtr def,
+ const char *stateDir)
+{
+ int ret = -1;
+ char *path = NULL;
+ int flags = MS_MOVE;
+
+ VIR_DEBUG("Mount /dev/pts stateDir=%s", stateDir);
+
+ if (virAsprintf(&path, "%s/%s.devpts",
+ stateDir, def->name) < 0)
+ return ret;
+
+ VIR_DEBUG("Trying to move %s to /dev/pts", path);
+
+ if (mount(path, "/dev/pts", NULL, flags, NULL) < 0) {
+ virReportSystemError(errno,
+ _("Failed to mount %s on /dev/pts"),
+ path);
+ goto cleanup;
+ }
+
+ ret = 0;
+ cleanup:
+ VIR_FREE(path);
+ return ret;
+}
+
+
static int lxcContainerMountFSDevPTS(virDomainDefPtr def,
const char *stateDir)
{
@@ -2120,6 +2155,148 @@ static int lxcAttachNS(int *ns_fd)
}
+/*
+ * lxcContainerChildRestore:
+ * @data: pointer to container arguments
+ */
+static int lxcContainerChildRestore(void *data)
+{
+ lxc_child_argv_t *argv = data;
+ virDomainDefPtr vmDef = argv->config;
+ int ttyfd = -1;
+ int ret = -1;
+ char *ttyPath = NULL;
+ virDomainFSDefPtr root;
+ char *sec_mount_options = NULL;
+ char *stateDir = NULL;
+ char *rootfs_mount = NULL;
+
+ if (NULL == vmDef) {
+ virReportError(VIR_ERR_INTERNAL_ERROR,
+ "%s", _("lxcChild() passed invalid vm
definition"));
+ goto cleanup;
+ }
+
+ if (lxcContainerWaitForContinue(argv->monitor) < 0) {
+ virReportSystemError(errno, "%s",
+ _("Failed to read the container continue
message"));
+ goto cleanup;
+ }
+ VIR_DEBUG("Received container continue message");
+
+ if (lxcContainerSetID(vmDef) < 0)
+ goto cleanup;
+
+ root = virDomainGetFilesystemForTarget(vmDef, "/");
+
+ if (argv->nttyPaths) {
+ const char *tty = argv->ttyPaths[0];
+ if (STRPREFIX(tty, "/dev/pts/"))
+ tty += strlen("/dev/pts/");
+ if (virAsprintf(&ttyPath, "%s/%s.devpts/%s",
+ LXC_STATE_DIR, vmDef->name, tty) < 0)
+ goto cleanup;
+ } else {
+ virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
+ _("At least one tty is required"));
+ goto cleanup;
+ }
+
+ VIR_DEBUG("Container TTY path: %s", ttyPath);
+
+ ttyfd = open(ttyPath, O_RDWR);
+ if (ttyfd < 0) {
+ virReportSystemError(errno,
+ _("Failed to open tty %s"),
+ ttyPath);
+ goto cleanup;
+ }
+ VIR_DEBUG("Container TTY fd: %d", ttyfd);
+
+ if (!(sec_mount_options = virSecurityManagerGetMountOptions(
+ argv->securityDriver,
+ vmDef)))
+ goto cleanup;
+
+ if (lxcContainerPrepareRoot(vmDef, root, sec_mount_options) < 0)
+ goto cleanup;
+
+ if (lxcContainerSendContinue(argv->handshakefd) < 0) {
+ virReportSystemError(errno, "%s",
+ _("Failed to send continue signal to
controller"));
+ goto cleanup;
+ }
+
+ VIR_DEBUG("Setting up container's std streams");
+
+ if (lxcContainerSetupFDs(&ttyfd,
+ argv->npassFDs, argv->passFDs, argv->restorefd)
< 0)
+ goto cleanup;
+
+ /* CRIU needs the container's root bind mounted so that it is the root of
+ * some mount.
+ */
+ if (virAsprintf(&rootfs_mount, "/tmp/%s", vmDef->name) < 0) {
+ virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
+ _("Failed to write rootfs dir mount path"));
+ goto cleanup;
+ }
+
+ if (virFileMakePath(rootfs_mount) < 0) {
+ virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
+ _("Failed to mkdir rootfs mount path"));
+ goto cleanup;
+ }
+
+ if (mount(root->src, rootfs_mount, NULL, MS_BIND, NULL) < 0) {
+ virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
+ _("Failed to create rootfs mountpoint"));
+ goto cleanup;
+ }
+
+ if (virFileResolveAllLinks(LXC_STATE_DIR, &stateDir) < 0)
+ goto cleanup;
+
+ /* Mounts /dev/pts */
+ if (lxcContainerMountFSDevPTSRestore(vmDef, stateDir) < 0) {
+ virReportSystemError(errno, "%s",
+ _("Failed to mount dev/pts"));
+ goto cleanup;
+ }
+
+ if (setsid() < 0) {
+ virReportSystemError(errno, "%s",
+ _("Unable to become session leader"));
+ }
+
+ ret = 0;
+
+ cleanup:
+ VIR_FORCE_CLOSE(argv->monitor);
+ VIR_FORCE_CLOSE(argv->handshakefd);
+ VIR_FORCE_CLOSE(ttyfd);
+ VIR_FREE(ttyPath);
+ VIR_FREE(rootfs_mount);
+ VIR_FREE(stateDir);
+ VIR_FREE(sec_mount_options);
+
+ if (ret == 0) {
+ VIR_DEBUG("Executing container restore criu function");
+ ret = lxcCriuRestore(vmDef, argv->restorefd, 0);
+ }
+
+ if (ret != 0) {
+ VIR_DEBUG("Tearing down container");
+ fprintf(stderr,
+ _("Failure in libvirt_lxc startup: %s\n"),
+ virGetLastErrorMessage());
+ }
+
+ return ret;
+}
+
+
+
/**
* lxcContainerChild:
* @data: pointer to container arguments
@@ -2242,7 +2419,7 @@ static int lxcContainerChild(void *data)
VIR_FORCE_CLOSE(argv->handshakefd);
VIR_FORCE_CLOSE(argv->monitor);
if (lxcContainerSetupFDs(&ttyfd,
- argv->npassFDs, argv->passFDs) < 0)
+ argv->npassFDs, argv->passFDs, -1) < 0)
goto cleanup;
/* Make init process of the container the leader of the new session.
@@ -2332,7 +2509,8 @@ int lxcContainerStart(virDomainDefPtr def,
int handshakefd,
int *nsInheritFDs,
size_t nttyPaths,
- char **ttyPaths)
+ char **ttyPaths,
+ int restorefd)
{
pid_t pid;
int cflags;
@@ -2350,6 +2528,7 @@ int lxcContainerStart(virDomainDefPtr def,
.ttyPaths = ttyPaths,
.handshakefd = handshakefd,
.nsInheritFDs = nsInheritFDs,
+ .restorefd = restorefd,
};
/* allocate a stack for the container */
@@ -2399,10 +2578,19 @@ int lxcContainerStart(virDomainDefPtr def,
VIR_DEBUG("Inheriting a UTS namespace");
}
- VIR_DEBUG("Cloning container init process");
- pid = clone(lxcContainerChild, stacktop, cflags, &args);
+ if (restorefd == -1)
+ VIR_DEBUG("Cloning container init process");
+ else
+ VIR_DEBUG("Cloning container process that will spawn criu restore");
+
+ if (restorefd != -1)
+ pid = clone(lxcContainerChildRestore, stacktop, SIGCHLD, &args);
+ else
+ pid = clone(lxcContainerChild, stacktop, cflags, &args);
+
VIR_FREE(stack);
- VIR_DEBUG("clone() completed, new container PID is %d", pid);
+ if (restorefd == -1)
+ VIR_DEBUG("clone() completed, new container PID is %d", pid);
if (pid < 0) {
virReportSystemError(errno, "%s",
diff --git a/src/lxc/lxc_container.h b/src/lxc/lxc_container.h
index 33eaab4..5d47071 100644
--- a/src/lxc/lxc_container.h
+++ b/src/lxc/lxc_container.h
@@ -63,7 +63,8 @@ int lxcContainerStart(virDomainDefPtr def,
int handshakefd,
int *nsInheritFDs,
size_t nttyPaths,
- char **ttyPaths);
+ char **ttyPaths,
+ int restorefd);
int lxcContainerAvailable(int features);
diff --git a/src/lxc/lxc_controller.c b/src/lxc/lxc_controller.c
index e58ff1b..e178195 100644
--- a/src/lxc/lxc_controller.c
+++ b/src/lxc/lxc_controller.c
@@ -146,6 +146,8 @@ struct _virLXCController {
virCgroupPtr cgroup;
virLXCFusePtr fuse;
+
+ int restore;
};
#include "lxc_controller_dispatch.h"
@@ -1009,6 +1011,64 @@ static int lxcControllerClearCapabilities(void)
return 0;
}
+static int
+lxcControllerFindRestoredPid(int fd)
+{
+ int initpid = 0;
+ int ret = -1;
+ char *checkpointdir = NULL;
+ char *pidfile = NULL;
+ char *checkpointfd = NULL;
+ int pidfilefd;
+ char c;
+
+ if (fd < 0)
+ goto cleanup;
+
+ if (virAsprintf(&checkpointfd, "/proc/self/fd/%d", fd) < 0) {
+ virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
+ _("Failed to write checkpoint dir path"));
+ goto cleanup;
+ }
+
+ if (virFileResolveLink(checkpointfd, &checkpointdir) < 0) {
+ virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
+ _("Failed to readlink checkpoint dir path"));
+ goto cleanup;
+ }
+
+ if (virAsprintf(&pidfile, "%s/pidfile", checkpointdir) < 0) {
+ virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
+ _("Failed to write pidfile path"));
+ goto cleanup;
+ }
+
+ if ((pidfilefd = virFileOpenAs(pidfile, O_RDONLY, 0, -1, -1, 0)) < 0) {
+ virReportSystemError(pidfilefd,
+ _("Failed to open domain's pidfile
'%s'"),
+ pidfile);
+ goto cleanup;
+ }
+
+ while ((saferead(pidfilefd, &c, 1) == 1) && c != EOF)
+ initpid = initpid*10 + c - '0';
+
+ ret = initpid;
+
+ if (virFileRemove(pidfile, -1, -1) < 0) {
+ virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
+ _("Failed to delete pidfile path"));
+ }
+
+ cleanup:
+ VIR_FORCE_CLOSE(fd);
+ VIR_FORCE_CLOSE(pidfilefd);
+ VIR_FREE(pidfile);
+ VIR_FREE(checkpointdir);
+ VIR_FREE(checkpointfd);
+ return ret;
+}
+
static bool wantReboot;
static virMutex lock = VIR_MUTEX_INITIALIZER;
@@ -2348,6 +2408,7 @@ virLXCControllerRun(virLXCControllerPtr ctrl)
int containerhandshake[2] = { -1, -1 };
char **containerTTYPaths = NULL;
size_t i;
+ bool restore_mode = (ctrl->restore != -1);
if (VIR_ALLOC_N(containerTTYPaths, ctrl->nconsoles) < 0)
goto cleanup;
@@ -2404,8 +2465,10 @@ virLXCControllerRun(virLXCControllerPtr ctrl)
containerhandshake[1],
ctrl->nsFDs,
ctrl->nconsoles,
- containerTTYPaths)) < 0)
+ containerTTYPaths,
+ ctrl->restore)) < 0)
goto cleanup;
+
VIR_FORCE_CLOSE(control[1]);
VIR_FORCE_CLOSE(containerhandshake[1]);
@@ -2416,10 +2479,10 @@ virLXCControllerRun(virLXCControllerPtr ctrl)
for (i = 0; i < VIR_LXC_DOMAIN_NAMESPACE_LAST; i++)
VIR_FORCE_CLOSE(ctrl->nsFDs[i]);
- if (virLXCControllerSetupCgroupLimits(ctrl) < 0)
+ if (!restore_mode && virLXCControllerSetupCgroupLimits(ctrl) < 0)
goto cleanup;
- if (virLXCControllerSetupUserns(ctrl) < 0)
+ if (!restore_mode && virLXCControllerSetupUserns(ctrl) < 0)
goto cleanup;
if (virLXCControllerMoveInterfaces(ctrl) < 0)
@@ -2444,13 +2507,33 @@ virLXCControllerRun(virLXCControllerPtr ctrl)
if (lxcControllerClearCapabilities() < 0)
goto cleanup;
- if (virLXCControllerDaemonHandshake(ctrl) < 0)
- goto cleanup;
+ if (restore_mode) {
+ int status;
+ int ret = waitpid(-1, &status, 0);
+ VIR_DEBUG("Got sig child %d", ret);
+
+ /* We have two basic cases here.
+ * - CRIU died bacause of restore error and we do not have a running container
+ * - CRIU detached itself from the running container
+ */
+ int initpid;
+ if ((initpid = lxcControllerFindRestoredPid(ctrl->restore)) < 0) {
+ virReportSystemError(errno, "%s",
+ _("Unable to get restored task pid"));
+ virNetDaemonQuit(ctrl->daemon);
+ goto cleanup;
+ } else {
+ ctrl->initpid = initpid;
+ }
+ }
for (i = 0; i < ctrl->nconsoles; i++)
if (virLXCControllerConsoleSetNonblocking(&(ctrl->consoles[i])) < 0)
goto cleanup;
+ if (virLXCControllerDaemonHandshake(ctrl) < 0)
+ goto cleanup;
+
/* We must not hold open a dbus connection for life
* of LXC instance, since dbus-daemon is limited to
* only a few 100 connections by default
@@ -2487,6 +2570,8 @@ int main(int argc, char *argv[])
int ns_fd[VIR_LXC_DOMAIN_NAMESPACE_LAST];
int handshakeFd = -1;
bool bg = false;
+ int restore = -1;
+
const struct option options[] = {
{ "background", 0, NULL, 'b' },
{ "name", 1, NULL, 'n' },
@@ -2498,6 +2583,7 @@ int main(int argc, char *argv[])
{ "share-net", 1, NULL, 'N' },
{ "share-ipc", 1, NULL, 'I' },
{ "share-uts", 1, NULL, 'U' },
+ { "restore", 1, NULL, 'r' },
{ "help", 0, NULL, 'h' },
{ 0, 0, 0, 0 },
};
@@ -2525,7 +2611,7 @@ int main(int argc, char *argv[])
while (1) {
int c;
- c = getopt_long(argc, argv, "dn:v:p:m:c:s:h:S:N:I:U:",
+ c = getopt_long(argc, argv, "dn:v:p:m:c:s:h:S:N:I:U:r:",
options, NULL);
if (c == -1)
@@ -2601,6 +2687,14 @@ int main(int argc, char *argv[])
securityDriver = optarg;
break;
+ case 'r':
+ if (virStrToLong_i(optarg, NULL, 10, &restore) < 0) {
+ fprintf(stderr, "malformed --restore argument '%s'",
+ optarg);
+ goto cleanup;
+ }
+ break;
+
case 'h':
case '?':
fprintf(stderr, "\n");
@@ -2617,6 +2711,7 @@ int main(int argc, char *argv[])
fprintf(stderr, " -N FD, --share-net FD\n");
fprintf(stderr, " -I FD, --share-ipc FD\n");
fprintf(stderr, " -U FD, --share-uts FD\n");
+ fprintf(stderr, " -r FD, --restore FD\n");
fprintf(stderr, " -h, --help\n");
fprintf(stderr, "\n");
rc = 0;
@@ -2669,6 +2764,8 @@ int main(int argc, char *argv[])
ctrl->passFDs = passFDs;
ctrl->npassFDs = npassFDs;
+ ctrl->restore = restore;
+
for (i = 0; i < VIR_LXC_DOMAIN_NAMESPACE_LAST; i++) {
if (ns_fd[i] != -1) {
if (!ctrl->nsFDs) {/*allocate only once */
diff --git a/src/lxc/lxc_driver.c b/src/lxc/lxc_driver.c
index 46af05d..bd47c91 100644
--- a/src/lxc/lxc_driver.c
+++ b/src/lxc/lxc_driver.c
@@ -1133,7 +1133,7 @@ static int lxcDomainCreateWithFiles(virDomainPtr dom,
ret = virLXCProcessStart(dom->conn, driver, vm,
nfiles, files,
- (flags & VIR_DOMAIN_START_AUTODESTROY),
+ (flags & VIR_DOMAIN_START_AUTODESTROY), -1,
VIR_DOMAIN_RUNNING_BOOTED);
if (ret == 0) {
@@ -1259,7 +1259,7 @@ lxcDomainCreateXMLWithFiles(virConnectPtr conn,
if (virLXCProcessStart(conn, driver, vm,
nfiles, files,
- (flags & VIR_DOMAIN_START_AUTODESTROY),
+ (flags & VIR_DOMAIN_START_AUTODESTROY), -1,
VIR_DOMAIN_RUNNING_BOOTED) < 0) {
virDomainAuditStart(vm, "booted", false);
if (!vm->persistent) {
diff --git a/src/lxc/lxc_process.c b/src/lxc/lxc_process.c
index 28313f0..b4f92e0 100644
--- a/src/lxc/lxc_process.c
+++ b/src/lxc/lxc_process.c
@@ -123,7 +123,7 @@ virLXCProcessReboot(virLXCDriverPtr driver,
virLXCProcessStop(driver, vm, VIR_DOMAIN_SHUTOFF_SHUTDOWN);
vm->newDef = savedDef;
if (virLXCProcessStart(conn, driver, vm,
- 0, NULL, autodestroy, reason) < 0) {
+ 0, NULL, autodestroy, -1, reason) < 0) {
VIR_WARN("Unable to handle reboot of vm %s",
vm->def->name);
goto cleanup;
@@ -929,7 +929,8 @@ virLXCProcessBuildControllerCmd(virLXCDriverPtr driver,
size_t nfiles,
int handshakefd,
int * const logfd,
- const char *pidfile)
+ const char *pidfile,
+ int restorefd)
{
size_t i;
char *filterstr;
@@ -1008,6 +1009,12 @@ virLXCProcessBuildControllerCmd(virLXCDriverPtr driver,
for (i = 0; i < nveths; i++)
virCommandAddArgList(cmd, "--veth", veths[i], NULL);
+ if (restorefd != -1) {
+ virCommandAddArg(cmd, "--restore");
+ virCommandAddArgFormat(cmd, "%d", restorefd);
+ virCommandPassFD(cmd, restorefd, 0);
+ }
+
virCommandPassFD(cmd, handshakefd, 0);
virCommandDaemonize(cmd);
virCommandSetPidFile(cmd, pidfile);
@@ -1181,6 +1188,8 @@ virLXCProcessEnsureRootFS(virDomainObjPtr vm)
* @driver: pointer to driver structure
* @vm: pointer to virtual machine structure
* @autoDestroy: mark the domain for auto destruction
+ * @restorefd: file descriptor pointing to the restore directory (-1 if not
+ * restoring)
* @reason: reason for switching vm to running state
*
* Starts a vm
@@ -1192,6 +1201,7 @@ int virLXCProcessStart(virConnectPtr conn,
virDomainObjPtr vm,
unsigned int nfiles, int *files,
bool autoDestroy,
+ int restorefd,
virDomainRunningReason reason)
{
int rc = -1, r;
@@ -1406,7 +1416,7 @@ int virLXCProcessStart(virConnectPtr conn,
files, nfiles,
handshakefds[1],
&logfd,
- pidfile)))
+ pidfile, restorefd)))
goto cleanup;
/* now that we know it is about to start call the hook if present */
@@ -1511,6 +1521,9 @@ int virLXCProcessStart(virConnectPtr conn,
goto cleanup;
}
+ if (restorefd != -1)
+ goto skip_cgroup_checks;
+
/* We know the cgroup must exist by this synchronization
* point so lets detect that first, since it gives us a
* more reliable way to kill everything off if something
@@ -1527,6 +1540,8 @@ int virLXCProcessStart(virConnectPtr conn,
goto cleanup;
}
+ skip_cgroup_checks:
+
/* Get the machine name so we can properly delete it through
* systemd later */
if (!(priv->machineName = virSystemdGetMachineNameByPID(vm->pid)))
@@ -1634,7 +1649,7 @@ virLXCProcessAutostartDomain(virDomainObjPtr vm,
if (vm->autostart &&
!virDomainObjIsActive(vm)) {
ret = virLXCProcessStart(data->conn, data->driver, vm,
- 0, NULL, false,
+ 0, NULL, false, -1,
VIR_DOMAIN_RUNNING_BOOTED);
virDomainAuditStart(vm, "booted", ret >= 0);
if (ret < 0) {
diff --git a/src/lxc/lxc_process.h b/src/lxc/lxc_process.h
index d78cdde..c724f31 100644
--- a/src/lxc/lxc_process.h
+++ b/src/lxc/lxc_process.h
@@ -29,6 +29,7 @@ int virLXCProcessStart(virConnectPtr conn,
virDomainObjPtr vm,
unsigned int nfiles, int *files,
bool autoDestroy,
+ int restorefd,
virDomainRunningReason reason);
int virLXCProcessStop(virLXCDriverPtr driver,
virDomainObjPtr vm,
--
2.7.3