On Thu, Jan 05, 2017 at 03:30:56PM +0000, Daniel P. Berrange wrote:
Currently when spawning containers with systemd, the container PID 1
will get moved into the systemd machine slice. Libvirt then manually
moves the libvirt_lxc and qemu-nbd processes into the cgroups associated
with the slice, but skips the systemd controller cgroup. This means that
from systemd's POV, libvirt_lxc and qemu-nbd are still part of the
libvirtd.service unit.
On systemctl daemon-reload, it will notice that libvirt_lxc & qemu-nbd
are in the libvirtd.service unit for the systemd controller, but in the
machine cgroups for resources. Systemd will thus move them back into
the libvirtd.service resource cgroups next time libvirtd is restarted.
This causes libvirtd to kill off the container due to incorrect cgroup
placement.
The solution is to ensure that when moving libvirt_lxc & qemu-nbd, we
also move the systemd cgroup controller placement. Normally this is
not something we ever want todo, but this is a special case as we are
intentionally wanting to move them to a different systemd unit.
Signed-off-by: Daniel P. Berrange <berrange(a)redhat.com>
---
src/libvirt_private.syms | 1 +
src/lxc/lxc_controller.c | 4 ++--
src/util/vircgroup.c | 52 +++++++++++++++++++++++++++++++++++++-----------
src/util/vircgroup.h | 1 +
4 files changed, 44 insertions(+), 14 deletions(-)
diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms
index 2d23e46..2dd5809 100644
--- a/src/libvirt_private.syms
+++ b/src/libvirt_private.syms
@@ -1287,6 +1287,7 @@ virBufferVasprintf;
# util/vircgroup.h
+virCgroupAddMachineTask;
virCgroupAddTask;
virCgroupAddTaskController;
virCgroupAllowAllDevices;
diff --git a/src/lxc/lxc_controller.c b/src/lxc/lxc_controller.c
index 2170b0a..88ba8aa 100644
--- a/src/lxc/lxc_controller.c
+++ b/src/lxc/lxc_controller.c
@@ -869,12 +869,12 @@ static int virLXCControllerSetupCgroupLimits(virLXCControllerPtr
ctrl)
ctrl->nicindexes)))
goto cleanup;
- if (virCgroupAddTask(ctrl->cgroup, getpid()) < 0)
+ if (virCgroupAddMachineTask(ctrl->cgroup, getpid()) < 0)
goto cleanup;
/* Add all qemu-nbd tasks to the cgroup */
for (i = 0; i < ctrl->nnbdpids; i++) {
- if (virCgroupAddTask(ctrl->cgroup, ctrl->nbdpids[i]) < 0)
+ if (virCgroupAddMachineTask(ctrl->cgroup, ctrl->nbdpids[i]) < 0)
goto cleanup;
}
diff --git a/src/util/vircgroup.c b/src/util/vircgroup.c
index 80ce43c..ddf19e9 100644
--- a/src/util/vircgroup.c
+++ b/src/util/vircgroup.c
@@ -1184,16 +1184,8 @@ virCgroupNew(pid_t pid,
}
-/**
- * virCgroupAddTask:
- *
- * @group: The cgroup to add a task to
- * @pid: The pid of the task to add
- *
- * Returns: 0 on success, -1 on error
- */
-int
-virCgroupAddTask(virCgroupPtr group, pid_t pid)
+static int
+virCgroupAddTaskInternal(virCgroupPtr group, pid_t pid, bool withSystemd)
{
int ret = -1;
size_t i;
@@ -1203,8 +1195,10 @@ virCgroupAddTask(virCgroupPtr group, pid_t pid)
if (!group->controllers[i].mountPoint)
continue;
- /* We must never add tasks in systemd's hierarchy */
- if (i == VIR_CGROUP_CONTROLLER_SYSTEMD)
+ /* We must never add tasks in systemd's hierarchy
+ * unless we're intentionally trying to move a
+ * task into a systemd machine scope */
+ if (i == VIR_CGROUP_CONTROLLER_SYSTEMD && !withSystemd)
continue;
if (virCgroupAddTaskController(group, pid, i) < 0)
@@ -1216,6 +1210,40 @@ virCgroupAddTask(virCgroupPtr group, pid_t pid)
return ret;
}
+/**
+ * virCgroupAddTask:
+ *
+ * @group: The cgroup to add a task to
+ * @pid: The pid of the task to add
+ *
+ * Will add the task to all controllers, except the
+ * systemd unit controller.
+ *
+ * Returns: 0 on success, -1 on error
+ */
+int
+virCgroupAddTask(virCgroupPtr group, pid_t pid)
+{
+ return virCgroupAddTaskInternal(group, pid, false);
+}
+
+/**
+ * virCgroupAddMachineTask:
+ *
+ * @group: The cgroup to add a task to
+ * @pid: The pid of the task to add
+ *
+ * Will add the task to all controllers, including the
+ * systemd unit controller.
+ *
+ * Returns: 0 on success, -1 on error
+ */
+int
+virCgroupAddMachineTask(virCgroupPtr group, pid_t pid)
+{
+ return virCgroupAddTaskInternal(group, pid, true);
+}
+
/**
* virCgroupAddTaskController:
diff --git a/src/util/vircgroup.h b/src/util/vircgroup.h
index 4b8f3ff..2de1bf2 100644
--- a/src/util/vircgroup.h
+++ b/src/util/vircgroup.h
@@ -131,6 +131,7 @@ int virCgroupPathOfController(virCgroupPtr group,
char **path);
int virCgroupAddTask(virCgroupPtr group, pid_t pid);
+int virCgroupAddMachineTask(virCgroupPtr group, pid_t pid);
int virCgroupAddTaskController(virCgroupPtr group,
pid_t pid,
ACK. This fixes the lxc container restart problems for me. Thanks a lot!
-- Guido