[libvirt] [PATCH] qemu: use systemd's TerminateMachine to kill all processes

If we don't properly clean up all processes in the machine-<vmname>.scope systemd won't remove the cgroup and subsequent vm starts fail with 'CreateMachine: File exists' Additional processes can e.g. be added via echo $PID > /sys/fs/cgroup/systemd/machine.slice/machine-${VMNAME}.scope/tasks but there are other cases like http://bugs.debian.org/761521 Invoke TerminateMachine to be on the safe side since systemd tracks the cgroup anyway. This is a noop if all processes have terminated already. --- src/libvirt_private.syms | 1 + src/qemu/qemu_cgroup.c | 11 ++++++++++- src/qemu/qemu_cgroup.h | 2 +- src/qemu/qemu_process.c | 4 ++-- src/util/vircgroup.c | 11 +++++++++++ src/util/vircgroup.h | 5 +++++ 6 files changed, 30 insertions(+), 4 deletions(-) diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms index 51a692b..99ef1db 100644 --- a/src/libvirt_private.syms +++ b/src/libvirt_private.syms @@ -1115,6 +1115,7 @@ virCgroupSetMemorySoftLimit; virCgroupSetMemSwapHardLimit; virCgroupSetOwner; virCgroupSupportsCpuBW; +virCgroupTerminateMachine; # util/virclosecallbacks.h diff --git a/src/qemu/qemu_cgroup.c b/src/qemu/qemu_cgroup.c index 7c6b2c1..0ab7227 100644 --- a/src/qemu/qemu_cgroup.c +++ b/src/qemu/qemu_cgroup.c @@ -1188,13 +1188,22 @@ qemuSetupCgroupForIOThreads(virDomainObjPtr vm) } int -qemuRemoveCgroup(virDomainObjPtr vm) +qemuRemoveCgroup(virQEMUDriverPtr driver, + virDomainObjPtr vm) { qemuDomainObjPrivatePtr priv = vm->privateData; + virQEMUDriverConfigPtr cfg = virQEMUDriverGetConfig(driver); if (priv->cgroup == NULL) return 0; /* Not supported, so claim success */ + if (virCgroupTerminateMachine(vm->def->name, + "qemu", + cfg->privileged) < 0) { + if (!virCgroupNewIgnoreError()) + VIR_DEBUG("Failed to terminate cgroup for %s", vm->def->name); + } + return virCgroupRemove(priv->cgroup); } diff --git a/src/qemu/qemu_cgroup.h b/src/qemu/qemu_cgroup.h index 8a2c723..4a4f22c 100644 --- a/src/qemu/qemu_cgroup.h +++ b/src/qemu/qemu_cgroup.h @@ -66,7 +66,7 @@ int qemuSetupCgroupForIOThreads(virDomainObjPtr vm); int qemuSetupCgroupForEmulator(virQEMUDriverPtr driver, virDomainObjPtr vm, virBitmapPtr nodemask); -int qemuRemoveCgroup(virDomainObjPtr vm); +int qemuRemoveCgroup(virQEMUDriverPtr driver, virDomainObjPtr vm); int qemuAddToCgroup(virDomainObjPtr vm); #endif /* __QEMU_CGROUP_H__ */ diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c index 13614e9..e7cce1a 100644 --- a/src/qemu/qemu_process.c +++ b/src/qemu/qemu_process.c @@ -4131,7 +4131,7 @@ int qemuProcessStart(virConnectPtr conn, /* Ensure no historical cgroup for this VM is lying around bogus * settings */ VIR_DEBUG("Ensuring no historical cgroup is lying around"); - qemuRemoveCgroup(vm); + qemuRemoveCgroup(driver, vm); for (i = 0; i < vm->def->ngraphics; ++i) { virDomainGraphicsDefPtr graphics = vm->def->graphics[i]; @@ -4909,7 +4909,7 @@ void qemuProcessStop(virQEMUDriverPtr driver, } retry: - if ((ret = qemuRemoveCgroup(vm)) < 0) { + if ((ret = qemuRemoveCgroup(driver, vm)) < 0) { if (ret == -EBUSY && (retries++ < 5)) { usleep(200*1000); goto retry; diff --git a/src/util/vircgroup.c b/src/util/vircgroup.c index 1dbe6f9..d69f71b 100644 --- a/src/util/vircgroup.c +++ b/src/util/vircgroup.c @@ -1680,6 +1680,17 @@ virCgroupNewMachineSystemd(const char *name, } +/* + * Returns 0 on success, -1 on fatal error + */ +int virCgroupTerminateMachine(const char *name, + const char *drivername, + bool privileged) +{ + return virSystemdTerminateMachine(name, drivername, privileged); +} + + static int virCgroupNewMachineManual(const char *name, const char *drivername, diff --git a/src/util/vircgroup.h b/src/util/vircgroup.h index 19e82d1..7718a07 100644 --- a/src/util/vircgroup.h +++ b/src/util/vircgroup.h @@ -106,6 +106,11 @@ int virCgroupNewMachine(const char *name, ATTRIBUTE_NONNULL(1) ATTRIBUTE_NONNULL(2) ATTRIBUTE_NONNULL(4); +int virCgroupTerminateMachine(const char *name, + const char *drivername, + bool privileged) + ATTRIBUTE_NONNULL(1) ATTRIBUTE_NONNULL(2); + bool virCgroupNewIgnoreError(void); void virCgroupFree(virCgroupPtr *group); -- 2.1.0

On 09/25/2014 08:30 AM, Guido Günther wrote:
If we don't properly clean up all processes in the machine-<vmname>.scope systemd won't remove the cgroup and subsequent vm starts fail with
'CreateMachine: File exists'
Additional processes can e.g. be added via
echo $PID > /sys/fs/cgroup/systemd/machine.slice/machine-${VMNAME}.scope/tasks
but there are other cases like
Invoke TerminateMachine to be on the safe side since systemd tracks the cgroup anyway. This is a noop if all processes have terminated already.
Thanks for the patch, I've definitely seen this a handful of times on Fedora as well.
--- src/libvirt_private.syms | 1 + src/qemu/qemu_cgroup.c | 11 ++++++++++- src/qemu/qemu_cgroup.h | 2 +- src/qemu/qemu_process.c | 4 ++-- src/util/vircgroup.c | 11 +++++++++++ src/util/vircgroup.h | 5 +++++ 6 files changed, 30 insertions(+), 4 deletions(-)
diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms index 51a692b..99ef1db 100644 --- a/src/libvirt_private.syms +++ b/src/libvirt_private.syms @@ -1115,6 +1115,7 @@ virCgroupSetMemorySoftLimit; virCgroupSetMemSwapHardLimit; virCgroupSetOwner; virCgroupSupportsCpuBW; +virCgroupTerminateMachine;
# util/virclosecallbacks.h diff --git a/src/qemu/qemu_cgroup.c b/src/qemu/qemu_cgroup.c index 7c6b2c1..0ab7227 100644 --- a/src/qemu/qemu_cgroup.c +++ b/src/qemu/qemu_cgroup.c @@ -1188,13 +1188,22 @@ qemuSetupCgroupForIOThreads(virDomainObjPtr vm) }
int -qemuRemoveCgroup(virDomainObjPtr vm) +qemuRemoveCgroup(virQEMUDriverPtr driver, + virDomainObjPtr vm) { qemuDomainObjPrivatePtr priv = vm->privateData; + virQEMUDriverConfigPtr cfg = virQEMUDriverGetConfig(driver);
if (priv->cgroup == NULL) return 0; /* Not supported, so claim success */
+ if (virCgroupTerminateMachine(vm->def->name, + "qemu", + cfg->privileged) < 0) { + if (!virCgroupNewIgnoreError()) + VIR_DEBUG("Failed to terminate cgroup for %s", vm->def->name); + } + return virCgroupRemove(priv->cgroup); }
diff --git a/src/qemu/qemu_cgroup.h b/src/qemu/qemu_cgroup.h index 8a2c723..4a4f22c 100644 --- a/src/qemu/qemu_cgroup.h +++ b/src/qemu/qemu_cgroup.h @@ -66,7 +66,7 @@ int qemuSetupCgroupForIOThreads(virDomainObjPtr vm); int qemuSetupCgroupForEmulator(virQEMUDriverPtr driver, virDomainObjPtr vm, virBitmapPtr nodemask); -int qemuRemoveCgroup(virDomainObjPtr vm); +int qemuRemoveCgroup(virQEMUDriverPtr driver, virDomainObjPtr vm); int qemuAddToCgroup(virDomainObjPtr vm);
#endif /* __QEMU_CGROUP_H__ */ diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c index 13614e9..e7cce1a 100644 --- a/src/qemu/qemu_process.c +++ b/src/qemu/qemu_process.c @@ -4131,7 +4131,7 @@ int qemuProcessStart(virConnectPtr conn, /* Ensure no historical cgroup for this VM is lying around bogus * settings */ VIR_DEBUG("Ensuring no historical cgroup is lying around"); - qemuRemoveCgroup(vm); + qemuRemoveCgroup(driver, vm);
for (i = 0; i < vm->def->ngraphics; ++i) { virDomainGraphicsDefPtr graphics = vm->def->graphics[i]; @@ -4909,7 +4909,7 @@ void qemuProcessStop(virQEMUDriverPtr driver, }
retry: - if ((ret = qemuRemoveCgroup(vm)) < 0) { + if ((ret = qemuRemoveCgroup(driver, vm)) < 0) { if (ret == -EBUSY && (retries++ < 5)) { usleep(200*1000); goto retry; diff --git a/src/util/vircgroup.c b/src/util/vircgroup.c index 1dbe6f9..d69f71b 100644 --- a/src/util/vircgroup.c +++ b/src/util/vircgroup.c @@ -1680,6 +1680,17 @@ virCgroupNewMachineSystemd(const char *name, }
+/* + * Returns 0 on success, -1 on fatal error + */ +int virCgroupTerminateMachine(const char *name, + const char *drivername, + bool privileged) +{ + return virSystemdTerminateMachine(name, drivername, privileged); +} + + static int virCgroupNewMachineManual(const char *name, const char *drivername, diff --git a/src/util/vircgroup.h b/src/util/vircgroup.h index 19e82d1..7718a07 100644 --- a/src/util/vircgroup.h +++ b/src/util/vircgroup.h @@ -106,6 +106,11 @@ int virCgroupNewMachine(const char *name, ATTRIBUTE_NONNULL(1) ATTRIBUTE_NONNULL(2) ATTRIBUTE_NONNULL(4);
+int virCgroupTerminateMachine(const char *name, + const char *drivername, + bool privileged) + ATTRIBUTE_NONNULL(1) ATTRIBUTE_NONNULL(2); + bool virCgroupNewIgnoreError(void);
void virCgroupFree(virCgroupPtr *group);
All the above seems reasonable to me. ACK - Cole

On Tue, Sep 30, 2014 at 10:10:51AM -0400, Cole Robinson wrote:
On 09/25/2014 08:30 AM, Guido Günther wrote:
If we don't properly clean up all processes in the machine-<vmname>.scope systemd won't remove the cgroup and subsequent vm starts fail with
'CreateMachine: File exists'
Additional processes can e.g. be added via
echo $PID > /sys/fs/cgroup/systemd/machine.slice/machine-${VMNAME}.scope/tasks
but there are other cases like
Invoke TerminateMachine to be on the safe side since systemd tracks the cgroup anyway. This is a noop if all processes have terminated already.
Thanks for the patch, I've definitely seen this a handful of times on Fedora as well.
--- src/libvirt_private.syms | 1 + src/qemu/qemu_cgroup.c | 11 ++++++++++- src/qemu/qemu_cgroup.h | 2 +- src/qemu/qemu_process.c | 4 ++-- src/util/vircgroup.c | 11 +++++++++++ src/util/vircgroup.h | 5 +++++ 6 files changed, 30 insertions(+), 4 deletions(-)
diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms index 51a692b..99ef1db 100644 --- a/src/libvirt_private.syms +++ b/src/libvirt_private.syms @@ -1115,6 +1115,7 @@ virCgroupSetMemorySoftLimit; virCgroupSetMemSwapHardLimit; virCgroupSetOwner; virCgroupSupportsCpuBW; +virCgroupTerminateMachine;
# util/virclosecallbacks.h diff --git a/src/qemu/qemu_cgroup.c b/src/qemu/qemu_cgroup.c index 7c6b2c1..0ab7227 100644 --- a/src/qemu/qemu_cgroup.c +++ b/src/qemu/qemu_cgroup.c @@ -1188,13 +1188,22 @@ qemuSetupCgroupForIOThreads(virDomainObjPtr vm) }
int -qemuRemoveCgroup(virDomainObjPtr vm) +qemuRemoveCgroup(virQEMUDriverPtr driver, + virDomainObjPtr vm) { qemuDomainObjPrivatePtr priv = vm->privateData; + virQEMUDriverConfigPtr cfg = virQEMUDriverGetConfig(driver);
if (priv->cgroup == NULL) return 0; /* Not supported, so claim success */
+ if (virCgroupTerminateMachine(vm->def->name, + "qemu", + cfg->privileged) < 0) { + if (!virCgroupNewIgnoreError()) + VIR_DEBUG("Failed to terminate cgroup for %s", vm->def->name); + } + return virCgroupRemove(priv->cgroup); }
diff --git a/src/qemu/qemu_cgroup.h b/src/qemu/qemu_cgroup.h index 8a2c723..4a4f22c 100644 --- a/src/qemu/qemu_cgroup.h +++ b/src/qemu/qemu_cgroup.h @@ -66,7 +66,7 @@ int qemuSetupCgroupForIOThreads(virDomainObjPtr vm); int qemuSetupCgroupForEmulator(virQEMUDriverPtr driver, virDomainObjPtr vm, virBitmapPtr nodemask); -int qemuRemoveCgroup(virDomainObjPtr vm); +int qemuRemoveCgroup(virQEMUDriverPtr driver, virDomainObjPtr vm); int qemuAddToCgroup(virDomainObjPtr vm);
#endif /* __QEMU_CGROUP_H__ */ diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c index 13614e9..e7cce1a 100644 --- a/src/qemu/qemu_process.c +++ b/src/qemu/qemu_process.c @@ -4131,7 +4131,7 @@ int qemuProcessStart(virConnectPtr conn, /* Ensure no historical cgroup for this VM is lying around bogus * settings */ VIR_DEBUG("Ensuring no historical cgroup is lying around"); - qemuRemoveCgroup(vm); + qemuRemoveCgroup(driver, vm);
for (i = 0; i < vm->def->ngraphics; ++i) { virDomainGraphicsDefPtr graphics = vm->def->graphics[i]; @@ -4909,7 +4909,7 @@ void qemuProcessStop(virQEMUDriverPtr driver, }
retry: - if ((ret = qemuRemoveCgroup(vm)) < 0) { + if ((ret = qemuRemoveCgroup(driver, vm)) < 0) { if (ret == -EBUSY && (retries++ < 5)) { usleep(200*1000); goto retry; diff --git a/src/util/vircgroup.c b/src/util/vircgroup.c index 1dbe6f9..d69f71b 100644 --- a/src/util/vircgroup.c +++ b/src/util/vircgroup.c @@ -1680,6 +1680,17 @@ virCgroupNewMachineSystemd(const char *name, }
+/* + * Returns 0 on success, -1 on fatal error + */ +int virCgroupTerminateMachine(const char *name, + const char *drivername, + bool privileged) +{ + return virSystemdTerminateMachine(name, drivername, privileged); +} + + static int virCgroupNewMachineManual(const char *name, const char *drivername, diff --git a/src/util/vircgroup.h b/src/util/vircgroup.h index 19e82d1..7718a07 100644 --- a/src/util/vircgroup.h +++ b/src/util/vircgroup.h @@ -106,6 +106,11 @@ int virCgroupNewMachine(const char *name, ATTRIBUTE_NONNULL(1) ATTRIBUTE_NONNULL(2) ATTRIBUTE_NONNULL(4);
+int virCgroupTerminateMachine(const char *name, + const char *drivername, + bool privileged) + ATTRIBUTE_NONNULL(1) ATTRIBUTE_NONNULL(2); + bool virCgroupNewIgnoreError(void);
void virCgroupFree(virCgroupPtr *group);
All the above seems reasonable to me. ACK
I'm surprised we see the problem with QEMU, but this matches what we do for LXC and is recommended by systemd maintainers so fine to for QEMU too. Regards, Daniel -- |: http://berrange.com -o- http://www.flickr.com/photos/dberrange/ :| |: http://libvirt.org -o- http://virt-manager.org :| |: http://autobuild.org -o- http://search.cpan.org/~danberr/ :| |: http://entangle-photo.org -o- http://live.gnome.org/gtk-vnc :|

On Tue, Sep 30, 2014 at 03:22:41PM +0100, Daniel P. Berrange wrote:
On Tue, Sep 30, 2014 at 10:10:51AM -0400, Cole Robinson wrote:
On 09/25/2014 08:30 AM, Guido Günther wrote:
If we don't properly clean up all processes in the machine-<vmname>.scope systemd won't remove the cgroup and subsequent vm starts fail with
'CreateMachine: File exists'
Additional processes can e.g. be added via
echo $PID > /sys/fs/cgroup/systemd/machine.slice/machine-${VMNAME}.scope/tasks
but there are other cases like
Invoke TerminateMachine to be on the safe side since systemd tracks the cgroup anyway. This is a noop if all processes have terminated already.
Thanks for the patch, I've definitely seen this a handful of times on Fedora as well.
--- src/libvirt_private.syms | 1 + src/qemu/qemu_cgroup.c | 11 ++++++++++- src/qemu/qemu_cgroup.h | 2 +- src/qemu/qemu_process.c | 4 ++-- src/util/vircgroup.c | 11 +++++++++++ src/util/vircgroup.h | 5 +++++ 6 files changed, 30 insertions(+), 4 deletions(-)
diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms index 51a692b..99ef1db 100644 --- a/src/libvirt_private.syms +++ b/src/libvirt_private.syms @@ -1115,6 +1115,7 @@ virCgroupSetMemorySoftLimit; virCgroupSetMemSwapHardLimit; virCgroupSetOwner; virCgroupSupportsCpuBW; +virCgroupTerminateMachine;
# util/virclosecallbacks.h diff --git a/src/qemu/qemu_cgroup.c b/src/qemu/qemu_cgroup.c index 7c6b2c1..0ab7227 100644 --- a/src/qemu/qemu_cgroup.c +++ b/src/qemu/qemu_cgroup.c @@ -1188,13 +1188,22 @@ qemuSetupCgroupForIOThreads(virDomainObjPtr vm) }
int -qemuRemoveCgroup(virDomainObjPtr vm) +qemuRemoveCgroup(virQEMUDriverPtr driver, + virDomainObjPtr vm) { qemuDomainObjPrivatePtr priv = vm->privateData; + virQEMUDriverConfigPtr cfg = virQEMUDriverGetConfig(driver);
if (priv->cgroup == NULL) return 0; /* Not supported, so claim success */
+ if (virCgroupTerminateMachine(vm->def->name, + "qemu", + cfg->privileged) < 0) { + if (!virCgroupNewIgnoreError()) + VIR_DEBUG("Failed to terminate cgroup for %s", vm->def->name); + } + return virCgroupRemove(priv->cgroup); }
diff --git a/src/qemu/qemu_cgroup.h b/src/qemu/qemu_cgroup.h index 8a2c723..4a4f22c 100644 --- a/src/qemu/qemu_cgroup.h +++ b/src/qemu/qemu_cgroup.h @@ -66,7 +66,7 @@ int qemuSetupCgroupForIOThreads(virDomainObjPtr vm); int qemuSetupCgroupForEmulator(virQEMUDriverPtr driver, virDomainObjPtr vm, virBitmapPtr nodemask); -int qemuRemoveCgroup(virDomainObjPtr vm); +int qemuRemoveCgroup(virQEMUDriverPtr driver, virDomainObjPtr vm); int qemuAddToCgroup(virDomainObjPtr vm);
#endif /* __QEMU_CGROUP_H__ */ diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c index 13614e9..e7cce1a 100644 --- a/src/qemu/qemu_process.c +++ b/src/qemu/qemu_process.c @@ -4131,7 +4131,7 @@ int qemuProcessStart(virConnectPtr conn, /* Ensure no historical cgroup for this VM is lying around bogus * settings */ VIR_DEBUG("Ensuring no historical cgroup is lying around"); - qemuRemoveCgroup(vm); + qemuRemoveCgroup(driver, vm);
for (i = 0; i < vm->def->ngraphics; ++i) { virDomainGraphicsDefPtr graphics = vm->def->graphics[i]; @@ -4909,7 +4909,7 @@ void qemuProcessStop(virQEMUDriverPtr driver, }
retry: - if ((ret = qemuRemoveCgroup(vm)) < 0) { + if ((ret = qemuRemoveCgroup(driver, vm)) < 0) { if (ret == -EBUSY && (retries++ < 5)) { usleep(200*1000); goto retry; diff --git a/src/util/vircgroup.c b/src/util/vircgroup.c index 1dbe6f9..d69f71b 100644 --- a/src/util/vircgroup.c +++ b/src/util/vircgroup.c @@ -1680,6 +1680,17 @@ virCgroupNewMachineSystemd(const char *name, }
+/* + * Returns 0 on success, -1 on fatal error + */ +int virCgroupTerminateMachine(const char *name, + const char *drivername, + bool privileged) +{ + return virSystemdTerminateMachine(name, drivername, privileged); +} + + static int virCgroupNewMachineManual(const char *name, const char *drivername, diff --git a/src/util/vircgroup.h b/src/util/vircgroup.h index 19e82d1..7718a07 100644 --- a/src/util/vircgroup.h +++ b/src/util/vircgroup.h @@ -106,6 +106,11 @@ int virCgroupNewMachine(const char *name, ATTRIBUTE_NONNULL(1) ATTRIBUTE_NONNULL(2) ATTRIBUTE_NONNULL(4);
+int virCgroupTerminateMachine(const char *name, + const char *drivername, + bool privileged) + ATTRIBUTE_NONNULL(1) ATTRIBUTE_NONNULL(2); + bool virCgroupNewIgnoreError(void);
void virCgroupFree(virCgroupPtr *group);
All the above seems reasonable to me. ACK
I'm surprised we see the problem with QEMU, but this matches what we do for LXC and is recommended by systemd maintainers so fine to for QEMU too.
Thanks to the two of you for reviewing. Should this go into 1.2.9 ? Cheers, -- Guido

On 09/30/2014 10:25 AM, Guido Günther wrote:
On Tue, Sep 30, 2014 at 03:22:41PM +0100, Daniel P. Berrange wrote:
On Tue, Sep 30, 2014 at 10:10:51AM -0400, Cole Robinson wrote:
On 09/25/2014 08:30 AM, Guido Günther wrote:
If we don't properly clean up all processes in the machine-<vmname>.scope systemd won't remove the cgroup and subsequent vm starts fail with
'CreateMachine: File exists'
Additional processes can e.g. be added via
echo $PID > /sys/fs/cgroup/systemd/machine.slice/machine-${VMNAME}.scope/tasks
but there are other cases like
Invoke TerminateMachine to be on the safe side since systemd tracks the cgroup anyway. This is a noop if all processes have terminated already.
Thanks for the patch, I've definitely seen this a handful of times on Fedora as well.
--- src/libvirt_private.syms | 1 + src/qemu/qemu_cgroup.c | 11 ++++++++++- src/qemu/qemu_cgroup.h | 2 +- src/qemu/qemu_process.c | 4 ++-- src/util/vircgroup.c | 11 +++++++++++ src/util/vircgroup.h | 5 +++++ 6 files changed, 30 insertions(+), 4 deletions(-)
diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms index 51a692b..99ef1db 100644 --- a/src/libvirt_private.syms +++ b/src/libvirt_private.syms @@ -1115,6 +1115,7 @@ virCgroupSetMemorySoftLimit; virCgroupSetMemSwapHardLimit; virCgroupSetOwner; virCgroupSupportsCpuBW; +virCgroupTerminateMachine;
# util/virclosecallbacks.h diff --git a/src/qemu/qemu_cgroup.c b/src/qemu/qemu_cgroup.c index 7c6b2c1..0ab7227 100644 --- a/src/qemu/qemu_cgroup.c +++ b/src/qemu/qemu_cgroup.c @@ -1188,13 +1188,22 @@ qemuSetupCgroupForIOThreads(virDomainObjPtr vm) }
int -qemuRemoveCgroup(virDomainObjPtr vm) +qemuRemoveCgroup(virQEMUDriverPtr driver, + virDomainObjPtr vm) { qemuDomainObjPrivatePtr priv = vm->privateData; + virQEMUDriverConfigPtr cfg = virQEMUDriverGetConfig(driver);
if (priv->cgroup == NULL) return 0; /* Not supported, so claim success */
+ if (virCgroupTerminateMachine(vm->def->name, + "qemu", + cfg->privileged) < 0) { + if (!virCgroupNewIgnoreError()) + VIR_DEBUG("Failed to terminate cgroup for %s", vm->def->name); + } + return virCgroupRemove(priv->cgroup); }
diff --git a/src/qemu/qemu_cgroup.h b/src/qemu/qemu_cgroup.h index 8a2c723..4a4f22c 100644 --- a/src/qemu/qemu_cgroup.h +++ b/src/qemu/qemu_cgroup.h @@ -66,7 +66,7 @@ int qemuSetupCgroupForIOThreads(virDomainObjPtr vm); int qemuSetupCgroupForEmulator(virQEMUDriverPtr driver, virDomainObjPtr vm, virBitmapPtr nodemask); -int qemuRemoveCgroup(virDomainObjPtr vm); +int qemuRemoveCgroup(virQEMUDriverPtr driver, virDomainObjPtr vm); int qemuAddToCgroup(virDomainObjPtr vm);
#endif /* __QEMU_CGROUP_H__ */ diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c index 13614e9..e7cce1a 100644 --- a/src/qemu/qemu_process.c +++ b/src/qemu/qemu_process.c @@ -4131,7 +4131,7 @@ int qemuProcessStart(virConnectPtr conn, /* Ensure no historical cgroup for this VM is lying around bogus * settings */ VIR_DEBUG("Ensuring no historical cgroup is lying around"); - qemuRemoveCgroup(vm); + qemuRemoveCgroup(driver, vm);
for (i = 0; i < vm->def->ngraphics; ++i) { virDomainGraphicsDefPtr graphics = vm->def->graphics[i]; @@ -4909,7 +4909,7 @@ void qemuProcessStop(virQEMUDriverPtr driver, }
retry: - if ((ret = qemuRemoveCgroup(vm)) < 0) { + if ((ret = qemuRemoveCgroup(driver, vm)) < 0) { if (ret == -EBUSY && (retries++ < 5)) { usleep(200*1000); goto retry; diff --git a/src/util/vircgroup.c b/src/util/vircgroup.c index 1dbe6f9..d69f71b 100644 --- a/src/util/vircgroup.c +++ b/src/util/vircgroup.c @@ -1680,6 +1680,17 @@ virCgroupNewMachineSystemd(const char *name, }
+/* + * Returns 0 on success, -1 on fatal error + */ +int virCgroupTerminateMachine(const char *name, + const char *drivername, + bool privileged) +{ + return virSystemdTerminateMachine(name, drivername, privileged); +} + + static int virCgroupNewMachineManual(const char *name, const char *drivername, diff --git a/src/util/vircgroup.h b/src/util/vircgroup.h index 19e82d1..7718a07 100644 --- a/src/util/vircgroup.h +++ b/src/util/vircgroup.h @@ -106,6 +106,11 @@ int virCgroupNewMachine(const char *name, ATTRIBUTE_NONNULL(1) ATTRIBUTE_NONNULL(2) ATTRIBUTE_NONNULL(4);
+int virCgroupTerminateMachine(const char *name, + const char *drivername, + bool privileged) + ATTRIBUTE_NONNULL(1) ATTRIBUTE_NONNULL(2); + bool virCgroupNewIgnoreError(void);
void virCgroupFree(virCgroupPtr *group);
All the above seems reasonable to me. ACK
I'm surprised we see the problem with QEMU, but this matches what we do for LXC and is recommended by systemd maintainers so fine to for QEMU too.
Thanks to the two of you for reviewing. Should this go into 1.2.9 ?
I don't know why this patch would cause problems... but then again it's cgroup/systemd stuff which makes me a little nervous. If it was my patch I'd wait until after the release to be safe. - Cole

On Tue, Sep 30, 2014 at 10:43:47AM -0400, Cole Robinson wrote:
On 09/30/2014 10:25 AM, Guido Günther wrote:
On Tue, Sep 30, 2014 at 03:22:41PM +0100, Daniel P. Berrange wrote:
On Tue, Sep 30, 2014 at 10:10:51AM -0400, Cole Robinson wrote:
On 09/25/2014 08:30 AM, Guido Günther wrote:
If we don't properly clean up all processes in the machine-<vmname>.scope systemd won't remove the cgroup and subsequent vm starts fail with
'CreateMachine: File exists'
Additional processes can e.g. be added via
echo $PID > /sys/fs/cgroup/systemd/machine.slice/machine-${VMNAME}.scope/tasks
but there are other cases like
Invoke TerminateMachine to be on the safe side since systemd tracks the cgroup anyway. This is a noop if all processes have terminated already.
Thanks for the patch, I've definitely seen this a handful of times on Fedora as well.
--- src/libvirt_private.syms | 1 + src/qemu/qemu_cgroup.c | 11 ++++++++++- src/qemu/qemu_cgroup.h | 2 +- src/qemu/qemu_process.c | 4 ++-- src/util/vircgroup.c | 11 +++++++++++ src/util/vircgroup.h | 5 +++++ 6 files changed, 30 insertions(+), 4 deletions(-)
diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms index 51a692b..99ef1db 100644 --- a/src/libvirt_private.syms +++ b/src/libvirt_private.syms @@ -1115,6 +1115,7 @@ virCgroupSetMemorySoftLimit; virCgroupSetMemSwapHardLimit; virCgroupSetOwner; virCgroupSupportsCpuBW; +virCgroupTerminateMachine;
# util/virclosecallbacks.h diff --git a/src/qemu/qemu_cgroup.c b/src/qemu/qemu_cgroup.c index 7c6b2c1..0ab7227 100644 --- a/src/qemu/qemu_cgroup.c +++ b/src/qemu/qemu_cgroup.c @@ -1188,13 +1188,22 @@ qemuSetupCgroupForIOThreads(virDomainObjPtr vm) }
int -qemuRemoveCgroup(virDomainObjPtr vm) +qemuRemoveCgroup(virQEMUDriverPtr driver, + virDomainObjPtr vm) { qemuDomainObjPrivatePtr priv = vm->privateData; + virQEMUDriverConfigPtr cfg = virQEMUDriverGetConfig(driver);
if (priv->cgroup == NULL) return 0; /* Not supported, so claim success */
+ if (virCgroupTerminateMachine(vm->def->name, + "qemu", + cfg->privileged) < 0) { + if (!virCgroupNewIgnoreError()) + VIR_DEBUG("Failed to terminate cgroup for %s", vm->def->name); + } + return virCgroupRemove(priv->cgroup); }
diff --git a/src/qemu/qemu_cgroup.h b/src/qemu/qemu_cgroup.h index 8a2c723..4a4f22c 100644 --- a/src/qemu/qemu_cgroup.h +++ b/src/qemu/qemu_cgroup.h @@ -66,7 +66,7 @@ int qemuSetupCgroupForIOThreads(virDomainObjPtr vm); int qemuSetupCgroupForEmulator(virQEMUDriverPtr driver, virDomainObjPtr vm, virBitmapPtr nodemask); -int qemuRemoveCgroup(virDomainObjPtr vm); +int qemuRemoveCgroup(virQEMUDriverPtr driver, virDomainObjPtr vm); int qemuAddToCgroup(virDomainObjPtr vm);
#endif /* __QEMU_CGROUP_H__ */ diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c index 13614e9..e7cce1a 100644 --- a/src/qemu/qemu_process.c +++ b/src/qemu/qemu_process.c @@ -4131,7 +4131,7 @@ int qemuProcessStart(virConnectPtr conn, /* Ensure no historical cgroup for this VM is lying around bogus * settings */ VIR_DEBUG("Ensuring no historical cgroup is lying around"); - qemuRemoveCgroup(vm); + qemuRemoveCgroup(driver, vm);
for (i = 0; i < vm->def->ngraphics; ++i) { virDomainGraphicsDefPtr graphics = vm->def->graphics[i]; @@ -4909,7 +4909,7 @@ void qemuProcessStop(virQEMUDriverPtr driver, }
retry: - if ((ret = qemuRemoveCgroup(vm)) < 0) { + if ((ret = qemuRemoveCgroup(driver, vm)) < 0) { if (ret == -EBUSY && (retries++ < 5)) { usleep(200*1000); goto retry; diff --git a/src/util/vircgroup.c b/src/util/vircgroup.c index 1dbe6f9..d69f71b 100644 --- a/src/util/vircgroup.c +++ b/src/util/vircgroup.c @@ -1680,6 +1680,17 @@ virCgroupNewMachineSystemd(const char *name, }
+/* + * Returns 0 on success, -1 on fatal error + */ +int virCgroupTerminateMachine(const char *name, + const char *drivername, + bool privileged) +{ + return virSystemdTerminateMachine(name, drivername, privileged); +} + + static int virCgroupNewMachineManual(const char *name, const char *drivername, diff --git a/src/util/vircgroup.h b/src/util/vircgroup.h index 19e82d1..7718a07 100644 --- a/src/util/vircgroup.h +++ b/src/util/vircgroup.h @@ -106,6 +106,11 @@ int virCgroupNewMachine(const char *name, ATTRIBUTE_NONNULL(1) ATTRIBUTE_NONNULL(2) ATTRIBUTE_NONNULL(4);
+int virCgroupTerminateMachine(const char *name, + const char *drivername, + bool privileged) + ATTRIBUTE_NONNULL(1) ATTRIBUTE_NONNULL(2); + bool virCgroupNewIgnoreError(void);
void virCgroupFree(virCgroupPtr *group);
All the above seems reasonable to me. ACK
I'm surprised we see the problem with QEMU, but this matches what we do for LXC and is recommended by systemd maintainers so fine to for QEMU too.
Thanks to the two of you for reviewing. Should this go into 1.2.9 ?
I don't know why this patch would cause problems... but then again it's cgroup/systemd stuff which makes me a little nervous. If it was my patch I'd wait until after the release to be safe.
Yeah, I think I'd rather we waited, and put it in a stable release once we've had some usage in master. Regards, Daniel -- |: http://berrange.com -o- http://www.flickr.com/photos/dberrange/ :| |: http://libvirt.org -o- http://virt-manager.org :| |: http://autobuild.org -o- http://search.cpan.org/~danberr/ :| |: http://entangle-photo.org -o- http://live.gnome.org/gtk-vnc :|

On Tue, Sep 30, 2014 at 03:44:41PM +0100, Daniel P. Berrange wrote:
On Tue, Sep 30, 2014 at 10:43:47AM -0400, Cole Robinson wrote:
On 09/30/2014 10:25 AM, Guido Günther wrote:
On Tue, Sep 30, 2014 at 03:22:41PM +0100, Daniel P. Berrange wrote:
On Tue, Sep 30, 2014 at 10:10:51AM -0400, Cole Robinson wrote:
On 09/25/2014 08:30 AM, Guido Günther wrote:
If we don't properly clean up all processes in the machine-<vmname>.scope systemd won't remove the cgroup and subsequent vm starts fail with
'CreateMachine: File exists'
Additional processes can e.g. be added via
echo $PID > /sys/fs/cgroup/systemd/machine.slice/machine-${VMNAME}.scope/tasks
but there are other cases like
Invoke TerminateMachine to be on the safe side since systemd tracks the cgroup anyway. This is a noop if all processes have terminated already.
Thanks for the patch, I've definitely seen this a handful of times on Fedora as well.
--- src/libvirt_private.syms | 1 + src/qemu/qemu_cgroup.c | 11 ++++++++++- src/qemu/qemu_cgroup.h | 2 +- src/qemu/qemu_process.c | 4 ++-- src/util/vircgroup.c | 11 +++++++++++ src/util/vircgroup.h | 5 +++++ 6 files changed, 30 insertions(+), 4 deletions(-)
diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms index 51a692b..99ef1db 100644 --- a/src/libvirt_private.syms +++ b/src/libvirt_private.syms @@ -1115,6 +1115,7 @@ virCgroupSetMemorySoftLimit; virCgroupSetMemSwapHardLimit; virCgroupSetOwner; virCgroupSupportsCpuBW; +virCgroupTerminateMachine;
# util/virclosecallbacks.h diff --git a/src/qemu/qemu_cgroup.c b/src/qemu/qemu_cgroup.c index 7c6b2c1..0ab7227 100644 --- a/src/qemu/qemu_cgroup.c +++ b/src/qemu/qemu_cgroup.c @@ -1188,13 +1188,22 @@ qemuSetupCgroupForIOThreads(virDomainObjPtr vm) }
int -qemuRemoveCgroup(virDomainObjPtr vm) +qemuRemoveCgroup(virQEMUDriverPtr driver, + virDomainObjPtr vm) { qemuDomainObjPrivatePtr priv = vm->privateData; + virQEMUDriverConfigPtr cfg = virQEMUDriverGetConfig(driver);
if (priv->cgroup == NULL) return 0; /* Not supported, so claim success */
+ if (virCgroupTerminateMachine(vm->def->name, + "qemu", + cfg->privileged) < 0) { + if (!virCgroupNewIgnoreError()) + VIR_DEBUG("Failed to terminate cgroup for %s", vm->def->name); + } + return virCgroupRemove(priv->cgroup); }
diff --git a/src/qemu/qemu_cgroup.h b/src/qemu/qemu_cgroup.h index 8a2c723..4a4f22c 100644 --- a/src/qemu/qemu_cgroup.h +++ b/src/qemu/qemu_cgroup.h @@ -66,7 +66,7 @@ int qemuSetupCgroupForIOThreads(virDomainObjPtr vm); int qemuSetupCgroupForEmulator(virQEMUDriverPtr driver, virDomainObjPtr vm, virBitmapPtr nodemask); -int qemuRemoveCgroup(virDomainObjPtr vm); +int qemuRemoveCgroup(virQEMUDriverPtr driver, virDomainObjPtr vm); int qemuAddToCgroup(virDomainObjPtr vm);
#endif /* __QEMU_CGROUP_H__ */ diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c index 13614e9..e7cce1a 100644 --- a/src/qemu/qemu_process.c +++ b/src/qemu/qemu_process.c @@ -4131,7 +4131,7 @@ int qemuProcessStart(virConnectPtr conn, /* Ensure no historical cgroup for this VM is lying around bogus * settings */ VIR_DEBUG("Ensuring no historical cgroup is lying around"); - qemuRemoveCgroup(vm); + qemuRemoveCgroup(driver, vm);
for (i = 0; i < vm->def->ngraphics; ++i) { virDomainGraphicsDefPtr graphics = vm->def->graphics[i]; @@ -4909,7 +4909,7 @@ void qemuProcessStop(virQEMUDriverPtr driver, }
retry: - if ((ret = qemuRemoveCgroup(vm)) < 0) { + if ((ret = qemuRemoveCgroup(driver, vm)) < 0) { if (ret == -EBUSY && (retries++ < 5)) { usleep(200*1000); goto retry; diff --git a/src/util/vircgroup.c b/src/util/vircgroup.c index 1dbe6f9..d69f71b 100644 --- a/src/util/vircgroup.c +++ b/src/util/vircgroup.c @@ -1680,6 +1680,17 @@ virCgroupNewMachineSystemd(const char *name, }
+/* + * Returns 0 on success, -1 on fatal error + */ +int virCgroupTerminateMachine(const char *name, + const char *drivername, + bool privileged) +{ + return virSystemdTerminateMachine(name, drivername, privileged); +} + + static int virCgroupNewMachineManual(const char *name, const char *drivername, diff --git a/src/util/vircgroup.h b/src/util/vircgroup.h index 19e82d1..7718a07 100644 --- a/src/util/vircgroup.h +++ b/src/util/vircgroup.h @@ -106,6 +106,11 @@ int virCgroupNewMachine(const char *name, ATTRIBUTE_NONNULL(1) ATTRIBUTE_NONNULL(2) ATTRIBUTE_NONNULL(4);
+int virCgroupTerminateMachine(const char *name, + const char *drivername, + bool privileged) + ATTRIBUTE_NONNULL(1) ATTRIBUTE_NONNULL(2); + bool virCgroupNewIgnoreError(void);
void virCgroupFree(virCgroupPtr *group);
All the above seems reasonable to me. ACK
I'm surprised we see the problem with QEMU, but this matches what we do for LXC and is recommended by systemd maintainers so fine to for QEMU too.
Thanks to the two of you for reviewing. Should this go into 1.2.9 ?
I don't know why this patch would cause problems... but then again it's cgroup/systemd stuff which makes me a little nervous. If it was my patch I'd wait until after the release to be safe.
Yeah, I think I'd rather we waited, and put it in a stable release once we've had some usage in master.
O.k., I've also added it to Debian's experimental package. Cheers, -- Guido

On Tue, Sep 30, 2014 at 03:22:41PM +0100, Daniel P. Berrange wrote:
On Tue, Sep 30, 2014 at 10:10:51AM -0400, Cole Robinson wrote:
On 09/25/2014 08:30 AM, Guido Günther wrote:
If we don't properly clean up all processes in the machine-<vmname>.scope systemd won't remove the cgroup and subsequent vm starts fail with
'CreateMachine: File exists'
Additional processes can e.g. be added via
echo $PID > /sys/fs/cgroup/systemd/machine.slice/machine-${VMNAME}.scope/tasks
but there are other cases like
Invoke TerminateMachine to be on the safe side since systemd tracks the cgroup anyway. This is a noop if all processes have terminated already.
Thanks for the patch, I've definitely seen this a handful of times on Fedora as well.
--- src/libvirt_private.syms | 1 + src/qemu/qemu_cgroup.c | 11 ++++++++++- src/qemu/qemu_cgroup.h | 2 +- src/qemu/qemu_process.c | 4 ++-- src/util/vircgroup.c | 11 +++++++++++ src/util/vircgroup.h | 5 +++++ 6 files changed, 30 insertions(+), 4 deletions(-)
diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms index 51a692b..99ef1db 100644 --- a/src/libvirt_private.syms +++ b/src/libvirt_private.syms @@ -1115,6 +1115,7 @@ virCgroupSetMemorySoftLimit; virCgroupSetMemSwapHardLimit; virCgroupSetOwner; virCgroupSupportsCpuBW; +virCgroupTerminateMachine;
# util/virclosecallbacks.h diff --git a/src/qemu/qemu_cgroup.c b/src/qemu/qemu_cgroup.c index 7c6b2c1..0ab7227 100644 --- a/src/qemu/qemu_cgroup.c +++ b/src/qemu/qemu_cgroup.c @@ -1188,13 +1188,22 @@ qemuSetupCgroupForIOThreads(virDomainObjPtr vm) }
int -qemuRemoveCgroup(virDomainObjPtr vm) +qemuRemoveCgroup(virQEMUDriverPtr driver, + virDomainObjPtr vm) { qemuDomainObjPrivatePtr priv = vm->privateData; + virQEMUDriverConfigPtr cfg = virQEMUDriverGetConfig(driver);
if (priv->cgroup == NULL) return 0; /* Not supported, so claim success */
+ if (virCgroupTerminateMachine(vm->def->name, + "qemu", + cfg->privileged) < 0) { + if (!virCgroupNewIgnoreError()) + VIR_DEBUG("Failed to terminate cgroup for %s", vm->def->name); + } + return virCgroupRemove(priv->cgroup); }
diff --git a/src/qemu/qemu_cgroup.h b/src/qemu/qemu_cgroup.h index 8a2c723..4a4f22c 100644 --- a/src/qemu/qemu_cgroup.h +++ b/src/qemu/qemu_cgroup.h @@ -66,7 +66,7 @@ int qemuSetupCgroupForIOThreads(virDomainObjPtr vm); int qemuSetupCgroupForEmulator(virQEMUDriverPtr driver, virDomainObjPtr vm, virBitmapPtr nodemask); -int qemuRemoveCgroup(virDomainObjPtr vm); +int qemuRemoveCgroup(virQEMUDriverPtr driver, virDomainObjPtr vm); int qemuAddToCgroup(virDomainObjPtr vm);
#endif /* __QEMU_CGROUP_H__ */ diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c index 13614e9..e7cce1a 100644 --- a/src/qemu/qemu_process.c +++ b/src/qemu/qemu_process.c @@ -4131,7 +4131,7 @@ int qemuProcessStart(virConnectPtr conn, /* Ensure no historical cgroup for this VM is lying around bogus * settings */ VIR_DEBUG("Ensuring no historical cgroup is lying around"); - qemuRemoveCgroup(vm); + qemuRemoveCgroup(driver, vm);
for (i = 0; i < vm->def->ngraphics; ++i) { virDomainGraphicsDefPtr graphics = vm->def->graphics[i]; @@ -4909,7 +4909,7 @@ void qemuProcessStop(virQEMUDriverPtr driver, }
retry: - if ((ret = qemuRemoveCgroup(vm)) < 0) { + if ((ret = qemuRemoveCgroup(driver, vm)) < 0) { if (ret == -EBUSY && (retries++ < 5)) { usleep(200*1000); goto retry; diff --git a/src/util/vircgroup.c b/src/util/vircgroup.c index 1dbe6f9..d69f71b 100644 --- a/src/util/vircgroup.c +++ b/src/util/vircgroup.c @@ -1680,6 +1680,17 @@ virCgroupNewMachineSystemd(const char *name, }
+/* + * Returns 0 on success, -1 on fatal error + */ +int virCgroupTerminateMachine(const char *name, + const char *drivername, + bool privileged) +{ + return virSystemdTerminateMachine(name, drivername, privileged); +} + + static int virCgroupNewMachineManual(const char *name, const char *drivername, diff --git a/src/util/vircgroup.h b/src/util/vircgroup.h index 19e82d1..7718a07 100644 --- a/src/util/vircgroup.h +++ b/src/util/vircgroup.h @@ -106,6 +106,11 @@ int virCgroupNewMachine(const char *name, ATTRIBUTE_NONNULL(1) ATTRIBUTE_NONNULL(2) ATTRIBUTE_NONNULL(4);
+int virCgroupTerminateMachine(const char *name, + const char *drivername, + bool privileged) + ATTRIBUTE_NONNULL(1) ATTRIBUTE_NONNULL(2); + bool virCgroupNewIgnoreError(void);
void virCgroupFree(virCgroupPtr *group);
All the above seems reasonable to me. ACK
I'm surprised we see the problem with QEMU, but this matches what we do for LXC and is recommended by systemd maintainers so fine to for QEMU too.
Pushed now. Thanks, -- Guido
participants (3)
-
Cole Robinson
-
Daniel P. Berrange
-
Guido Günther