On Tue, Sep 30, 2014 at 03:22:41PM +0100, Daniel P. Berrange wrote:
On Tue, Sep 30, 2014 at 10:10:51AM -0400, Cole Robinson wrote:
> On 09/25/2014 08:30 AM, Guido Günther wrote:
> > If we don't properly clean up all processes in the
> > machine-<vmname>.scope systemd won't remove the cgroup and subsequent
vm
> > starts fail with
> >
> > 'CreateMachine: File exists'
> >
> > Additional processes can e.g. be added via
> >
> > echo $PID >
/sys/fs/cgroup/systemd/machine.slice/machine-${VMNAME}.scope/tasks
> >
> > but there are other cases like
> >
> >
http://bugs.debian.org/761521
> >
> > Invoke TerminateMachine to be on the safe side since systemd tracks the
> > cgroup anyway. This is a noop if all processes have terminated already.
>
> Thanks for the patch, I've definitely seen this a handful of times on Fedora
> as well.
>
> > ---
> > src/libvirt_private.syms | 1 +
> > src/qemu/qemu_cgroup.c | 11 ++++++++++-
> > src/qemu/qemu_cgroup.h | 2 +-
> > src/qemu/qemu_process.c | 4 ++--
> > src/util/vircgroup.c | 11 +++++++++++
> > src/util/vircgroup.h | 5 +++++
> > 6 files changed, 30 insertions(+), 4 deletions(-)
> >
> > diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms
> > index 51a692b..99ef1db 100644
> > --- a/src/libvirt_private.syms
> > +++ b/src/libvirt_private.syms
> > @@ -1115,6 +1115,7 @@ virCgroupSetMemorySoftLimit;
> > virCgroupSetMemSwapHardLimit;
> > virCgroupSetOwner;
> > virCgroupSupportsCpuBW;
> > +virCgroupTerminateMachine;
> >
> >
> > # util/virclosecallbacks.h
> > diff --git a/src/qemu/qemu_cgroup.c b/src/qemu/qemu_cgroup.c
> > index 7c6b2c1..0ab7227 100644
> > --- a/src/qemu/qemu_cgroup.c
> > +++ b/src/qemu/qemu_cgroup.c
> > @@ -1188,13 +1188,22 @@ qemuSetupCgroupForIOThreads(virDomainObjPtr vm)
> > }
> >
> > int
> > -qemuRemoveCgroup(virDomainObjPtr vm)
> > +qemuRemoveCgroup(virQEMUDriverPtr driver,
> > + virDomainObjPtr vm)
> > {
> > qemuDomainObjPrivatePtr priv = vm->privateData;
> > + virQEMUDriverConfigPtr cfg = virQEMUDriverGetConfig(driver);
> >
> > if (priv->cgroup == NULL)
> > return 0; /* Not supported, so claim success */
> >
> > + if (virCgroupTerminateMachine(vm->def->name,
> > + "qemu",
> > + cfg->privileged) < 0) {
> > + if (!virCgroupNewIgnoreError())
> > + VIR_DEBUG("Failed to terminate cgroup for %s",
vm->def->name);
> > + }
> > +
> > return virCgroupRemove(priv->cgroup);
> > }
> >
> > diff --git a/src/qemu/qemu_cgroup.h b/src/qemu/qemu_cgroup.h
> > index 8a2c723..4a4f22c 100644
> > --- a/src/qemu/qemu_cgroup.h
> > +++ b/src/qemu/qemu_cgroup.h
> > @@ -66,7 +66,7 @@ int qemuSetupCgroupForIOThreads(virDomainObjPtr vm);
> > int qemuSetupCgroupForEmulator(virQEMUDriverPtr driver,
> > virDomainObjPtr vm,
> > virBitmapPtr nodemask);
> > -int qemuRemoveCgroup(virDomainObjPtr vm);
> > +int qemuRemoveCgroup(virQEMUDriverPtr driver, virDomainObjPtr vm);
> > int qemuAddToCgroup(virDomainObjPtr vm);
> >
> > #endif /* __QEMU_CGROUP_H__ */
> > diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c
> > index 13614e9..e7cce1a 100644
> > --- a/src/qemu/qemu_process.c
> > +++ b/src/qemu/qemu_process.c
> > @@ -4131,7 +4131,7 @@ int qemuProcessStart(virConnectPtr conn,
> > /* Ensure no historical cgroup for this VM is lying around bogus
> > * settings */
> > VIR_DEBUG("Ensuring no historical cgroup is lying around");
> > - qemuRemoveCgroup(vm);
> > + qemuRemoveCgroup(driver, vm);
> >
> > for (i = 0; i < vm->def->ngraphics; ++i) {
> > virDomainGraphicsDefPtr graphics = vm->def->graphics[i];
> > @@ -4909,7 +4909,7 @@ void qemuProcessStop(virQEMUDriverPtr driver,
> > }
> >
> > retry:
> > - if ((ret = qemuRemoveCgroup(vm)) < 0) {
> > + if ((ret = qemuRemoveCgroup(driver, vm)) < 0) {
> > if (ret == -EBUSY && (retries++ < 5)) {
> > usleep(200*1000);
> > goto retry;
> > diff --git a/src/util/vircgroup.c b/src/util/vircgroup.c
> > index 1dbe6f9..d69f71b 100644
> > --- a/src/util/vircgroup.c
> > +++ b/src/util/vircgroup.c
> > @@ -1680,6 +1680,17 @@ virCgroupNewMachineSystemd(const char *name,
> > }
> >
> >
> > +/*
> > + * Returns 0 on success, -1 on fatal error
> > + */
> > +int virCgroupTerminateMachine(const char *name,
> > + const char *drivername,
> > + bool privileged)
> > +{
> > + return virSystemdTerminateMachine(name, drivername, privileged);
> > +}
> > +
> > +
> > static int
> > virCgroupNewMachineManual(const char *name,
> > const char *drivername,
> > diff --git a/src/util/vircgroup.h b/src/util/vircgroup.h
> > index 19e82d1..7718a07 100644
> > --- a/src/util/vircgroup.h
> > +++ b/src/util/vircgroup.h
> > @@ -106,6 +106,11 @@ int virCgroupNewMachine(const char *name,
> > ATTRIBUTE_NONNULL(1) ATTRIBUTE_NONNULL(2)
> > ATTRIBUTE_NONNULL(4);
> >
> > +int virCgroupTerminateMachine(const char *name,
> > + const char *drivername,
> > + bool privileged)
> > + ATTRIBUTE_NONNULL(1) ATTRIBUTE_NONNULL(2);
> > +
> > bool virCgroupNewIgnoreError(void);
> >
> > void virCgroupFree(virCgroupPtr *group);
> >
>
> All the above seems reasonable to me. ACK
I'm surprised we see the problem with QEMU, but this matches what we
do for LXC and is recommended by systemd maintainers so fine to for
QEMU too.
Thanks to the two of you for reviewing. Should this go into 1.2.9 ?
Cheers,
-- Guido