On Tue, Sep 30, 2014 at 03:44:41PM +0100, Daniel P. Berrange wrote:
On Tue, Sep 30, 2014 at 10:43:47AM -0400, Cole Robinson wrote:
> On 09/30/2014 10:25 AM, Guido Günther wrote:
> > On Tue, Sep 30, 2014 at 03:22:41PM +0100, Daniel P. Berrange wrote:
> >> On Tue, Sep 30, 2014 at 10:10:51AM -0400, Cole Robinson wrote:
> >>> On 09/25/2014 08:30 AM, Guido Günther wrote:
> >>>> If we don't properly clean up all processes in the
> >>>> machine-<vmname>.scope systemd won't remove the cgroup
and subsequent vm
> >>>> starts fail with
> >>>>
> >>>> 'CreateMachine: File exists'
> >>>>
> >>>> Additional processes can e.g. be added via
> >>>>
> >>>> echo $PID >
/sys/fs/cgroup/systemd/machine.slice/machine-${VMNAME}.scope/tasks
> >>>>
> >>>> but there are other cases like
> >>>>
> >>>>
http://bugs.debian.org/761521
> >>>>
> >>>> Invoke TerminateMachine to be on the safe side since systemd tracks
the
> >>>> cgroup anyway. This is a noop if all processes have terminated
already.
> >>>
> >>> Thanks for the patch, I've definitely seen this a handful of times
on Fedora
> >>> as well.
> >>>
> >>>> ---
> >>>> src/libvirt_private.syms | 1 +
> >>>> src/qemu/qemu_cgroup.c | 11 ++++++++++-
> >>>> src/qemu/qemu_cgroup.h | 2 +-
> >>>> src/qemu/qemu_process.c | 4 ++--
> >>>> src/util/vircgroup.c | 11 +++++++++++
> >>>> src/util/vircgroup.h | 5 +++++
> >>>> 6 files changed, 30 insertions(+), 4 deletions(-)
> >>>>
> >>>> diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms
> >>>> index 51a692b..99ef1db 100644
> >>>> --- a/src/libvirt_private.syms
> >>>> +++ b/src/libvirt_private.syms
> >>>> @@ -1115,6 +1115,7 @@ virCgroupSetMemorySoftLimit;
> >>>> virCgroupSetMemSwapHardLimit;
> >>>> virCgroupSetOwner;
> >>>> virCgroupSupportsCpuBW;
> >>>> +virCgroupTerminateMachine;
> >>>>
> >>>>
> >>>> # util/virclosecallbacks.h
> >>>> diff --git a/src/qemu/qemu_cgroup.c b/src/qemu/qemu_cgroup.c
> >>>> index 7c6b2c1..0ab7227 100644
> >>>> --- a/src/qemu/qemu_cgroup.c
> >>>> +++ b/src/qemu/qemu_cgroup.c
> >>>> @@ -1188,13 +1188,22 @@ qemuSetupCgroupForIOThreads(virDomainObjPtr
vm)
> >>>> }
> >>>>
> >>>> int
> >>>> -qemuRemoveCgroup(virDomainObjPtr vm)
> >>>> +qemuRemoveCgroup(virQEMUDriverPtr driver,
> >>>> + virDomainObjPtr vm)
> >>>> {
> >>>> qemuDomainObjPrivatePtr priv = vm->privateData;
> >>>> + virQEMUDriverConfigPtr cfg = virQEMUDriverGetConfig(driver);
> >>>>
> >>>> if (priv->cgroup == NULL)
> >>>> return 0; /* Not supported, so claim success */
> >>>>
> >>>> + if (virCgroupTerminateMachine(vm->def->name,
> >>>> + "qemu",
> >>>> + cfg->privileged) < 0) {
> >>>> + if (!virCgroupNewIgnoreError())
> >>>> + VIR_DEBUG("Failed to terminate cgroup for
%s", vm->def->name);
> >>>> + }
> >>>> +
> >>>> return virCgroupRemove(priv->cgroup);
> >>>> }
> >>>>
> >>>> diff --git a/src/qemu/qemu_cgroup.h b/src/qemu/qemu_cgroup.h
> >>>> index 8a2c723..4a4f22c 100644
> >>>> --- a/src/qemu/qemu_cgroup.h
> >>>> +++ b/src/qemu/qemu_cgroup.h
> >>>> @@ -66,7 +66,7 @@ int qemuSetupCgroupForIOThreads(virDomainObjPtr
vm);
> >>>> int qemuSetupCgroupForEmulator(virQEMUDriverPtr driver,
> >>>> virDomainObjPtr vm,
> >>>> virBitmapPtr nodemask);
> >>>> -int qemuRemoveCgroup(virDomainObjPtr vm);
> >>>> +int qemuRemoveCgroup(virQEMUDriverPtr driver, virDomainObjPtr
vm);
> >>>> int qemuAddToCgroup(virDomainObjPtr vm);
> >>>>
> >>>> #endif /* __QEMU_CGROUP_H__ */
> >>>> diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c
> >>>> index 13614e9..e7cce1a 100644
> >>>> --- a/src/qemu/qemu_process.c
> >>>> +++ b/src/qemu/qemu_process.c
> >>>> @@ -4131,7 +4131,7 @@ int qemuProcessStart(virConnectPtr conn,
> >>>> /* Ensure no historical cgroup for this VM is lying around
bogus
> >>>> * settings */
> >>>> VIR_DEBUG("Ensuring no historical cgroup is lying
around");
> >>>> - qemuRemoveCgroup(vm);
> >>>> + qemuRemoveCgroup(driver, vm);
> >>>>
> >>>> for (i = 0; i < vm->def->ngraphics; ++i) {
> >>>> virDomainGraphicsDefPtr graphics =
vm->def->graphics[i];
> >>>> @@ -4909,7 +4909,7 @@ void qemuProcessStop(virQEMUDriverPtr
driver,
> >>>> }
> >>>>
> >>>> retry:
> >>>> - if ((ret = qemuRemoveCgroup(vm)) < 0) {
> >>>> + if ((ret = qemuRemoveCgroup(driver, vm)) < 0) {
> >>>> if (ret == -EBUSY && (retries++ < 5)) {
> >>>> usleep(200*1000);
> >>>> goto retry;
> >>>> diff --git a/src/util/vircgroup.c b/src/util/vircgroup.c
> >>>> index 1dbe6f9..d69f71b 100644
> >>>> --- a/src/util/vircgroup.c
> >>>> +++ b/src/util/vircgroup.c
> >>>> @@ -1680,6 +1680,17 @@ virCgroupNewMachineSystemd(const char
*name,
> >>>> }
> >>>>
> >>>>
> >>>> +/*
> >>>> + * Returns 0 on success, -1 on fatal error
> >>>> + */
> >>>> +int virCgroupTerminateMachine(const char *name,
> >>>> + const char *drivername,
> >>>> + bool privileged)
> >>>> +{
> >>>> + return virSystemdTerminateMachine(name, drivername,
privileged);
> >>>> +}
> >>>> +
> >>>> +
> >>>> static int
> >>>> virCgroupNewMachineManual(const char *name,
> >>>> const char *drivername,
> >>>> diff --git a/src/util/vircgroup.h b/src/util/vircgroup.h
> >>>> index 19e82d1..7718a07 100644
> >>>> --- a/src/util/vircgroup.h
> >>>> +++ b/src/util/vircgroup.h
> >>>> @@ -106,6 +106,11 @@ int virCgroupNewMachine(const char *name,
> >>>> ATTRIBUTE_NONNULL(1) ATTRIBUTE_NONNULL(2)
> >>>> ATTRIBUTE_NONNULL(4);
> >>>>
> >>>> +int virCgroupTerminateMachine(const char *name,
> >>>> + const char *drivername,
> >>>> + bool privileged)
> >>>> + ATTRIBUTE_NONNULL(1) ATTRIBUTE_NONNULL(2);
> >>>> +
> >>>> bool virCgroupNewIgnoreError(void);
> >>>>
> >>>> void virCgroupFree(virCgroupPtr *group);
> >>>>
> >>>
> >>> All the above seems reasonable to me. ACK
> >>
> >> I'm surprised we see the problem with QEMU, but this matches what we
> >> do for LXC and is recommended by systemd maintainers so fine to for
> >> QEMU too.
> >
> > Thanks to the two of you for reviewing. Should this go into 1.2.9 ?
>
> I don't know why this patch would cause problems... but then again it's
> cgroup/systemd stuff which makes me a little nervous. If it was my patch I'd
> wait until after the release to be safe.
Yeah, I think I'd rather we waited, and put it in a stable release once
we've had some usage in master.
O.k., I've also added it to Debian's experimental package.
Cheers,
-- Guido