On Tue, Sep 30, 2014 at 03:22:41PM +0100, Daniel P. Berrange wrote:
> On Tue, Sep 30, 2014 at 10:10:51AM -0400, Cole Robinson wrote:
>> On 09/25/2014 08:30 AM, Guido Günther wrote:
>>> If we don't properly clean up all processes in the
>>> machine-<vmname>.scope systemd won't remove the cgroup and
subsequent vm
>>> starts fail with
>>>
>>> 'CreateMachine: File exists'
>>>
>>> Additional processes can e.g. be added via
>>>
>>> echo $PID >
/sys/fs/cgroup/systemd/machine.slice/machine-${VMNAME}.scope/tasks
>>>
>>> but there are other cases like
>>>
>>>
http://bugs.debian.org/761521
>>>
>>> Invoke TerminateMachine to be on the safe side since systemd tracks the
>>> cgroup anyway. This is a noop if all processes have terminated already.
>>
>> Thanks for the patch, I've definitely seen this a handful of times on Fedora
>> as well.
>>
>>> ---
>>> src/libvirt_private.syms | 1 +
>>> src/qemu/qemu_cgroup.c | 11 ++++++++++-
>>> src/qemu/qemu_cgroup.h | 2 +-
>>> src/qemu/qemu_process.c | 4 ++--
>>> src/util/vircgroup.c | 11 +++++++++++
>>> src/util/vircgroup.h | 5 +++++
>>> 6 files changed, 30 insertions(+), 4 deletions(-)
>>>
>>> diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms
>>> index 51a692b..99ef1db 100644
>>> --- a/src/libvirt_private.syms
>>> +++ b/src/libvirt_private.syms
>>> @@ -1115,6 +1115,7 @@ virCgroupSetMemorySoftLimit;
>>> virCgroupSetMemSwapHardLimit;
>>> virCgroupSetOwner;
>>> virCgroupSupportsCpuBW;
>>> +virCgroupTerminateMachine;
>>>
>>>
>>> # util/virclosecallbacks.h
>>> diff --git a/src/qemu/qemu_cgroup.c b/src/qemu/qemu_cgroup.c
>>> index 7c6b2c1..0ab7227 100644
>>> --- a/src/qemu/qemu_cgroup.c
>>> +++ b/src/qemu/qemu_cgroup.c
>>> @@ -1188,13 +1188,22 @@ qemuSetupCgroupForIOThreads(virDomainObjPtr vm)
>>> }
>>>
>>> int
>>> -qemuRemoveCgroup(virDomainObjPtr vm)
>>> +qemuRemoveCgroup(virQEMUDriverPtr driver,
>>> + virDomainObjPtr vm)
>>> {
>>> qemuDomainObjPrivatePtr priv = vm->privateData;
>>> + virQEMUDriverConfigPtr cfg = virQEMUDriverGetConfig(driver);
>>>
>>> if (priv->cgroup == NULL)
>>> return 0; /* Not supported, so claim success */
>>>
>>> + if (virCgroupTerminateMachine(vm->def->name,
>>> + "qemu",
>>> + cfg->privileged) < 0) {
>>> + if (!virCgroupNewIgnoreError())
>>> + VIR_DEBUG("Failed to terminate cgroup for %s",
vm->def->name);
>>> + }
>>> +
>>> return virCgroupRemove(priv->cgroup);
>>> }
>>>
>>> diff --git a/src/qemu/qemu_cgroup.h b/src/qemu/qemu_cgroup.h
>>> index 8a2c723..4a4f22c 100644
>>> --- a/src/qemu/qemu_cgroup.h
>>> +++ b/src/qemu/qemu_cgroup.h
>>> @@ -66,7 +66,7 @@ int qemuSetupCgroupForIOThreads(virDomainObjPtr vm);
>>> int qemuSetupCgroupForEmulator(virQEMUDriverPtr driver,
>>> virDomainObjPtr vm,
>>> virBitmapPtr nodemask);
>>> -int qemuRemoveCgroup(virDomainObjPtr vm);
>>> +int qemuRemoveCgroup(virQEMUDriverPtr driver, virDomainObjPtr vm);
>>> int qemuAddToCgroup(virDomainObjPtr vm);
>>>
>>> #endif /* __QEMU_CGROUP_H__ */
>>> diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c
>>> index 13614e9..e7cce1a 100644
>>> --- a/src/qemu/qemu_process.c
>>> +++ b/src/qemu/qemu_process.c
>>> @@ -4131,7 +4131,7 @@ int qemuProcessStart(virConnectPtr conn,
>>> /* Ensure no historical cgroup for this VM is lying around bogus
>>> * settings */
>>> VIR_DEBUG("Ensuring no historical cgroup is lying around");
>>> - qemuRemoveCgroup(vm);
>>> + qemuRemoveCgroup(driver, vm);
>>>
>>> for (i = 0; i < vm->def->ngraphics; ++i) {
>>> virDomainGraphicsDefPtr graphics = vm->def->graphics[i];
>>> @@ -4909,7 +4909,7 @@ void qemuProcessStop(virQEMUDriverPtr driver,
>>> }
>>>
>>> retry:
>>> - if ((ret = qemuRemoveCgroup(vm)) < 0) {
>>> + if ((ret = qemuRemoveCgroup(driver, vm)) < 0) {
>>> if (ret == -EBUSY && (retries++ < 5)) {
>>> usleep(200*1000);
>>> goto retry;
>>> diff --git a/src/util/vircgroup.c b/src/util/vircgroup.c
>>> index 1dbe6f9..d69f71b 100644
>>> --- a/src/util/vircgroup.c
>>> +++ b/src/util/vircgroup.c
>>> @@ -1680,6 +1680,17 @@ virCgroupNewMachineSystemd(const char *name,
>>> }
>>>
>>>
>>> +/*
>>> + * Returns 0 on success, -1 on fatal error
>>> + */
>>> +int virCgroupTerminateMachine(const char *name,
>>> + const char *drivername,
>>> + bool privileged)
>>> +{
>>> + return virSystemdTerminateMachine(name, drivername, privileged);
>>> +}
>>> +
>>> +
>>> static int
>>> virCgroupNewMachineManual(const char *name,
>>> const char *drivername,
>>> diff --git a/src/util/vircgroup.h b/src/util/vircgroup.h
>>> index 19e82d1..7718a07 100644
>>> --- a/src/util/vircgroup.h
>>> +++ b/src/util/vircgroup.h
>>> @@ -106,6 +106,11 @@ int virCgroupNewMachine(const char *name,
>>> ATTRIBUTE_NONNULL(1) ATTRIBUTE_NONNULL(2)
>>> ATTRIBUTE_NONNULL(4);
>>>
>>> +int virCgroupTerminateMachine(const char *name,
>>> + const char *drivername,
>>> + bool privileged)
>>> + ATTRIBUTE_NONNULL(1) ATTRIBUTE_NONNULL(2);
>>> +
>>> bool virCgroupNewIgnoreError(void);
>>>
>>> void virCgroupFree(virCgroupPtr *group);
>>>
>>
>> All the above seems reasonable to me. ACK
>
> I'm surprised we see the problem with QEMU, but this matches what we
> do for LXC and is recommended by systemd maintainers so fine to for
> QEMU too.
Thanks to the two of you for reviewing. Should this go into 1.2.9 ?
I don't know why this patch would cause problems... but then again it's
cgroup/systemd stuff which makes me a little nervous. If it was my patch I'd
wait until after the release to be safe.
- Cole