[libvirt] [PATCH v1 0/3] unplug timeout QEMU configuration

newer
[libvirt] [PATCH] vircgroupv2: fix...

Daniel Henrique Barboza

19 Aug 2019 19 Aug '19

6:45 a.m.

At this moment, the unplug timeout Libvirt uses is 5 seconds. Which is good enough for most cases, but can be troublesome for cases in which it is not enough. For example, PowerPC guests with lots of vcpus can experience the 'vcpu unplug request timed out' message depending on the guest workload. What happens is that the user doesn't know if there was a problem in the unplug operation in QEMU, or the process took a bit longer than 5 seconds and Libvirt timed out, with QEMU carrying on the unplug operation regardless. This series implements a new 'unplug_timeout' attribute to allow the user to set this parameter, allowing users to set bigger timeouts, if so they choose. The existing 5 seconds timeout is the default value if the attribute isn't set. 5 seconds is also the minimal timeout allowed. Daniel Henrique Barboza (3): adding unplug_timeout QEMU conf qemu_hotplug: use qemu_driver->unplugTimeout in device removal qemu: Remove qemu_hotplugpriv.h src/qemu/Makefile.inc.am | 1 - src/qemu/libvirtd_qemu.aug | 3 +++ src/qemu/qemu.conf | 4 ++++ src/qemu/qemu_conf.c | 26 ++++++++++++++++++++++++ src/qemu/qemu_conf.h | 5 +++++ src/qemu/qemu_driver.c | 2 ++ src/qemu/qemu_hotplug.c | 15 +++++--------- src/qemu/qemu_hotplugpriv.h | 32 ------------------------------ src/qemu/test_libvirtd_qemu.aug.in | 1 + tests/qemuhotplugtest.c | 3 +-- 10 files changed, 47 insertions(+), 45 deletions(-) delete mode 100644 src/qemu/qemu_hotplugpriv.h -- 2.21.0

Show replies by date

Daniel Henrique Barboza

19 Aug 19 Aug

6:45 a.m.

New subject: [libvirt] [PATCH v1 1/3] adding unplug_timeout QEMU conf

For some architectures and setups, device removal can take longer than the default 5 seconds. This results in commands such as 'virsh setvcpus' to fire timeout messages even if the actual operation happened in the guest, causing confusion for the user. This patch adds a new qemu.conf parameter called 'unplug_timeout' to handle these cases. If left unset, the current default timeout is used. To avoid user 'experimentation' with small timeouts, the current timeout is also the minimal value allowed. Signed-off-by: Daniel Henrique Barboza <danielhb413@gmail.com> --- src/qemu/libvirtd_qemu.aug | 3 +++ src/qemu/qemu.conf | 4 ++++ src/qemu/qemu_conf.c | 26 ++++++++++++++++++++++++++ src/qemu/qemu_conf.h | 5 +++++ src/qemu/qemu_driver.c | 2 ++ src/qemu/test_libvirtd_qemu.aug.in | 1 + 6 files changed, 41 insertions(+) diff --git a/src/qemu/libvirtd_qemu.aug b/src/qemu/libvirtd_qemu.aug index 2a99a0c55f..3bf94c9235 100644 --- a/src/qemu/libvirtd_qemu.aug +++ b/src/qemu/libvirtd_qemu.aug @@ -130,6 +130,8 @@ module Libvirtd_qemu = let capability_filters_entry = str_array_entry "capability_filters" + let unplug_timeout_entry = int_entry "unplug_timeout" + (* Each entry in the config is one of the following ... *) let entry = default_tls_entry | vnc_entry @@ -152,6 +154,7 @@ module Libvirtd_qemu = | nbd_entry | swtpm_entry | capability_filters_entry + | unplug_timeout_entry let comment = [ label "#comment" . del /#[ \t]*/ "# " . store /([^ \t\n][^\n]*)?/ . del /\n/ "\n" ] let empty = [ label "#empty" . eol ] diff --git a/src/qemu/qemu.conf b/src/qemu/qemu.conf index 8cabeccacb..c6d0f0940c 100644 --- a/src/qemu/qemu.conf +++ b/src/qemu/qemu.conf @@ -847,3 +847,7 @@ # may change across versions. # #capability_filters = [ "capname" ] + +# Timeout, in seconds, for unplug operations. Default and minimal value +# is 5. +#unplug_timeout = 5 diff --git a/src/qemu/qemu_conf.c b/src/qemu/qemu_conf.c index 2953893337..83d4ac8310 100644 --- a/src/qemu/qemu_conf.c +++ b/src/qemu/qemu_conf.c @@ -69,6 +69,8 @@ VIR_LOG_INIT("qemu.qemu_conf"); #define QEMU_MIGRATION_PORT_MIN 49152 #define QEMU_MIGRATION_PORT_MAX 49215 +#define QEMU_UNPLUG_TIMEOUT 5 + static virClassPtr virQEMUDriverConfigClass; static void virQEMUDriverConfigDispose(void *obj); @@ -298,6 +300,8 @@ virQEMUDriverConfigPtr virQEMUDriverConfigNew(bool privileged) cfg->glusterDebugLevel = 4; cfg->stdioLogD = true; + cfg->unplugTimeout = QEMU_UNPLUG_TIMEOUT; + if (!(cfg->namespaces = virBitmapNew(QEMU_DOMAIN_NS_LAST))) goto error; @@ -1009,6 +1013,24 @@ virQEMUDriverConfigLoadCapsFiltersEntry(virQEMUDriverConfigPtr cfg, } +static int +virQEMUDriverConfigLoadUnplugTimeoutEntry(virQEMUDriverConfigPtr cfg, + virConfPtr conf) +{ + if (virConfGetValueUInt(conf, "unplug_timeout", &cfg->unplugTimeout) < 0) + return -1; + + if (cfg->unplugTimeout < QEMU_UNPLUG_TIMEOUT) { + virReportError(VIR_ERR_INTERNAL_ERROR, + _("unplug_timeout: value must be greater " + "than or equal to %d"), QEMU_UNPLUG_TIMEOUT); + return -1; + } + + return 0; +} + + int virQEMUDriverConfigLoadFile(virQEMUDriverConfigPtr cfg, const char *filename, bool privileged) @@ -1081,6 +1103,10 @@ int virQEMUDriverConfigLoadFile(virQEMUDriverConfigPtr cfg, if (virQEMUDriverConfigLoadCapsFiltersEntry(cfg, conf) < 0) goto cleanup; + if (virQEMUDriverConfigLoadUnplugTimeoutEntry(cfg, conf) < 0) + goto cleanup; + + ret = 0; cleanup: diff --git a/src/qemu/qemu_conf.h b/src/qemu/qemu_conf.h index 0cbddd7a9c..29824e4e35 100644 --- a/src/qemu/qemu_conf.h +++ b/src/qemu/qemu_conf.h @@ -214,6 +214,8 @@ struct _virQEMUDriverConfig { gid_t swtpm_group; char **capabilityfilters; + + unsigned int unplugTimeout; }; /* Main driver state */ @@ -294,6 +296,9 @@ struct _virQEMUDriver { /* Immutable pointer, self-locking APIs */ virHashAtomicPtr migrationErrors; + + /* Immutable value */ + unsigned int unplugTimeout; }; virQEMUDriverConfigPtr virQEMUDriverConfigNew(bool privileged); diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c index 11f97dbc65..5e86d4cfe6 100644 --- a/src/qemu/qemu_driver.c +++ b/src/qemu/qemu_driver.c @@ -1078,6 +1078,8 @@ qemuStateInitialize(bool privileged, if (!qemu_driver->workerPool) goto error; + qemu_driver->unplugTimeout = cfg->unplugTimeout * 1000ul; + qemuProcessReconnectAll(qemu_driver); qemuAutostartDomains(qemu_driver); diff --git a/src/qemu/test_libvirtd_qemu.aug.in b/src/qemu/test_libvirtd_qemu.aug.in index 9f81759cb5..a1cd48f1ee 100644 --- a/src/qemu/test_libvirtd_qemu.aug.in +++ b/src/qemu/test_libvirtd_qemu.aug.in @@ -109,3 +109,4 @@ module Test_libvirtd_qemu = { "capability_filters" { "1" = "capname" } } +unplug_timeout = 5 -- 2.21.0

Ján Tomko

6:16 p.m.

New subject: [libvirt] [PATCH v1 1/3] adding unplug_timeout QEMU conf

On Sun, Aug 18, 2019 at 06:45:29PM -0300, Daniel Henrique Barboza wrote:

...

For some architectures and setups, device removal can take longer than the default 5 seconds. This results in commands such as 'virsh setvcpus' to fire timeout messages even if the actual operation happened in the guest, causing confusion for the user.

The commit that introduced this error message: commit e3229f6e4461cd1721dc68a32e16ab1718ae716e qemu: hotplug: Add support for VCPU unplug specifically says that we treat this differently than regular device detach: As the new code is using device_del all the implications of using it are present. Contrary to the device deletion code, the vcpu deletion code fails if the unplug request is not executed in time. Technically, we already did execute the unplug request so we lie to the user saying "operation failed". Maybe we can revisit the decision? [cc-ing pkrempa who added this]

...

This patch adds a new qemu.conf parameter called 'unplug_timeout' to handle these cases. If left unset, the current default timeout is used. To avoid user 'experimentation' with small timeouts, the current timeout is also the minimal value allowed.

The reason for this timeout is that we originally promised something that we cannot deliver - a synchronous device detach API, while the operation itself is asynchronous. I'm not a fan of exposing it and making it configurable.

...

Signed-off-by: Daniel Henrique Barboza <danielhb413@gmail.com> --- src/qemu/libvirtd_qemu.aug | 3 +++ src/qemu/qemu.conf | 4 ++++ src/qemu/qemu_conf.c | 26 ++++++++++++++++++++++++++ src/qemu/qemu_conf.h | 5 +++++ src/qemu/qemu_driver.c | 2 ++ src/qemu/test_libvirtd_qemu.aug.in | 1 + 6 files changed, 41 insertions(+)

[...]

...

diff --git a/src/qemu/qemu_conf.h b/src/qemu/qemu_conf.h index 0cbddd7a9c..29824e4e35 100644 --- a/src/qemu/qemu_conf.h +++ b/src/qemu/qemu_conf.h @@ -214,6 +214,8 @@ struct _virQEMUDriverConfig { gid_t swtpm_group;

char **capabilityfilters; + + unsigned int unplugTimeout; };

/* Main driver state */ @@ -294,6 +296,9 @@ struct _virQEMUDriver {

/* Immutable pointer, self-locking APIs */ virHashAtomicPtr migrationErrors; + + /* Immutable value */ + unsigned int unplugTimeout; };

Why store this value twice? Jano

Daniel Henrique Barboza

7:07 p.m.

New subject: [libvirt] [PATCH v1 1/3] adding unplug_timeout QEMU conf

Hi, On 8/19/19 6:16 AM, Ján Tomko wrote:

...

On Sun, Aug 18, 2019 at 06:45:29PM -0300, Daniel Henrique Barboza wrote:

...
For some architectures and setups, device removal can take longer than the default 5 seconds. This results in commands such as 'virsh setvcpus' to fire timeout messages even if the actual operation happened in the guest, causing confusion for the user.

The commit that introduced this error message: commit e3229f6e4461cd1721dc68a32e16ab1718ae716e qemu: hotplug: Add support for VCPU unplug

specifically says that we treat this differently than regular device detach:

As the new code is using device_del all the implications of using it are present. Contrary to the device deletion code, the vcpu deletion code fails if the unplug request is not executed in time.

Technically, we already did execute the unplug request so we lie to the user saying "operation failed".

Maybe we can revisit the decision? [cc-ing pkrempa who added this]

I have thought about making setvcpus asynchronous when it is a device_del operation. This would be more code (perhaps a new command to do that? Or a --asynchronous option?), and we can set user expectations properly by making it clear that this is a unplug request command, and the user will need to check the result personally in the guest. But then, if we are prepared to tell the user "go check yourself" we can simply change the current timeout message to say "vcpu unplug [...] timeout, check unplug status in the guest". This would be clearer, and the user wouldn't automatically assume that timeout == operation failed. Another thing we can do, instead of exposing the option to the user (which has a good potential for disaster - hence why I added a minimal value), is to simply set the timeout to a greater value (10? 15?) and be done with it. If we set the timeout to 15 seconds and change the timeout message to let the user know "we don't know, you'll need to check", like I said above, we have more resilience and will not alarm the user if a timeout still occurs. We'll also avoid exposing the timeout configuration like I'm doing here.

...

...
This patch adds a new qemu.conf parameter called 'unplug_timeout' to handle these cases. If left unset, the current default timeout is used. To avoid user 'experimentation' with small timeouts, the current timeout is also the minimal value allowed.

The reason for this timeout is that we originally promised something that we cannot deliver - a synchronous device detach API, while the operation itself is asynchronous. I'm not a fan of exposing it and making it configurable.

...
Signed-off-by: Daniel Henrique Barboza <danielhb413@gmail.com> --- src/qemu/libvirtd_qemu.aug         | 3 +++ src/qemu/qemu.conf                 | 4 ++++ src/qemu/qemu_conf.c               | 26 ++++++++++++++++++++++++++ src/qemu/qemu_conf.h               | 5 +++++ src/qemu/qemu_driver.c             | 2 ++ src/qemu/test_libvirtd_qemu.aug.in | 1 + 6 files changed, 41 insertions(+)

[...]

...
diff --git a/src/qemu/qemu_conf.h b/src/qemu/qemu_conf.h index 0cbddd7a9c..29824e4e35 100644 --- a/src/qemu/qemu_conf.h +++ b/src/qemu/qemu_conf.h @@ -214,6 +214,8 @@ struct _virQEMUDriverConfig {     gid_t swtpm_group;

    char **capabilityfilters; + +    unsigned int unplugTimeout; };

/* Main driver state */ @@ -294,6 +296,9 @@ struct _virQEMUDriver {

    /* Immutable pointer, self-locking APIs */     virHashAtomicPtr migrationErrors; + +    /* Immutable value */ +    unsigned int unplugTimeout; };

Why store this value twice?

I wanted the value to be available at the driver object, but saw that the parsing of the reading file put stuff in config. However, just realized that we get to the cfg via qemu_driver->config (as long as the lock is being held, which I think it's the case for all unplug operations). In case we still want this configurable timeout solution, I'll fix this in the next spin. Thanks, DHB

...

Jano

Daniel Henrique Barboza

30 Aug 30 Aug

5:43 a.m.

New subject: [libvirt] [PATCH v1 1/3] adding unplug_timeout QEMU conf

Bump Does anyone else want to weight in on this? Jano suggested that exposing the timeout configuration is not the best plan, and I don't have a strong opinion about it. I even mentioned below that I'm ok with the idea of simply increasing the timeout to 15 seconds and changing the error message a bit to let users know that this is not an error per-se and the user should see the guest to see the result. If no one else have feelings for this matter, I'll simply re-sent the series with the change I just mentioned. I might also keep the unplug settings in the config object to get rid of qemu_hotplugpriv.h (patch 3/3). Thanks, DHB On 8/19/19 7:07 AM, Daniel Henrique Barboza wrote:

...

Hi,

On 8/19/19 6:16 AM, Ján Tomko wrote:

...
On Sun, Aug 18, 2019 at 06:45:29PM -0300, Daniel Henrique Barboza wrote:

...
For some architectures and setups, device removal can take longer than the default 5 seconds. This results in commands such as 'virsh setvcpus' to fire timeout messages even if the actual operation happened in the guest, causing confusion for the user.

The commit that introduced this error message:    commit e3229f6e4461cd1721dc68a32e16ab1718ae716e        qemu: hotplug: Add support for VCPU unplug

specifically says that we treat this differently than regular device detach:

   As the new code is using device_del all the implications of using it    are present. Contrary to the device deletion code, the vcpu deletion    code fails if the unplug request is not executed in time.

Technically, we already did execute the unplug request so we lie to the user saying "operation failed".

Maybe we can revisit the decision? [cc-ing pkrempa who added this]

I have thought about making setvcpus asynchronous when it is a device_del operation. This would be more code (perhaps a new command to do that? Or a --asynchronous option?), and we can set user expectations properly by making it clear that this is a unplug request command, and the user will need to check the result personally in the guest.

But then, if we are prepared to tell the user "go check yourself" we can simply change the current timeout message to say "vcpu unplug [...] timeout, check unplug status in the guest". This would be clearer, and the user wouldn't automatically assume that timeout == operation failed.

Another thing we can do, instead of exposing the option to the user (which has a good potential for disaster - hence why I added a minimal value), is to simply set the timeout to a greater value (10? 15?) and be done with it. If we set the timeout to 15 seconds and change the timeout message to let the user know "we don't know, you'll need to check", like I said above, we have more resilience and will not alarm the user if a timeout still occurs. We'll also avoid exposing the timeout configuration like I'm doing here.

...
...
This patch adds a new qemu.conf parameter called 'unplug_timeout' to handle these cases. If left unset, the current default timeout is used. To avoid user 'experimentation' with small timeouts, the current timeout is also the minimal value allowed.

The reason for this timeout is that we originally promised something that we cannot deliver - a synchronous device detach API, while the operation itself is asynchronous. I'm not a fan of exposing it and making it configurable.

...
Signed-off-by: Daniel Henrique Barboza <danielhb413@gmail.com> --- src/qemu/libvirtd_qemu.aug         | 3 +++ src/qemu/qemu.conf                 | 4 ++++ src/qemu/qemu_conf.c               | 26 ++++++++++++++++++++++++++ src/qemu/qemu_conf.h               | 5 +++++ src/qemu/qemu_driver.c             | 2 ++ src/qemu/test_libvirtd_qemu.aug.in | 1 + 6 files changed, 41 insertions(+)

[...]

...
diff --git a/src/qemu/qemu_conf.h b/src/qemu/qemu_conf.h index 0cbddd7a9c..29824e4e35 100644 --- a/src/qemu/qemu_conf.h +++ b/src/qemu/qemu_conf.h @@ -214,6 +214,8 @@ struct _virQEMUDriverConfig {     gid_t swtpm_group;

    char **capabilityfilters; + +    unsigned int unplugTimeout; };

/* Main driver state */ @@ -294,6 +296,9 @@ struct _virQEMUDriver {

    /* Immutable pointer, self-locking APIs */     virHashAtomicPtr migrationErrors; + +    /* Immutable value */ +    unsigned int unplugTimeout; };

Why store this value twice?

I wanted the value to be available at the driver object, but saw that the parsing of the reading file put stuff in config.

However, just realized that we get to the cfg via qemu_driver->config (as long as the lock is being held, which I think it's the case for all unplug operations).

In case we still want this configurable timeout solution, I'll fix this in the next spin.

Thanks,

DHB

...
Jano

Daniel P. Berrangé

5:24 p.m.

New subject: [libvirt] [PATCH v1 1/3] adding unplug_timeout QEMU conf

On Mon, Aug 19, 2019 at 11:16:24AM +0200, Ján Tomko wrote:

...

On Sun, Aug 18, 2019 at 06:45:29PM -0300, Daniel Henrique Barboza wrote:

...
For some architectures and setups, device removal can take longer than the default 5 seconds. This results in commands such as 'virsh setvcpus' to fire timeout messages even if the actual operation happened in the guest, causing confusion for the user.

The commit that introduced this error message: commit e3229f6e4461cd1721dc68a32e16ab1718ae716e qemu: hotplug: Add support for VCPU unplug

specifically says that we treat this differently than regular device detach:

As the new code is using device_del all the implications of using it are present. Contrary to the device deletion code, the vcpu deletion code fails if the unplug request is not executed in time.

Technically, we already did execute the unplug request so we lie to the user saying "operation failed".

Maybe we can revisit the decision? [cc-ing pkrempa who added this]

...
This patch adds a new qemu.conf parameter called 'unplug_timeout' to handle these cases. If left unset, the current default timeout is used. To avoid user 'experimentation' with small timeouts, the current timeout is also the minimal value allowed.

The reason for this timeout is that we originally promised something that we cannot deliver - a synchronous device detach API, while the operation itself is asynchronous. I'm not a fan of exposing it and making it configurable.

I'm especially *not* a fan because the commit messages says this is a problem on certain architectures. Since we know what those arches are, we should use a larger timeout for those arches out of the box. Requiring admin to set a config param to fix the architectures is super unpleasant out of the box experiance. Regards, Daniel -- |: https://berrange.com -o- https://www.flickr.com/photos/dberrange :| |: https://libvirt.org -o- https://fstop138.berrange.com :| |: https://entangle-photo.org -o- https://www.instagram.com/dberrange :|

Daniel Henrique Barboza

6:46 p.m.

New subject: [libvirt] [PATCH v1 1/3] adding unplug_timeout QEMU conf

On 8/30/19 5:24 AM, Daniel P. Berrangé wrote:

...

On Mon, Aug 19, 2019 at 11:16:24AM +0200, Ján Tomko wrote:

...
On Sun, Aug 18, 2019 at 06:45:29PM -0300, Daniel Henrique Barboza wrote:

...
For some architectures and setups, device removal can take longer than the default 5 seconds. This results in commands such as 'virsh setvcpus' to fire timeout messages even if the actual operation happened in the guest, causing confusion for the user.

The commit that introduced this error message: commit e3229f6e4461cd1721dc68a32e16ab1718ae716e qemu: hotplug: Add support for VCPU unplug

specifically says that we treat this differently than regular device detach:

As the new code is using device_del all the implications of using it are present. Contrary to the device deletion code, the vcpu deletion code fails if the unplug request is not executed in time.

Technically, we already did execute the unplug request so we lie to the user saying "operation failed".

Maybe we can revisit the decision? [cc-ing pkrempa who added this]

...
This patch adds a new qemu.conf parameter called 'unplug_timeout' to handle these cases. If left unset, the current default timeout is used. To avoid user 'experimentation' with small timeouts, the current timeout is also the minimal value allowed.

The reason for this timeout is that we originally promised something that we cannot deliver - a synchronous device detach API, while the operation itself is asynchronous. I'm not a fan of exposing it and making it configurable. I'm especially *not* a fan because the commit messages says this is a problem on certain architectures. Since we know what those arches are, we should use a larger timeout for those arches out of the box. Requiring admin to set a config param to fix the architectures is super unpleasant out of the box experiance.

Good point. I'll re-send the series changing the timeout for PowerPC guests only. There's no need to impact all users for a problem that so far only impacts PPC. Thanks, DHB

...

Regards, Daniel

Christophe de Dinechin

8:15 p.m.

New subject: [libvirt] [PATCH v1 1/3] adding unplug_timeout QEMU conf

Daniel P. Berrangé writes:

...

...
...
The reason for this timeout is that we originally promised something that we cannot deliver - a synchronous device detach API, while the operation itself is asynchronous. I'm not a fan of exposing it and making it configurable.

I'm especially *not* a fan because the commit messages says this is a problem on certain architectures. Since we know what those arches are, we should use a larger timeout for those arches out of the box. Requiring admin to set a config param to fix the architectures is super unpleasant out of the box experiance.

True, but also notice that 5 seconds is also already close to the attention span time limit for users [1]. So increasing it to 10s might bring people to believe things are stuck, unless you provide some sort of feedback that this is normal. https://www.nngroup.com/articles/response-times-3-important-limits/

...

Regards, Daniel -- |: https://berrange.com -o- https://www.flickr.com/photos/dberrange :| |: https://libvirt.org -o- https://fstop138.berrange.com :| |: https://entangle-photo.org -o- https://www.instagram.com/dberrange :|

-- Cheers, Christophe de Dinechin (IRC c3d)

Daniel Henrique Barboza

10:32 p.m.

New subject: [libvirt] [PATCH v1 1/3] adding unplug_timeout QEMU conf

On 8/30/19 8:15 AM, Christophe de Dinechin wrote:

...

Daniel P. Berrangé writes:

...
...
The reason for this timeout is that we originally promised something that we cannot deliver - a synchronous device detach API, while the operation itself is asynchronous. I'm not a fan of exposing it and making it configurable. I'm especially *not* a fan because the commit messages says this is a problem on certain architectures. Since we know what those arches are, we should use a larger timeout for those arches out of the box. Requiring admin to set a config param to fix the architectures is super unpleasant out of the box experiance. True, but also notice that 5 seconds is also already close to the attention span time limit for users [1]. So increasing it to 10s might bring people to believe things are stuck, unless you provide some sort of feedback that this is normal.

https://www.nngroup.com/articles/response-times-3-important-limits/

Interesting link, thanks. About the user feedback due to long response delay: we're already breaking this with the setvcpus command, at least with PowerPC guests and a lot of vcpus being unplugged. Here's an example in which I am able to complete the command without kicking the timeout error (guest is idle, vcpu unplug is fast in this case): --- guest booted with 1 vcpu - added 39 extra vcpus. Operation takes a second, it's fast --- [danielhb@kop5 libvirt]$ sudo ./run tools/virsh setvcpus vcpus-test 40 --live [danielhb@kop5 libvirt]$ [danielhb@kop5 libvirt]$ --- removing them back ---- [danielhb@kop5 libvirt]$ time sudo ./run tools/virsh setvcpus vcpus-test 1 --live real 0m21.695s user 0m0.119s sys 0m0.000s [danielhb@kop5 libvirt]$ This happens in PowerPC because the timeout is being considered not for the whole operation, but per device. Since I'm unplugging 39 devices and the 5 seconds timeout is refreshed for every operation, in theory the user can wait close to 39*5 seconds with the terminal frozen. Now, if we are to adhere to such UX standards (IMO, we should), I propose the following: - short term: increase PowerPC timeout to 10 seconds per device. Following the UX guideline above, this is the limit we can go without warning the user about the delay; - short term: for PowerPC guests, tune 'setvcpus' message to warn the user that the operation can take some time to complete; These 2 are simples changes and I can get it done for the next release without too much trouble. - mid/long term: I can look into the PowerPC guest implementation, see if there are device_del events being fired up in QMP and implement a better UX with more information about how the process is going. Something like "vcpu 1 out of 30 unplugged", "vcpu 2 out of 30 unplugged", or a progress bar, or whatever makes more sense to give the user a feeling of operation ongoing. Note that I'm suggesting PowerPC only changes due to what Daniel said earlier - we can't impact other users due to something that, at first glance, only PowerPC does different. I have a hunch that we should do for all archs, but I can't defend this claim without testing this in x86 at least. These short-term changes are easy to make it across the board though,so it's just a matter of removing "if PowerPC" in these changes. What do you think? Thanks, DHB

...

...
Regards, Daniel -- |:https://berrange.com -o-https://www.flickr.com/photos/dberrange :| |:https://libvirt.org -o-https://fstop138.berrange.com :| |:https://entangle-photo.org -o-https://www.instagram.com/dberrange :| -- Cheers, Christophe de Dinechin (IRC c3d)

Daniel Henrique Barboza

19 Aug 19 Aug

6:45 a.m.

New subject: [libvirt] [PATCH v1 2/3] qemu_hotplug: use qemu_driver->unplugTimeout in device removal

qemuDomainResetDeviceRemoval() uses a global variable called 'qemuDomainRemoveDeviceWaitTime', hardcoded to 5000, to use as timeout value for virDomainObjWaitUntil(). This timeout value can now be set by the user, via 'unplug_timeout' in qemu.conf, and it is stored in the QEMUDriver object. Signed-off-by: Daniel Henrique Barboza <danielhb413@gmail.com> --- src/qemu/qemu_hotplug.c | 10 +++++----- tests/qemuhotplugtest.c | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/qemu/qemu_hotplug.c b/src/qemu/qemu_hotplug.c index d8be63b71c..04a888863c 100644 --- a/src/qemu/qemu_hotplug.c +++ b/src/qemu/qemu_hotplug.c @@ -5135,7 +5135,7 @@ qemuDomainResetDeviceRemoval(virDomainObjPtr vm) * - we failed to reliably wait for the event and thus use fallback behavior */ static int -qemuDomainWaitForDeviceRemoval(virDomainObjPtr vm) +qemuDomainWaitForDeviceRemoval(virQEMUDriverPtr driver, virDomainObjPtr vm) { qemuDomainObjPrivatePtr priv = vm->privateData; unsigned long long until; @@ -5143,7 +5143,7 @@ qemuDomainWaitForDeviceRemoval(virDomainObjPtr vm) if (virTimeMillisNow(&until) < 0) return 1; - until += qemuDomainRemoveDeviceWaitTime; + until += driver->unplugTimeout; while (priv->unplug.alias) { if ((rc = virDomainObjWaitUntil(vm, until)) == 1) @@ -5599,7 +5599,7 @@ qemuDomainDetachDeviceChr(virQEMUDriverPtr driver, } else if (async) { ret = 0; } else { - if ((ret = qemuDomainWaitForDeviceRemoval(vm)) == 1) + if ((ret = qemuDomainWaitForDeviceRemoval(driver, vm)) == 1) ret = qemuDomainRemoveChrDevice(driver, vm, tmpChr, true); } @@ -5899,7 +5899,7 @@ qemuDomainDetachDeviceLive(virDomainObjPtr vm, if (async) { ret = 0; } else { - if ((ret = qemuDomainWaitForDeviceRemoval(vm)) == 1) + if ((ret = qemuDomainWaitForDeviceRemoval(driver, vm)) == 1) ret = qemuDomainRemoveDevice(driver, vm, &detach); } @@ -6005,7 +6005,7 @@ qemuDomainHotplugDelVcpu(virQEMUDriverPtr driver, goto cleanup; } - if ((rc = qemuDomainWaitForDeviceRemoval(vm)) <= 0) { + if ((rc = qemuDomainWaitForDeviceRemoval(driver, vm)) <= 0) { if (rc == 0) virReportError(VIR_ERR_OPERATION_FAILED, "%s", _("vcpu unplug request timed out")); diff --git a/tests/qemuhotplugtest.c b/tests/qemuhotplugtest.c index 6ad67c8902..5e9fc48975 100644 --- a/tests/qemuhotplugtest.c +++ b/tests/qemuhotplugtest.c @@ -622,7 +622,7 @@ mymain(void) return EXIT_FAILURE; /* wait only 100ms for DEVICE_DELETED event */ - qemuDomainRemoveDeviceWaitTime = 100; + driver.unplugTimeout = 100; #define DO_TEST(file, ACTION, dev, fial, kep, ...) \ do { \ -- 2.21.0

Daniel Henrique Barboza

6:45 a.m.

New subject: [libvirt] [PATCH v1 3/3] qemu: Remove qemu_hotplugpriv.h

This is a header file created to share a global variable called 'qemuDomainRemoveDeviceWaitTime', declared in qemu_hotplug.c, to other files that would want to change the timeout value (currently, only tests/qemuhotplugtest.c). Previous patches deprecated the variable, using qemu_driver->unplugTimeout to set the timeout instead, including qemuhotplugtest.c. This means that the header file is now unused, and can be safely discarded. Signed-off-by: Daniel Henrique Barboza <danielhb413@gmail.com> --- src/qemu/Makefile.inc.am | 1 - src/qemu/qemu_hotplug.c | 5 ----- src/qemu/qemu_hotplugpriv.h | 32 -------------------------------- tests/qemuhotplugtest.c | 1 - 4 files changed, 39 deletions(-) delete mode 100644 src/qemu/qemu_hotplugpriv.h diff --git a/src/qemu/Makefile.inc.am b/src/qemu/Makefile.inc.am index 30a9751cfd..2b5584302f 100644 --- a/src/qemu/Makefile.inc.am +++ b/src/qemu/Makefile.inc.am @@ -27,7 +27,6 @@ QEMU_DRIVER_SOURCES = \ qemu/qemu_hostdev.h \ qemu/qemu_hotplug.c \ qemu/qemu_hotplug.h \ - qemu/qemu_hotplugpriv.h \ qemu/qemu_conf.c \ qemu/qemu_conf.h \ qemu/qemu_process.c \ diff --git a/src/qemu/qemu_hotplug.c b/src/qemu/qemu_hotplug.c index 04a888863c..454044d04e 100644 --- a/src/qemu/qemu_hotplug.c +++ b/src/qemu/qemu_hotplug.c @@ -23,8 +23,6 @@ #include <config.h> #include "qemu_hotplug.h" -#define LIBVIRT_QEMU_HOTPLUGPRIV_H_ALLOW -#include "qemu_hotplugpriv.h" #include "qemu_alias.h" #include "qemu_capabilities.h" #include "qemu_domain.h" @@ -63,9 +61,6 @@ VIR_LOG_INIT("qemu.qemu_hotplug"); #define CHANGE_MEDIA_TIMEOUT 5000 -/* Wait up to 5 seconds for device removal to finish. */ -unsigned long long qemuDomainRemoveDeviceWaitTime = 1000ull * 5; - static void qemuDomainResetDeviceRemoval(virDomainObjPtr vm); diff --git a/src/qemu/qemu_hotplugpriv.h b/src/qemu/qemu_hotplugpriv.h deleted file mode 100644 index a5c443ba85..0000000000 --- a/src/qemu/qemu_hotplugpriv.h +++ /dev/null @@ -1,32 +0,0 @@ -/* - * qemu_hotplugpriv.h: private declarations for QEMU device hotplug management - * - * Copyright (C) 2013 Red Hat, Inc. - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library. If not, see - * <http://www.gnu.org/licenses/>. - * - */ - -#ifndef LIBVIRT_QEMU_HOTPLUGPRIV_H_ALLOW -# error "qemu_hotplugpriv.h may only be included by qemu_hotplug.c or test suites" -#endif /* LIBVIRT_QEMU_HOTPLUGPRIV_H_ALLOW */ - -#pragma once - -/* - * This header file should never be used outside unit tests. - */ - -extern unsigned long long qemuDomainRemoveDeviceWaitTime; diff --git a/tests/qemuhotplugtest.c b/tests/qemuhotplugtest.c index 5e9fc48975..796459d73d 100644 --- a/tests/qemuhotplugtest.c +++ b/tests/qemuhotplugtest.c @@ -23,7 +23,6 @@ #include "qemu/qemu_conf.h" #include "qemu/qemu_hotplug.h" #define LIBVIRT_QEMU_HOTPLUGPRIV_H_ALLOW -#include "qemu/qemu_hotplugpriv.h" #include "qemumonitortestutils.h" #include "testutils.h" #include "testutilsqemu.h" -- 2.21.0

2323

Age (days ago)

2335

Last active (days ago)

List overview

Download

10 comments

4 participants

participants (4)

Christophe de Dinechin
Daniel Henrique Barboza
Daniel P. Berrangé
Ján Tomko

[libvirt] [PATCH v1 0/3] unplug timeout QEMU configuration

tags

participants (4)