On Fri, Jan 13, 2012 at 04:48:30PM -0500, Dave Allan wrote:
On Fri, Jan 13, 2012 at 02:51:23PM -0600, Adam Litke wrote:
> Qemu has changed the semantics of the "block_job_cancel" API. Originally,
the
> operation was synchronous (ie. upon command completion, the operation was
> guaranteed to be completely stopped). With the new semantics, a
> "block_job_cancel" merely requests that the operation be cancelled and an
event
> is triggered once the cancellation request has been honored.
>
> To adopt the new semantics while preserving compatibility I propose the
> following updates to the virDomainBlockJob API:
>
> A new block job event type VIR_DOMAIN_BLOCK_JOB_CANCELLED will be recognized by
> libvirt. Regardless of the flags used with virDomainBlockJobAbort, this event
> will be raised whenever it is received from qemu. This event indicates that a
> block job has been successfully cancelled.
>
> A new extension flag VIR_DOMAIN_BLOCK_JOB_ABORT_ASYNC will be added to the
> virDomainBlockJobAbort API. When enabled, this function will operate
> asynchronously (ie, it can return before the job has actually been cancelled).
> When the API is used in this mode, it is the responsibility of the caller to
> wait for a VIR_DOMAIN_BLOCK_JOB_CANCELLED event or poll via the
> virDomainGetBlockJobInfo API to check the cancellation status.
>
> Without the VIR_DOMAIN_BLOCK_JOB_ABORT_ASYNC flag, libvirt will internally poll
> using qemu's "query-block-jobs" API and will not return until the
operation has
> been completed. API users are advised that this operation is unbounded and
> further interaction with the domain during this period may block.
>
> This patch implements the new event type, the API flag, and the polling. The
> main outstanding issue is whether we should bound the amount of time we will
> wait for cancellation and return an error.
>
> Comments on this proposal?
Hi Adam,
Thanks for the patch. That approach seems reasonable. Re: bounding
the amount of time we wait for cancellation, if the time isn't
bounded, what happens if the cancellation never happens? IMO the most
important thing is to make sure that the caller has a way to return to
a normally functioning state even if the virDomainBlockJobAbort call
is unable to cancel the job.
Yes, agreed. But this brings up the familiar problem with timeouts: selecting
the right amount of time to wait. I don't have a good answer here, but it's not
really all that bad if we bail out too early. In that case we can just return
VIR_ERR_OPERATION_TIMEOUT to the caller and they can retry the operation again.
Unfortunately, these semantics were not present in the initial API. Is adding a
new error condition (timeout) to an existing API allowed?
I'll defer to the other folks on the code, since I am at this point
likely to do more harm than good. :)
Dave
> Signed-off-by: Adam Litke <agl(a)us.ibm.com>
> Cc: Stefan Hajnoczi <stefanha(a)gmail.com>
> Cc: Eric Blake <eblake(a)redhat.com>
> ---
> include/libvirt/libvirt.h.in | 10 ++++++++++
> src/libvirt.c | 9 ++++++++-
> src/qemu/qemu_driver.c | 24 +++++++++++++++++-------
> src/qemu/qemu_monitor.c | 24 ++++++++++++++++++++++++
> src/qemu/qemu_monitor.h | 2 ++
> src/qemu/qemu_monitor_json.c | 36 +++++++++++++++++++++++++++++-------
> 6 files changed, 90 insertions(+), 15 deletions(-)
>
> diff --git a/include/libvirt/libvirt.h.in b/include/libvirt/libvirt.h.in
> index e436f3c..fc7f028 100644
> --- a/include/libvirt/libvirt.h.in
> +++ b/include/libvirt/libvirt.h.in
> @@ -1776,6 +1776,15 @@ typedef enum {
> VIR_DOMAIN_BLOCK_JOB_TYPE_PULL = 1,
> } virDomainBlockJobType;
>
> +/**
> + * virDomainBlockJobAbortFlags:
> + *
> + * VIR_DOMAIN_BLOCK_JOB_ABORT_ASYNC: Request only, do not wait for completion
> + */
> +typedef enum {
> + VIR_DOMAIN_BLOCK_JOB_ABORT_ASYNC = 1,
> +} virDomainBlockJobAbortFlags;
> +
> /* An iterator for monitoring block job operations */
> typedef unsigned long long virDomainBlockJobCursor;
>
> @@ -3293,6 +3302,7 @@ typedef void
(*virConnectDomainEventGraphicsCallback)(virConnectPtr conn,
> typedef enum {
> VIR_DOMAIN_BLOCK_JOB_COMPLETED = 0,
> VIR_DOMAIN_BLOCK_JOB_FAILED = 1,
> + VIR_DOMAIN_BLOCK_JOB_CANCELLED = 2,
> } virConnectDomainEventBlockJobStatus;
>
> /**
> diff --git a/src/libvirt.c b/src/libvirt.c
> index a540424..0e886f5 100644
> --- a/src/libvirt.c
> +++ b/src/libvirt.c
> @@ -17299,7 +17299,7 @@ error:
> * virDomainBlockJobAbort:
> * @dom: pointer to domain object
> * @disk: path to the block device, or device shorthand
> - * @flags: extra flags; not used yet, so callers should always pass 0
> + * @flags: bitwise or of virDomainBlockJobAbortFlags
> *
> * Cancel the active block job on the given disk.
> *
> @@ -17310,6 +17310,13 @@ error:
> * can be found by calling virDomainGetXMLDesc() and inspecting
> * elements within //domain/devices/disk.
> *
> + * By default, this function performs a synchronous operation and the caller
> + * may assume that the operation has completed when 0 is returned. However,
> + * BlockJob operations may take a long time to complete, and during this time
> + * further domain interactions may be unresponsive. To avoid this problem, pass
> + * VIR_DOMAIN_BLOCK_JOB_ABORT_ASYNC in the flags argument to enable asynchronous
> + * behavior. When the job has been cancelled, a BlockJob event will be emitted.
> + *
> * Returns -1 in case of failure, 0 when successful.
> */
> int virDomainBlockJobAbort(virDomainPtr dom, const char *disk,
> diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c
> index 712f1fc..d971a5f 100644
> --- a/src/qemu/qemu_driver.c
> +++ b/src/qemu/qemu_driver.c
> @@ -11379,7 +11379,7 @@ cleanup:
> static int
> qemuDomainBlockJobImpl(virDomainPtr dom, const char *path,
> unsigned long bandwidth, virDomainBlockJobInfoPtr info,
> - int mode)
> + int flags, int mode)
> {
> struct qemud_driver *driver = dom->conn->privateData;
> virDomainObjPtr vm = NULL;
> @@ -11420,6 +11420,15 @@ qemuDomainBlockJobImpl(virDomainPtr dom, const char *path,
> qemuDomainObjEnterMonitorWithDriver(driver, vm);
> priv = vm->privateData;
> ret = qemuMonitorBlockJob(priv->mon, device, bandwidth, info, mode);
> + /*
> + * Qemu provides asynchronous block job cancellation but without the
> + * VIR_DOMAIN_BLOCK_JOB_ABORT_ASYNC flag libvirt guarantees a synchronous
> + * operation. Provide this behavior by waiting here (with the monitor
> + * locked) so we don't get confused by newly scheduled block jobs.
> + */
> + if (ret == 0 && mode == BLOCK_JOB_ABORT &&
> + !(flags & VIR_DOMAIN_BLOCK_JOB_ABORT_ASYNC))
> + ret = qemuMonitorBlockJobCancelWait(priv->mon, device);
> qemuDomainObjExitMonitorWithDriver(driver, vm);
>
> endjob:
> @@ -11439,8 +11448,7 @@ cleanup:
> static int
> qemuDomainBlockJobAbort(virDomainPtr dom, const char *path, unsigned int flags)
> {
> - virCheckFlags(0, -1);
> - return qemuDomainBlockJobImpl(dom, path, 0, NULL, BLOCK_JOB_ABORT);
> + return qemuDomainBlockJobImpl(dom, path, 0, NULL, flags, BLOCK_JOB_ABORT);
> }
>
> static int
> @@ -11448,7 +11456,7 @@ qemuDomainGetBlockJobInfo(virDomainPtr dom, const char
*path,
> virDomainBlockJobInfoPtr info, unsigned int flags)
> {
> virCheckFlags(0, -1);
> - return qemuDomainBlockJobImpl(dom, path, 0, info, BLOCK_JOB_INFO);
> + return qemuDomainBlockJobImpl(dom, path, 0, info, flags, BLOCK_JOB_INFO);
> }
>
> static int
> @@ -11456,7 +11464,8 @@ qemuDomainBlockJobSetSpeed(virDomainPtr dom, const char
*path,
> unsigned long bandwidth, unsigned int flags)
> {
> virCheckFlags(0, -1);
> - return qemuDomainBlockJobImpl(dom, path, bandwidth, NULL, BLOCK_JOB_SPEED);
> + return qemuDomainBlockJobImpl(dom, path, bandwidth, NULL, flags,
> + BLOCK_JOB_SPEED);
> }
>
> static int
> @@ -11466,9 +11475,10 @@ qemuDomainBlockPull(virDomainPtr dom, const char *path,
unsigned long bandwidth,
> int ret;
>
> virCheckFlags(0, -1);
> - ret = qemuDomainBlockJobImpl(dom, path, bandwidth, NULL, BLOCK_JOB_PULL);
> + ret = qemuDomainBlockJobImpl(dom, path, bandwidth, NULL, flags,
> + BLOCK_JOB_PULL);
> if (ret == 0 && bandwidth != 0)
> - ret = qemuDomainBlockJobImpl(dom, path, bandwidth, NULL,
> + ret = qemuDomainBlockJobImpl(dom, path, bandwidth, NULL, flags,
> BLOCK_JOB_SPEED);
> return ret;
> }
> diff --git a/src/qemu/qemu_monitor.c b/src/qemu/qemu_monitor.c
> index ad7e2a5..5c22486 100644
> --- a/src/qemu/qemu_monitor.c
> +++ b/src/qemu/qemu_monitor.c
> @@ -2585,6 +2585,30 @@ int qemuMonitorScreendump(qemuMonitorPtr mon,
> return ret;
> }
>
> +/* Poll the monitor to wait for the block job on a given disk to end.
> + * We don't need to worry about another block job starting since we have the
> + * driver locked. */
> +int
> +qemuMonitorBlockJobCancelWait(qemuMonitorPtr mon, const char *device)
> +{
> + VIR_DEBUG("mon=%p, device=%p", mon, device);
> + /* XXX: Should we provide some sort of escape hatch for this wait? */
> + while (1) {
> + /* Poll every 100ms */
> + int ret;
> + struct timespec ts = { .tv_sec = 0, .tv_nsec = 100 * 1000 * 1000ull };
> + virDomainBlockJobInfo info;
> +
> + ret = qemuMonitorBlockJob(mon, device, 0, &info, BLOCK_JOB_INFO);
> + if (ret < 0)
> + return -1;
> + else if (ret == 0)
> + return 0;
> + else
> + nanosleep(&ts, NULL);
> + }
> +}
> +
> int qemuMonitorBlockJob(qemuMonitorPtr mon,
> const char *device,
> unsigned long bandwidth,
> diff --git a/src/qemu/qemu_monitor.h b/src/qemu/qemu_monitor.h
> index 15acf8b..afc081e 100644
> --- a/src/qemu/qemu_monitor.h
> +++ b/src/qemu/qemu_monitor.h
> @@ -510,6 +510,8 @@ typedef enum {
> BLOCK_JOB_PULL = 3,
> } BLOCK_JOB_CMD;
>
> +int qemuMonitorBlockJobCancelWait(qemuMonitorPtr mon, const char *device);
> +
> int qemuMonitorBlockJob(qemuMonitorPtr mon,
> const char *device,
> unsigned long bandwidth,
> diff --git a/src/qemu/qemu_monitor_json.c b/src/qemu/qemu_monitor_json.c
> index 7eb2a92..2ca8eeb 100644
> --- a/src/qemu/qemu_monitor_json.c
> +++ b/src/qemu/qemu_monitor_json.c
> @@ -57,7 +57,8 @@ static void qemuMonitorJSONHandleIOError(qemuMonitorPtr mon,
virJSONValuePtr dat
> static void qemuMonitorJSONHandleVNCConnect(qemuMonitorPtr mon, virJSONValuePtr
data);
> static void qemuMonitorJSONHandleVNCInitialize(qemuMonitorPtr mon, virJSONValuePtr
data);
> static void qemuMonitorJSONHandleVNCDisconnect(qemuMonitorPtr mon, virJSONValuePtr
data);
> -static void qemuMonitorJSONHandleBlockJob(qemuMonitorPtr mon, virJSONValuePtr
data);
> +static void qemuMonitorJSONHandleBlockJobCompleted(qemuMonitorPtr mon,
virJSONValuePtr data);
> +static void qemuMonitorJSONHandleBlockJobCancelled(qemuMonitorPtr mon,
virJSONValuePtr data);
>
> struct {
> const char *type;
> @@ -73,7 +74,8 @@ struct {
> { "VNC_CONNECTED", qemuMonitorJSONHandleVNCConnect, },
> { "VNC_INITIALIZED", qemuMonitorJSONHandleVNCInitialize, },
> { "VNC_DISCONNECTED", qemuMonitorJSONHandleVNCDisconnect, },
> - { "BLOCK_JOB_COMPLETED", qemuMonitorJSONHandleBlockJob, },
> + { "BLOCK_JOB_COMPLETED", qemuMonitorJSONHandleBlockJobCompleted, },
> + { "BLOCK_JOB_CANCELLED", qemuMonitorJSONHandleBlockJobCancelled, },
> };
>
>
> @@ -685,13 +687,14 @@ static void qemuMonitorJSONHandleVNCDisconnect(qemuMonitorPtr
mon, virJSONValueP
> qemuMonitorJSONHandleVNC(mon, data, VIR_DOMAIN_EVENT_GRAPHICS_DISCONNECT);
> }
>
> -static void qemuMonitorJSONHandleBlockJob(qemuMonitorPtr mon, virJSONValuePtr
data)
> +static void qemuMonitorJSONHandleBlockJobImpl(qemuMonitorPtr mon,
> + virJSONValuePtr data,
> + int event)
> {
> const char *device;
> const char *type_str;
> int type = VIR_DOMAIN_BLOCK_JOB_TYPE_UNKNOWN;
> unsigned long long offset, len;
> - int status = VIR_DOMAIN_BLOCK_JOB_FAILED;
>
> if ((device = virJSONValueObjectGetString(data, "device")) == NULL)
{
> VIR_WARN("missing device in block job event");
> @@ -716,13 +719,32 @@ static void qemuMonitorJSONHandleBlockJob(qemuMonitorPtr mon,
virJSONValuePtr da
> if (STREQ(type_str, "stream"))
> type = VIR_DOMAIN_BLOCK_JOB_TYPE_PULL;
>
> - if (offset != 0 && offset == len)
> - status = VIR_DOMAIN_BLOCK_JOB_COMPLETED;
> + switch (event) {
> + case VIR_DOMAIN_BLOCK_JOB_COMPLETED:
> + /* Make sure the whole device has been processed */
> + if (offset != len)
> + event = VIR_DOMAIN_BLOCK_JOB_FAILED;
> + break;
> + case VIR_DOMAIN_BLOCK_JOB_FAILED:
> + case VIR_DOMAIN_BLOCK_JOB_CANCELLED:
> + break;
> + }
>
> out:
> - qemuMonitorEmitBlockJob(mon, device, type, status);
> + qemuMonitorEmitBlockJob(mon, device, type, event);
> +}
> +
> +static void qemuMonitorJSONHandleBlockJobCompleted(qemuMonitorPtr mon,
> + virJSONValuePtr data)
> +{
> + qemuMonitorJSONHandleBlockJobImpl(mon, data, VIR_DOMAIN_BLOCK_JOB_COMPLETED);
> }
>
> +static void qemuMonitorJSONHandleBlockJobCancelled(qemuMonitorPtr mon,
> + virJSONValuePtr data)
> +{
> + qemuMonitorJSONHandleBlockJobImpl(mon, data, VIR_DOMAIN_BLOCK_JOB_CANCELLED);
> +}
>
> int
> qemuMonitorJSONHumanCommandWithFd(qemuMonitorPtr mon,
> --
> 1.7.5.rc1
>
--
Adam Litke <agl(a)us.ibm.com>
IBM Linux Technology Center