qemu 2.3 added a new QMP command block-set-write-threshold,
which allows callers to get an interrupt when a file hits a
write threshold, rather than the current approach of repeatedly
polling for file allocation. This patch prepares the API for
callers to register to receive the event, as well as a way
to query the threshold via virDomainListGetStats().
The event is one-shot in qemu - a guest must re-register a new
threshold after each time it triggers. However, the
virConnectDomainEventRegisterAny() call does not allow
parameterization, so callers must use a pair of APIs - one
to register the callback (one-time call) that will be used each
time a threshold triggers for any guest disk, and another to
repeatedly set the desired threshold (must be called each time
a threshold should be changed).
Note that the threshold can either be registered by a byte
offset, or by a parts-per-million proportion (a value between
0 and 1000000, scaled to the disk size). But the value is
always reported as a byte offset, even when registered as a
proportion. I also considered having the setup parameter be
a double, to allow a finer resolution rather than fixed-point
proportion; but that much resolution is probably not necessary
(for a 100G disk, the resulting 100k granularity is pretty
much in the noise).
To make the patch series more digestible, this patch
intentionally omits remote support, by using a couple of
placeholders at a point where the compiler forces the addition
of a case label within a switch statement.
* include/libvirt/libvirt-domain.h
(virDomainBlockSetWriteThreshold): New API.
(virConnectDomainEventWriteThresholdCallback): New event.
* src/libvirt_public.syms (LIBVIRT_1.3.0): Export it.
* src/libvirt-domain.c (virDomainBlockSetWriteThreshold): New API.
(virConnectGetAllDomainStats): New stat.
* src/driver-hypervisor.h (virDrvDomainBlockSetWriteThreshold):
New hypervisor entry point.
* tools/virsh-domain.c (vshEventWriteThresholdPrint): Print new
event.
* tools/virsh.pod (domstats): Document new stat.
* daemon/remote.c (domainEventCallbacks): Add stub.
* src/conf/domain_event.c (virDomainEventDispatchDefaultFunc):
Likewise.
Signed-off-by: Eric Blake <eblake(a)redhat.com>
---
daemon/remote.c | 2 +
include/libvirt/libvirt-domain.h | 53 ++++++++++++++++++++++
src/conf/domain_event.c | 4 +-
src/driver-hypervisor.h | 7 +++
src/libvirt-domain.c | 98 ++++++++++++++++++++++++++++++++++++++++
src/libvirt_public.syms | 1 +
tools/virsh-domain.c | 24 ++++++++++
tools/virsh.pod | 1 +
8 files changed, 189 insertions(+), 1 deletion(-)
diff --git a/daemon/remote.c b/daemon/remote.c
index e9e2dca..283ece2 100644
--- a/daemon/remote.c
+++ b/daemon/remote.c
@@ -1102,6 +1102,8 @@ static virConnectDomainEventGenericCallback domainEventCallbacks[] =
{
VIR_DOMAIN_EVENT_CALLBACK(remoteRelayDomainEventTunable),
VIR_DOMAIN_EVENT_CALLBACK(remoteRelayDomainEventAgentLifecycle),
VIR_DOMAIN_EVENT_CALLBACK(remoteRelayDomainEventDeviceAdded),
+ /* TODO: Implement RPC support for this */
+ VIR_DOMAIN_EVENT_CALLBACK(NULL),
};
verify(ARRAY_CARDINALITY(domainEventCallbacks) == VIR_DOMAIN_EVENT_ID_LAST);
diff --git a/include/libvirt/libvirt-domain.h b/include/libvirt/libvirt-domain.h
index 7564c20..ca2f929 100644
--- a/include/libvirt/libvirt-domain.h
+++ b/include/libvirt/libvirt-domain.h
@@ -1306,6 +1306,18 @@ int virDomainBlockStatsFlags (virDomainPtr
dom,
virTypedParameterPtr params,
int *nparams,
unsigned int flags);
+
+typedef enum {
+ /* threshold is a parts-per-million proportion of the image size
+ * rather than byte limit */
+ VIR_DOMAIN_BLOCK_SET_WRITE_THRESHOLD_PROPORTION = (1 << 0),
+} virDomainBlockSetWriteThresholdFlags;
+
+int virDomainBlockSetWriteThreshold(virDomainPtr dom,
+ const char *disk,
+ unsigned long long threshold,
+ unsigned int flags);
+
int virDomainInterfaceStats (virDomainPtr dom,
const char *path,
virDomainInterfaceStatsPtr stats,
@@ -3255,6 +3267,46 @@ typedef void
(*virConnectDomainEventDeviceAddedCallback)(virConnectPtr conn,
void *opaque);
/**
+ * virConnectDomainEventWriteThresholdCallback:
+ * @conn: connection object
+ * @dom: domain on which the event occurred
+ * @devAlias: device alias
+ * @path: a local path name of the host resource, or NULL if not available
+ * @threshold: threshold that was exceeded, in bytes
+ * @length: length beyond @threshold that was involved in the triggering
+ * write, or 0 if not known
+ * @opaque: application specified data
+ *
+ * The callback signature to use when registering for an event of type
+ * VIR_DOMAIN_EVENT_ID_WRITE_THRESHOLD with virConnectDomainEventRegisterAny()
+ *
+ * This callback occurs when a block device detects a write event that
+ * exceeds a non-zero threshold set by
+ * virDomainBlockSetWriteThreshold(). When this event occurs, the
+ * threshold is reset to 0, and a new limit must be installed to see
+ * the event again on the same device. The intent of this event is to
+ * allow time for the underlying storage to be resized dynamically
+ * prior to the point where the guest would be paused due to running
+ * out of space, without having to poll for allocation values.
+ *
+ * The contents of @devAlias will be "vda" when the threshold is
+ * triggered on the active layer of guest disk vda. Some hypervisors
+ * also support threshold reporting on backing images, such as during
+ * a block commit; when that happens, @devAlias will be "vda[1]" for
+ * the backingStore at index 1 within the chain of host resources for
+ * guest disk vda. For convenience, if the host resource has a local
+ * file name, that will be listed in @path (note that @path will be
+ * NULL for network resources).
+ */
+typedef void (*virConnectDomainEventWriteThresholdCallback)(virConnectPtr conn,
+ virDomainPtr dom,
+ const char *devAlias,
+ const char *path,
+ unsigned long long
threshold,
+ unsigned long long length,
+ void *opaque);
+
+/**
* VIR_DOMAIN_TUNABLE_CPU_VCPUPIN:
*
* Macro represents formatted pinning for one vcpu specified by id which is
@@ -3537,6 +3589,7 @@ typedef enum {
VIR_DOMAIN_EVENT_ID_TUNABLE = 17, /* virConnectDomainEventTunableCallback */
VIR_DOMAIN_EVENT_ID_AGENT_LIFECYCLE = 18,/*
virConnectDomainEventAgentLifecycleCallback */
VIR_DOMAIN_EVENT_ID_DEVICE_ADDED = 19, /* virConnectDomainEventDeviceAddedCallback
*/
+ VIR_DOMAIN_EVENT_ID_WRITE_THRESHOLD = 20, /* virConnectDomainEventWriteThreshold */
# ifdef VIR_ENUM_SENTINELS
VIR_DOMAIN_EVENT_ID_LAST
diff --git a/src/conf/domain_event.c b/src/conf/domain_event.c
index 20d66e1..c43799f 100644
--- a/src/conf/domain_event.c
+++ b/src/conf/domain_event.c
@@ -1,7 +1,7 @@
/*
* domain_event.c: domain event queue processing helpers
*
- * Copyright (C) 2010-2014 Red Hat, Inc.
+ * Copyright (C) 2010-2015 Red Hat, Inc.
* Copyright (C) 2008 VirtualIron
* Copyright (C) 2013 SUSE LINUX Products GmbH, Nuernberg, Germany.
*
@@ -1614,6 +1614,8 @@ virDomainEventDispatchDefaultFunc(virConnectPtr conn,
goto cleanup;
}
+ case VIR_DOMAIN_EVENT_ID_WRITE_THRESHOLD:
+ /* TODO: Implement RPC support for this */
case VIR_DOMAIN_EVENT_ID_LAST:
break;
}
diff --git a/src/driver-hypervisor.h b/src/driver-hypervisor.h
index 3275343..b5b51f1 100644
--- a/src/driver-hypervisor.h
+++ b/src/driver-hypervisor.h
@@ -484,6 +484,12 @@ typedef int
unsigned int flags);
typedef int
+(*virDrvDomainBlockSetWriteThreshold)(virDomainPtr domain,
+ const char *disk,
+ unsigned long long threshold,
+ unsigned int flags);
+
+typedef int
(*virDrvDomainInterfaceStats)(virDomainPtr domain,
const char *path,
virDomainInterfaceStatsPtr stats);
@@ -1324,6 +1330,7 @@ struct _virHypervisorDriver {
virDrvDomainBlockResize domainBlockResize;
virDrvDomainBlockStats domainBlockStats;
virDrvDomainBlockStatsFlags domainBlockStatsFlags;
+ virDrvDomainBlockSetWriteThreshold domainBlockSetWriteThreshold;
virDrvDomainInterfaceStats domainInterfaceStats;
virDrvDomainSetInterfaceParameters domainSetInterfaceParameters;
virDrvDomainGetInterfaceParameters domainGetInterfaceParameters;
diff --git a/src/libvirt-domain.c b/src/libvirt-domain.c
index 4d7b88a..4ac9325 100644
--- a/src/libvirt-domain.c
+++ b/src/libvirt-domain.c
@@ -5743,6 +5743,102 @@ virDomainBlockStatsFlags(virDomainPtr dom,
/**
+ * virDomainBlockSetWriteThreshold:
+ * @dom: pointer to domain object
+ * @disk: path to the block device, or device shorthand
+ * @threshold: limit at which a write threshold event can trigger
+ * @flags: bitwise-OR of virDomainBlockSetWriteThresholdFlags
+ *
+ * This function is used to set a one-shot write threshold. It should
+ * be used in tandem with virConnectDomainEventRegisterAny()
+ * installing a handler for VIR_DOMAIN_EVENT_ID_WRITE_THRESHOLD. If
+ * the hypervisor detects that a write request (whether guest data, or
+ * host metadata) would exceed the host byte offset specified in
+ * @threshold, then an event is raised, and the threshold is reset to
+ * 0 at that time. The event registration is only needed once, but
+ * this function must be called each time a new threshold is desired;
+ * the event will only fire if a non-zero threshold is
+ * exceeded.
+ *
+ * By default, @threshold is specified in bytes, and must not exceed
+ * the size of the block device. However, when @flags includes
+ * VIR_DOMAIN_BLOCK_SET_WRITE_THRESHOLD_PROPORTION, @threshold is
+ * instead a value between 0 an 1,000,000, as a parts-per-million
+ * proportion to the current size of the disk, and the driver will
+ * compute the corresponding byte value. For example, 500000
+ * represents a threshold when half the disk has been allocated. A
+ * driver may round the requested threshold to a granularity that can
+ * actually be supported.
+ *
+ * Setting a threshold allows event-based resizing of host resources
+ * that back a guest disk without having to poll the current disk
+ * allocation, while still having enough time to complete the resize
+ * before the guest would end up halting due to insufficient space.
+ * Calling this function to set the threshold back to zero will stop
+ * further firing of the event. virConnectGetAllDomainStats() can be
+ * used to track the current threshold value, always in the form
+ * normalized to bytes.
+ *
+ * The @disk parameter is either the device target shorthand (the
+ * <target dev='...'/> sub-element, such as "vda"), or an
unambiguous
+ * source name of the block device (the <source file='...'/>
+ * sub-element, such as "/path/to/image"). Valid names can be found
+ * by calling virDomainGetXMLDesc() and inspecting elements within
+ * //domain/devices/disk. Some drivers might also accept strings such
+ * as "vda[1]" for setting the threshold of a backing image, useful
+ * when doing a block commit into the backing image. Hypervisors may
+ * restrict threshold reporting to certain types of host resources,
+ * such as a qcow2 format on top of a block device (as allocation
+ * tracking differs according to the type of host resource).
+ *
+ * Domains may have more than one block device. To set thresholds for
+ * each you should make multiple calls to this function. If write
+ * thresholds are not supported, an application will have to instead
+ * poll virDomainGetBlockInfo() or similar to track allocation.
+ *
+ * Returns -1 in case of error, 0 in case of success.
+ */
+int
+virDomainBlockSetWriteThreshold(virDomainPtr dom,
+ const char *disk,
+ unsigned long long threshold,
+ unsigned int flags)
+{
+ virConnectPtr conn;
+
+ VIR_DOMAIN_DEBUG(dom, "disk=%s, threshold=%llu, flags=%x",
+ disk, threshold, flags);
+
+ virResetLastError();
+
+ virCheckDomainReturn(dom, -1);
+ virCheckNonNullArgGoto(disk, error);
+ if (flags & VIR_DOMAIN_BLOCK_SET_WRITE_THRESHOLD_PROPORTION &&
+ threshold > 1000000) {
+ virReportError(VIR_ERR_INVALID_ARG,
+ _("threshold in %s is larger than 100%%"),
+ __FUNCTION__);
+ goto error;
+ }
+ conn = dom->conn;
+
+ if (conn->driver->domainBlockSetWriteThreshold) {
+ int ret;
+ ret = conn->driver->domainBlockSetWriteThreshold(dom, disk, threshold,
+ flags);
+ if (ret < 0)
+ goto error;
+ return ret;
+ }
+ virReportUnsupportedError();
+
+ error:
+ virDispatchError(dom->conn);
+ return -1;
+}
+
+
+/**
* virDomainInterfaceStats:
* @dom: pointer to the domain object
* @path: path to the interface
@@ -11176,6 +11272,8 @@ virConnectGetDomainCapabilities(virConnectPtr conn,
* unsigned long long.
* "block.<num>.errors" - Xen only: the 'oo_req' value as
* unsigned long long.
+ * "block.<num>.write-threshold" - byte at which a write threshold event
+ * will fire, as unsigned long long.
* "block.<num>.allocation" - offset of the highest written sector
* as unsigned long long.
* "block.<num>.capacity" - logical size in bytes of the block device
backing
diff --git a/src/libvirt_public.syms b/src/libvirt_public.syms
index 59d8c12..14b2373 100644
--- a/src/libvirt_public.syms
+++ b/src/libvirt_public.syms
@@ -717,6 +717,7 @@ LIBVIRT_1.2.16 {
LIBVIRT_1.3.0 {
global:
+ virDomainBlockSetWriteThreshold;
virTypedParamsAddStringList;
} LIBVIRT_1.2.16;
diff --git a/tools/virsh-domain.c b/tools/virsh-domain.c
index baf4fa3..141be3a 100644
--- a/tools/virsh-domain.c
+++ b/tools/virsh-domain.c
@@ -12082,6 +12082,28 @@ vshEventDeviceAddedPrint(virConnectPtr conn ATTRIBUTE_UNUSED,
}
static void
+vshEventWriteThresholdPrint(virConnectPtr conn ATTRIBUTE_UNUSED,
+ virDomainPtr dom,
+ const char *alias,
+ const char *path,
+ unsigned long long threshold,
+ unsigned long long length,
+ void *opaque)
+{
+ vshDomEventData *data = opaque;
+
+ if (!data->loop && *data->count)
+ return;
+ vshPrint(data->ctl,
+ _("event 'write-threshold' for domain %s disk %s (%s): "
+ "threshold %llu exceeded by %llu bytes\n"),
+ virDomainGetName(dom), alias, NULLSTR(path), threshold, length);
+ (*data->count)++;
+ if (!data->loop)
+ vshEventDone(data->ctl);
+}
+
+static void
vshEventTunablePrint(virConnectPtr conn ATTRIBUTE_UNUSED,
virDomainPtr dom,
virTypedParameterPtr params,
@@ -12188,6 +12210,8 @@ static vshEventCallback vshEventCallbacks[] = {
VIR_DOMAIN_EVENT_CALLBACK(vshEventAgentLifecyclePrint), },
{ "device-added",
VIR_DOMAIN_EVENT_CALLBACK(vshEventDeviceAddedPrint), },
+ { "write-threshold",
+ VIR_DOMAIN_EVENT_CALLBACK(vshEventWriteThresholdPrint), },
};
verify(VIR_DOMAIN_EVENT_ID_LAST == ARRAY_CARDINALITY(vshEventCallbacks));
diff --git a/tools/virsh.pod b/tools/virsh.pod
index bcfa165..600ea42 100644
--- a/tools/virsh.pod
+++ b/tools/virsh.pod
@@ -919,6 +919,7 @@ local file or block device,
"block.<num>.fl.reqs" - total flush requests,
"block.<num>.fl.times" - total time (ns) spent on cache flushing,
"block.<num>.errors" - Xen only: the 'oo_req' value,
+"block.<num>.write-threshold" - write threshold event trigger, in bytes,
"block.<num>.allocation" - offset of highest written sector in bytes,
"block.<num>.capacity" - logical size of source file in bytes,
"block.<num>.physical" - physical size of source file in bytes
--
2.4.3