On Sat, May 23, 2015 at 21:45:56 -0600, Eric Blake wrote:
qemu 2.3 added a new QMP command block-set-write-threshold,
which allows callers to get an interrupt when a file hits a
write threshold, rather than the current approach of repeatedly
polling for file allocation. This patch prepares the API for
callers to register to receive the event, as well as a way
to query the threshold.
The event is one-shot in qemu - a guest must re-register a new
threshold after each time it triggers. However, the
virConnectDomainEventRegisterAny() call does not allow
parameterization, so callers must use a pair of APIs - one
to register the callback (one-time call), and another to
repeatedly set the desired threshold (repeated each time a
threshold changes).
Note that the threshold is registered as a double, but reported
as an unsigned long long. This is intentional, as it allows
the use of a flag for registering a threshold via a percentage,
but once registered, the threshold is normalized according to
the current size of the disk.
To make the patch series more digestible, this patch
intentionally omits remote support, by using a couple of
placeholders at a point where the compiler forces the addition
of a case within a switch statement.
Signed-off-by: Eric Blake <eblake(a)redhat.com>
---
Posting now to get feedback on the proposed API, before the actual
implementation is complete.
daemon/remote.c | 2 +
include/libvirt/libvirt-domain.h | 48 +++++++++++++++++++
src/conf/domain_event.c | 4 +-
src/driver-hypervisor.h | 7 +++
src/libvirt-domain.c | 101 +++++++++++++++++++++++++++++++++++++++
src/libvirt_public.syms | 1 +
tools/virsh-domain.c | 23 +++++++++
tools/virsh.pod | 1 +
8 files changed, 186 insertions(+), 1 deletion(-)
diff --git a/daemon/remote.c b/daemon/remote.c
index e259a76..51d7de5 100644
--- a/daemon/remote.c
+++ b/daemon/remote.c
@@ -1102,6 +1102,8 @@ static virConnectDomainEventGenericCallback domainEventCallbacks[]
= {
VIR_DOMAIN_EVENT_CALLBACK(remoteRelayDomainEventTunable),
VIR_DOMAIN_EVENT_CALLBACK(remoteRelayDomainEventAgentLifecycle),
VIR_DOMAIN_EVENT_CALLBACK(remoteRelayDomainEventDeviceAdded),
+ /* TODO: Implement RPC support for this */
+ VIR_DOMAIN_EVENT_CALLBACK(NULL),
};
verify(ARRAY_CARDINALITY(domainEventCallbacks) == VIR_DOMAIN_EVENT_ID_LAST);
diff --git a/include/libvirt/libvirt-domain.h b/include/libvirt/libvirt-domain.h
index d851225..0c4fd62 100644
--- a/include/libvirt/libvirt-domain.h
+++ b/include/libvirt/libvirt-domain.h
@@ -515,6 +515,15 @@ typedef virDomainBlockStatsStruct *virDomainBlockStatsPtr;
# define VIR_DOMAIN_BLOCK_STATS_ERRS "errs"
/**
+ * VIR_DOMAIN_BLOCK_STATS_WRITE_THRESHOLD:
+ *
+ * Macro represents the typed parameter, as an llong, that reports
Signed? The rest of the block stats fields is signed for a reason, is
there a reason why it's here?
+ * the threshold in bytes at which the block device will trigger a
+ * VIR_DOMAIN_EVENT_ID_WRITE_THRESHOLD, or 0 if no threshold is registered.
In case it remains signed, the negative values should be somehow
described.
+ */
+# define VIR_DOMAIN_BLOCK_STATS_WRITE_THRESHOLD "write-threshold"
+
+/**
* virDomainInterfaceStats:
*
* Network interface stats for virDomainInterfaceStats.
@@ -1297,6 +1306,16 @@ int virDomainBlockStatsFlags (virDomainPtr
dom,
virTypedParameterPtr params,
int *nparams,
unsigned int flags);
+
+typedef enum {
+ /* threshold is a percentage rather than byte limit */
+ VIR_DOMAIN_BLOCK_SET_WRITE_THRESHOLD_PERCENTAGE = (1 << 0),
+} virDomainBlockSetWriteThresholdFlags;
+int virDomainBlockSetWriteThreshold(virDomainPtr dom,
+ const char *disk,
+ double threshold,
Double? Your last proposal used unsigned long long. Using double with
byte sizes seems a bit odd to me. The file size won't exceed unsigned
long long for a while so I don't see a reason to use double here.
+ unsigned int flags);
+
int virDomainInterfaceStats (virDomainPtr dom,
const char *path,
virDomainInterfaceStatsPtr stats,
@@ -3246,6 +3265,34 @@ typedef void
(*virConnectDomainEventDeviceAddedCallback)(virConnectPtr conn,
void *opaque);
/**
+ * virConnectDomainEventWriteThresholdCallback:
+ * @conn: connection object
+ * @dom: domain on which the event occurred
+ * @devAlias: device alias
+ * @threshold: threshold that was exceeded, in bytes
+ * @length: length beyond @threshold that was involved in the triggering write
+ * @opaque: application specified data
+ *
+ * The callback signature to use when registering for an event of type
+ * VIR_DOMAIN_EVENT_ID_WRITE_THRESHOLD with virConnectDomainEventRegisterAny()
+ *
+ * This callback occurs when a block device detects a write event that
+ * exceeds a non-zero threshold set by
+ * virDomainBlockSetWriteThreshold(). When this event occurs, the
+ * threshold is reset to 0, and a new limit must be installed to see
+ * the event again on the same device. The intent of this event is to
+ * allow time for the underlying storage to be resized dynamically
+ * prior to the point where the guest would be paused due to running
+ * out of space, without having to poll for allocation values.
@devAlias is not decribed enough. Will it contain the square brackets to
denote the backing chain member?
Shouldn't we make @length optional so that hypervisors that might not
support that in the future will not have to work this around?
+ */
+typedef void (*virConnectDomainEventWriteThresholdCallback)(virConnectPtr conn,
+ virDomainPtr dom,
+ const char *devAlias,
+ unsigned long long
threshold,
+ unsigned long long length,
+ void *opaque);
+
+/**
* VIR_DOMAIN_TUNABLE_CPU_VCPUPIN:
*
* Macro represents formatted pinning for one vcpu specified by id which is
@@ -3528,6 +3575,7 @@ typedef enum {
VIR_DOMAIN_EVENT_ID_TUNABLE = 17, /* virConnectDomainEventTunableCallback */
VIR_DOMAIN_EVENT_ID_AGENT_LIFECYCLE = 18,/*
virConnectDomainEventAgentLifecycleCallback */
VIR_DOMAIN_EVENT_ID_DEVICE_ADDED = 19, /* virConnectDomainEventDeviceAddedCallback
*/
+ VIR_DOMAIN_EVENT_ID_WRITE_THRESHOLD = 20, /* virConnectDomainEventWriteThreshold */
# ifdef VIR_ENUM_SENTINELS
VIR_DOMAIN_EVENT_ID_LAST
diff --git a/src/conf/domain_event.c b/src/conf/domain_event.c
index 20d66e1..c43799f 100644
--- a/src/conf/domain_event.c
+++ b/src/conf/domain_event.c
@@ -1,7 +1,7 @@
/*
* domain_event.c: domain event queue processing helpers
*
- * Copyright (C) 2010-2014 Red Hat, Inc.
+ * Copyright (C) 2010-2015 Red Hat, Inc.
* Copyright (C) 2008 VirtualIron
* Copyright (C) 2013 SUSE LINUX Products GmbH, Nuernberg, Germany.
*
@@ -1614,6 +1614,8 @@ virDomainEventDispatchDefaultFunc(virConnectPtr conn,
goto cleanup;
}
+ case VIR_DOMAIN_EVENT_ID_WRITE_THRESHOLD:
+ /* TODO: Implement RPC support for this */
case VIR_DOMAIN_EVENT_ID_LAST:
break;
}
diff --git a/src/driver-hypervisor.h b/src/driver-hypervisor.h
index 3275343..81b80d3 100644
--- a/src/driver-hypervisor.h
+++ b/src/driver-hypervisor.h
@@ -484,6 +484,12 @@ typedef int
unsigned int flags);
typedef int
+(*virDrvDomainBlockSetWriteThreshold)(virDomainPtr domain,
+ const char *disk,
+ double threshold,
+ unsigned int flags);
+
+typedef int
(*virDrvDomainInterfaceStats)(virDomainPtr domain,
const char *path,
virDomainInterfaceStatsPtr stats);
@@ -1324,6 +1330,7 @@ struct _virHypervisorDriver {
virDrvDomainBlockResize domainBlockResize;
virDrvDomainBlockStats domainBlockStats;
virDrvDomainBlockStatsFlags domainBlockStatsFlags;
+ virDrvDomainBlockSetWriteThreshold domainBlockSetWriteThreshold;
virDrvDomainInterfaceStats domainInterfaceStats;
virDrvDomainSetInterfaceParameters domainSetInterfaceParameters;
virDrvDomainGetInterfaceParameters domainGetInterfaceParameters;
diff --git a/src/libvirt-domain.c b/src/libvirt-domain.c
index 2edba1a..a211de0 100644
--- a/src/libvirt-domain.c
+++ b/src/libvirt-domain.c
@@ -5743,6 +5743,105 @@ virDomainBlockStatsFlags(virDomainPtr dom,
/**
+ * virDomainBlockSetWriteThreshold:
+ * @dom: pointer to domain object
+ * @disk: path to the block device, or device shorthand
+ * @threshold: limit at which a write threshold event can trigger
+ * @flags: bitwise-OR of virDomainBlockSetWriteThresholdFlags
+ *
+ * This function is used to set a one-shot write threshold. It should
+ * be used in tandem with virConnectDomainEventRegisterAny()
+ * installing a handler for VIR_DOMAIN_EVENT_ID_WRITE_THRESHOLD. If
+ * the hypervisor detects that a write request (whether guest data, or
+ * host metadata) would exceed the host byte offset specified in
+ * @threshold, then an event is raised, and the threshold is reset to
+ * 0 at that time. The event registration is only needed once, but
+ * this function must be called each time a new threshold is desired;
+ * the event will only fire if a non-zero threshold is
+ * exceeded.
+ *
+ * By default, @threshold is specified in bytes, and must not exceed
+ * the size of the block device. However, when @flags includes
+ * VIR_DOMAIN_BLOCK_SET_WRITE_THRESHOLD_PERCENTAGE, @threshold is
+ * instead a value between 0.0 and 100.0, and the driver will compute
This sentence looks like it implies a single digit after the decimal. At
that point you could simply use promile rather than percents and use a
integer variable for @threshold.
+ * a byte value based on the current size of the disk. A driver
may
+ * round the requested threshold to a granularity that can actually
+ * be supported.
+ *
+ * Setting a threshold allows event-based resizing of host resources
+ * that back a guest disk without having to poll the current disk
+ * allocation, while still having enough time to complete the resize
+ * before the guest would end up halting due to insufficient space.
+ * Calling this function to set the threshold back to zero will stop
+ * further firing of the event. virDomainBlockStatsFlags() and
+ * virConnectGetAllDomainStats() can be used to track the current
+ * threshold value, always in the form normalized to bytes.
+ *
+ * The @disk parameter is either the device target shorthand (the
+ * <target dev='...'/> sub-element, such as "vda"), or (since
0.9.8)
+ * an unambiguous source name of the block device (the <source
+ * file='...'/> sub-element, such as "/path/to/image"). Valid
names
+ * can be found by calling virDomainGetXMLDesc() and inspecting
+ * elements within //domain/devices/disk. Some drivers might also
+ * accept strings such as "vda[1]" for setting the threshold of a
+ * backing image, useful when doing a block commit into the backing
+ * image.
+ *
+ * Domains may have more than one block device. To set thresholds for
+ * each you should make multiple calls to this function. If write
+ * thresholds are not supported, an application will have to instead
+ * poll virDomainGetBlockInfo() or similar to track allocation.
Is it worth noting that the events will be ignored during the time
libvirtd was down? Since it doesn't make much sense to trigger them on
daemon start since there probably won't be any client at that point.
Perhaps it's also worth noting that polling the domain stats
+ *
+ * Returns -1 in case of error, 0 in case of success.
+ */
+int
+virDomainBlockSetWriteThreshold(virDomainPtr dom,
+ const char *disk,
+ double threshold,
+ unsigned int flags)
+{
+ virConnectPtr conn;
+
+ VIR_DOMAIN_DEBUG(dom, "disk=%s, threshold=%g, flags=%x",
+ disk, threshold, flags);
+
+ virResetLastError();
+
+ virCheckDomainReturn(dom, -1);
+ virCheckNonNullArgGoto(disk, error);
+ virCheckNonNegativeArgGoto(threshold, error);
+ if (flags & VIR_DOMAIN_BLOCK_SET_WRITE_THRESHOLD_PERCENTAGE) {
+ if (threshold > 100.0)
+ virReportError(VIR_ERR_INVALID_ARG,
+ _("threshold in %s must be a percentage"),
+ __FUNCTION__);
+ goto error;
+ } else {
+ if (threshold > LLONG_MAX)
+ virReportError(VIR_ERR_INVALID_ARG,
+ _("threshold in %s must fit in unsigned long
long"),
+ __FUNCTION__);
+ goto error;
+ }
+ conn = dom->conn;
+
+ if (conn->driver->domainBlockSetWriteThreshold) {
+ int ret;
+ ret = conn->driver->domainBlockSetWriteThreshold(dom, disk, threshold,
+ flags);
+ if (ret < 0)
+ goto error;
+ return ret;
+ }
+ virReportUnsupportedError();
+
+ error:
+ virDispatchError(dom->conn);
+ return -1;
+}
+
+
+/**
* virDomainInterfaceStats:
* @dom: pointer to the domain object
* @path: path to the interface
@@ -11201,6 +11300,8 @@ virConnectGetDomainCapabilities(virConnectPtr conn,
* unsigned long long.
* "block.<num>.errors" - Xen only: the 'oo_req' value as
* unsigned long long.
+ * "block.<num>.write-threshold" - byte at which a write threshold
event
+ * will fire, as unsigned long long.
Here it's as unsigned when compared to the block stats API.
* "block.<num>.allocation" - offset of the highest
written sector
* as unsigned long long.
* "block.<num>.capacity" - logical size in bytes of the block device
backing
diff --git a/src/libvirt_public.syms b/src/libvirt_public.syms
index 716dd2f..5131fcf 100644
--- a/src/libvirt_public.syms
+++ b/src/libvirt_public.syms
@@ -712,6 +712,7 @@ LIBVIRT_1.2.15 {
LIBVIRT_1.2.16 {
global:
+ virDomainBlockSetWriteThreshold;
You will probably have to bump this, since the feature freeze will be
tomorrow.
virDomainSetUserPassword;
} LIBVIRT_1.2.15;
Peter