[libvirt] [PATCH 00/10 v4] Integration of lock managers in QEMU

An update to http://www.redhat.com/archives/libvir-list/2011-May/msg00622.html New in this series: - Split the QEMU lock manager integration into 3 patches for easier review - Actually implemented the lease hotplug/unplug code fully - Fixes from previous review of patches 1+2 - Fix QEMU startup to not rely on uninitialized variable !

Allow the parent process to perform a bi-directional handshake with the child process during fork/exec. The child process will fork and do its initial setup. Immediately prior to the exec(), it will stop & wait for a handshake from the parent process. The parent process will spawn the child and wait until the child reaches the handshake point. It will do whatever extra setup work is required, before signalling the child to continue. The implementation of this is done using two pairs of blocking pipes. The first pair is used to block the parent, until the child writes a single byte. Then the second pair pair is used to block the child, until the parent confirms with another single byte. * src/util/command.c, src/util/command.h, src/libvirt_private.syms: Add APIs to perform a handshake --- src/libvirt_private.syms | 3 + src/util/command.c | 182 +++++++++++++++++++++++++++++++++++++++++++++- src/util/command.h | 22 ++++++ 3 files changed, 206 insertions(+), 1 deletions(-) diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms index 2f475b2..1b13c5c 100644 --- a/src/libvirt_private.syms +++ b/src/libvirt_private.syms @@ -109,11 +109,14 @@ virCommandClearCaps; virCommandDaemonize; virCommandExec; virCommandFree; +virCommandHandshakeNotify; +virCommandHandshakeWait; virCommandNew; virCommandNewArgList; virCommandNewArgs; virCommandNonblockingFDs; virCommandPreserveFD; +virCommandRequireHandshake; virCommandRun; virCommandRunAsync; virCommandSetErrorBuffer; diff --git a/src/util/command.c b/src/util/command.c index ebb90cb..2991daa 100644 --- a/src/util/command.c +++ b/src/util/command.c @@ -36,6 +36,8 @@ #include "files.h" #include "buf.h" +#include <stdlib.h> + #define VIR_FROM_THIS VIR_FROM_NONE #define virCommandError(code, ...) \ @@ -77,6 +79,10 @@ struct _virCommand { int *outfdptr; int *errfdptr; + bool handshake; + int handshakeWait[2]; + int handshakeNotify[2]; + virExecHook hook; void *opaque; @@ -108,6 +114,11 @@ virCommandNewArgs(const char *const*args) if (VIR_ALLOC(cmd) < 0) return NULL; + cmd->handshakeWait[0] = -1; + cmd->handshakeWait[1] = -1; + cmd->handshakeNotify[0] = -1; + cmd->handshakeNotify[1] = -1; + FD_ZERO(&cmd->preserve); FD_ZERO(&cmd->transfer); cmd->infd = cmd->outfd = cmd->errfd = -1; @@ -1174,12 +1185,61 @@ virCommandHook(void *data) virCommandPtr cmd = data; int res = 0; - if (cmd->hook) + if (cmd->hook) { + VIR_DEBUG("Run hook %p %p", cmd->hook, cmd->opaque); res = cmd->hook(cmd->opaque); + VIR_DEBUG("Done hook %d", res); + } if (res == 0 && cmd->pwd) { VIR_DEBUG("Running child in %s", cmd->pwd); res = chdir(cmd->pwd); + if (res < 0) { + virReportSystemError(errno, + _("Unable to change to %s"), cmd->pwd); + } + } + if (cmd->handshake) { + char c = res < 0 ? '0' : '1'; + int rv; + VIR_DEBUG("Notifying parent for handshake start on %d", cmd->handshakeWait[1]); + if (safewrite(cmd->handshakeWait[1], &c, sizeof(c)) != sizeof(c)) { + virReportSystemError(errno, "%s", _("Unable to notify parent process")); + return -1; + } + + /* On failure we pass the error message back to parent, + * so they don't have to dig through stderr logs + */ + if (res < 0) { + virErrorPtr err = virGetLastError(); + const char *msg = err ? err->message : + _("Unknown failure during hook execution"); + size_t len = strlen(msg) + 1; + if (safewrite(cmd->handshakeWait[1], msg, len) != len) { + virReportSystemError(errno, "%s", _("Unable to send error to parent process")); + return -1; + } + return -1; + } + + VIR_DEBUG("Waiting on parent for handshake complete on %d", cmd->handshakeNotify[0]); + if ((rv = saferead(cmd->handshakeNotify[0], &c, sizeof(c))) != sizeof(c)) { + if (rv < 0) + virReportSystemError(errno, "%s", _("Unable to wait on parent process")); + else + virReportSystemError(EIO, "%s", _("libvirtd quit during handshake")); + return -1; + } + if (c != '1') { + virReportSystemError(EINVAL, _("Unexpected confirm code '%c' from parent process"), c); + return -1; + } + VIR_FORCE_CLOSE(cmd->handshakeWait[1]); + VIR_FORCE_CLOSE(cmd->handshakeNotify[0]); } + + VIR_DEBUG("Hook is done %d", res); + return res; } @@ -1409,6 +1469,119 @@ virCommandAbort(virCommandPtr cmd ATTRIBUTE_UNUSED) } #endif + +void virCommandRequireHandshake(virCommandPtr cmd) +{ + if (!cmd || cmd->has_error) + return; + + if (cmd->handshake) { + cmd->has_error = -1; + VIR_DEBUG("Cannot require handshake twice"); + return; + } + + if (pipe(cmd->handshakeWait) < 0) { + cmd->has_error = errno; + return; + } + if (pipe(cmd->handshakeNotify) < 0) { + VIR_FORCE_CLOSE(cmd->handshakeWait[0]); + VIR_FORCE_CLOSE(cmd->handshakeWait[1]); + cmd->has_error = errno; + return; + } + + VIR_DEBUG("Transfer handshake wait=%d notify=%d", + cmd->handshakeWait[1], cmd->handshakeNotify[0]); + virCommandTransferFD(cmd, cmd->handshakeWait[1]); + virCommandTransferFD(cmd, cmd->handshakeNotify[0]); + cmd->handshake = true; +} + +int virCommandHandshakeWait(virCommandPtr cmd) +{ + char c; + int rv; + if (!cmd ||cmd->has_error == ENOMEM) { + virReportOOMError(); + return -1; + } + if (cmd->has_error || !cmd->handshake) { + virCommandError(VIR_ERR_INTERNAL_ERROR, "%s", + _("invalid use of command API")); + return -1; + } + + if (cmd->handshakeWait[0] == -1) { + virCommandError(VIR_ERR_INTERNAL_ERROR, "%s", + _("Handshake is already complete")); + return -1; + } + + VIR_DEBUG("Wait for handshake on %d", cmd->handshakeWait[0]); + if ((rv = saferead(cmd->handshakeWait[0], &c, sizeof(c))) != sizeof(c)) { + if (rv < 0) + virReportSystemError(errno, "%s", _("Unable to wait for child process")); + else + virReportSystemError(EIO, "%s", _("Child process quit during startup handshake")); + VIR_FORCE_CLOSE(cmd->handshakeWait[0]); + return -1; + } + if (c != '1') { + char *msg; + ssize_t len; + if (VIR_ALLOC_N(msg, 1024) < 0) { + virReportOOMError(); + VIR_FORCE_CLOSE(cmd->handshakeWait[0]); + return -1; + } + if ((len = saferead(cmd->handshakeWait[0], msg, 1024)) < 0) { + VIR_FORCE_CLOSE(cmd->handshakeWait[0]); + VIR_FREE(msg); + virReportSystemError(errno, "%s", _("No error message from child failure")); + return -1; + } + VIR_FORCE_CLOSE(cmd->handshakeWait[0]); + msg[len-1] = '\0'; + virCommandError(VIR_ERR_INTERNAL_ERROR, "%s", msg); + VIR_FREE(msg); + return -1; + } + VIR_FORCE_CLOSE(cmd->handshakeWait[0]); + return 0; +} + +int virCommandHandshakeNotify(virCommandPtr cmd) +{ + char c = '1'; + if (!cmd ||cmd->has_error == ENOMEM) { + virReportOOMError(); + return -1; + } + if (cmd->has_error || !cmd->handshake) { + virCommandError(VIR_ERR_INTERNAL_ERROR, "%s", + _("invalid use of command API")); + return -1; + } + + if (cmd->handshakeNotify[1] == -1) { + virCommandError(VIR_ERR_INTERNAL_ERROR, "%s", + _("Handshake is already complete")); + return -1; + } + + VIR_DEBUG("Notify handshake on %d", cmd->handshakeWait[0]); + if (safewrite(cmd->handshakeNotify[1], &c, sizeof(c)) != sizeof(c)) { + virReportSystemError(errno, "%s", _("Unable to notify child process")); + VIR_FORCE_CLOSE(cmd->handshakeNotify[1]); + return -1; + } + VIR_FORCE_CLOSE(cmd->handshakeNotify[1]); + return 0; +} + + /* * Release all resources */ @@ -1440,6 +1613,13 @@ virCommandFree(virCommandPtr cmd) VIR_FREE(cmd->pwd); + if (cmd->handshake) { + VIR_FORCE_CLOSE(cmd->handshakeWait[0]); + VIR_FORCE_CLOSE(cmd->handshakeWait[1]); + VIR_FORCE_CLOSE(cmd->handshakeNotify[0]); + VIR_FORCE_CLOSE(cmd->handshakeNotify[1]); + } + VIR_FREE(cmd->pidfile); if (cmd->reap) diff --git a/src/util/command.h b/src/util/command.h index aa5136b..95b6a5e 100644 --- a/src/util/command.h +++ b/src/util/command.h @@ -292,6 +292,28 @@ int virCommandWait(virCommandPtr cmd, int *exitstatus) ATTRIBUTE_RETURN_CHECK; /* + * Request that the child perform a handshake with + * the parent when the hook function has completed + * execution. The child will not exec() until the + * parent has notified + */ +void virCommandRequireHandshake(virCommandPtr cmd); + +/* + * Wait for the child to complete execution of its + * hook function + */ +int virCommandHandshakeWait(virCommandPtr cmd) + ATTRIBUTE_RETURN_CHECK; + +/* + * Notify the child that it is OK to exec() the + * real binary now + */ +int virCommandHandshakeNotify(virCommandPtr cmd) + ATTRIBUTE_RETURN_CHECK; + +/* * Abort an async command if it is running, without issuing * any errors or affecting errno. Designed for error paths * where some but not all paths to the cleanup code might -- 1.7.4.4

On Thu, May 19, 2011 at 07:24:16AM -0400, Daniel P. Berrange wrote:
Allow the parent process to perform a bi-directional handshake with the child process during fork/exec. The child process will fork and do its initial setup. Immediately prior to the exec(), it will stop & wait for a handshake from the parent process. The parent process will spawn the child and wait until the child reaches the handshake point. It will do whatever extra setup work is required, before signalling the child to continue.
The implementation of this is done using two pairs of blocking pipes. The first pair is used to block the parent, until the child writes a single byte. Then the second pair pair is used to block the child, until the parent confirms with another single byte.
* src/util/command.c, src/util/command.h, src/libvirt_private.syms: Add APIs to perform a handshake --- src/libvirt_private.syms | 3 + src/util/command.c | 182 +++++++++++++++++++++++++++++++++++++++++++++- src/util/command.h | 22 ++++++ 3 files changed, 206 insertions(+), 1 deletions(-)
diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms index 2f475b2..1b13c5c 100644 --- a/src/libvirt_private.syms +++ b/src/libvirt_private.syms @@ -109,11 +109,14 @@ virCommandClearCaps; virCommandDaemonize; virCommandExec; virCommandFree; +virCommandHandshakeNotify; +virCommandHandshakeWait; virCommandNew; virCommandNewArgList; virCommandNewArgs; virCommandNonblockingFDs; virCommandPreserveFD; +virCommandRequireHandshake; virCommandRun; virCommandRunAsync; virCommandSetErrorBuffer; diff --git a/src/util/command.c b/src/util/command.c index ebb90cb..2991daa 100644 --- a/src/util/command.c +++ b/src/util/command.c @@ -36,6 +36,8 @@ #include "files.h" #include "buf.h"
+#include <stdlib.h> + #define VIR_FROM_THIS VIR_FROM_NONE
#define virCommandError(code, ...) \ @@ -77,6 +79,10 @@ struct _virCommand { int *outfdptr; int *errfdptr;
+ bool handshake; + int handshakeWait[2]; + int handshakeNotify[2]; + virExecHook hook; void *opaque;
@@ -108,6 +114,11 @@ virCommandNewArgs(const char *const*args) if (VIR_ALLOC(cmd) < 0) return NULL;
+ cmd->handshakeWait[0] = -1; + cmd->handshakeWait[1] = -1; + cmd->handshakeNotify[0] = -1; + cmd->handshakeNotify[1] = -1; + FD_ZERO(&cmd->preserve); FD_ZERO(&cmd->transfer); cmd->infd = cmd->outfd = cmd->errfd = -1; @@ -1174,12 +1185,61 @@ virCommandHook(void *data) virCommandPtr cmd = data; int res = 0;
- if (cmd->hook) + if (cmd->hook) { + VIR_DEBUG("Run hook %p %p", cmd->hook, cmd->opaque); res = cmd->hook(cmd->opaque); + VIR_DEBUG("Done hook %d", res); + } if (res == 0 && cmd->pwd) { VIR_DEBUG("Running child in %s", cmd->pwd); res = chdir(cmd->pwd); + if (res < 0) { + virReportSystemError(errno, + _("Unable to change to %s"), cmd->pwd); + } + } + if (cmd->handshake) { + char c = res < 0 ? '0' : '1'; + int rv; + VIR_DEBUG("Notifying parent for handshake start on %d", cmd->handshakeWait[1]); + if (safewrite(cmd->handshakeWait[1], &c, sizeof(c)) != sizeof(c)) { + virReportSystemError(errno, "%s", _("Unable to notify parent process")); + return -1; + } + + /* On failure we pass the error message back to parent, + * so they don't have to dig through stderr logs + */ + if (res < 0) { + virErrorPtr err = virGetLastError(); + const char *msg = err ? err->message : + _("Unknown failure during hook execution"); + size_t len = strlen(msg) + 1; + if (safewrite(cmd->handshakeWait[1], msg, len) != len) { + virReportSystemError(errno, "%s", _("Unable to send error to parent process")); + return -1; + } + return -1; + } + + VIR_DEBUG("Waiting on parent for handshake complete on %d", cmd->handshakeNotify[0]); + if ((rv = saferead(cmd->handshakeNotify[0], &c, sizeof(c))) != sizeof(c)) { + if (rv < 0) + virReportSystemError(errno, "%s", _("Unable to wait on parent process")); + else + virReportSystemError(EIO, "%s", _("libvirtd quit during handshake")); + return -1; + } + if (c != '1') { + virReportSystemError(EINVAL, _("Unexpected confirm code '%c' from parent process"), c); + return -1; + } + VIR_FORCE_CLOSE(cmd->handshakeWait[1]); + VIR_FORCE_CLOSE(cmd->handshakeNotify[0]); } + + VIR_DEBUG("Hook is done %d", res); + return res; }
@@ -1409,6 +1469,119 @@ virCommandAbort(virCommandPtr cmd ATTRIBUTE_UNUSED) } #endif
+ +void virCommandRequireHandshake(virCommandPtr cmd) +{ + if (!cmd || cmd->has_error) + return; + + if (cmd->handshake) { + cmd->has_error = -1; + VIR_DEBUG("Cannot require handshake twice"); + return; + } + + if (pipe(cmd->handshakeWait) < 0) { + cmd->has_error = errno; + return; + } + if (pipe(cmd->handshakeNotify) < 0) { + VIR_FORCE_CLOSE(cmd->handshakeWait[0]); + VIR_FORCE_CLOSE(cmd->handshakeWait[1]); + cmd->has_error = errno; + return; + } + + VIR_DEBUG("Transfer handshake wait=%d notify=%d", + cmd->handshakeWait[1], cmd->handshakeNotify[0]); + virCommandTransferFD(cmd, cmd->handshakeWait[1]); + virCommandTransferFD(cmd, cmd->handshakeNotify[0]); + cmd->handshake = true; +} + +int virCommandHandshakeWait(virCommandPtr cmd) +{ + char c; + int rv; + if (!cmd ||cmd->has_error == ENOMEM) { + virReportOOMError(); + return -1; + } + if (cmd->has_error || !cmd->handshake) { + virCommandError(VIR_ERR_INTERNAL_ERROR, "%s", + _("invalid use of command API")); + return -1; + } + + if (cmd->handshakeWait[0] == -1) { + virCommandError(VIR_ERR_INTERNAL_ERROR, "%s", + _("Handshake is already complete")); + return -1; + } + + VIR_DEBUG("Wait for handshake on %d", cmd->handshakeWait[0]); + if ((rv = saferead(cmd->handshakeWait[0], &c, sizeof(c))) != sizeof(c)) { + if (rv < 0) + virReportSystemError(errno, "%s", _("Unable to wait for child process")); + else + virReportSystemError(EIO, "%s", _("Child process quit during startup handshake")); + VIR_FORCE_CLOSE(cmd->handshakeWait[0]); + return -1; + } + if (c != '1') { + char *msg; + ssize_t len; + if (VIR_ALLOC_N(msg, 1024) < 0) { + virReportOOMError(); + VIR_FORCE_CLOSE(cmd->handshakeWait[0]); + return -1; + } + if ((len = saferead(cmd->handshakeWait[0], msg, 1024)) < 0) { + VIR_FORCE_CLOSE(cmd->handshakeWait[0]); + VIR_FREE(msg); + virReportSystemError(errno, "%s", _("No error message from child failure")); + return -1; + } + VIR_FORCE_CLOSE(cmd->handshakeWait[0]); + msg[len-1] = '\0'; + virCommandError(VIR_ERR_INTERNAL_ERROR, "%s", msg); + VIR_FREE(msg); + return -1; + } + VIR_FORCE_CLOSE(cmd->handshakeWait[0]); + return 0; +} + +int virCommandHandshakeNotify(virCommandPtr cmd) +{ + char c = '1'; + if (!cmd ||cmd->has_error == ENOMEM) { + virReportOOMError(); + return -1; + } + if (cmd->has_error || !cmd->handshake) { + virCommandError(VIR_ERR_INTERNAL_ERROR, "%s", + _("invalid use of command API")); + return -1; + } + + if (cmd->handshakeNotify[1] == -1) { + virCommandError(VIR_ERR_INTERNAL_ERROR, "%s", + _("Handshake is already complete")); + return -1; + } + + VIR_DEBUG("Notify handshake on %d", cmd->handshakeWait[0]); + if (safewrite(cmd->handshakeNotify[1], &c, sizeof(c)) != sizeof(c)) { + virReportSystemError(errno, "%s", _("Unable to notify child process")); + VIR_FORCE_CLOSE(cmd->handshakeNotify[1]); + return -1; + } + VIR_FORCE_CLOSE(cmd->handshakeNotify[1]); + return 0; +} + + /* * Release all resources */ @@ -1440,6 +1613,13 @@ virCommandFree(virCommandPtr cmd)
VIR_FREE(cmd->pwd);
+ if (cmd->handshake) { + VIR_FORCE_CLOSE(cmd->handshakeWait[0]); + VIR_FORCE_CLOSE(cmd->handshakeWait[1]); + VIR_FORCE_CLOSE(cmd->handshakeNotify[0]); + VIR_FORCE_CLOSE(cmd->handshakeNotify[1]); + } + VIR_FREE(cmd->pidfile);
if (cmd->reap) diff --git a/src/util/command.h b/src/util/command.h index aa5136b..95b6a5e 100644 --- a/src/util/command.h +++ b/src/util/command.h @@ -292,6 +292,28 @@ int virCommandWait(virCommandPtr cmd, int *exitstatus) ATTRIBUTE_RETURN_CHECK;
/* + * Request that the child perform a handshake with + * the parent when the hook function has completed + * execution. The child will not exec() until the + * parent has notified + */ +void virCommandRequireHandshake(virCommandPtr cmd); + +/* + * Wait for the child to complete execution of its + * hook function + */ +int virCommandHandshakeWait(virCommandPtr cmd) + ATTRIBUTE_RETURN_CHECK; + +/* + * Notify the child that it is OK to exec() the + * real binary now + */ +int virCommandHandshakeNotify(virCommandPtr cmd) + ATTRIBUTE_RETURN_CHECK; + +/* * Abort an async command if it is running, without issuing * any errors or affecting errno. Designed for error paths * where some but not all paths to the cleanup code might
Looks fine to me, I was wondering if passing a changing value like the LSB of the child pid would be of any interest, probably not we're always operating from fork() there should not be any risk. ACK, Daniel -- Daniel Veillard | libxml Gnome XML XSLT toolkit http://xmlsoft.org/ daniel@veillard.com | Rpmfind RPM search engine http://rpmfind.net/ http://veillard.com/ | virtualization library http://libvirt.org/

A lock manager may operate in various modes. The direct mode of operation is to obtain locks based on the resources associated with devices in the XML. The indirect mode is where the app creating the domain provides explicit leases for each resource that needs to be locked. This XML extension allows for listing resources in the XML <devices> ... <lease> <lockspace>somearea</lockspace> <key>thequickbrownfoxjumpsoverthelazydog</key> <target path='/some/lease/path' offset='23432'/> </lease> ... </devices> The 'lockspace' is a unique identifier for the lockspace which the lease is associated The 'key' is a unique identifier for the resource associated with the lease. The 'target' is the file on disk where the leases are held. * docs/schemas/domain.rng: Add lease schema * src/conf/domain_conf.c, src/conf/domain_conf.h: parsing and formatting for leases * tests/qemuxml2argvdata/qemuxml2argv-lease.args, tests/qemuxml2argvdata/qemuxml2argv-lease.xml, tests/qemuxml2xmltest.c: Test XML handling for leases --- docs/formatdomain.html.in | 39 +++++++ docs/schemas/domain.rng | 24 ++++ src/conf/domain_conf.c | 134 ++++++++++++++++++++++++ src/conf/domain_conf.h | 14 +++ tests/qemuxml2argvdata/qemuxml2argv-lease.args | 4 + tests/qemuxml2argvdata/qemuxml2argv-lease.xml | 36 +++++++ tests/qemuxml2xmltest.c | 1 + 7 files changed, 252 insertions(+), 0 deletions(-) create mode 100644 tests/qemuxml2argvdata/qemuxml2argv-lease.args create mode 100644 tests/qemuxml2argvdata/qemuxml2argv-lease.xml diff --git a/docs/formatdomain.html.in b/docs/formatdomain.html.in index facdaf2..d59779d 100644 --- a/docs/formatdomain.html.in +++ b/docs/formatdomain.html.in @@ -1071,6 +1071,45 @@ sub-element. </p> + <h4><a name="elementsLease">Device leases</a></h4> + + <p> + When using a lock manager, it may be desirable to record device leases + against a VM. The lock manager will ensure the VM won't start unless + the leases can be acquired. + </p> + +<pre> + ... + <devices> + ... + <lease> + <lockspace>somearea</lockspace> + <key>somekey</key> + <target path='/some/lease/path' offset='1024'/> + </lease> + ... + </devices> + ...</pre> + + <dl> + <dt>lockspace</dt> + <dd>This is an arbitrary string, identifying the lockspace + within which the key is held. Lock managers may impose + extra restrictions on the format, or length of the lockspace + name.</dd> + <dt>key</dt> + <dd>This is an arbitrary string, uniquely identifying the + lease to be acquired. Lock managers may impose extra + restrictions on the format, or length of the key. + </dd> + <dt>target</dt> + <dd>This is the fully qualified path of the file associated + with the lockspace. The offset specifies where the lease + is stored within the file. If the lock manager does not + require a offset, just pass 0. + </dd> + </dl> <h4><a name="elementsUSB">USB and PCI devices</a></h4> diff --git a/docs/schemas/domain.rng b/docs/schemas/domain.rng index b252547..43c811f 100644 --- a/docs/schemas/domain.rng +++ b/docs/schemas/domain.rng @@ -586,6 +586,29 @@ <ref name="address"/> </optional> </define> + + <define name="lease"> + <element name="lease"> + <interleave> + <element name="lockspace"> + <text/> + </element> + <element name="key"> + <text/> + </element> + <element name="target"> + <attribute name="path"> + <text/> + </attribute> + <optional> + <attribute name="offset"> + <ref name="unsignedInt"/> + </attribute> + </optional> + </element> + </interleave> + </element> + </define> <!-- A disk description can be either of type file or block The name of the attribute on the source element depends on the type @@ -1940,6 +1963,7 @@ <choice> <ref name="disk"/> <ref name="controller"/> + <ref name="lease"/> <ref name="filesystem"/> <ref name="interface"/> <ref name="input"/> diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c index 420d104..b6f7740 100644 --- a/src/conf/domain_conf.c +++ b/src/conf/domain_conf.c @@ -102,6 +102,7 @@ VIR_ENUM_IMPL(virDomainLifecycleCrash, VIR_DOMAIN_LIFECYCLE_CRASH_LAST, VIR_ENUM_IMPL(virDomainDevice, VIR_DOMAIN_DEVICE_LAST, "disk", + "lease", "filesystem", "interface", "input", @@ -638,6 +639,18 @@ void virDomainInputDefFree(virDomainInputDefPtr def) VIR_FREE(def); } +static void virDomainLeaseDefFree(virDomainLeaseDefPtr def) +{ + if (!def) + return; + + VIR_FREE(def->lockspace); + VIR_FREE(def->key); + VIR_FREE(def->path); + + VIR_FREE(def); +} + void virDomainDiskDefFree(virDomainDiskDefPtr def) { unsigned int i; @@ -900,6 +913,9 @@ void virDomainDeviceDefFree(virDomainDeviceDefPtr def) case VIR_DOMAIN_DEVICE_DISK: virDomainDiskDefFree(def->data.disk); break; + case VIR_DOMAIN_DEVICE_LEASE: + virDomainLeaseDefFree(def->data.lease); + break; case VIR_DOMAIN_DEVICE_NET: virDomainNetDefFree(def->data.net); break; @@ -974,6 +990,10 @@ void virDomainDefFree(virDomainDefPtr def) if (!def) return; + for (i = 0 ; i < def->nleases ; i++) + virDomainLeaseDefFree(def->leases[i]); + VIR_FREE(def->leases); + for (i = 0 ; i < def->ngraphics ; i++) virDomainGraphicsDefFree(def->graphics[i]); VIR_FREE(def->graphics); @@ -1880,6 +1900,79 @@ virDomainDiskDefAssignAddress(virCapsPtr caps, virDomainDiskDefPtr def) return 0; } +/* Parse the XML definition for a lease + */ +static virDomainLeaseDefPtr +virDomainLeaseDefParseXML(xmlNodePtr node) +{ + virDomainLeaseDefPtr def; + xmlNodePtr cur; + char *lockspace = NULL; + char *key = NULL; + char *path = NULL; + char *offset = NULL; + + if (VIR_ALLOC(def) < 0) { + virReportOOMError(); + return NULL; + } + + cur = node->children; + while (cur != NULL) { + if (cur->type == XML_ELEMENT_NODE) { + if ((key == NULL) && + (xmlStrEqual(cur->name, BAD_CAST "key"))) { + key = (char *)xmlNodeGetContent(cur); + } else if ((lockspace == NULL) && + (xmlStrEqual(cur->name, BAD_CAST "lockspace"))) { + lockspace = (char *)xmlNodeGetContent(cur); + } else if ((path == NULL) && + (xmlStrEqual(cur->name, BAD_CAST "target"))) { + path = virXMLPropString(cur, "path"); + offset = virXMLPropString(cur, "offset"); + } + } + cur = cur->next; + } + + if (!key) { + virDomainReportError(VIR_ERR_XML_ERROR, "%s", + _("Missing 'key' element for lease")); + goto error; + } + if (!path) { + virDomainReportError(VIR_ERR_XML_ERROR, "%s", + _("Missing 'target' element for lease")); + goto error; + } + + if (offset && + virStrToLong_ull(offset, NULL, 10, &def->offset) < 0) { + virDomainReportError(VIR_ERR_XML_ERROR, + _("Malformed lease target offset %s"), offset); + goto error; + } + + def->key = key; + def->lockspace = lockspace; + def->path = path; + path = key = lockspace = NULL; + +cleanup: + VIR_FREE(lockspace); + VIR_FREE(key); + VIR_FREE(path); + VIR_FREE(offset); + + return def; + + error: + virDomainLeaseDefFree(def); + def = NULL; + goto cleanup; +} + + /* Parse the XML definition for a disk * @param node XML nodeset to parse for disk definition */ @@ -4966,6 +5059,10 @@ virDomainDeviceDefPtr virDomainDeviceDefParse(virCapsPtr caps, if (!(dev->data.disk = virDomainDiskDefParseXML(caps, node, NULL, flags))) goto error; + } else if (xmlStrEqual(node->name, BAD_CAST "lease")) { + dev->type = VIR_DOMAIN_DEVICE_LEASE; + if (!(dev->data.lease = virDomainLeaseDefParseXML(node))) + goto error; } else if (xmlStrEqual(node->name, BAD_CAST "filesystem")) { dev->type = VIR_DOMAIN_DEVICE_FS; if (!(dev->data.fs = virDomainFSDefParseXML(node, flags))) @@ -5838,6 +5935,23 @@ static virDomainDefPtr virDomainDefParseXML(virCapsPtr caps, } VIR_FREE(nodes); + /* analysis of the resource leases */ + if ((n = virXPathNodeSet("./devices/lease", ctxt, &nodes)) < 0) { + virDomainReportError(VIR_ERR_INTERNAL_ERROR, + "%s", _("cannot extract device leases")); + goto error; + } + if (n && VIR_ALLOC_N(def->leases, n) < 0) + goto no_memory; + for (i = 0 ; i < n ; i++) { + virDomainLeaseDefPtr lease = virDomainLeaseDefParseXML(nodes[i]); + if (!lease) + goto error; + + def->leases[def->nleases++] = lease; + } + VIR_FREE(nodes); + /* analysis of the filesystems */ if ((n = virXPathNodeSet("./devices/filesystem", ctxt, &nodes)) < 0) { goto error; @@ -7012,6 +7126,22 @@ virDomainLifecycleDefFormat(virBufferPtr buf, static int +virDomainLeaseDefFormat(virBufferPtr buf, + virDomainLeaseDefPtr def) +{ + virBufferAddLit(buf, " <lease>\n"); + virBufferEscapeString(buf, " <lockspace>%s</lockspace>\n", def->lockspace); + virBufferEscapeString(buf, " <key>%s</key>\n", def->key); + virBufferEscapeString(buf, " <target path='%s'", def->path); + if (def->offset) + virBufferAsprintf(buf, " offset='%llu'", def->offset); + virBufferAddLit(buf, "/>\n"); + virBufferAddLit(buf, " </lease>\n"); + + return 0; +} + +static int virDomainDiskDefFormat(virBufferPtr buf, virDomainDiskDefPtr def, int flags) @@ -8415,6 +8545,10 @@ char *virDomainDefFormat(virDomainDefPtr def, if (virDomainControllerDefFormat(&buf, def->controllers[n], flags) < 0) goto cleanup; + for (n = 0 ; n < def->nleases ; n++) + if (virDomainLeaseDefFormat(&buf, def->leases[n]) < 0) + goto cleanup; + for (n = 0 ; n < def->nfss ; n++) if (virDomainFSDefFormat(&buf, def->fss[n], flags) < 0) goto cleanup; diff --git a/src/conf/domain_conf.h b/src/conf/domain_conf.h index 8560076..b0771aa 100644 --- a/src/conf/domain_conf.h +++ b/src/conf/domain_conf.h @@ -123,6 +123,15 @@ struct _virDomainDeviceInfo { } addr; }; +typedef struct _virDomainLeaseDef virDomainLeaseDef; +typedef virDomainLeaseDef *virDomainLeaseDefPtr; +struct _virDomainLeaseDef { + char *lockspace; + char *key; + char *path; + unsigned long long offset; +}; + /* Two types of disk backends */ enum virDomainDiskType { @@ -816,6 +825,7 @@ enum virDomainSmbiosMode { /* Flags for the 'type' field in next struct */ enum virDomainDeviceType { VIR_DOMAIN_DEVICE_DISK, + VIR_DOMAIN_DEVICE_LEASE, VIR_DOMAIN_DEVICE_FS, VIR_DOMAIN_DEVICE_NET, VIR_DOMAIN_DEVICE_INPUT, @@ -836,6 +846,7 @@ struct _virDomainDeviceDef { union { virDomainDiskDefPtr disk; virDomainControllerDefPtr controller; + virDomainLeaseDefPtr lease; virDomainFSDefPtr fs; virDomainNetDefPtr net; virDomainInputDefPtr input; @@ -1171,6 +1182,9 @@ struct _virDomainDef { int nchannels; virDomainChrDefPtr *channels; + int nleases; + virDomainLeaseDefPtr *leases; + /* Only 1 */ virDomainChrDefPtr console; virSecurityLabelDef seclabel; diff --git a/tests/qemuxml2argvdata/qemuxml2argv-lease.args b/tests/qemuxml2argvdata/qemuxml2argv-lease.args new file mode 100644 index 0000000..63f9bef --- /dev/null +++ b/tests/qemuxml2argvdata/qemuxml2argv-lease.args @@ -0,0 +1,4 @@ +LC_ALL=C PATH=/bin HOME=/home/test USER=test LOGNAME=test /usr/bin/qemu -S \ +-M pc -m 214 -smp 1 -nographic -monitor unix:/tmp/test-monitor,server,nowait \ +-no-acpi -boot c -hda /dev/HostVG/QEMUGuest1 -cdrom /root/boot.iso -net none \ +-serial none -parallel none -usb diff --git a/tests/qemuxml2argvdata/qemuxml2argv-lease.xml b/tests/qemuxml2argvdata/qemuxml2argv-lease.xml new file mode 100644 index 0000000..7efe1ef --- /dev/null +++ b/tests/qemuxml2argvdata/qemuxml2argv-lease.xml @@ -0,0 +1,36 @@ +<domain type='qemu'> + <name>QEMUGuest1</name> + <uuid>c7a5fdbd-edaf-9455-926a-d65c16db1809</uuid> + <memory>219200</memory> + <currentMemory>219200</currentMemory> + <vcpu>1</vcpu> + <os> + <type arch='i686' machine='pc'>hvm</type> + <boot dev='hd'/> + </os> + <clock offset='utc'/> + <on_poweroff>destroy</on_poweroff> + <on_reboot>restart</on_reboot> + <on_crash>destroy</on_crash> + <devices> + <emulator>/usr/bin/qemu</emulator> + <disk type='block' device='disk'> + <source dev='/dev/HostVG/QEMUGuest1'/> + <target dev='hda' bus='ide'/> + <address type='drive' controller='0' bus='0' unit='0'/> + </disk> + <disk type='file' device='cdrom'> + <source file='/root/boot.iso'/> + <target dev='hdc' bus='ide'/> + <readonly/> + <address type='drive' controller='0' bus='1' unit='0'/> + </disk> + <controller type='ide' index='0'/> + <lease> + <lockspace>somearea</lockspace> + <key>thequickbrownfoxjumpedoverthelazydog</key> + <target path='/some/lease/path' offset='1024'/> + </lease> + <memballoon model='virtio'/> + </devices> +</domain> diff --git a/tests/qemuxml2xmltest.c b/tests/qemuxml2xmltest.c index 5bfbcab..e74c337 100644 --- a/tests/qemuxml2xmltest.c +++ b/tests/qemuxml2xmltest.c @@ -179,6 +179,7 @@ mymain(void) DO_TEST("cputune"); DO_TEST("smp"); + DO_TEST("lease"); /* These tests generate different XML */ DO_TEST_DIFFERENT("balloon-device-auto"); -- 1.7.4.4

On Thu, May 19, 2011 at 07:24:17AM -0400, Daniel P. Berrange wrote:
A lock manager may operate in various modes. The direct mode of operation is to obtain locks based on the resources associated with devices in the XML. The indirect mode is where the app creating the domain provides explicit leases for each resource that needs to be locked. This XML extension allows for listing resources in the XML
<devices> ... <lease> <lockspace>somearea</lockspace> <key>thequickbrownfoxjumpsoverthelazydog</key> <target path='/some/lease/path' offset='23432'/> </lease> ... </devices>
The 'lockspace' is a unique identifier for the lockspace which the lease is associated
The 'key' is a unique identifier for the resource associated with the lease.
The 'target' is the file on disk where the leases are held.
* docs/schemas/domain.rng: Add lease schema * src/conf/domain_conf.c, src/conf/domain_conf.h: parsing and formatting for leases * tests/qemuxml2argvdata/qemuxml2argv-lease.args, tests/qemuxml2argvdata/qemuxml2argv-lease.xml, tests/qemuxml2xmltest.c: Test XML handling for leases --- docs/formatdomain.html.in | 39 +++++++ docs/schemas/domain.rng | 24 ++++ src/conf/domain_conf.c | 134 ++++++++++++++++++++++++ src/conf/domain_conf.h | 14 +++ tests/qemuxml2argvdata/qemuxml2argv-lease.args | 4 + tests/qemuxml2argvdata/qemuxml2argv-lease.xml | 36 +++++++ tests/qemuxml2xmltest.c | 1 + 7 files changed, 252 insertions(+), 0 deletions(-) create mode 100644 tests/qemuxml2argvdata/qemuxml2argv-lease.args create mode 100644 tests/qemuxml2argvdata/qemuxml2argv-lease.xml
diff --git a/docs/formatdomain.html.in b/docs/formatdomain.html.in index facdaf2..d59779d 100644 --- a/docs/formatdomain.html.in +++ b/docs/formatdomain.html.in @@ -1071,6 +1071,45 @@ sub-element. </p>
+ <h4><a name="elementsLease">Device leases</a></h4> + + <p> + When using a lock manager, it may be desirable to record device leases + against a VM. The lock manager will ensure the VM won't start unless + the leases can be acquired. + </p> + +<pre> + ... + <devices> + ... + <lease> + <lockspace>somearea</lockspace> + <key>somekey</key> + <target path='/some/lease/path' offset='1024'/> + </lease> + ... + </devices> + ...</pre> + + <dl> + <dt>lockspace</dt> + <dd>This is an arbitrary string, identifying the lockspace + within which the key is held. Lock managers may impose + extra restrictions on the format, or length of the lockspace + name.</dd> + <dt>key</dt> + <dd>This is an arbitrary string, uniquely identifying the + lease to be acquired. Lock managers may impose extra + restrictions on the format, or length of the key. + </dd> + <dt>target</dt> + <dd>This is the fully qualified path of the file associated + with the lockspace. The offset specifies where the lease + is stored within the file. If the lock manager does not + require a offset, just pass 0. + </dd> + </dl>
<h4><a name="elementsUSB">USB and PCI devices</a></h4>
diff --git a/docs/schemas/domain.rng b/docs/schemas/domain.rng index b252547..43c811f 100644 --- a/docs/schemas/domain.rng +++ b/docs/schemas/domain.rng @@ -586,6 +586,29 @@ <ref name="address"/> </optional> </define> + + <define name="lease"> + <element name="lease"> + <interleave> + <element name="lockspace"> + <text/> + </element> + <element name="key"> + <text/> + </element> + <element name="target"> + <attribute name="path"> + <text/>
This should use <ref name="absFilePath"/> instead of <text/> for added checking
+ </attribute> + <optional> + <attribute name="offset"> + <ref name="unsignedInt"/> + </attribute> + </optional> + </element> + </interleave> + </element> + </define> <!-- A disk description can be either of type file or block The name of the attribute on the source element depends on the type @@ -1940,6 +1963,7 @@ <choice> <ref name="disk"/> <ref name="controller"/> + <ref name="lease"/> <ref name="filesystem"/> <ref name="interface"/> <ref name="input"/> diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c index 420d104..b6f7740 100644 --- a/src/conf/domain_conf.c +++ b/src/conf/domain_conf.c @@ -102,6 +102,7 @@ VIR_ENUM_IMPL(virDomainLifecycleCrash, VIR_DOMAIN_LIFECYCLE_CRASH_LAST,
VIR_ENUM_IMPL(virDomainDevice, VIR_DOMAIN_DEVICE_LAST, "disk", + "lease", "filesystem", "interface", "input", @@ -638,6 +639,18 @@ void virDomainInputDefFree(virDomainInputDefPtr def) VIR_FREE(def); }
+static void virDomainLeaseDefFree(virDomainLeaseDefPtr def) +{ + if (!def) + return; + + VIR_FREE(def->lockspace); + VIR_FREE(def->key); + VIR_FREE(def->path); + + VIR_FREE(def); +} + void virDomainDiskDefFree(virDomainDiskDefPtr def) { unsigned int i; @@ -900,6 +913,9 @@ void virDomainDeviceDefFree(virDomainDeviceDefPtr def) case VIR_DOMAIN_DEVICE_DISK: virDomainDiskDefFree(def->data.disk); break; + case VIR_DOMAIN_DEVICE_LEASE: + virDomainLeaseDefFree(def->data.lease); + break; case VIR_DOMAIN_DEVICE_NET: virDomainNetDefFree(def->data.net); break; @@ -974,6 +990,10 @@ void virDomainDefFree(virDomainDefPtr def) if (!def) return;
+ for (i = 0 ; i < def->nleases ; i++) + virDomainLeaseDefFree(def->leases[i]); + VIR_FREE(def->leases); + for (i = 0 ; i < def->ngraphics ; i++) virDomainGraphicsDefFree(def->graphics[i]); VIR_FREE(def->graphics); @@ -1880,6 +1900,79 @@ virDomainDiskDefAssignAddress(virCapsPtr caps, virDomainDiskDefPtr def) return 0; }
+/* Parse the XML definition for a lease + */ +static virDomainLeaseDefPtr +virDomainLeaseDefParseXML(xmlNodePtr node) +{ + virDomainLeaseDefPtr def; + xmlNodePtr cur; + char *lockspace = NULL; + char *key = NULL; + char *path = NULL; + char *offset = NULL; + + if (VIR_ALLOC(def) < 0) { + virReportOOMError(); + return NULL; + } + + cur = node->children; + while (cur != NULL) { + if (cur->type == XML_ELEMENT_NODE) { + if ((key == NULL) && + (xmlStrEqual(cur->name, BAD_CAST "key"))) { + key = (char *)xmlNodeGetContent(cur); + } else if ((lockspace == NULL) && + (xmlStrEqual(cur->name, BAD_CAST "lockspace"))) { + lockspace = (char *)xmlNodeGetContent(cur); + } else if ((path == NULL) && + (xmlStrEqual(cur->name, BAD_CAST "target"))) { + path = virXMLPropString(cur, "path"); + offset = virXMLPropString(cur, "offset"); + } + } + cur = cur->next; + } + + if (!key) { + virDomainReportError(VIR_ERR_XML_ERROR, "%s", + _("Missing 'key' element for lease")); + goto error; + } + if (!path) { + virDomainReportError(VIR_ERR_XML_ERROR, "%s", + _("Missing 'target' element for lease")); + goto error; + } + + if (offset && + virStrToLong_ull(offset, NULL, 10, &def->offset) < 0) { + virDomainReportError(VIR_ERR_XML_ERROR, + _("Malformed lease target offset %s"), offset); + goto error; + } + + def->key = key; + def->lockspace = lockspace; + def->path = path; + path = key = lockspace = NULL; + +cleanup: + VIR_FREE(lockspace); + VIR_FREE(key); + VIR_FREE(path); + VIR_FREE(offset); + + return def; + + error: + virDomainLeaseDefFree(def); + def = NULL; + goto cleanup; +} + + /* Parse the XML definition for a disk * @param node XML nodeset to parse for disk definition */ @@ -4966,6 +5059,10 @@ virDomainDeviceDefPtr virDomainDeviceDefParse(virCapsPtr caps, if (!(dev->data.disk = virDomainDiskDefParseXML(caps, node, NULL, flags))) goto error; + } else if (xmlStrEqual(node->name, BAD_CAST "lease")) { + dev->type = VIR_DOMAIN_DEVICE_LEASE; + if (!(dev->data.lease = virDomainLeaseDefParseXML(node))) + goto error; } else if (xmlStrEqual(node->name, BAD_CAST "filesystem")) { dev->type = VIR_DOMAIN_DEVICE_FS; if (!(dev->data.fs = virDomainFSDefParseXML(node, flags))) @@ -5838,6 +5935,23 @@ static virDomainDefPtr virDomainDefParseXML(virCapsPtr caps, } VIR_FREE(nodes);
+ /* analysis of the resource leases */ + if ((n = virXPathNodeSet("./devices/lease", ctxt, &nodes)) < 0) { + virDomainReportError(VIR_ERR_INTERNAL_ERROR, + "%s", _("cannot extract device leases")); + goto error; + } + if (n && VIR_ALLOC_N(def->leases, n) < 0) + goto no_memory; + for (i = 0 ; i < n ; i++) { + virDomainLeaseDefPtr lease = virDomainLeaseDefParseXML(nodes[i]); + if (!lease) + goto error; + + def->leases[def->nleases++] = lease; + } + VIR_FREE(nodes); + /* analysis of the filesystems */ if ((n = virXPathNodeSet("./devices/filesystem", ctxt, &nodes)) < 0) { goto error; @@ -7012,6 +7126,22 @@ virDomainLifecycleDefFormat(virBufferPtr buf,
static int +virDomainLeaseDefFormat(virBufferPtr buf, + virDomainLeaseDefPtr def) +{ + virBufferAddLit(buf, " <lease>\n"); + virBufferEscapeString(buf, " <lockspace>%s</lockspace>\n", def->lockspace); + virBufferEscapeString(buf, " <key>%s</key>\n", def->key); + virBufferEscapeString(buf, " <target path='%s'", def->path); + if (def->offset) + virBufferAsprintf(buf, " offset='%llu'", def->offset); + virBufferAddLit(buf, "/>\n"); + virBufferAddLit(buf, " </lease>\n"); + + return 0; +} + +static int virDomainDiskDefFormat(virBufferPtr buf, virDomainDiskDefPtr def, int flags) @@ -8415,6 +8545,10 @@ char *virDomainDefFormat(virDomainDefPtr def, if (virDomainControllerDefFormat(&buf, def->controllers[n], flags) < 0) goto cleanup;
+ for (n = 0 ; n < def->nleases ; n++) + if (virDomainLeaseDefFormat(&buf, def->leases[n]) < 0) + goto cleanup; + for (n = 0 ; n < def->nfss ; n++) if (virDomainFSDefFormat(&buf, def->fss[n], flags) < 0) goto cleanup; diff --git a/src/conf/domain_conf.h b/src/conf/domain_conf.h index 8560076..b0771aa 100644 --- a/src/conf/domain_conf.h +++ b/src/conf/domain_conf.h @@ -123,6 +123,15 @@ struct _virDomainDeviceInfo { } addr; };
+typedef struct _virDomainLeaseDef virDomainLeaseDef; +typedef virDomainLeaseDef *virDomainLeaseDefPtr; +struct _virDomainLeaseDef { + char *lockspace; + char *key; + char *path; + unsigned long long offset; +}; +
/* Two types of disk backends */ enum virDomainDiskType { @@ -816,6 +825,7 @@ enum virDomainSmbiosMode { /* Flags for the 'type' field in next struct */ enum virDomainDeviceType { VIR_DOMAIN_DEVICE_DISK, + VIR_DOMAIN_DEVICE_LEASE, VIR_DOMAIN_DEVICE_FS, VIR_DOMAIN_DEVICE_NET, VIR_DOMAIN_DEVICE_INPUT, @@ -836,6 +846,7 @@ struct _virDomainDeviceDef { union { virDomainDiskDefPtr disk; virDomainControllerDefPtr controller; + virDomainLeaseDefPtr lease; virDomainFSDefPtr fs; virDomainNetDefPtr net; virDomainInputDefPtr input; @@ -1171,6 +1182,9 @@ struct _virDomainDef { int nchannels; virDomainChrDefPtr *channels;
+ int nleases; + virDomainLeaseDefPtr *leases; + /* Only 1 */ virDomainChrDefPtr console; virSecurityLabelDef seclabel; diff --git a/tests/qemuxml2argvdata/qemuxml2argv-lease.args b/tests/qemuxml2argvdata/qemuxml2argv-lease.args new file mode 100644 index 0000000..63f9bef --- /dev/null +++ b/tests/qemuxml2argvdata/qemuxml2argv-lease.args @@ -0,0 +1,4 @@ +LC_ALL=C PATH=/bin HOME=/home/test USER=test LOGNAME=test /usr/bin/qemu -S \ +-M pc -m 214 -smp 1 -nographic -monitor unix:/tmp/test-monitor,server,nowait \ +-no-acpi -boot c -hda /dev/HostVG/QEMUGuest1 -cdrom /root/boot.iso -net none \ +-serial none -parallel none -usb
Hum is it really useful to add the test case yet while the driver code isn't applied, but minor, I assume the test get expanded with the driver addition later on
diff --git a/tests/qemuxml2argvdata/qemuxml2argv-lease.xml b/tests/qemuxml2argvdata/qemuxml2argv-lease.xml new file mode 100644 index 0000000..7efe1ef --- /dev/null +++ b/tests/qemuxml2argvdata/qemuxml2argv-lease.xml @@ -0,0 +1,36 @@ +<domain type='qemu'> + <name>QEMUGuest1</name> + <uuid>c7a5fdbd-edaf-9455-926a-d65c16db1809</uuid> + <memory>219200</memory> + <currentMemory>219200</currentMemory> + <vcpu>1</vcpu> + <os> + <type arch='i686' machine='pc'>hvm</type> + <boot dev='hd'/> + </os> + <clock offset='utc'/> + <on_poweroff>destroy</on_poweroff> + <on_reboot>restart</on_reboot> + <on_crash>destroy</on_crash> + <devices> + <emulator>/usr/bin/qemu</emulator> + <disk type='block' device='disk'> + <source dev='/dev/HostVG/QEMUGuest1'/> + <target dev='hda' bus='ide'/> + <address type='drive' controller='0' bus='0' unit='0'/> + </disk> + <disk type='file' device='cdrom'> + <source file='/root/boot.iso'/> + <target dev='hdc' bus='ide'/> + <readonly/> + <address type='drive' controller='0' bus='1' unit='0'/> + </disk> + <controller type='ide' index='0'/> + <lease> + <lockspace>somearea</lockspace> + <key>thequickbrownfoxjumpedoverthelazydog</key> + <target path='/some/lease/path' offset='1024'/> + </lease> + <memballoon model='virtio'/> + </devices> +</domain> diff --git a/tests/qemuxml2xmltest.c b/tests/qemuxml2xmltest.c index 5bfbcab..e74c337 100644 --- a/tests/qemuxml2xmltest.c +++ b/tests/qemuxml2xmltest.c @@ -179,6 +179,7 @@ mymain(void) DO_TEST("cputune");
DO_TEST("smp"); + DO_TEST("lease");
/* These tests generate different XML */ DO_TEST_DIFFERENT("balloon-device-auto");
ACK, with the small improvement to the RNG Daniel -- Daniel Veillard | libxml Gnome XML XSLT toolkit http://xmlsoft.org/ daniel@veillard.com | Rpmfind RPM search engine http://rpmfind.net/ http://veillard.com/ | virtualization library http://libvirt.org/

On Fri, May 27, 2011 at 04:31:36PM +0800, Daniel Veillard wrote:
On Thu, May 19, 2011 at 07:24:17AM -0400, Daniel P. Berrange wrote:
--- /dev/null +++ b/tests/qemuxml2argvdata/qemuxml2argv-lease.args @@ -0,0 +1,4 @@ +LC_ALL=C PATH=/bin HOME=/home/test USER=test LOGNAME=test /usr/bin/qemu -S \ +-M pc -m 214 -smp 1 -nographic -monitor unix:/tmp/test-monitor,server,nowait \ +-no-acpi -boot c -hda /dev/HostVG/QEMUGuest1 -cdrom /root/boot.iso -net none \ +-serial none -parallel none -usb
Hum is it really useful to add the test case yet while the driver code isn't applied, but minor, I assume the test get expanded with the driver addition later on
Too much habit of XML data turning into qemu command line, that was wronng please ignore that comment :-) Daniel -- Daniel Veillard | libxml Gnome XML XSLT toolkit http://xmlsoft.org/ daniel@veillard.com | Rpmfind RPM search engine http://rpmfind.net/ http://veillard.com/ | virtualization library http://libvirt.org/

Define the basic framework lock manager plugins. The basic plugin API for 3rd parties to implemented is defined in src/locking/lock_driver.h This allows dlopen()able modules for alternative locking schemes, however, we do not install the header. This requires lock plugins to be in-tree allowing changing of the lock manager plugin API in future. The libvirt code for loading & calling into plugins is in src/locking/lock_manager.{c,h} * include/libvirt/virterror.h, src/util/virterror.c: Add VIR_FROM_LOCKING * src/locking/lock_driver.h: API for lock driver plugins to implement * src/locking/lock_manager.c, src/locking/lock_manager.h: Internal API for managing locking * src/Makefile.am: Add locking code --- include/libvirt/virterror.h | 1 + po/POTFILES.in | 1 + src/Makefile.am | 3 +- src/libvirt_private.syms | 14 ++ src/locking/README | 158 +++++++++++++++++++ src/locking/lock_driver.h | 293 +++++++++++++++++++++++++++++++++++ src/locking/lock_manager.c | 357 +++++++++++++++++++++++++++++++++++++++++++ src/locking/lock_manager.h | 65 ++++++++ src/util/virterror.c | 3 + 9 files changed, 894 insertions(+), 1 deletions(-) create mode 100644 src/locking/README create mode 100644 src/locking/lock_driver.h create mode 100644 src/locking/lock_manager.c create mode 100644 src/locking/lock_manager.h diff --git a/include/libvirt/virterror.h b/include/libvirt/virterror.h index 0708e02..efa4796 100644 --- a/include/libvirt/virterror.h +++ b/include/libvirt/virterror.h @@ -81,6 +81,7 @@ typedef enum { VIR_FROM_VMWARE = 39, /* Error from VMware driver */ VIR_FROM_EVENT = 40, /* Error from event loop impl */ VIR_FROM_LIBXL = 41, /* Error from libxenlight driver */ + VIR_FROM_LOCKING = 42, /* Error from lock manager */ } virErrorDomain; diff --git a/po/POTFILES.in b/po/POTFILES.in index dd44da2..9c3d287 100644 --- a/po/POTFILES.in +++ b/po/POTFILES.in @@ -31,6 +31,7 @@ src/fdstream.c src/interface/netcf_driver.c src/internal.h src/libvirt.c +src/locking/lock_manager.c src/lxc/lxc_container.c src/lxc/lxc_conf.c src/lxc/lxc_controller.c diff --git a/src/Makefile.am b/src/Makefile.am index 58eb2a7..a27838b 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -93,7 +93,8 @@ DRIVER_SOURCES = \ datatypes.c datatypes.h \ fdstream.c fdstream.h \ $(NODE_INFO_SOURCES) \ - libvirt.c libvirt_internal.h + libvirt.c libvirt_internal.h \ + locking/lock_manager.c locking/lock_manager.h # XML configuration format handling sources diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms index 1b13c5c..1784c0d 100644 --- a/src/libvirt_private.syms +++ b/src/libvirt_private.syms @@ -588,6 +588,20 @@ virRegisterSecretDriver; virRegisterStorageDriver; +# locking.h +virLockManagerAcquire; +virLockManagerAddResource; +virLockManagerFree; +virLockManagerInquire; +virLockManagerNew; +virLockManagerPluginNew; +virLockManagerPluginRef; +virLockManagerPluginUnref; +virLockManagerPluginUsesState; +virLockManagerPluginGetName; +virLockManagerRelease; + + # logging.h virLogDefineFilter; virLogDefineOutput; diff --git a/src/locking/README b/src/locking/README new file mode 100644 index 0000000..4fa4f89 --- /dev/null +++ b/src/locking/README @@ -0,0 +1,158 @@ + +At libvirtd startup: + + plugin = virLockManagerPluginLoad("sync-manager"); + + +At libvirtd shtudown: + + virLockManagerPluginUnload(plugin) + + +At guest startup: + + manager = virLockManagerNew(plugin, + VIR_LOCK_MANAGER_OBJECT_DOMAIN, + 0); + + virLockManagerSetParameter(manager, "id", id); + virLockManagerSetParameter(manager, "uuid", uuid); + virLockManagerSetParameter(manager, "name", name); + + foreach disk + virLockManagerRegisterResource(manager, + VIR_LOCK_MANAGER_RESOURCE_TYPE_DISK, + disk.path, + ..flags...); + + if (!virLockManagerAcquireObject(manager)) + abort.. + + run QEMU + + +At guest shutdown: + + ...send QEMU 'quit' monitor command, and/or kill(qemupid)... + + if (!virLockManagerShutdown(manager)) + kill(supervisorpid); /* XXX or leave it running ??? */ + + virLockManagerFree(manager); + + + +At libvirtd restart with running guests: + + foreach still running guest + manager = virLockManagerNew(driver, + VIR_LOCK_MANAGER_START_DOMAIN, + VIR_LOCK_MANAGER_NEW_ATTACH); + virLockManagerSetParameter(manager, "id", id); + virLockManagerSetParameter(manager, "uuid", uuid); + virLockManagerSetParameter(manager, "name", name); + + if (!virLockManagerGetChild(manager, &qemupid)) + kill(supervisorpid); /* XXX or leave it running ??? */ + + + +With disk hotplug: + + if (virLockManagerAcquireResource(manager, + VIR_LOCK_MANAGER_RESOURCE_TYPE_DISK, + disk.path + ..flags..)) + ...abort hotplug attempt ... + + ...hotplug the device... + + + +With disk unhotplug: + + ...hotunplug the device... + + if (virLockManagerReleaseResource(manager, + VIR_LOCK_MANAGER_RESOURCE_TYPE_DISK, + disk.path + ..flags..)) + ...log warning ... + + + +During migration: + + 1. On source host + + if (!virLockManagerPrepareMigrate(manager, hosturi)) + ..don't start migration.. + + 2. On dest host + + manager = virLockManagerNew(driver, + VIR_LOCK_MANAGER_START_DOMAIN, + VIR_LOCK_MANAGER_NEW_MIGRATE); + virLockManagerSetParameter(manager, "id", id); + virLockManagerSetParameter(manager, "uuid", uuid); + virLockManagerSetParameter(manager, "name", name); + + foreach disk + virLockManagerRegisterResource(manager, + VIR_LOCK_MANAGER_RESOURCE_TYPE_DISK, + disk.path, + ..flags...); + + char **supervisorargv; + int supervisorargc; + + supervisor = virLockManagerGetSupervisorPath(manager); + virLockManagerGetSupervisorArgs(&argv, &argc); + + cmd = qemuBuildCommandLine(supervisor, supervisorargv, supervisorargv); + + supervisorpid = virCommandExec(cmd); + + if (!virLockManagerGetChild(manager, &qemupid)) + kill(supervisorpid); /* XXX or leave it running ??? */ + + 3. Initiate migration in QEMU on source and wait for completion + + 4a. On failure + + 4a1 On target + + virLockManagerCompleteMigrateIn(manager, + VIR_LOCK_MANAGER_MIGRATE_CANCEL); + virLockManagerShutdown(manager); + virLockManagerFree(manager); + + 4a2 On source + + virLockManagerCompleteMigrateIn(manager, + VIR_LOCK_MANAGER_MIGRATE_CANCEL); + + 4b. On succcess + + + 4b1 On target + + virLockManagerCompleteMigrateIn(manager, 0); + + 42 On source + + virLockManagerCompleteMigrateIn(manager, 0); + virLockManagerShutdown(manager); + virLockManagerFree(manager); + + +Notes: + + - If a lock manager impl does just VM level leases, it can + ignore all the resource paths at startup. + + - If a lock manager impl does not support migrate + it can return an error from all migrate calls + + - If a lock manger impl does not support hotplug + it can return an error from all resource acquire/release calls diff --git a/src/locking/lock_driver.h b/src/locking/lock_driver.h new file mode 100644 index 0000000..40a55f6 --- /dev/null +++ b/src/locking/lock_driver.h @@ -0,0 +1,293 @@ +/* + * lock_driver.h: Defines the lock driver plugin API + * + * Copyright (C) 2010-2011 Red Hat, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +#ifndef __VIR_PLUGINS_LOCK_DRIVER_H__ +# define __VIR_PLUGINS_LOCK_DRIVER_H__ + +# include "internal.h" + +typedef struct _virLockManager virLockManager; +typedef virLockManager *virLockManagerPtr; + +typedef struct _virLockDriver virLockDriver; +typedef virLockDriver *virLockDriverPtr; + +typedef struct _virLockManagerParam virLockManagerParam; +typedef virLockManagerParam *virLockManagerParamPtr; + +typedef enum { + /* State passing is used to re-acquire existing leases */ + VIR_LOCK_MANAGER_USES_STATE = (1 << 0) +} virLockManagerFlags; + +typedef enum { + /* The managed object is a virtual guest domain */ + VIR_LOCK_MANAGER_OBJECT_TYPE_DOMAIN = 0, +} virLockManagerObjectType; + +typedef enum { + /* The resource to be locked is a virtual disk */ + VIR_LOCK_MANAGER_RESOURCE_TYPE_DISK = 0, + /* A lease against an arbitrary resource */ + VIR_LOCK_MANAGER_RESOURCE_TYPE_LEASE = 1, +} virLockManagerResourceType; + +typedef enum { + /* The resource is assigned in readonly mode */ + VIR_LOCK_MANAGER_RESOURCE_READONLY = (1 << 0), + /* The resource is assigned in shared, writable mode */ + VIR_LOCK_MANAGER_RESOURCE_SHARED = (1 << 1), +} virLockManagerResourceFlags; + +typedef enum { + /* Don't acquire the resources, just register the object PID */ + VIR_LOCK_MANAGER_ACQUIRE_REGISTER_ONLY = (1 << 0) +} virLockManagerAcquireFlags; + +enum { + VIR_LOCK_MANAGER_PARAM_TYPE_STRING, + VIR_LOCK_MANAGER_PARAM_TYPE_INT, + VIR_LOCK_MANAGER_PARAM_TYPE_LONG, + VIR_LOCK_MANAGER_PARAM_TYPE_UINT, + VIR_LOCK_MANAGER_PARAM_TYPE_ULONG, + VIR_LOCK_MANAGER_PARAM_TYPE_DOUBLE, + VIR_LOCK_MANAGER_PARAM_TYPE_UUID, +}; + +struct _virLockManagerParam { + int type; + const char *key; + union { + int i; + long long l; + unsigned int ui; + unsigned long long ul; + double d; + char *str; + unsigned char uuid[16]; + } value; +}; + + +/* + * Changes in major version denote incompatible ABI changes + * Changes in minor version denote new compatible API entry points + * Changes in micro version denote new compatible flags + */ +# define VIR_LOCK_MANAGER_VERSION_MAJOR 1 +# define VIR_LOCK_MANAGER_VERSION_MINOR 0 +# define VIR_LOCK_MANAGER_VERSION_MICRO 0 + +# define VIR_LOCK_MANAGER_VERSION \ + ((VIR_LOCK_MANAGER_VERSION_MAJOR * 1000 * 1000) + \ + (VIR_LOCK_MANAGER_VERSION_MINOR * 1000) + \ + (VIR_LOCK_MANAGER_VERSION_MICRO)) + + + +/** + * virLockDriverInit: + * @version: the libvirt requested plugin ABI version + * @flags: the libvirt requested plugin optional extras + * + * Allow the plugin to validate the libvirt requested + * plugin version / flags. This allows the plugin impl + * to block its use in versions of libvirtd which are + * too old to support key features. + * + * NB: A plugin may be loaded multiple times, for different + * libvirt drivers (eg QEMU, LXC, UML) + * + * Returns -1 if the requested version/flags were inadequate + */ +typedef int (*virLockDriverInit)(unsigned int version, + unsigned int flags); + +/** + * virLockDriverDeinit: + * + * Called to release any resources prior to the plugin + * being unloaded from memory. Returns -1 to prevent + * plugin from being unloaded from memory. + */ +typedef int (*virLockDriverDeinit)(void); + +/** + * virLockManagerNew: + * @man: the lock manager context + * @type: the type of process to be supervised + * @nparams: number of metadata parameters + * @params: extra metadata parameters + * @flags: optional flags, currently unused + * + * Initialize a new context to supervise a process, usually + * a virtual machine. The lock driver implementation can use + * the <code>privateData</code> field of <code>man</code> + * to store a pointer to any driver specific state. + * + * A process of VIR_LOCK_MANAGER_START_DOMAIN will be + * given the following parameters + * + * - id: the domain unique id (unsigned int) + * - uuid: the domain uuid (uuid) + * - name: the domain name (string) + * - pid: process ID to own/owning the lock (unsigned int) + * + * Returns 0 if successful initialized a new context, -1 on error + */ +typedef int (*virLockDriverNew)(virLockManagerPtr man, + unsigned int type, + size_t nparams, + virLockManagerParamPtr params, + unsigned int flags); + +/** + * virLockDriverFree: + * @manager: the lock manager context + * + * Release any resources associated with the lock manager + * context private data + */ +typedef void (*virLockDriverFree)(virLockManagerPtr man); + +/** + * virLockDriverAddResource: + * @manager: the lock manager context + * @type: the resource type virLockManagerResourceType + * @name: the resource name + * @nparams: number of metadata parameters + * @params: extra metadata parameters + * @flags: the resource access flags + * + * Assign a resource to a managed object. This will + * only be called prior to the object is being locked + * when it is inactive. eg, to set the initial boot + * time disk assignments on a VM + * The format of @name varies according to + * the resource @type. A VIR_LOCK_MANAGER_RESOURCE_TYPE_DISK + * will have the fully qualified file path, while a resource + * of type VIR_LOCK_MANAGER_RESOURCE_TYPE_LEASE will have the + * unique name of the lease + * + * A resource of type VIR_LOCK_MANAGER_RESOURCE_TYPE_LEASE + * will receive at least the following extra parameters + * + * - 'path': a fully qualified path to the lockspace + * - 'lockspace': globally string identifying the lockspace name + * - 'offset': byte offset within the lease (unsigned long long) + * + * If no flags are given, the resource is assumed to be + * used in exclusive, read-write mode. Access can be + * relaxed to readonly, or shared read-write. + * + * Returns 0 on success, or -1 on failure + */ +typedef int (*virLockDriverAddResource)(virLockManagerPtr man, + unsigned int type, + const char *name, + size_t nparams, + virLockManagerParamPtr params, + unsigned int flags); + +/** + * virLockDriverAcquire: + * @manager: the lock manager context + * @state: the current lock state + * @flags: optional flags, currently unused + * + * Start managing resources for the object. This + * must be called from the PID that represents the + * object to be managed. If the lock is lost at any + * time, the PID will be killed off by the lock manager. + * The optional state contains information about the + * locks previously held for the object. + * + * Returns 0 on success, or -1 on failure + */ +typedef int (*virLockDriverAcquire)(virLockManagerPtr man, + const char *state, + unsigned int flags); + +/** + * virLockDriverRelease: + * @manager: the lock manager context + * @state: pointer to be filled with lock state + * @flags: optional flags + * + * Inform the lock manager that the supervised process has + * been, or can be stopped. + * + * Returns 0 on success, or -1 on failure + */ +typedef int (*virLockDriverRelease)(virLockManagerPtr man, + char **state, + unsigned int flags); + +/** + * virLockDriverInquire: + * @manager: the lock manager context + * @state: pointer to be filled with lock state + * @flags: optional flags, currently unused + * + * Retrieve the current lock state. The returned + * lock state may be NULL if none is required. The + * caller is responsible for freeing the lock + * state string when it is no longer required + * + * Returns 0 on success, or -1 on failure. + */ +typedef int (*virLockDriverInquire)(virLockManagerPtr man, + char **state, + unsigned int flags); + + +struct _virLockManager { + virLockDriverPtr driver; + void *privateData; +}; + +/** + * The plugin must export a static instance of this + * driver table, with the name 'virLockDriverImpl' + */ +struct _virLockDriver { + /** + * @version: the newest implemented plugin ABI version + * @flags: optional flags, currently unused + */ + unsigned int version; + unsigned int flags; + + virLockDriverInit drvInit; + virLockDriverDeinit drvDeinit; + + virLockDriverNew drvNew; + virLockDriverFree drvFree; + + virLockDriverAddResource drvAddResource; + + virLockDriverAcquire drvAcquire; + virLockDriverRelease drvRelease; + virLockDriverInquire drvInquire; +}; + + +#endif /* __VIR_PLUGINS_LOCK_DRIVER_H__ */ diff --git a/src/locking/lock_manager.c b/src/locking/lock_manager.c new file mode 100644 index 0000000..cb96091 --- /dev/null +++ b/src/locking/lock_manager.c @@ -0,0 +1,357 @@ +/* + * lock_manager.c: Implements the internal lock manager API + * + * Copyright (C) 2010-2011 Red Hat, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +#include <config.h> + +#include "lock_manager.h" +#include "virterror_internal.h" +#include "logging.h" +#include "util.h" +#include "memory.h" +#include "uuid.h" + +#include <dlfcn.h> +#include <stdlib.h> +#include <unistd.h> + +#include "configmake.h" + +#define VIR_FROM_THIS VIR_FROM_LOCKING + +#define virLockError(code, ...) \ + virReportErrorHelper(VIR_FROM_THIS, code, __FILE__, \ + __FUNCTION__, __LINE__, __VA_ARGS__) + +#define CHECK_PLUGIN(field, errret) \ + if (!plugin->driver->field) { \ + virLockError(VIR_ERR_INTERNAL_ERROR, \ + _("Missing '%s' field in lock manager driver"), \ + #field); \ + return errret; \ + } + +#define CHECK_MANAGER(field, errret) \ + if (!lock->driver->field) { \ + virLockError(VIR_ERR_INTERNAL_ERROR, \ + _("Missing '%s' field in lock manager driver"), \ + #field); \ + return errret; \ + } + +struct _virLockManagerPlugin { + char *name; + virLockDriverPtr driver; + void *handle; + int refs; +}; + +#define DEFAULT_LOCK_MANAGER_PLUGIN_DIR LIBDIR "/libvirt/lock-driver" + +static void virLockManagerLogParams(size_t nparams, + virLockManagerParamPtr params) +{ + int i; + char uuidstr[VIR_UUID_STRING_BUFLEN]; + for (i = 0 ; i < nparams ; i++) { + switch (params[i].type) { + case VIR_LOCK_MANAGER_PARAM_TYPE_INT: + VIR_DEBUG(" key=%s type=int value=%d", params[i].key, params[i].value.i); + break; + case VIR_LOCK_MANAGER_PARAM_TYPE_UINT: + VIR_DEBUG(" key=%s type=uint value=%u", params[i].key, params[i].value.ui); + break; + case VIR_LOCK_MANAGER_PARAM_TYPE_LONG: + VIR_DEBUG(" key=%s type=long value=%lld", params[i].key, params[i].value.l); + break; + case VIR_LOCK_MANAGER_PARAM_TYPE_ULONG: + VIR_DEBUG(" key=%s type=ulong value=%llu", params[i].key, params[i].value.ul); + break; + case VIR_LOCK_MANAGER_PARAM_TYPE_DOUBLE: + VIR_DEBUG(" key=%s type=double value=%lf", params[i].key, params[i].value.d); + break; + case VIR_LOCK_MANAGER_PARAM_TYPE_STRING: + VIR_DEBUG(" key=%s type=string value=%s", params[i].key, params[i].value.str); + break; + case VIR_LOCK_MANAGER_PARAM_TYPE_UUID: + virUUIDFormat(params[i].value.uuid, uuidstr); + VIR_DEBUG(" key=%s type=uuid value=%s", params[i].key, uuidstr); + break; + } + } +} + + +/** + * virLockManagerPluginNew: + * @name: the name of the plugin + * @flag: optional plugin flags + * + * Attempt to load the plugin $(libdir)/libvirt/lock-driver/@name.so + * The plugin driver entry point will be resolved & invoked to obtain + * the lock manager driver. + * + * Even if the loading of the plugin succeeded, this may still + * return NULL if the plugin impl decided that we (libvirtd) + * are too old to support a feature it requires + * + * Returns a plugin object, or NULL if loading failed. + */ +virLockManagerPluginPtr virLockManagerPluginNew(const char *name, + unsigned int flags) +{ + void *handle = NULL; + virLockDriverPtr driver; + virLockManagerPluginPtr plugin; + const char *moddir = getenv("LIBVIRT_LOCK_MANAGER_PLUGIN_DIR"); + char *modfile = NULL; + + if (moddir == NULL) + moddir = DEFAULT_LOCK_MANAGER_PLUGIN_DIR; + + VIR_DEBUG("Module load %s from %s", name, moddir); + + if (virAsprintf(&modfile, "%s/%s.so", moddir, name) < 0) { + virReportOOMError(); + return NULL; + } + + if (access(modfile, R_OK) < 0) { + virReportSystemError(errno, + _("Plugin %s not accessible"), + modfile); + goto cleanup; + } + + handle = dlopen(modfile, RTLD_NOW | RTLD_LOCAL); + if (!handle) { + virLockError(VIR_ERR_SYSTEM_ERROR, + _("Failed to load plugin %s: %s"), + modfile, dlerror()); + goto cleanup; + } + + if (!(driver = dlsym(handle, "virLockDriverImpl"))) { + virLockError(VIR_ERR_INTERNAL_ERROR, "%s", + _("Missing plugin initialization symbol 'virLockDriverImpl'")); + goto cleanup; + } + + if (driver->drvInit(VIR_LOCK_MANAGER_VERSION, flags) < 0) { + virLockError(VIR_ERR_INTERNAL_ERROR, "%s", + _("plugin ABI is not compatible")); + goto cleanup; + } + + if (VIR_ALLOC(plugin) < 0) { + virReportOOMError(); + goto cleanup; + } + + plugin->driver = driver; + plugin->handle = handle; + plugin->refs = 1; + if (!(plugin->name = strdup(name))) { + virReportOOMError(); + goto cleanup; + } + + VIR_FREE(modfile); + return plugin; + +cleanup: + VIR_FREE(modfile); + if (handle) + dlclose(handle); + return NULL; +} + + +/** + * virLockManagerPluginRef: + * @plugin: the plugin implementation to ref + * + * Acquires an additional reference on the plugin. + */ +void virLockManagerPluginRef(virLockManagerPluginPtr plugin) +{ + plugin->refs++; +} + + +/** + * virLockManagerPluginUnref: + * @plugin: the plugin implementation to unref + * + * Releases a reference on the plugin. When the last reference + * is released, it will attempt to unload the plugin from memory. + * The plugin may refuse to allow unloading if this would + * result in an unsafe scenario. + * + */ +void virLockManagerPluginUnref(virLockManagerPluginPtr plugin) +{ + if (!plugin) + return; + + plugin->refs--; + + if (plugin->refs > 0) + return; + + if (plugin->driver->drvDeinit() >= 0) { + if (plugin->handle) + dlclose(plugin->handle); + } else { + VIR_WARN("Unable to unload lock maanger plugin from memory"); + return; + } + + VIR_FREE(plugin->name); + VIR_FREE(plugin); +} + + +const char *virLockManagerPluginGetName(virLockManagerPluginPtr plugin) +{ + VIR_DEBUG("plugin=%p", plugin); + + return plugin->name; +} + + +bool virLockManagerPluginUsesState(virLockManagerPluginPtr plugin) +{ + VIR_DEBUG("plugin=%p", plugin); + + return plugin->driver->flags & VIR_LOCK_MANAGER_USES_STATE; +} + + +/** + * virLockManagerNew: + * @plugin: the plugin implementation to use + * @type: the type of process to be supervised + * @flags: optional flags, currently unused + * + * Create a new context to supervise a process, usually + * a virtual machine. + * + * Returns a new lock manager context + */ +virLockManagerPtr virLockManagerNew(virLockManagerPluginPtr plugin, + unsigned int type, + size_t nparams, + virLockManagerParamPtr params, + unsigned int flags) +{ + virLockManagerPtr lock; + VIR_DEBUG("plugin=%p type=%u nparams=%zu params=%p flags=%u", + plugin, type, nparams, params, flags); + virLockManagerLogParams(nparams, params); + + CHECK_PLUGIN(drvNew, NULL); + + if (VIR_ALLOC(lock) < 0) { + virReportOOMError(); + return NULL; + } + + lock->driver = plugin->driver; + + if (plugin->driver->drvNew(lock, type, nparams, params, flags) < 0) { + VIR_FREE(lock); + return NULL; + } + + return lock; +} + + +int virLockManagerAddResource(virLockManagerPtr lock, + unsigned int type, + const char *name, + size_t nparams, + virLockManagerParamPtr params, + unsigned int flags) +{ + VIR_DEBUG("lock=%p type=%u name=%s nparams=%zu params=%p flags=%u", + lock, type, name, nparams, params, flags); + virLockManagerLogParams(nparams, params); + + CHECK_MANAGER(drvAddResource, -1); + + return lock->driver->drvAddResource(lock, + type, name, + nparams, params, + flags); +} + +int virLockManagerAcquire(virLockManagerPtr lock, + const char *state, + unsigned int flags) +{ + VIR_DEBUG("lock=%p state='%s' flags=%u", lock, NULLSTR(state), flags); + + CHECK_MANAGER(drvAcquire, -1); + + return lock->driver->drvAcquire(lock, state, flags); +} + + +int virLockManagerRelease(virLockManagerPtr lock, + char **state, + unsigned int flags) +{ + VIR_DEBUG("lock=%p state=%p flags=%u", lock, state, flags); + + CHECK_MANAGER(drvRelease, -1); + + return lock->driver->drvRelease(lock, state, flags); +} + + +int virLockManagerInquire(virLockManagerPtr lock, + char **state, + unsigned int flags) +{ + VIR_DEBUG("lock=%p state=%p flags=%u", lock, state, flags); + + CHECK_MANAGER(drvInquire, -1); + + return lock->driver->drvInquire(lock, state, flags); +} + + +int virLockManagerFree(virLockManagerPtr lock) +{ + VIR_DEBUG("lock=%p", lock); + + if (!lock) + return 0; + + CHECK_MANAGER(drvFree, -1); + + lock->driver->drvFree(lock); + + VIR_FREE(lock); + + return 0; +} diff --git a/src/locking/lock_manager.h b/src/locking/lock_manager.h new file mode 100644 index 0000000..13ad372 --- /dev/null +++ b/src/locking/lock_manager.h @@ -0,0 +1,65 @@ +/* + * lock_manager.h: Defines the internal lock manager API + * + * Copyright (C) 2010-2011 Red Hat, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +#ifndef __VIR_LOCK_MANAGER_H__ +# define __VIR_LOCK_MANAGER_H__ + +# include "internal.h" +# include "lock_driver.h" + +typedef struct _virLockManagerPlugin virLockManagerPlugin; +typedef virLockManagerPlugin *virLockManagerPluginPtr; + +virLockManagerPluginPtr virLockManagerPluginNew(const char *name, + unsigned int flags); +void virLockManagerPluginRef(virLockManagerPluginPtr plugin); +void virLockManagerPluginUnref(virLockManagerPluginPtr plugin); + +const char *virLockManagerPluginGetName(virLockManagerPluginPtr plugin); +bool virLockManagerPluginUsesState(virLockManagerPluginPtr plugin); + + +virLockManagerPtr virLockManagerNew(virLockManagerPluginPtr plugin, + unsigned int type, + size_t nparams, + virLockManagerParamPtr params, + unsigned int flags); + +int virLockManagerAddResource(virLockManagerPtr manager, + unsigned int type, + const char *name, + size_t nparams, + virLockManagerParamPtr params, + unsigned int flags); + +int virLockManagerAcquire(virLockManagerPtr manager, + const char *state, + unsigned int flags); +int virLockManagerRelease(virLockManagerPtr manager, + char **state, + unsigned int flags); +int virLockManagerInquire(virLockManagerPtr manager, + char **state, + unsigned int flags); + +int virLockManagerFree(virLockManagerPtr manager); + +#endif /* __VIR_LOCK_MANAGER_H__ */ diff --git a/src/util/virterror.c b/src/util/virterror.c index 2d7309a..95b718e 100644 --- a/src/util/virterror.c +++ b/src/util/virterror.c @@ -206,6 +206,9 @@ static const char *virErrorDomainName(virErrorDomain domain) { case VIR_FROM_EVENT: dom = "Events "; break; + case VIR_FROM_LOCKING: + dom = "Locking "; + break; } return(dom); } -- 1.7.4.4

On Thu, May 19, 2011 at 07:24:18AM -0400, Daniel P. Berrange wrote:
Define the basic framework lock manager plugins. The basic plugin API for 3rd parties to implemented is defined in
src/locking/lock_driver.h
This allows dlopen()able modules for alternative locking schemes, however, we do not install the header. This requires lock plugins to be in-tree allowing changing of the lock manager plugin API in future.
The libvirt code for loading & calling into plugins is in
src/locking/lock_manager.{c,h}
* include/libvirt/virterror.h, src/util/virterror.c: Add VIR_FROM_LOCKING * src/locking/lock_driver.h: API for lock driver plugins to implement * src/locking/lock_manager.c, src/locking/lock_manager.h: Internal API for managing locking * src/Makefile.am: Add locking code --- include/libvirt/virterror.h | 1 + po/POTFILES.in | 1 + src/Makefile.am | 3 +- src/libvirt_private.syms | 14 ++ src/locking/README | 158 +++++++++++++++++++ src/locking/lock_driver.h | 293 +++++++++++++++++++++++++++++++++++ src/locking/lock_manager.c | 357 +++++++++++++++++++++++++++++++++++++++++++ src/locking/lock_manager.h | 65 ++++++++ src/util/virterror.c | 3 + 9 files changed, 894 insertions(+), 1 deletions(-) create mode 100644 src/locking/README create mode 100644 src/locking/lock_driver.h create mode 100644 src/locking/lock_manager.c create mode 100644 src/locking/lock_manager.h
diff --git a/include/libvirt/virterror.h b/include/libvirt/virterror.h index 0708e02..efa4796 100644 --- a/include/libvirt/virterror.h +++ b/include/libvirt/virterror.h @@ -81,6 +81,7 @@ typedef enum { VIR_FROM_VMWARE = 39, /* Error from VMware driver */ VIR_FROM_EVENT = 40, /* Error from event loop impl */ VIR_FROM_LIBXL = 41, /* Error from libxenlight driver */ + VIR_FROM_LOCKING = 42, /* Error from lock manager */ } virErrorDomain;
diff --git a/po/POTFILES.in b/po/POTFILES.in index dd44da2..9c3d287 100644 --- a/po/POTFILES.in +++ b/po/POTFILES.in @@ -31,6 +31,7 @@ src/fdstream.c src/interface/netcf_driver.c src/internal.h src/libvirt.c +src/locking/lock_manager.c src/lxc/lxc_container.c src/lxc/lxc_conf.c src/lxc/lxc_controller.c diff --git a/src/Makefile.am b/src/Makefile.am index 58eb2a7..a27838b 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -93,7 +93,8 @@ DRIVER_SOURCES = \ datatypes.c datatypes.h \ fdstream.c fdstream.h \ $(NODE_INFO_SOURCES) \ - libvirt.c libvirt_internal.h + libvirt.c libvirt_internal.h \ + locking/lock_manager.c locking/lock_manager.h
# XML configuration format handling sources diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms index 1b13c5c..1784c0d 100644 --- a/src/libvirt_private.syms +++ b/src/libvirt_private.syms @@ -588,6 +588,20 @@ virRegisterSecretDriver; virRegisterStorageDriver;
+# locking.h +virLockManagerAcquire; +virLockManagerAddResource; +virLockManagerFree; +virLockManagerInquire; +virLockManagerNew; +virLockManagerPluginNew; +virLockManagerPluginRef; +virLockManagerPluginUnref; +virLockManagerPluginUsesState; +virLockManagerPluginGetName; +virLockManagerRelease; + + # logging.h virLogDefineFilter; virLogDefineOutput; diff --git a/src/locking/README b/src/locking/README new file mode 100644 index 0000000..4fa4f89 --- /dev/null +++ b/src/locking/README @@ -0,0 +1,158 @@ + +At libvirtd startup: + + plugin = virLockManagerPluginLoad("sync-manager"); + + +At libvirtd shtudown: + + virLockManagerPluginUnload(plugin) + + +At guest startup: + + manager = virLockManagerNew(plugin, + VIR_LOCK_MANAGER_OBJECT_DOMAIN, + 0); + + virLockManagerSetParameter(manager, "id", id); + virLockManagerSetParameter(manager, "uuid", uuid); + virLockManagerSetParameter(manager, "name", name); + + foreach disk + virLockManagerRegisterResource(manager, + VIR_LOCK_MANAGER_RESOURCE_TYPE_DISK, + disk.path, + ..flags...); + + if (!virLockManagerAcquireObject(manager)) + abort.. + + run QEMU + + +At guest shutdown: + + ...send QEMU 'quit' monitor command, and/or kill(qemupid)... + + if (!virLockManagerShutdown(manager)) + kill(supervisorpid); /* XXX or leave it running ??? */ + + virLockManagerFree(manager); + + + +At libvirtd restart with running guests: + + foreach still running guest + manager = virLockManagerNew(driver, + VIR_LOCK_MANAGER_START_DOMAIN, + VIR_LOCK_MANAGER_NEW_ATTACH); + virLockManagerSetParameter(manager, "id", id); + virLockManagerSetParameter(manager, "uuid", uuid); + virLockManagerSetParameter(manager, "name", name); + + if (!virLockManagerGetChild(manager, &qemupid)) + kill(supervisorpid); /* XXX or leave it running ??? */ + + + +With disk hotplug: + + if (virLockManagerAcquireResource(manager, + VIR_LOCK_MANAGER_RESOURCE_TYPE_DISK, + disk.path + ..flags..)) + ...abort hotplug attempt ... + + ...hotplug the device... + + + +With disk unhotplug: + + ...hotunplug the device... + + if (virLockManagerReleaseResource(manager, + VIR_LOCK_MANAGER_RESOURCE_TYPE_DISK, + disk.path + ..flags..)) + ...log warning ... + + + +During migration: + + 1. On source host + + if (!virLockManagerPrepareMigrate(manager, hosturi)) + ..don't start migration.. + + 2. On dest host + + manager = virLockManagerNew(driver, + VIR_LOCK_MANAGER_START_DOMAIN, + VIR_LOCK_MANAGER_NEW_MIGRATE); + virLockManagerSetParameter(manager, "id", id); + virLockManagerSetParameter(manager, "uuid", uuid); + virLockManagerSetParameter(manager, "name", name); + + foreach disk + virLockManagerRegisterResource(manager, + VIR_LOCK_MANAGER_RESOURCE_TYPE_DISK, + disk.path, + ..flags...); + + char **supervisorargv; + int supervisorargc; + + supervisor = virLockManagerGetSupervisorPath(manager); + virLockManagerGetSupervisorArgs(&argv, &argc); + + cmd = qemuBuildCommandLine(supervisor, supervisorargv, supervisorargv); + + supervisorpid = virCommandExec(cmd); + + if (!virLockManagerGetChild(manager, &qemupid)) + kill(supervisorpid); /* XXX or leave it running ??? */ + + 3. Initiate migration in QEMU on source and wait for completion + + 4a. On failure + + 4a1 On target + + virLockManagerCompleteMigrateIn(manager, + VIR_LOCK_MANAGER_MIGRATE_CANCEL); + virLockManagerShutdown(manager); + virLockManagerFree(manager); + + 4a2 On source + + virLockManagerCompleteMigrateIn(manager, + VIR_LOCK_MANAGER_MIGRATE_CANCEL); + + 4b. On succcess + + + 4b1 On target + + virLockManagerCompleteMigrateIn(manager, 0); + + 42 On source + + virLockManagerCompleteMigrateIn(manager, 0); + virLockManagerShutdown(manager); + virLockManagerFree(manager); + + +Notes: + + - If a lock manager impl does just VM level leases, it can + ignore all the resource paths at startup. + + - If a lock manager impl does not support migrate + it can return an error from all migrate calls + + - If a lock manger impl does not support hotplug + it can return an error from all resource acquire/release calls diff --git a/src/locking/lock_driver.h b/src/locking/lock_driver.h new file mode 100644 index 0000000..40a55f6 --- /dev/null +++ b/src/locking/lock_driver.h @@ -0,0 +1,293 @@ +/* + * lock_driver.h: Defines the lock driver plugin API + * + * Copyright (C) 2010-2011 Red Hat, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + *
Please add * Author: Daniel P. Berrange <berrange@redhat.com> well I assume you wrote it :-)
+ */ + +#ifndef __VIR_PLUGINS_LOCK_DRIVER_H__ +# define __VIR_PLUGINS_LOCK_DRIVER_H__ + +# include "internal.h" + +typedef struct _virLockManager virLockManager; +typedef virLockManager *virLockManagerPtr; + +typedef struct _virLockDriver virLockDriver; +typedef virLockDriver *virLockDriverPtr; + +typedef struct _virLockManagerParam virLockManagerParam; +typedef virLockManagerParam *virLockManagerParamPtr; + +typedef enum { + /* State passing is used to re-acquire existing leases */ + VIR_LOCK_MANAGER_USES_STATE = (1 << 0) +} virLockManagerFlags; + +typedef enum { + /* The managed object is a virtual guest domain */ + VIR_LOCK_MANAGER_OBJECT_TYPE_DOMAIN = 0, +} virLockManagerObjectType; + +typedef enum { + /* The resource to be locked is a virtual disk */ + VIR_LOCK_MANAGER_RESOURCE_TYPE_DISK = 0, + /* A lease against an arbitrary resource */ + VIR_LOCK_MANAGER_RESOURCE_TYPE_LEASE = 1, +} virLockManagerResourceType; + +typedef enum { + /* The resource is assigned in readonly mode */ + VIR_LOCK_MANAGER_RESOURCE_READONLY = (1 << 0), + /* The resource is assigned in shared, writable mode */ + VIR_LOCK_MANAGER_RESOURCE_SHARED = (1 << 1), +} virLockManagerResourceFlags; + +typedef enum { + /* Don't acquire the resources, just register the object PID */ + VIR_LOCK_MANAGER_ACQUIRE_REGISTER_ONLY = (1 << 0) +} virLockManagerAcquireFlags; + +enum { + VIR_LOCK_MANAGER_PARAM_TYPE_STRING, + VIR_LOCK_MANAGER_PARAM_TYPE_INT, + VIR_LOCK_MANAGER_PARAM_TYPE_LONG, + VIR_LOCK_MANAGER_PARAM_TYPE_UINT, + VIR_LOCK_MANAGER_PARAM_TYPE_ULONG, + VIR_LOCK_MANAGER_PARAM_TYPE_DOUBLE, + VIR_LOCK_MANAGER_PARAM_TYPE_UUID, +}; + +struct _virLockManagerParam { + int type; + const char *key; + union { + int i; + long long l; + unsigned int ui; + unsigned long long ul; + double d; + char *str; + unsigned char uuid[16]; + } value; +}; + + +/* + * Changes in major version denote incompatible ABI changes + * Changes in minor version denote new compatible API entry points + * Changes in micro version denote new compatible flags + */ +# define VIR_LOCK_MANAGER_VERSION_MAJOR 1 +# define VIR_LOCK_MANAGER_VERSION_MINOR 0 +# define VIR_LOCK_MANAGER_VERSION_MICRO 0 + +# define VIR_LOCK_MANAGER_VERSION \ + ((VIR_LOCK_MANAGER_VERSION_MAJOR * 1000 * 1000) + \ + (VIR_LOCK_MANAGER_VERSION_MINOR * 1000) + \ + (VIR_LOCK_MANAGER_VERSION_MICRO)) + + + +/** + * virLockDriverInit: + * @version: the libvirt requested plugin ABI version + * @flags: the libvirt requested plugin optional extras + * + * Allow the plugin to validate the libvirt requested + * plugin version / flags. This allows the plugin impl + * to block its use in versions of libvirtd which are + * too old to support key features. + * + * NB: A plugin may be loaded multiple times, for different + * libvirt drivers (eg QEMU, LXC, UML) + * + * Returns -1 if the requested version/flags were inadequate + */ +typedef int (*virLockDriverInit)(unsigned int version, + unsigned int flags); + +/** + * virLockDriverDeinit: + * + * Called to release any resources prior to the plugin + * being unloaded from memory. Returns -1 to prevent + * plugin from being unloaded from memory. + */ +typedef int (*virLockDriverDeinit)(void); + +/** + * virLockManagerNew: + * @man: the lock manager context + * @type: the type of process to be supervised + * @nparams: number of metadata parameters + * @params: extra metadata parameters + * @flags: optional flags, currently unused + * + * Initialize a new context to supervise a process, usually + * a virtual machine. The lock driver implementation can use + * the <code>privateData</code> field of <code>man</code> + * to store a pointer to any driver specific state. + * + * A process of VIR_LOCK_MANAGER_START_DOMAIN will be + * given the following parameters + * + * - id: the domain unique id (unsigned int) + * - uuid: the domain uuid (uuid) + * - name: the domain name (string) + * - pid: process ID to own/owning the lock (unsigned int) + * + * Returns 0 if successful initialized a new context, -1 on error + */ +typedef int (*virLockDriverNew)(virLockManagerPtr man, + unsigned int type, + size_t nparams, + virLockManagerParamPtr params, + unsigned int flags); + +/** + * virLockDriverFree: + * @manager: the lock manager context + * + * Release any resources associated with the lock manager + * context private data + */ +typedef void (*virLockDriverFree)(virLockManagerPtr man); + +/** + * virLockDriverAddResource: + * @manager: the lock manager context + * @type: the resource type virLockManagerResourceType + * @name: the resource name + * @nparams: number of metadata parameters + * @params: extra metadata parameters + * @flags: the resource access flags + * + * Assign a resource to a managed object. This will + * only be called prior to the object is being locked + * when it is inactive. eg, to set the initial boot + * time disk assignments on a VM + * The format of @name varies according to + * the resource @type. A VIR_LOCK_MANAGER_RESOURCE_TYPE_DISK + * will have the fully qualified file path, while a resource + * of type VIR_LOCK_MANAGER_RESOURCE_TYPE_LEASE will have the + * unique name of the lease + * + * A resource of type VIR_LOCK_MANAGER_RESOURCE_TYPE_LEASE + * will receive at least the following extra parameters + * + * - 'path': a fully qualified path to the lockspace + * - 'lockspace': globally string identifying the lockspace name + * - 'offset': byte offset within the lease (unsigned long long) + * + * If no flags are given, the resource is assumed to be + * used in exclusive, read-write mode. Access can be + * relaxed to readonly, or shared read-write. + * + * Returns 0 on success, or -1 on failure + */ +typedef int (*virLockDriverAddResource)(virLockManagerPtr man, + unsigned int type, + const char *name, + size_t nparams, + virLockManagerParamPtr params, + unsigned int flags); + +/** + * virLockDriverAcquire: + * @manager: the lock manager context + * @state: the current lock state + * @flags: optional flags, currently unused + * + * Start managing resources for the object. This + * must be called from the PID that represents the + * object to be managed. If the lock is lost at any + * time, the PID will be killed off by the lock manager. + * The optional state contains information about the + * locks previously held for the object. + * + * Returns 0 on success, or -1 on failure + */ +typedef int (*virLockDriverAcquire)(virLockManagerPtr man, + const char *state, + unsigned int flags); + +/** + * virLockDriverRelease: + * @manager: the lock manager context + * @state: pointer to be filled with lock state + * @flags: optional flags + * + * Inform the lock manager that the supervised process has + * been, or can be stopped. + * + * Returns 0 on success, or -1 on failure + */ +typedef int (*virLockDriverRelease)(virLockManagerPtr man, + char **state, + unsigned int flags); + +/** + * virLockDriverInquire: + * @manager: the lock manager context + * @state: pointer to be filled with lock state + * @flags: optional flags, currently unused + * + * Retrieve the current lock state. The returned + * lock state may be NULL if none is required. The + * caller is responsible for freeing the lock + * state string when it is no longer required + * + * Returns 0 on success, or -1 on failure. + */ +typedef int (*virLockDriverInquire)(virLockManagerPtr man, + char **state, + unsigned int flags); + + +struct _virLockManager { + virLockDriverPtr driver; + void *privateData; +}; + +/** + * The plugin must export a static instance of this + * driver table, with the name 'virLockDriverImpl' + */ +struct _virLockDriver { + /** + * @version: the newest implemented plugin ABI version + * @flags: optional flags, currently unused + */ + unsigned int version; + unsigned int flags; + + virLockDriverInit drvInit; + virLockDriverDeinit drvDeinit; + + virLockDriverNew drvNew; + virLockDriverFree drvFree; + + virLockDriverAddResource drvAddResource; + + virLockDriverAcquire drvAcquire; + virLockDriverRelease drvRelease; + virLockDriverInquire drvInquire; +}; + + +#endif /* __VIR_PLUGINS_LOCK_DRIVER_H__ */ diff --git a/src/locking/lock_manager.c b/src/locking/lock_manager.c new file mode 100644 index 0000000..cb96091 --- /dev/null +++ b/src/locking/lock_manager.c @@ -0,0 +1,357 @@ +/* + * lock_manager.c: Implements the internal lock manager API + * + * Copyright (C) 2010-2011 Red Hat, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + *
Same thing :-)
+ */ + +#include <config.h> + +#include "lock_manager.h" +#include "virterror_internal.h" +#include "logging.h" +#include "util.h" +#include "memory.h" +#include "uuid.h" + +#include <dlfcn.h> +#include <stdlib.h> +#include <unistd.h> + +#include "configmake.h" V> + +#define VIR_FROM_THIS VIR_FROM_LOCKING [...] +/** + * virLockManagerPluginRef: + * @plugin: the plugin implementation to ref + * + * Acquires an additional reference on the plugin. + */ +void virLockManagerPluginRef(virLockManagerPluginPtr plugin) +{ + plugin->refs++; +} + + +/** + * virLockManagerPluginUnref: + * @plugin: the plugin implementation to unref + * + * Releases a reference on the plugin. When the last reference + * is released, it will attempt to unload the plugin from memory. + * The plugin may refuse to allow unloading if this would + * result in an unsafe scenario. + * + */ +void virLockManagerPluginUnref(virLockManagerPluginPtr plugin) +{ + if (!plugin) + return; + + plugin->refs--;
Shoudn't we protect those ref/unrefs with a global lock ? Chances of entering the race there sounds small but this looks racy Could be done as an improvement.
+ if (plugin->refs > 0) + return;
besides minor comments, ACK, Daniel -- Daniel Veillard | libxml Gnome XML XSLT toolkit http://xmlsoft.org/ daniel@veillard.com | Rpmfind RPM search engine http://rpmfind.net/ http://veillard.com/ | virtualization library http://libvirt.org/

To allow hypervisor drivers to assume that a lock driver impl will be guaranteed to exist, provide a 'nop' impl that is compiled into the library * src/Makefile.am: Add nop driver * src/locking/lock_driver_nop.c, src/locking/lock_driver_nop.h: Nop lock driver implementation * src/locking/lock_manager.c: Enable direct access of 'nop' driver, instead of dlopen()ing it. --- src/Makefile.am | 4 +- src/locking/lock_driver_nop.c | 115 +++++++++++++++++++++++++++++++++++++++++ src/locking/lock_driver_nop.h | 30 +++++++++++ src/locking/lock_manager.c | 53 ++++++++++--------- 4 files changed, 177 insertions(+), 25 deletions(-) create mode 100644 src/locking/lock_driver_nop.c create mode 100644 src/locking/lock_driver_nop.h diff --git a/src/Makefile.am b/src/Makefile.am index a27838b..96e2edf 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -94,7 +94,9 @@ DRIVER_SOURCES = \ fdstream.c fdstream.h \ $(NODE_INFO_SOURCES) \ libvirt.c libvirt_internal.h \ - locking/lock_manager.c locking/lock_manager.h + locking/lock_manager.c locking/lock_manager.h \ + locking/lock_driver.h \ + locking/lock_driver_nop.h locking/lock_driver_nop.c # XML configuration format handling sources diff --git a/src/locking/lock_driver_nop.c b/src/locking/lock_driver_nop.c new file mode 100644 index 0000000..5ebbd8d --- /dev/null +++ b/src/locking/lock_driver_nop.c @@ -0,0 +1,115 @@ +/* + * lock_driver_nop.c: A lock driver which locks nothing + * + * Copyright (C) 2010-2011 Red Hat, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +#include <config.h> + +#include "lock_driver_nop.h" +#include "memory.h" +#include "logging.h" +#include "uuid.h" + + +static int virLockManagerNopInit(unsigned int version, + unsigned int flags) +{ + VIR_DEBUG("version=%u flags=%u", version, flags); + + return 0; +} + +static int virLockManagerNopDeinit(void) +{ + VIR_DEBUG(" "); + + return 0; +} + + +static int virLockManagerNopNew(virLockManagerPtr lock ATTRIBUTE_UNUSED, + unsigned int type ATTRIBUTE_UNUSED, + size_t nparams ATTRIBUTE_UNUSED, + virLockManagerParamPtr params ATTRIBUTE_UNUSED, + unsigned int flags ATTRIBUTE_UNUSED) +{ + return 0; +} + +static int virLockManagerNopAddResource(virLockManagerPtr lock ATTRIBUTE_UNUSED, + unsigned int type ATTRIBUTE_UNUSED, + const char *name ATTRIBUTE_UNUSED, + size_t nparams ATTRIBUTE_UNUSED, + virLockManagerParamPtr params ATTRIBUTE_UNUSED, + unsigned int flags ATTRIBUTE_UNUSED) +{ + + return 0; +} + + +static int virLockManagerNopAcquire(virLockManagerPtr lock ATTRIBUTE_UNUSED, + const char *state ATTRIBUTE_UNUSED, + unsigned int flags ATTRIBUTE_UNUSED) +{ + + return 0; +} + +static int virLockManagerNopRelease(virLockManagerPtr lock ATTRIBUTE_UNUSED, + char **state, + unsigned int flags ATTRIBUTE_UNUSED) +{ + *state = NULL; + + return 0; +} + +static int virLockManagerNopInquire(virLockManagerPtr lock ATTRIBUTE_UNUSED, + char **state, + unsigned int flags ATTRIBUTE_UNUSED) +{ + + *state = NULL; + + return 0; +} + +static void virLockManagerNopFree(virLockManagerPtr lock ATTRIBUTE_UNUSED) +{ +} + +virLockDriver virLockDriverNop = +{ + .version = VIR_LOCK_MANAGER_VERSION, + .flags = 0, + + .drvInit = virLockManagerNopInit, + .drvDeinit = virLockManagerNopDeinit, + + .drvNew = virLockManagerNopNew, + .drvFree = virLockManagerNopFree, + + .drvAddResource = virLockManagerNopAddResource, + + .drvAcquire = virLockManagerNopAcquire, + .drvRelease = virLockManagerNopRelease, + + .drvInquire = virLockManagerNopInquire, +}; diff --git a/src/locking/lock_driver_nop.h b/src/locking/lock_driver_nop.h new file mode 100644 index 0000000..4be5377 --- /dev/null +++ b/src/locking/lock_driver_nop.h @@ -0,0 +1,30 @@ +/* + * lock_driver_nop.h: A lock driver which locks nothing + * + * Copyright (C) 2010-2011 Red Hat, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +#ifndef __VIR_LOCK_DRIVER_NOP_H__ +# define __VIR_LOCK_DRIVER_NOP_H__ + +# include "lock_driver.h" + +extern virLockDriver virLockDriverNop; + + +#endif /* __VIR_LOCK_DRIVER_NOP_H__ */ diff --git a/src/locking/lock_manager.c b/src/locking/lock_manager.c index cb96091..6197fd4 100644 --- a/src/locking/lock_manager.c +++ b/src/locking/lock_manager.c @@ -22,6 +22,7 @@ #include <config.h> #include "lock_manager.h" +#include "lock_driver_nop.h" #include "virterror_internal.h" #include "logging.h" #include "util.h" @@ -123,35 +124,39 @@ virLockManagerPluginPtr virLockManagerPluginNew(const char *name, const char *moddir = getenv("LIBVIRT_LOCK_MANAGER_PLUGIN_DIR"); char *modfile = NULL; - if (moddir == NULL) - moddir = DEFAULT_LOCK_MANAGER_PLUGIN_DIR; + if (STREQ(name, "nop")) { + driver = &virLockDriverNop; + } else { + if (moddir == NULL) + moddir = DEFAULT_LOCK_MANAGER_PLUGIN_DIR; - VIR_DEBUG("Module load %s from %s", name, moddir); + VIR_DEBUG("Module load %s from %s", name, moddir); - if (virAsprintf(&modfile, "%s/%s.so", moddir, name) < 0) { - virReportOOMError(); - return NULL; - } + if (virAsprintf(&modfile, "%s/%s.so", moddir, name) < 0) { + virReportOOMError(); + return NULL; + } - if (access(modfile, R_OK) < 0) { - virReportSystemError(errno, - _("Plugin %s not accessible"), - modfile); - goto cleanup; - } + if (access(modfile, R_OK) < 0) { + virReportSystemError(errno, + _("Plugin %s not accessible"), + modfile); + goto cleanup; + } - handle = dlopen(modfile, RTLD_NOW | RTLD_LOCAL); - if (!handle) { - virLockError(VIR_ERR_SYSTEM_ERROR, - _("Failed to load plugin %s: %s"), - modfile, dlerror()); - goto cleanup; - } + handle = dlopen(modfile, RTLD_NOW | RTLD_LOCAL); + if (!handle) { + virLockError(VIR_ERR_SYSTEM_ERROR, + _("Failed to load plugin %s: %s"), + modfile, dlerror()); + goto cleanup; + } - if (!(driver = dlsym(handle, "virLockDriverImpl"))) { - virLockError(VIR_ERR_INTERNAL_ERROR, "%s", - _("Missing plugin initialization symbol 'virLockDriverImpl'")); - goto cleanup; + if (!(driver = dlsym(handle, "virLockDriverImpl"))) { + virLockError(VIR_ERR_INTERNAL_ERROR, "%s", + _("Missing plugin initialization symbol 'virLockDriverImpl'")); + goto cleanup; + } } if (driver->drvInit(VIR_LOCK_MANAGER_VERSION, flags) < 0) { -- 1.7.4.4

On Thu, May 19, 2011 at 07:24:19AM -0400, Daniel P. Berrange wrote:
To allow hypervisor drivers to assume that a lock driver impl will be guaranteed to exist, provide a 'nop' impl that is compiled into the library
Also has to good property of being usable as a template for new ones...
* src/Makefile.am: Add nop driver * src/locking/lock_driver_nop.c, src/locking/lock_driver_nop.h: Nop lock driver implementation * src/locking/lock_manager.c: Enable direct access of 'nop' driver, instead of dlopen()ing it. --- src/Makefile.am | 4 +- src/locking/lock_driver_nop.c | 115 +++++++++++++++++++++++++++++++++++++++++ src/locking/lock_driver_nop.h | 30 +++++++++++ src/locking/lock_manager.c | 53 ++++++++++--------- 4 files changed, 177 insertions(+), 25 deletions(-) create mode 100644 src/locking/lock_driver_nop.c create mode 100644 src/locking/lock_driver_nop.h
diff --git a/src/Makefile.am b/src/Makefile.am index a27838b..96e2edf 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -94,7 +94,9 @@ DRIVER_SOURCES = \ fdstream.c fdstream.h \ $(NODE_INFO_SOURCES) \ libvirt.c libvirt_internal.h \ - locking/lock_manager.c locking/lock_manager.h + locking/lock_manager.c locking/lock_manager.h \ + locking/lock_driver.h \ + locking/lock_driver_nop.h locking/lock_driver_nop.c
# XML configuration format handling sources diff --git a/src/locking/lock_driver_nop.c b/src/locking/lock_driver_nop.c new file mode 100644 index 0000000..5ebbd8d --- /dev/null +++ b/src/locking/lock_driver_nop.c @@ -0,0 +1,115 @@ +/* + * lock_driver_nop.c: A lock driver which locks nothing + * + * Copyright (C) 2010-2011 Red Hat, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + *
* Author: Daniel P. Berrange <berrange@redhat.com>
+ */ + +#include <config.h> + +#include "lock_driver_nop.h" +#include "memory.h" +#include "logging.h" +#include "uuid.h" + + +static int virLockManagerNopInit(unsigned int version, + unsigned int flags) +{ + VIR_DEBUG("version=%u flags=%u", version, flags); + + return 0; +} + +static int virLockManagerNopDeinit(void) +{ + VIR_DEBUG(" "); + + return 0; +} + + +static int virLockManagerNopNew(virLockManagerPtr lock ATTRIBUTE_UNUSED, + unsigned int type ATTRIBUTE_UNUSED, + size_t nparams ATTRIBUTE_UNUSED, + virLockManagerParamPtr params ATTRIBUTE_UNUSED, + unsigned int flags ATTRIBUTE_UNUSED) +{ + return 0; +} + +static int virLockManagerNopAddResource(virLockManagerPtr lock ATTRIBUTE_UNUSED, + unsigned int type ATTRIBUTE_UNUSED, + const char *name ATTRIBUTE_UNUSED, + size_t nparams ATTRIBUTE_UNUSED, + virLockManagerParamPtr params ATTRIBUTE_UNUSED, + unsigned int flags ATTRIBUTE_UNUSED) +{ + + return 0; +} + + +static int virLockManagerNopAcquire(virLockManagerPtr lock ATTRIBUTE_UNUSED, + const char *state ATTRIBUTE_UNUSED, + unsigned int flags ATTRIBUTE_UNUSED) +{ + + return 0; +} + +static int virLockManagerNopRelease(virLockManagerPtr lock ATTRIBUTE_UNUSED, + char **state, + unsigned int flags ATTRIBUTE_UNUSED) +{ + *state = NULL; + + return 0; +} + +static int virLockManagerNopInquire(virLockManagerPtr lock ATTRIBUTE_UNUSED, + char **state, + unsigned int flags ATTRIBUTE_UNUSED) +{ + + *state = NULL; + + return 0; +} + +static void virLockManagerNopFree(virLockManagerPtr lock ATTRIBUTE_UNUSED) +{ +} + +virLockDriver virLockDriverNop = +{ + .version = VIR_LOCK_MANAGER_VERSION, + .flags = 0, + + .drvInit = virLockManagerNopInit, + .drvDeinit = virLockManagerNopDeinit, + + .drvNew = virLockManagerNopNew, + .drvFree = virLockManagerNopFree, + + .drvAddResource = virLockManagerNopAddResource, + + .drvAcquire = virLockManagerNopAcquire, + .drvRelease = virLockManagerNopRelease, + + .drvInquire = virLockManagerNopInquire, +}; diff --git a/src/locking/lock_driver_nop.h b/src/locking/lock_driver_nop.h new file mode 100644 index 0000000..4be5377 --- /dev/null +++ b/src/locking/lock_driver_nop.h @@ -0,0 +1,30 @@ +/* + * lock_driver_nop.h: A lock driver which locks nothing + * + * Copyright (C) 2010-2011 Red Hat, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +#ifndef __VIR_LOCK_DRIVER_NOP_H__ +# define __VIR_LOCK_DRIVER_NOP_H__ + +# include "lock_driver.h" + +extern virLockDriver virLockDriverNop; + + +#endif /* __VIR_LOCK_DRIVER_NOP_H__ */ diff --git a/src/locking/lock_manager.c b/src/locking/lock_manager.c index cb96091..6197fd4 100644 --- a/src/locking/lock_manager.c +++ b/src/locking/lock_manager.c @@ -22,6 +22,7 @@ #include <config.h>
#include "lock_manager.h" +#include "lock_driver_nop.h" #include "virterror_internal.h" #include "logging.h" #include "util.h" @@ -123,35 +124,39 @@ virLockManagerPluginPtr virLockManagerPluginNew(const char *name, const char *moddir = getenv("LIBVIRT_LOCK_MANAGER_PLUGIN_DIR"); char *modfile = NULL;
- if (moddir == NULL) - moddir = DEFAULT_LOCK_MANAGER_PLUGIN_DIR; + if (STREQ(name, "nop")) { + driver = &virLockDriverNop; + } else { + if (moddir == NULL) + moddir = DEFAULT_LOCK_MANAGER_PLUGIN_DIR;
- VIR_DEBUG("Module load %s from %s", name, moddir); + VIR_DEBUG("Module load %s from %s", name, moddir);
- if (virAsprintf(&modfile, "%s/%s.so", moddir, name) < 0) { - virReportOOMError(); - return NULL; - } + if (virAsprintf(&modfile, "%s/%s.so", moddir, name) < 0) { + virReportOOMError(); + return NULL; + }
- if (access(modfile, R_OK) < 0) { - virReportSystemError(errno, - _("Plugin %s not accessible"), - modfile); - goto cleanup; - } + if (access(modfile, R_OK) < 0) { + virReportSystemError(errno, + _("Plugin %s not accessible"), + modfile); + goto cleanup; + }
- handle = dlopen(modfile, RTLD_NOW | RTLD_LOCAL); - if (!handle) { - virLockError(VIR_ERR_SYSTEM_ERROR, - _("Failed to load plugin %s: %s"), - modfile, dlerror()); - goto cleanup; - } + handle = dlopen(modfile, RTLD_NOW | RTLD_LOCAL); + if (!handle) { + virLockError(VIR_ERR_SYSTEM_ERROR, + _("Failed to load plugin %s: %s"), + modfile, dlerror()); + goto cleanup; + }
- if (!(driver = dlsym(handle, "virLockDriverImpl"))) { - virLockError(VIR_ERR_INTERNAL_ERROR, "%s", - _("Missing plugin initialization symbol 'virLockDriverImpl'")); - goto cleanup; + if (!(driver = dlsym(handle, "virLockDriverImpl"))) { + virLockError(VIR_ERR_INTERNAL_ERROR, "%s", + _("Missing plugin initialization symbol 'virLockDriverImpl'")); + goto cleanup; + } }
if (driver->drvInit(VIR_LOCK_MANAGER_VERSION, flags) < 0) {
ACK, Daniel -- Daniel Veillard | libxml Gnome XML XSLT toolkit http://xmlsoft.org/ daniel@veillard.com | Rpmfind RPM search engine http://rpmfind.net/ http://veillard.com/ | virtualization library http://libvirt.org/

To facilitate use of the locking plugins from hypervisor drivers, introduce a higher level API for locking virDomainObjPtr instances. In includes APIs targetted to VM startup, and hotplug/unplug * src/Makefile.am: Add domain lock API * src/locking/domain_lock.c, src/locking/domain_lock.h: High level API for domain locking --- src/Makefile.am | 3 +- src/libvirt_private.syms | 11 ++ src/locking/README | 7 + src/locking/domain_lock.c | 284 +++++++++++++++++++++++++++++++++++++++++++++ src/locking/domain_lock.h | 56 +++++++++ 5 files changed, 360 insertions(+), 1 deletions(-) create mode 100644 src/locking/domain_lock.c create mode 100644 src/locking/domain_lock.h diff --git a/src/Makefile.am b/src/Makefile.am index 96e2edf..1e5a72e 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -96,7 +96,8 @@ DRIVER_SOURCES = \ libvirt.c libvirt_internal.h \ locking/lock_manager.c locking/lock_manager.h \ locking/lock_driver.h \ - locking/lock_driver_nop.h locking/lock_driver_nop.c + locking/lock_driver_nop.h locking/lock_driver_nop.c \ + locking/domain_lock.h locking/domain_lock.c # XML configuration format handling sources diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms index 1784c0d..a2a6de9 100644 --- a/src/libvirt_private.syms +++ b/src/libvirt_private.syms @@ -407,6 +407,17 @@ virDomainEventWatchdogNewFromDom; virDomainEventWatchdogNewFromObj; +# domain_lock.h +virDomainLockProcessStart; +virDomainLockProcessInquire; +virDomainLockProcessPause; +virDomainLockProcessResume; +virDomainLockDiskAttach; +virDomainLockDiskDetach; +virDomainLockLeaseAttach; +virDomainLockLeaseDetach; + + # domain_nwfilter.h virDomainConfNWFilterInstantiate; virDomainConfNWFilterRegister; diff --git a/src/locking/README b/src/locking/README index 4fa4f89..da2a8f8 100644 --- a/src/locking/README +++ b/src/locking/README @@ -1,3 +1,10 @@ + Using the Lock Manager APIs + =========================== + +This file describes how to use the lock manager APIs. +All the guest lifecycle sequences here have higher +level wrappers provided by the 'domain_lock.h' API, +which simplify thue usage At libvirtd startup: diff --git a/src/locking/domain_lock.c b/src/locking/domain_lock.c new file mode 100644 index 0000000..85352e2 --- /dev/null +++ b/src/locking/domain_lock.c @@ -0,0 +1,284 @@ +/* + * domain_lock.c: Locking for domain lifecycle operations + * + * Copyright (C) 2010-2011 Red Hat, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +#include <config.h> + +#include <intprops.h> + +#include "domain_lock.h" +#include "memory.h" +#include "uuid.h" +#include "virterror_internal.h" +#include "logging.h" + +#define VIR_FROM_THIS VIR_FROM_LOCKING + + +static int virDomainLockManagerAddLease(virLockManagerPtr lock, + virDomainLeaseDefPtr lease) +{ + unsigned int leaseFlags = 0; + virLockManagerParam lparams[] = { + { .type = VIR_LOCK_MANAGER_PARAM_TYPE_STRING, + .key = "path", + .value = { .str = lease->path }, + }, + { .type = VIR_LOCK_MANAGER_PARAM_TYPE_ULONG, + .key = "offset", + .value = { .ul = lease->offset }, + }, + { .type = VIR_LOCK_MANAGER_PARAM_TYPE_STRING, + .key = "lockspace", + .value = { .str = lease->lockspace }, + }, + }; + size_t nparams = ARRAY_CARDINALITY(lparams); + if (!lease->lockspace) + nparams--; + + VIR_DEBUG("Add lease %s", lease->path); + if (virLockManagerAddResource(lock, + VIR_LOCK_MANAGER_RESOURCE_TYPE_LEASE, + lease->key, + nparams, + lparams, + leaseFlags) < 0) { + VIR_DEBUG("Failed to add lease %s", lease->path); + return -1; + } + return 0; +} + + +static int virDomainLockManagerAddDisk(virLockManagerPtr lock, + virDomainDiskDefPtr disk) +{ + unsigned int diskFlags = 0; + if (!disk->src) + return 0; + + if (!(disk->type == VIR_DOMAIN_DISK_TYPE_BLOCK || + disk->type == VIR_DOMAIN_DISK_TYPE_FILE || + disk->type == VIR_DOMAIN_DISK_TYPE_DIR)) + return 0; + + if (disk->readonly) + diskFlags |= VIR_LOCK_MANAGER_RESOURCE_READONLY; + if (disk->shared) + diskFlags |= VIR_LOCK_MANAGER_RESOURCE_SHARED; + + VIR_DEBUG("Add disk %s", disk->src); + if (virLockManagerAddResource(lock, + VIR_LOCK_MANAGER_RESOURCE_TYPE_DISK, + disk->src, + 0, + NULL, + diskFlags) < 0) { + VIR_DEBUG("Failed add disk %s", disk->src); + return -1; + } + return 0; +} + +static virLockManagerPtr virDomainLockManagerNew(virLockManagerPluginPtr plugin, + virDomainObjPtr dom, + bool withResources) +{ + virLockManagerPtr lock; + int i; + virLockManagerParam params[] = { + { .type = VIR_LOCK_MANAGER_PARAM_TYPE_UUID, + .key = "uuid", + }, + { .type = VIR_LOCK_MANAGER_PARAM_TYPE_STRING, + .key = "name", + .value = { .str = dom->def->name }, + }, + { .type = VIR_LOCK_MANAGER_PARAM_TYPE_UINT, + .key = "id", + .value = { .i = dom->def->id }, + }, + { .type = VIR_LOCK_MANAGER_PARAM_TYPE_UINT, + .key = "pid", + .value = { .i = dom->pid }, + }, + }; + VIR_DEBUG("plugin=%p dom=%p withResources=%d", + plugin, dom, withResources); + + memcpy(params[0].value.uuid, dom->def->uuid, VIR_UUID_BUFLEN); + + if (!(lock = virLockManagerNew(plugin, + VIR_LOCK_MANAGER_OBJECT_TYPE_DOMAIN, + ARRAY_CARDINALITY(params), + params, + 0))) + goto error; + + if (withResources) { + VIR_DEBUG("Adding leases"); + for (i = 0 ; i < dom->def->nleases ; i++) + if (virDomainLockManagerAddLease(lock, dom->def->leases[i]) < 0) + goto error; + + VIR_DEBUG("Adding disks"); + for (i = 0 ; i < dom->def->ndisks ; i++) + if (virDomainLockManagerAddDisk(lock, dom->def->disks[i]) < 0) + goto error; + } + + return lock; + +error: + virLockManagerFree(lock); + return NULL; +} + + +int virDomainLockProcessStart(virLockManagerPluginPtr plugin, + virDomainObjPtr dom, + bool paused) +{ + virLockManagerPtr lock = virDomainLockManagerNew(plugin, dom, true); + int ret; + if (paused) + ret = virLockManagerAcquire(lock, NULL, VIR_LOCK_MANAGER_ACQUIRE_REGISTER_ONLY); + else + ret = virLockManagerAcquire(lock, NULL, 0); + + virLockManagerFree(lock); + + return ret; +} + +int virDomainLockProcessPause(virLockManagerPluginPtr plugin, + virDomainObjPtr dom, + char **state) +{ + virLockManagerPtr lock = virDomainLockManagerNew(plugin, dom, true); + int ret = virLockManagerRelease(lock, state, 0); + + virLockManagerFree(lock); + + return ret; +} + +int virDomainLockProcessResume(virLockManagerPluginPtr plugin, + virDomainObjPtr dom, + const char *state) +{ + virLockManagerPtr lock = virDomainLockManagerNew(plugin, dom, true); + int ret = virLockManagerAcquire(lock, state, 0); + + virLockManagerFree(lock); + + return ret; +} + +int virDomainLockProcessInquire(virLockManagerPluginPtr plugin, + virDomainObjPtr dom, + char **state) +{ + virLockManagerPtr lock = virDomainLockManagerNew(plugin, dom, true); + int ret = virLockManagerInquire(lock, state, 0); + + virLockManagerFree(lock); + + return ret; +} + + +int virDomainLockDiskAttach(virLockManagerPluginPtr plugin, + virDomainObjPtr dom, + virDomainDiskDefPtr disk) +{ + virLockManagerPtr lock = virDomainLockManagerNew(plugin, dom, false); + int ret = -1; + + if (virDomainLockManagerAddDisk(lock, disk) < 0) + goto cleanup; + + if (virLockManagerAcquire(lock, NULL, 0) < 0) + goto cleanup; + +cleanup: + virLockManagerFree(lock); + + return ret; +} + +int virDomainLockDiskDetach(virLockManagerPluginPtr plugin, + virDomainObjPtr dom, + virDomainDiskDefPtr disk) +{ + virLockManagerPtr lock = virDomainLockManagerNew(plugin, dom, false); + int ret = -1; + + if (virDomainLockManagerAddDisk(lock, disk) < 0) + goto cleanup; + + if (virLockManagerRelease(lock, NULL, 0) < 0) + goto cleanup; + +cleanup: + virLockManagerFree(lock); + + return ret; +} + + +int virDomainLockLeaseAttach(virLockManagerPluginPtr plugin, + virDomainObjPtr dom, + virDomainLeaseDefPtr lease) +{ + virLockManagerPtr lock = virDomainLockManagerNew(plugin, dom, false); + int ret = -1; + + if (virDomainLockManagerAddLease(lock, lease) < 0) + goto cleanup; + + if (virLockManagerAcquire(lock, NULL, 0) < 0) + goto cleanup; + +cleanup: + virLockManagerFree(lock); + + return ret; +} + +int virDomainLockLeaseDetach(virLockManagerPluginPtr plugin, + virDomainObjPtr dom, + virDomainLeaseDefPtr lease) +{ + virLockManagerPtr lock = virDomainLockManagerNew(plugin, dom, false); + int ret = -1; + + if (virDomainLockManagerAddLease(lock, lease) < 0) + goto cleanup; + + if (virLockManagerRelease(lock, NULL, 0) < 0) + goto cleanup; + +cleanup: + virLockManagerFree(lock); + + return ret; +} diff --git a/src/locking/domain_lock.h b/src/locking/domain_lock.h new file mode 100644 index 0000000..40fadd4 --- /dev/null +++ b/src/locking/domain_lock.h @@ -0,0 +1,56 @@ +/* + * domain_lock.c: Locking for domain lifecycle operations + * + * Copyright (C) 2010-2011 Red Hat, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +#ifndef __VIR_DOMAIN_LOCK_H__ +# define __VIR_DOMAIN_LOCK_H__ + +# include "internal.h" +# include "domain_conf.h" +# include "lock_manager.h" + +int virDomainLockProcessStart(virLockManagerPluginPtr plugin, + virDomainObjPtr dom, + bool paused); +int virDomainLockProcessPause(virLockManagerPluginPtr plugin, + virDomainObjPtr dom, + char **state); +int virDomainLockProcessResume(virLockManagerPluginPtr plugin, + virDomainObjPtr dom, + const char *state); +int virDomainLockProcessInquire(virLockManagerPluginPtr plugin, + virDomainObjPtr dom, + char **state); + +int virDomainLockDiskAttach(virLockManagerPluginPtr plugin, + virDomainObjPtr dom, + virDomainDiskDefPtr disk); +int virDomainLockDiskDetach(virLockManagerPluginPtr plugin, + virDomainObjPtr dom, + virDomainDiskDefPtr disk); + +int virDomainLockLeaseAttach(virLockManagerPluginPtr plugin, + virDomainObjPtr dom, + virDomainLeaseDefPtr lease); +int virDomainLockLeaseDetach(virLockManagerPluginPtr plugin, + virDomainObjPtr dom, + virDomainLeaseDefPtr lease); + +#endif /* __VIR_DOMAIN_LOCK_H__ */ -- 1.7.4.4

On Thu, May 19, 2011 at 07:24:20AM -0400, Daniel P. Berrange wrote:
To facilitate use of the locking plugins from hypervisor drivers, introduce a higher level API for locking virDomainObjPtr instances. In includes APIs targetted to VM startup, and hotplug/unplug
* src/Makefile.am: Add domain lock API * src/locking/domain_lock.c, src/locking/domain_lock.h: High level API for domain locking --- src/Makefile.am | 3 +- src/libvirt_private.syms | 11 ++ src/locking/README | 7 + src/locking/domain_lock.c | 284 +++++++++++++++++++++++++++++++++++++++++++++ src/locking/domain_lock.h | 56 +++++++++ 5 files changed, 360 insertions(+), 1 deletions(-) create mode 100644 src/locking/domain_lock.c create mode 100644 src/locking/domain_lock.h
diff --git a/src/Makefile.am b/src/Makefile.am index 96e2edf..1e5a72e 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -96,7 +96,8 @@ DRIVER_SOURCES = \ libvirt.c libvirt_internal.h \ locking/lock_manager.c locking/lock_manager.h \ locking/lock_driver.h \ - locking/lock_driver_nop.h locking/lock_driver_nop.c + locking/lock_driver_nop.h locking/lock_driver_nop.c \ + locking/domain_lock.h locking/domain_lock.c
# XML configuration format handling sources diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms index 1784c0d..a2a6de9 100644 --- a/src/libvirt_private.syms +++ b/src/libvirt_private.syms @@ -407,6 +407,17 @@ virDomainEventWatchdogNewFromDom; virDomainEventWatchdogNewFromObj;
+# domain_lock.h +virDomainLockProcessStart; +virDomainLockProcessInquire; +virDomainLockProcessPause; +virDomainLockProcessResume; +virDomainLockDiskAttach; +virDomainLockDiskDetach; +virDomainLockLeaseAttach; +virDomainLockLeaseDetach; + + # domain_nwfilter.h virDomainConfNWFilterInstantiate; virDomainConfNWFilterRegister; diff --git a/src/locking/README b/src/locking/README index 4fa4f89..da2a8f8 100644 --- a/src/locking/README +++ b/src/locking/README @@ -1,3 +1,10 @@ + Using the Lock Manager APIs + =========================== + +This file describes how to use the lock manager APIs. +All the guest lifecycle sequences here have higher +level wrappers provided by the 'domain_lock.h' API, +which simplify thue usage
At libvirtd startup:
diff --git a/src/locking/domain_lock.c b/src/locking/domain_lock.c new file mode 100644 index 0000000..85352e2 --- /dev/null +++ b/src/locking/domain_lock.c @@ -0,0 +1,284 @@ +/* + * domain_lock.c: Locking for domain lifecycle operations + * + * Copyright (C) 2010-2011 Red Hat, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +#include <config.h> + +#include <intprops.h> + +#include "domain_lock.h" +#include "memory.h" +#include "uuid.h" +#include "virterror_internal.h" +#include "logging.h" + +#define VIR_FROM_THIS VIR_FROM_LOCKING + + +static int virDomainLockManagerAddLease(virLockManagerPtr lock, + virDomainLeaseDefPtr lease) +{ + unsigned int leaseFlags = 0; + virLockManagerParam lparams[] = { + { .type = VIR_LOCK_MANAGER_PARAM_TYPE_STRING, + .key = "path", + .value = { .str = lease->path }, + }, + { .type = VIR_LOCK_MANAGER_PARAM_TYPE_ULONG, + .key = "offset", + .value = { .ul = lease->offset }, + }, + { .type = VIR_LOCK_MANAGER_PARAM_TYPE_STRING, + .key = "lockspace", + .value = { .str = lease->lockspace }, + }, + }; + size_t nparams = ARRAY_CARDINALITY(lparams); + if (!lease->lockspace) + nparams--; + + VIR_DEBUG("Add lease %s", lease->path); + if (virLockManagerAddResource(lock, + VIR_LOCK_MANAGER_RESOURCE_TYPE_LEASE, + lease->key, + nparams, + lparams, + leaseFlags) < 0) { + VIR_DEBUG("Failed to add lease %s", lease->path); + return -1; + } + return 0; +} + + +static int virDomainLockManagerAddDisk(virLockManagerPtr lock, + virDomainDiskDefPtr disk) +{ + unsigned int diskFlags = 0; + if (!disk->src) + return 0; + + if (!(disk->type == VIR_DOMAIN_DISK_TYPE_BLOCK || + disk->type == VIR_DOMAIN_DISK_TYPE_FILE || + disk->type == VIR_DOMAIN_DISK_TYPE_DIR)) + return 0; + + if (disk->readonly) + diskFlags |= VIR_LOCK_MANAGER_RESOURCE_READONLY; + if (disk->shared) + diskFlags |= VIR_LOCK_MANAGER_RESOURCE_SHARED; + + VIR_DEBUG("Add disk %s", disk->src); + if (virLockManagerAddResource(lock, + VIR_LOCK_MANAGER_RESOURCE_TYPE_DISK, + disk->src, + 0, + NULL, + diskFlags) < 0) { + VIR_DEBUG("Failed add disk %s", disk->src); + return -1; + } + return 0; +} + +static virLockManagerPtr virDomainLockManagerNew(virLockManagerPluginPtr plugin, + virDomainObjPtr dom, + bool withResources) +{ + virLockManagerPtr lock; + int i; + virLockManagerParam params[] = { + { .type = VIR_LOCK_MANAGER_PARAM_TYPE_UUID, + .key = "uuid", + }, + { .type = VIR_LOCK_MANAGER_PARAM_TYPE_STRING, + .key = "name", + .value = { .str = dom->def->name }, + }, + { .type = VIR_LOCK_MANAGER_PARAM_TYPE_UINT, + .key = "id", + .value = { .i = dom->def->id }, + }, + { .type = VIR_LOCK_MANAGER_PARAM_TYPE_UINT, + .key = "pid", + .value = { .i = dom->pid }, + }, + }; + VIR_DEBUG("plugin=%p dom=%p withResources=%d", + plugin, dom, withResources); + + memcpy(params[0].value.uuid, dom->def->uuid, VIR_UUID_BUFLEN); + + if (!(lock = virLockManagerNew(plugin, + VIR_LOCK_MANAGER_OBJECT_TYPE_DOMAIN, + ARRAY_CARDINALITY(params), + params, + 0))) + goto error; + + if (withResources) { + VIR_DEBUG("Adding leases"); + for (i = 0 ; i < dom->def->nleases ; i++) + if (virDomainLockManagerAddLease(lock, dom->def->leases[i]) < 0) + goto error; + + VIR_DEBUG("Adding disks"); + for (i = 0 ; i < dom->def->ndisks ; i++) + if (virDomainLockManagerAddDisk(lock, dom->def->disks[i]) < 0) + goto error; + } + + return lock; + +error: + virLockManagerFree(lock); + return NULL; +} + + +int virDomainLockProcessStart(virLockManagerPluginPtr plugin, + virDomainObjPtr dom, + bool paused) +{ + virLockManagerPtr lock = virDomainLockManagerNew(plugin, dom, true); + int ret; + if (paused) + ret = virLockManagerAcquire(lock, NULL, VIR_LOCK_MANAGER_ACQUIRE_REGISTER_ONLY); + else + ret = virLockManagerAcquire(lock, NULL, 0); + + virLockManagerFree(lock); + + return ret; +} + +int virDomainLockProcessPause(virLockManagerPluginPtr plugin, + virDomainObjPtr dom, + char **state) +{ + virLockManagerPtr lock = virDomainLockManagerNew(plugin, dom, true); + int ret = virLockManagerRelease(lock, state, 0); + + virLockManagerFree(lock); + + return ret; +} + +int virDomainLockProcessResume(virLockManagerPluginPtr plugin, + virDomainObjPtr dom, + const char *state) +{ + virLockManagerPtr lock = virDomainLockManagerNew(plugin, dom, true); + int ret = virLockManagerAcquire(lock, state, 0); + + virLockManagerFree(lock); + + return ret; +} + +int virDomainLockProcessInquire(virLockManagerPluginPtr plugin, + virDomainObjPtr dom, + char **state) +{ + virLockManagerPtr lock = virDomainLockManagerNew(plugin, dom, true); + int ret = virLockManagerInquire(lock, state, 0); + + virLockManagerFree(lock); + + return ret; +} + + +int virDomainLockDiskAttach(virLockManagerPluginPtr plugin, + virDomainObjPtr dom, + virDomainDiskDefPtr disk) +{ + virLockManagerPtr lock = virDomainLockManagerNew(plugin, dom, false); + int ret = -1; + + if (virDomainLockManagerAddDisk(lock, disk) < 0) + goto cleanup; + + if (virLockManagerAcquire(lock, NULL, 0) < 0) + goto cleanup; + +cleanup: + virLockManagerFree(lock); + + return ret; +} + +int virDomainLockDiskDetach(virLockManagerPluginPtr plugin, + virDomainObjPtr dom, + virDomainDiskDefPtr disk) +{ + virLockManagerPtr lock = virDomainLockManagerNew(plugin, dom, false); + int ret = -1; + + if (virDomainLockManagerAddDisk(lock, disk) < 0) + goto cleanup; + + if (virLockManagerRelease(lock, NULL, 0) < 0) + goto cleanup; + +cleanup: + virLockManagerFree(lock); + + return ret; +} + + +int virDomainLockLeaseAttach(virLockManagerPluginPtr plugin, + virDomainObjPtr dom, + virDomainLeaseDefPtr lease) +{ + virLockManagerPtr lock = virDomainLockManagerNew(plugin, dom, false); + int ret = -1; + + if (virDomainLockManagerAddLease(lock, lease) < 0) + goto cleanup; + + if (virLockManagerAcquire(lock, NULL, 0) < 0) + goto cleanup; + +cleanup: + virLockManagerFree(lock); + + return ret; +} + +int virDomainLockLeaseDetach(virLockManagerPluginPtr plugin, + virDomainObjPtr dom, + virDomainLeaseDefPtr lease) +{ + virLockManagerPtr lock = virDomainLockManagerNew(plugin, dom, false); + int ret = -1; + + if (virDomainLockManagerAddLease(lock, lease) < 0) + goto cleanup; + + if (virLockManagerRelease(lock, NULL, 0) < 0) + goto cleanup; + +cleanup: + virLockManagerFree(lock); + + return ret; +} diff --git a/src/locking/domain_lock.h b/src/locking/domain_lock.h new file mode 100644 index 0000000..40fadd4 --- /dev/null +++ b/src/locking/domain_lock.h @@ -0,0 +1,56 @@ +/* + * domain_lock.c: Locking for domain lifecycle operations + * + * Copyright (C) 2010-2011 Red Hat, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +#ifndef __VIR_DOMAIN_LOCK_H__ +# define __VIR_DOMAIN_LOCK_H__ + +# include "internal.h" +# include "domain_conf.h" +# include "lock_manager.h" + +int virDomainLockProcessStart(virLockManagerPluginPtr plugin, + virDomainObjPtr dom, + bool paused); +int virDomainLockProcessPause(virLockManagerPluginPtr plugin, + virDomainObjPtr dom, + char **state); +int virDomainLockProcessResume(virLockManagerPluginPtr plugin, + virDomainObjPtr dom, + const char *state); +int virDomainLockProcessInquire(virLockManagerPluginPtr plugin, + virDomainObjPtr dom, + char **state); + +int virDomainLockDiskAttach(virLockManagerPluginPtr plugin, + virDomainObjPtr dom, + virDomainDiskDefPtr disk); +int virDomainLockDiskDetach(virLockManagerPluginPtr plugin, + virDomainObjPtr dom, + virDomainDiskDefPtr disk); + +int virDomainLockLeaseAttach(virLockManagerPluginPtr plugin, + virDomainObjPtr dom, + virDomainLeaseDefPtr lease); +int virDomainLockLeaseDetach(virLockManagerPluginPtr plugin, + virDomainObjPtr dom, + virDomainLeaseDefPtr lease); + +#endif /* __VIR_DOMAIN_LOCK_H__ */ -- 1.7.4.4
ACK, Daniel -- Daniel Veillard | libxml Gnome XML XSLT toolkit http://xmlsoft.org/ daniel@veillard.com | Rpmfind RPM search engine http://rpmfind.net/ http://veillard.com/ | virtualization library http://libvirt.org/

--- docs/internals/locking.html.in | 257 ++++++++++++++++++++++++++++++++++++++++ docs/sitemap.html.in | 4 + 2 files changed, 261 insertions(+), 0 deletions(-) create mode 100644 docs/internals/locking.html.in diff --git a/docs/internals/locking.html.in b/docs/internals/locking.html.in new file mode 100644 index 0000000..3790ef0 --- /dev/null +++ b/docs/internals/locking.html.in @@ -0,0 +1,257 @@ +<html> + <body> + <h1>Resource Lock Manager</h1> + + <ul id="toc"></ul> + + <p> + This page describes the design of the resource lock manager + that is used for locking disk images, to ensure exclusive + access to content. + </p> + + <h2><a name="goals">Goals</a></h2> + + <p> + The high level goal is to prevent the same disk image being + used by more than one QEMU instance at a time (unless the + disk is marked as sharable, or readonly). The scenarios + to be prevented are thus: + </p> + + <ol> + <li> + Two different guests running configured to point at the + same disk image. + </li> + <li> + One guest being started more than once on two different + machines due to admin mistake + </li> + <li> + One guest being started more than once on a single machine + due to libvirt driver bug on a single machine. + </li> + </ol> + + <h2><a name="requirement">Requirements</a></h2> + + <p> + The high level goal leads to a set of requirements + for the lock manager design + </p> + + <ol> + <li> + A lock must be held on a disk whenever a QEMU process + has the disk open + </li> + <li> + The lock scheme must allow QEMU to be configured with + readonly, shared write, or exclusive writable disks + </li> + <li> + A lock handover must be performed during the migration + process where 2 QEMU processes will have the same disk + open concurrently. + </li> + <li> + The lock manager must be able to identify and kill the + process accessing the resource if the lock is revoked. + </li> + <li> + Locks can be acquired for arbitrary VM related resources, + as determined by the management application. + </li> + </ol> + + <h2><a name="design">Design</a></h2> + + <p> + Within a lock manager the following series of operations + will need to be supported. + </p> + + <ul> + <li> + <strong>Register object</strong> + Register the identity of an object against which + locks will be acquired + </li> + <li> + <strong>Add resource</strong> + Associate a resource with an object for future + lock acquisition / release + </li> + <li> + <strong>Acquire locks</strong> + Acquire the locks for all resources associated + with the object + </li> + <li> + <strong>Release locks</strong> + Release the locks for all resources associated + with the object + </li> + <li> + <strong>Inquire locks</strong> + Get a representation of the state of the locks + for all resources associated with the object + </li> + </ul> + + <h2><a name="impl">Plugin Implementations</a></h2> + + <p> + Lock manager implementations are provided as LGPLv2+ + licensed, dlopen()able library modules. The plugins + will be loadable from the following location: + </p> + + <pre> +/usr/{lib,lib64}/libvirt/lock_manager/$NAME.so +</pre> + + <p> + The lock manager plugin must export a single ELF + symbol named <code>virLockDriverImpl</code>, which is + a static instance of the <code>virLockDriver</code> + struct. The struct is defined in the header file + </p> + + <pre> + #include <libvirt/plugins/lock_manager.h> + </pre> + + <p> + All callbacks in the struct must be initialized + to non-NULL pointers. The semantics of each + callback are defined in the API docs embedded + in the previously mentioned header file + </p> + + <h2><a name="qemuIntegrate">QEMU Driver integration</a></h2> + + <p> + With the QEMU driver, the lock plugin will be set + in the <code>/etc/libvirt/qemu.conf</code> configuration + file by specifying the lock manager name. + </p> + + <pre> + lockManager="sanlock" + </pre> + + <p> + By default the lock manager will be a 'no op' implementation + for backwards compatibility + </p> + + <h2><a name="usagePatterns">Lock usage patterns</a></h2> + + <p> + The following psuedo code illustrates the common + patterns of operations invoked on the lock + manager plugin callbacks. + </p> + + <h3><a name="usageLockAcquire">Lock acquisition</a></h3> + + <p> + Initial lock acquisition will be performed from the + process that is to own the lock. This is typically + the QEMU child process, in between the fork+exec + pairing. When adding further resources on the fly, + to an existing object holding locks, this will be + done from the libvirtd process. + </p> + + <pre> + virLockManagerParam params[] = { + { .type = VIR_LOCK_MANAGER_PARAM_TYPE_UUID, + .key = "uuid", + }, + { .type = VIR_LOCK_MANAGER_PARAM_TYPE_STRING, + .key = "name", + .value = { .str = dom->def->name }, + }, + { .type = VIR_LOCK_MANAGER_PARAM_TYPE_UINT, + .key = "id", + .value = { .i = dom->def->id }, + }, + { .type = VIR_LOCK_MANAGER_PARAM_TYPE_UINT, + .key = "pid", + .value = { .i = dom->pid }, + }, + }; + mgr = virLockManagerNew(lockPlugin, + VIR_LOCK_MANAGER_TYPE_DOMAIN, + ARRAY_CARDINALITY(params), + params, + 0))); + + foreach (initial disks) + virLockManagerAddResource(mgr, + VIR_LOCK_MANAGER_RESOURCE_TYPE_DISK, + $path, 0, NULL, $flags); + + if (virLockManagerAcquire(lock, NULL, 0) < 0); + ...abort... + </pre> + + <h3><a name="usageLockAttach">Lock release</a></h3> + + <p> + The locks are all implicitly released when the process + that acquired them exits, however, a process may + voluntarily give up the lock by running + </p> + + <pre> + char *state = NULL; + virLockManagerParam params[] = { + { .type = VIR_LOCK_MANAGER_PARAM_TYPE_UUID, + .key = "uuid", + }, + { .type = VIR_LOCK_MANAGER_PARAM_TYPE_STRING, + .key = "name", + .value = { .str = dom->def->name }, + }, + { .type = VIR_LOCK_MANAGER_PARAM_TYPE_UINT, + .key = "id", + .value = { .i = dom->def->id }, + }, + { .type = VIR_LOCK_MANAGER_PARAM_TYPE_UINT, + .key = "pid", + .value = { .i = dom->pid }, + }, + }; + mgr = virLockManagerNew(lockPlugin, + VIR_LOCK_MANAGER_TYPE_DOMAIN, + ARRAY_CARDINALITY(params), + params, + 0))); + + foreach (initial disks) + virLockManagerAddResource(mgr, + VIR_LOCK_MANAGER_RESOURCE_TYPE_DISK, + $path, 0, NULL, $flags); + + virLockManagerRelease(mgr, & state, 0); + </pre> + + <p> + The returned state string can be passed to the + <code>virLockManagerAcquire</code> method to + later re-acquire the exact same locks. This + state transfer is commonly used when performing + live migration of virtual machines. By validating + the state the lock manager can ensure no other + VM has re-acquire the same locks on a different + host. The state can also be obtained without + releasing the locks, by calling the + <code>virLockManagerInquire</code> method. + </p> + + </body> +</html> diff --git a/docs/sitemap.html.in b/docs/sitemap.html.in index ad8dc7b..db2963e 100644 --- a/docs/sitemap.html.in +++ b/docs/sitemap.html.in @@ -284,6 +284,10 @@ <a href="internals/command.html">Spawning commands</a> <span>Spawning commands from libvirt driver code</span> </li> + <li> + <a href="internals/locking.html">Lock managers</a> + <span>Use lock managers to protect disk content</span> + </li> </ul> </li> <li> -- 1.7.4.4

On Thu, May 19, 2011 at 07:24:21AM -0400, Daniel P. Berrange wrote:
--- docs/internals/locking.html.in | 257 ++++++++++++++++++++++++++++++++++++++++ docs/sitemap.html.in | 4 + 2 files changed, 261 insertions(+), 0 deletions(-) create mode 100644 docs/internals/locking.html.in
diff --git a/docs/internals/locking.html.in b/docs/internals/locking.html.in new file mode 100644 index 0000000..3790ef0 --- /dev/null +++ b/docs/internals/locking.html.in @@ -0,0 +1,257 @@ +<html> + <body> + <h1>Resource Lock Manager</h1> + + <ul id="toc"></ul> + + <p> + This page describes the design of the resource lock manager + that is used for locking disk images, to ensure exclusive + access to content. + </p> + + <h2><a name="goals">Goals</a></h2> + + <p> + The high level goal is to prevent the same disk image being + used by more than one QEMU instance at a time (unless the + disk is marked as sharable, or readonly). The scenarios + to be prevented are thus: + </p> + + <ol> + <li> + Two different guests running configured to point at the + same disk image. + </li> + <li> + One guest being started more than once on two different + machines due to admin mistake + </li> + <li> + One guest being started more than once on a single machine + due to libvirt driver bug on a single machine. + </li> + </ol> + + <h2><a name="requirement">Requirements</a></h2> + + <p> + The high level goal leads to a set of requirements + for the lock manager design + </p> + + <ol> + <li> + A lock must be held on a disk whenever a QEMU process + has the disk open + </li> + <li> + The lock scheme must allow QEMU to be configured with + readonly, shared write, or exclusive writable disks + </li> + <li> + A lock handover must be performed during the migration + process where 2 QEMU processes will have the same disk + open concurrently. + </li> + <li> + The lock manager must be able to identify and kill the + process accessing the resource if the lock is revoked. + </li> + <li> + Locks can be acquired for arbitrary VM related resources, + as determined by the management application. + </li> + </ol> + + <h2><a name="design">Design</a></h2> + + <p> + Within a lock manager the following series of operations + will need to be supported. + </p> + + <ul> + <li> + <strong>Register object</strong> + Register the identity of an object against which + locks will be acquired + </li> + <li> + <strong>Add resource</strong> + Associate a resource with an object for future + lock acquisition / release + </li> + <li> + <strong>Acquire locks</strong> + Acquire the locks for all resources associated + with the object + </li> + <li> + <strong>Release locks</strong> + Release the locks for all resources associated + with the object + </li> + <li> + <strong>Inquire locks</strong> + Get a representation of the state of the locks + for all resources associated with the object + </li> + </ul> + + <h2><a name="impl">Plugin Implementations</a></h2> + + <p> + Lock manager implementations are provided as LGPLv2+ + licensed, dlopen()able library modules. The plugins + will be loadable from the following location:
Well unless they compile with a different prefix, right ? will usually be loadable ....
+ </p> + + <pre> +/usr/{lib,lib64}/libvirt/lock_manager/$NAME.so +</pre> + + <p> + The lock manager plugin must export a single ELF + symbol named <code>virLockDriverImpl</code>, which is + a static instance of the <code>virLockDriver</code> + struct. The struct is defined in the header file + </p> [...] + <p> + The following psuedo code illustrates the common
pseudo
+ patterns of operations invoked on the lock + manager plugin callbacks.
Shouldn't we point to the README in the src/locking subdir here ?
+ </p> + + <h3><a name="usageLockAcquire">Lock acquisition</a></h3> +
small nits, ACK overall Daniel -- Daniel Veillard | libxml Gnome XML XSLT toolkit http://xmlsoft.org/ daniel@veillard.com | Rpmfind RPM search engine http://rpmfind.net/ http://veillard.com/ | virtualization library http://libvirt.org/

The QEMU integrates with the lock manager instructure in a number of key places * During startup, a lock is acquired in between the fork & exec * During startup, the libvirtd process acquires a lock before setting file labelling * During shutdown, the libvirtd process acquires a lock before restoring file labelling * During hotplug, unplug & media change the libvirtd process holds a lock while setting/restoring labels The main content lock is only ever held by the QEMU child process, or libvirtd during VM shutdown. The rest of the operations only require libvirtd to hold the metadata locks, relying on the active QEMU still holding the content lock. * src/qemu/qemu_conf.c, src/qemu/qemu_conf.h, src/qemu/libvirtd_qemu.aug, src/qemu/test_libvirtd_qemu.aug: Add config parameter for configuring lock managers * src/qemu/qemu_driver.c: Add calls to the lock manager --- src/qemu/libvirtd_qemu.aug | 1 + src/qemu/qemu.conf | 7 ++ src/qemu/qemu_conf.c | 12 ++++ src/qemu/qemu_conf.h | 3 + src/qemu/qemu_domain.c | 5 ++ src/qemu/qemu_domain.h | 1 + src/qemu/qemu_driver.c | 13 ++++- src/qemu/qemu_hotplug.c | 56 ++++++++++++++++- src/qemu/qemu_process.c | 127 ++++++++++++++++++++++++++++++--------- src/qemu/test_libvirtd_qemu.aug | 4 + 10 files changed, 195 insertions(+), 34 deletions(-) diff --git a/src/qemu/libvirtd_qemu.aug b/src/qemu/libvirtd_qemu.aug index ac30b8e..66858ae 100644 --- a/src/qemu/libvirtd_qemu.aug +++ b/src/qemu/libvirtd_qemu.aug @@ -48,6 +48,7 @@ module Libvirtd_qemu = | bool_entry "allow_disk_format_probing" | bool_entry "set_process_name" | int_entry "max_processes" + | str_entry "lock_manager" (* Each enty in the config is one of the following three ... *) let entry = vnc_entry diff --git a/src/qemu/qemu.conf b/src/qemu/qemu.conf index c70050e..2c50d9d 100644 --- a/src/qemu/qemu.conf +++ b/src/qemu/qemu.conf @@ -280,3 +280,10 @@ # override default value set by host OS. # # max_processes = 0 + +# To enable strict 'fcntl' based locking of the file +# content (to prevent two VMs writing to the same +# disk), start the 'virtlockd' service, and uncomment +# this +# +# lock_manager = "fcntl" diff --git a/src/qemu/qemu_conf.c b/src/qemu/qemu_conf.c index f977673..ea4d7d0 100644 --- a/src/qemu/qemu_conf.c +++ b/src/qemu/qemu_conf.c @@ -115,6 +115,9 @@ int qemudLoadDriverConfig(struct qemud_driver *driver, } #endif + if (!(driver->lockManager = + virLockManagerPluginNew("nop", 0))) + return -1; /* Just check the file is readable before opening it, otherwise * libvirt emits an error. @@ -428,6 +431,15 @@ int qemudLoadDriverConfig(struct qemud_driver *driver, CHECK_TYPE("max_processes", VIR_CONF_LONG); if (p) driver->maxProcesses = p->l; + p = virConfGetValue (conf, "lock_manager"); + CHECK_TYPE ("lock_manager", VIR_CONF_STRING); + if (p && p->str) { + virLockManagerPluginUnref(driver->lockManager); + if (!(driver->lockManager = + virLockManagerPluginNew(p->str, 0))) + VIR_ERROR(_("Failed to load lock manager %s"), p->str); + } + virConfFree (conf); return 0; } diff --git a/src/qemu/qemu_conf.h b/src/qemu/qemu_conf.h index ceec16d..bf6dcf4 100644 --- a/src/qemu/qemu_conf.h +++ b/src/qemu/qemu_conf.h @@ -43,6 +43,7 @@ # include "macvtap.h" # include "command.h" # include "threadpool.h" +# include "locking/lock_manager.h" # define QEMUD_CPUMASK_LEN CPU_SETSIZE @@ -124,6 +125,8 @@ struct qemud_driver { virBitmapPtr reservedVNCPorts; virSysinfoDefPtr hostsysinfo; + + virLockManagerPluginPtr lockManager; }; typedef struct _qemuDomainCmdlineDef qemuDomainCmdlineDef; diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c index bcacb18..81ac2bc 100644 --- a/src/qemu/qemu_domain.c +++ b/src/qemu/qemu_domain.c @@ -101,6 +101,7 @@ static void qemuDomainObjPrivateFree(void *data) qemuDomainPCIAddressSetFree(priv->pciaddrs); virDomainChrSourceDefFree(priv->monConfig); VIR_FREE(priv->vcpupids); + VIR_FREE(priv->lockState); /* This should never be non-NULL if we get here, but just in case... */ if (priv->mon) { @@ -157,6 +158,9 @@ static int qemuDomainObjPrivateXMLFormat(virBufferPtr buf, void *data) virBufferAddLit(buf, " </qemuCaps>\n"); } + if (priv->lockState) + virBufferAsprintf(buf, " <lockstate>%s</lockstate>\n", priv->lockState); + return 0; } @@ -260,6 +264,7 @@ static int qemuDomainObjPrivateXMLParse(xmlXPathContextPtr ctxt, void *data) } VIR_FREE(nodes); + priv->lockState = virXPathString("string(./lockstate)", ctxt); return 0; diff --git a/src/qemu/qemu_domain.h b/src/qemu/qemu_domain.h index 6d24f53..0fca974 100644 --- a/src/qemu/qemu_domain.h +++ b/src/qemu/qemu_domain.h @@ -79,6 +79,7 @@ struct _qemuDomainObjPrivate { int persistentAddrs; virBitmapPtr qemuCaps; + char *lockState; }; struct qemuDomainWatchdogEvent diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c index f836d79..18233b7 100644 --- a/src/qemu/qemu_driver.c +++ b/src/qemu/qemu_driver.c @@ -1,5 +1,5 @@ /* - * driver.c: core driver methods for managing qemu guests + * qemu_driver.c: core driver methods for managing qemu guests * * Copyright (C) 2006-2011 Red Hat, Inc. * Copyright (C) 2006 Daniel P. Berrange @@ -87,6 +87,7 @@ #include "fdstream.h" #include "configmake.h" #include "threadpool.h" +#include "locking/lock_manager.h" #define VIR_FROM_THIS VIR_FROM_QEMU @@ -529,6 +530,14 @@ qemudStartup(int privileged) { } VIR_FREE(driverConf); + /* We should always at least have the 'nop' manager, so + * NULLs here are a fatal error + */ + if (!qemu_driver->lockManager) { + VIR_ERROR(_("Missing lock manager implementation")); + goto error; + } + if (qemuSecurityInit(qemu_driver) < 0) goto error; @@ -769,6 +778,8 @@ qemudShutdown(void) { virCgroupFree(&qemu_driver->cgroup); + virLockManagerPluginUnref(qemu_driver->lockManager); + qemuDriverUnlock(qemu_driver); virMutexDestroy(&qemu_driver->lock); virThreadPoolFree(qemu_driver->workerPool); diff --git a/src/qemu/qemu_hotplug.c b/src/qemu/qemu_hotplug.c index 3cf7d35..a8e73c4 100644 --- a/src/qemu/qemu_hotplug.c +++ b/src/qemu/qemu_hotplug.c @@ -38,6 +38,7 @@ #include "pci.h" #include "files.h" #include "qemu_cgroup.h" +#include "locking/domain_lock.h" #define VIR_FROM_THIS VIR_FROM_QEMU @@ -82,9 +83,15 @@ int qemuDomainChangeEjectableMedia(struct qemud_driver *driver, return -1; } + if (virDomainLockDiskAttach(driver->lockManager, vm, disk) < 0) + return -1; + if (virSecurityManagerSetImageLabel(driver->securityManager, - vm, disk) < 0) + vm, disk) < 0) { + if (virDomainLockDiskDetach(driver->lockManager, vm, disk) < 0) + VIR_WARN("Unable to release lock on %s", disk->src); return -1; + } if (!(driveAlias = qemuDeviceDriveHostAlias(origdisk, priv->qemuCaps))) goto error; @@ -115,6 +122,9 @@ int qemuDomainChangeEjectableMedia(struct qemud_driver *driver, vm, origdisk) < 0) VIR_WARN("Unable to restore security label on ejected image %s", origdisk->src); + if (virDomainLockDiskDetach(driver->lockManager, vm, origdisk) < 0) + VIR_WARN("Unable to release lock on disk %s", origdisk->src); + VIR_FREE(origdisk->src); origdisk->src = disk->src; disk->src = NULL; @@ -128,9 +138,14 @@ int qemuDomainChangeEjectableMedia(struct qemud_driver *driver, error: VIR_FREE(driveAlias); + if (virSecurityManagerRestoreImageLabel(driver->securityManager, vm, disk) < 0) VIR_WARN("Unable to restore security label on new media %s", disk->src); + + if (virDomainLockDiskDetach(driver->lockManager, vm, disk) < 0) + VIR_WARN("Unable to release lock on %s", disk->src); + return -1; } @@ -154,9 +169,15 @@ int qemuDomainAttachPciDiskDevice(struct qemud_driver *driver, } } + if (virDomainLockDiskAttach(driver->lockManager, vm, disk) < 0) + return -1; + if (virSecurityManagerSetImageLabel(driver->securityManager, - vm, disk) < 0) + vm, disk) < 0) { + if (virDomainLockDiskDetach(driver->lockManager, vm, disk) < 0) + VIR_WARN("Unable to release lock on %s", disk->src); return -1; + } if (qemuCapsGet(priv->qemuCaps, QEMU_CAPS_DEVICE)) { if (qemuDomainPCIAddressEnsureAddr(priv->pciaddrs, &disk->info) < 0) @@ -228,6 +249,9 @@ error: vm, disk) < 0) VIR_WARN("Unable to restore security label on %s", disk->src); + if (virDomainLockDiskDetach(driver->lockManager, vm, disk) < 0) + VIR_WARN("Unable to release lock on %s", disk->src); + return -1; } @@ -364,10 +388,15 @@ int qemuDomainAttachSCSIDisk(struct qemud_driver *driver, } } + if (virDomainLockDiskAttach(driver->lockManager, vm, disk) < 0) + return -1; if (virSecurityManagerSetImageLabel(driver->securityManager, - vm, disk) < 0) + vm, disk) < 0) { + if (virDomainLockDiskDetach(driver->lockManager, vm, disk) < 0) + VIR_WARN("Unable to release lock on %s", disk->src); return -1; + } /* We should have an address already, so make sure */ if (disk->info.type != VIR_DOMAIN_DEVICE_ADDRESS_TYPE_DRIVE) { @@ -456,6 +485,9 @@ error: vm, disk) < 0) VIR_WARN("Unable to restore security label on %s", disk->src); + if (virDomainLockDiskDetach(driver->lockManager, vm, disk) < 0) + VIR_WARN("Unable to release lock on %s", disk->src); + return -1; } @@ -477,10 +509,17 @@ int qemuDomainAttachUsbMassstorageDevice(struct qemud_driver *driver, } } + if (virDomainLockDiskAttach(driver->lockManager, vm, disk) < 0) + return -1; + if (virSecurityManagerSetImageLabel(driver->securityManager, - vm, disk) < 0) + vm, disk) < 0) { + if (virDomainLockDiskDetach(driver->lockManager, vm, disk) < 0) + VIR_WARN("Unable to release lock on %s", disk->src); return -1; + } + /* XXX not correct once we allow attaching a USB CDROM */ if (!disk->src) { qemuReportError(VIR_ERR_INTERNAL_ERROR, "%s", _("disk source path is missing")); @@ -538,6 +577,9 @@ error: vm, disk) < 0) VIR_WARN("Unable to restore security label on %s", disk->src); + if (virDomainLockDiskDetach(driver->lockManager, vm, disk) < 0) + VIR_WARN("Unable to release lock on %s", disk->src); + return -1; } @@ -1184,6 +1226,9 @@ int qemuDomainDetachPciDiskDevice(struct qemud_driver *driver, NULLSTR(dev->data.disk->src)); } + if (virDomainLockDiskDetach(driver->lockManager, vm, dev->data.disk) < 0) + VIR_WARN("Unable to release lock on %s", dev->data.disk->src); + ret = 0; cleanup: @@ -1262,6 +1307,9 @@ int qemuDomainDetachDiskDevice(struct qemud_driver *driver, NULLSTR(dev->data.disk->src)); } + if (virDomainLockDiskDetach(driver->lockManager, vm, dev->data.disk) < 0) + VIR_WARN("Unable to release lock on disk %s", dev->data.disk->src); + ret = 0; cleanup: diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c index 01b15e0..89211f9 100644 --- a/src/qemu/qemu_process.c +++ b/src/qemu/qemu_process.c @@ -50,6 +50,7 @@ #include "nodeinfo.h" #include "processinfo.h" #include "domain_nwfilter.h" +#include "locking/domain_lock.h" #define VIR_FROM_THIS VIR_FROM_QEMU @@ -344,6 +345,7 @@ qemuProcessHandleStop(qemuMonitorPtr mon ATTRIBUTE_UNUSED, virDomainObjLock(vm); if (virDomainObjGetState(vm, NULL) == VIR_DOMAIN_RUNNING) { + qemuDomainObjPrivatePtr priv = vm->privateData; VIR_DEBUG("Transitioned guest %s to paused state due to unknown event", vm->def->name); @@ -352,6 +354,11 @@ qemuProcessHandleStop(qemuMonitorPtr mon ATTRIBUTE_UNUSED, VIR_DOMAIN_EVENT_SUSPENDED, VIR_DOMAIN_EVENT_SUSPENDED_PAUSED); + VIR_FREE(priv->lockState); + if (virDomainLockProcessPause(driver->lockManager, vm, &priv->lockState) < 0) + VIR_WARN("Unable to release lease on %s", vm->def->name); + VIR_DEBUG("Preserving lock state '%s'", NULLSTR(priv->lockState)); + if (virDomainSaveStatus(driver->caps, driver->stateDir, vm) < 0) { VIR_WARN("Unable to save status on vm %s after state change", vm->def->name); @@ -413,6 +420,7 @@ qemuProcessHandleWatchdog(qemuMonitorPtr mon ATTRIBUTE_UNUSED, if (action == VIR_DOMAIN_EVENT_WATCHDOG_PAUSE && virDomainObjGetState(vm, NULL) == VIR_DOMAIN_RUNNING) { + qemuDomainObjPrivatePtr priv = vm->privateData; VIR_DEBUG("Transitioned guest %s to paused state due to watchdog", vm->def->name); virDomainObjSetState(vm, VIR_DOMAIN_PAUSED, VIR_DOMAIN_PAUSED_WATCHDOG); @@ -420,6 +428,11 @@ qemuProcessHandleWatchdog(qemuMonitorPtr mon ATTRIBUTE_UNUSED, VIR_DOMAIN_EVENT_SUSPENDED, VIR_DOMAIN_EVENT_SUSPENDED_WATCHDOG); + VIR_FREE(priv->lockState); + if (virDomainLockProcessPause(driver->lockManager, vm, &priv->lockState) < 0) + VIR_WARN("Unable to release lease on %s", vm->def->name); + VIR_DEBUG("Preserving lock state '%s'", NULLSTR(priv->lockState)); + if (virDomainSaveStatus(driver->caps, driver->stateDir, vm) < 0) { VIR_WARN("Unable to save status on vm %s after watchdog event", vm->def->name); @@ -492,6 +505,7 @@ qemuProcessHandleIOError(qemuMonitorPtr mon ATTRIBUTE_UNUSED, if (action == VIR_DOMAIN_EVENT_IO_ERROR_PAUSE && virDomainObjGetState(vm, NULL) == VIR_DOMAIN_RUNNING) { + qemuDomainObjPrivatePtr priv = vm->privateData; VIR_DEBUG("Transitioned guest %s to paused state due to IO error", vm->def->name); virDomainObjSetState(vm, VIR_DOMAIN_PAUSED, VIR_DOMAIN_PAUSED_IOERROR); @@ -499,6 +513,11 @@ qemuProcessHandleIOError(qemuMonitorPtr mon ATTRIBUTE_UNUSED, VIR_DOMAIN_EVENT_SUSPENDED, VIR_DOMAIN_EVENT_SUSPENDED_IOERROR); + VIR_FREE(priv->lockState); + if (virDomainLockProcessPause(driver->lockManager, vm, &priv->lockState) < 0) + VIR_WARN("Unable to release lease on %s", vm->def->name); + VIR_DEBUG("Preserving lock state '%s'", NULLSTR(priv->lockState)); + if (virDomainSaveStatus(driver->caps, driver->stateDir, vm) < 0) VIR_WARN("Unable to save status on vm %s after IO error", vm->def->name); } @@ -1777,6 +1796,17 @@ struct qemuProcessHookData { static int qemuProcessHook(void *data) { struct qemuProcessHookData *h = data; + int ret = -1; + + /* Some later calls want pid present */ + h->vm->pid = getpid(); + + VIR_DEBUG("Obtaining domain lock"); + if (virDomainLockProcessStart(h->driver->lockManager, + h->vm, + /* QEMU is always pased initially */ + true) < 0) + goto cleanup; if (qemuProcessLimits(h->driver) < 0) return -1; @@ -1784,18 +1814,25 @@ static int qemuProcessHook(void *data) /* This must take place before exec(), so that all QEMU * memory allocation is on the correct NUMA node */ + VIR_DEBUG("Moving procss to cgroup"); if (qemuAddToCgroup(h->driver, h->vm->def) < 0) - return -1; + goto cleanup; /* This must be done after cgroup placement to avoid resetting CPU * affinity */ + VIR_DEBUG("Setup CPU affinity"); if (qemuProcessInitCpuAffinity(h->vm) < 0) - return -1; + goto cleanup; + VIR_DEBUG("Setting up security labeling"); if (virSecurityManagerSetProcessLabel(h->driver->securityManager, h->vm) < 0) - return -1; + goto cleanup; - return 0; + ret = 0; + +cleanup: + VIR_DEBUG("Hook complete ret=%d", ret); + return ret; } @@ -1824,12 +1861,24 @@ qemuProcessStartCPUs(struct qemud_driver *driver, virDomainObjPtr vm, int ret; qemuDomainObjPrivatePtr priv = vm->privateData; + VIR_DEBUG("Using lock state '%s'", NULLSTR(priv->lockState)); + if (virDomainLockProcessResume(driver->lockManager, vm, priv->lockState) < 0) { + VIR_FREE(priv->lockState); + return -1; + } + VIR_FREE(priv->lockState); + qemuDomainObjEnterMonitorWithDriver(driver, vm); ret = qemuMonitorStartCPUs(priv->mon, conn); qemuDomainObjExitMonitorWithDriver(driver, vm); - if (ret == 0) + if (ret == 0) { virDomainObjSetState(vm, VIR_DOMAIN_RUNNING, reason); + } else { + if (virDomainLockProcessPause(driver->lockManager, vm, &priv->lockState) < 0) + VIR_WARN("Unable to release lease on %s", vm->def->name); + VIR_DEBUG("Preserving lock state '%s'", NULLSTR(priv->lockState)); + } return ret; } @@ -1843,6 +1892,7 @@ int qemuProcessStopCPUs(struct qemud_driver *driver, virDomainObjPtr vm, int oldReason; qemuDomainObjPrivatePtr priv = vm->privateData; + VIR_FREE(priv->lockState); oldState = virDomainObjGetState(vm, &oldReason); virDomainObjSetState(vm, VIR_DOMAIN_PAUSED, reason); @@ -1850,8 +1900,13 @@ int qemuProcessStopCPUs(struct qemud_driver *driver, virDomainObjPtr vm, ret = qemuMonitorStopCPUs(priv->mon); qemuDomainObjExitMonitorWithDriver(driver, vm); - if (ret < 0) + if (ret == 0) { + if (virDomainLockProcessPause(driver->lockManager, vm, &priv->lockState) < 0) + VIR_WARN("Unable to release lease on %s", vm->def->name); + VIR_DEBUG("Preserving lock state '%s'", NULLSTR(priv->lockState)); + } else { virDomainObjSetState(vm, oldState, oldReason); + } return ret; } @@ -2096,29 +2151,6 @@ int qemuProcessStart(virConnectPtr conn, } qemuAuditSecurityLabel(vm, true); - VIR_DEBUG("Generating setting domain security labels (if required)"); - if (virSecurityManagerSetAllLabel(driver->securityManager, - vm, stdin_path) < 0) - goto cleanup; - - if (stdin_fd != -1) { - /* if there's an fd to migrate from, and it's a pipe, put the - * proper security label on it - */ - struct stat stdin_sb; - - VIR_DEBUG("setting security label on pipe used for migration"); - - if (fstat(stdin_fd, &stdin_sb) < 0) { - virReportSystemError(errno, - _("cannot stat fd %d"), stdin_fd); - goto cleanup; - } - if (S_ISFIFO(stdin_sb.st_mode) && - virSecurityManagerSetFDLabel(driver->securityManager, vm, stdin_fd) < 0) - goto cleanup; - } - /* Ensure no historical cgroup for this VM is lying around bogus * settings */ VIR_DEBUG("Ensuring no historical cgroup is lying around"); @@ -2303,6 +2335,7 @@ int qemuProcessStart(virConnectPtr conn, virCommandNonblockingFDs(cmd); virCommandSetPidFile(cmd, pidfile); virCommandDaemonize(cmd); + virCommandRequireHandshake(cmd); ret = virCommandRun(cmd, NULL); VIR_FREE(pidfile); @@ -2333,6 +2366,42 @@ int qemuProcessStart(virConnectPtr conn, #endif } + VIR_DEBUG("Waiting for handshake from child"); + if (virCommandHandshakeWait(cmd) < 0) { + ret = -1; + goto cleanup; + } + + VIR_DEBUG("Setting domain security labels"); + if (virSecurityManagerSetAllLabel(driver->securityManager, + vm, stdin_path) < 0) + goto cleanup; + + if (stdin_fd != -1) { + /* if there's an fd to migrate from, and it's a pipe, put the + * proper security label on it + */ + struct stat stdin_sb; + + VIR_DEBUG("setting security label on pipe used for migration"); + + if (fstat(stdin_fd, &stdin_sb) < 0) { + virReportSystemError(errno, + _("cannot stat fd %d"), stdin_fd); + goto cleanup; + } + if (S_ISFIFO(stdin_sb.st_mode) && + virSecurityManagerSetFDLabel(driver->securityManager, vm, stdin_fd) < 0) + goto cleanup; + } + + VIR_DEBUG("Labelling done, completing handshake to child"); + if (virCommandHandshakeNotify(cmd) < 0) { + ret = -1; + goto cleanup; + } + VIR_DEBUG("Handshake complete, child running"); + if (migrateFrom) start_paused = true; diff --git a/src/qemu/test_libvirtd_qemu.aug b/src/qemu/test_libvirtd_qemu.aug index 917bd4f..b1f9114 100644 --- a/src/qemu/test_libvirtd_qemu.aug +++ b/src/qemu/test_libvirtd_qemu.aug @@ -113,6 +113,8 @@ allow_disk_format_probing = 1 vnc_auto_unix_socket = 1 max_processes = 12345 + +lock_manager = \"fcntl\" " test Libvirtd_qemu.lns get conf = @@ -236,3 +238,5 @@ max_processes = 12345 { "vnc_auto_unix_socket" = "1" } { "#empty" } { "max_processes" = "12345" } +{ "#empty" } +{ "lock_manager" = "fcntl" } -- 1.7.4.4

On Thu, May 19, 2011 at 07:24:22AM -0400, Daniel P. Berrange wrote:
The QEMU integrates with the lock manager instructure in a number of key places
* During startup, a lock is acquired in between the fork & exec * During startup, the libvirtd process acquires a lock before setting file labelling * During shutdown, the libvirtd process acquires a lock before restoring file labelling * During hotplug, unplug & media change the libvirtd process holds a lock while setting/restoring labels
The main content lock is only ever held by the QEMU child process, or libvirtd during VM shutdown. The rest of the operations only require libvirtd to hold the metadata locks, relying on the active QEMU still holding the content lock.
* src/qemu/qemu_conf.c, src/qemu/qemu_conf.h, src/qemu/libvirtd_qemu.aug, src/qemu/test_libvirtd_qemu.aug: Add config parameter for configuring lock managers * src/qemu/qemu_driver.c: Add calls to the lock manager --- src/qemu/libvirtd_qemu.aug | 1 + src/qemu/qemu.conf | 7 ++ src/qemu/qemu_conf.c | 12 ++++ src/qemu/qemu_conf.h | 3 + src/qemu/qemu_domain.c | 5 ++ src/qemu/qemu_domain.h | 1 + src/qemu/qemu_driver.c | 13 ++++- src/qemu/qemu_hotplug.c | 56 ++++++++++++++++- src/qemu/qemu_process.c | 127 ++++++++++++++++++++++++++++++--------- src/qemu/test_libvirtd_qemu.aug | 4 + 10 files changed, 195 insertions(+), 34 deletions(-)
diff --git a/src/qemu/libvirtd_qemu.aug b/src/qemu/libvirtd_qemu.aug index ac30b8e..66858ae 100644 --- a/src/qemu/libvirtd_qemu.aug +++ b/src/qemu/libvirtd_qemu.aug @@ -48,6 +48,7 @@ module Libvirtd_qemu = | bool_entry "allow_disk_format_probing" | bool_entry "set_process_name" | int_entry "max_processes" + | str_entry "lock_manager"
(* Each enty in the config is one of the following three ... *) let entry = vnc_entry diff --git a/src/qemu/qemu.conf b/src/qemu/qemu.conf index c70050e..2c50d9d 100644 --- a/src/qemu/qemu.conf +++ b/src/qemu/qemu.conf @@ -280,3 +280,10 @@ # override default value set by host OS. # # max_processes = 0 + +# To enable strict 'fcntl' based locking of the file +# content (to prevent two VMs writing to the same +# disk), start the 'virtlockd' service, and uncomment +# this +# +# lock_manager = "fcntl" diff --git a/src/qemu/qemu_conf.c b/src/qemu/qemu_conf.c index f977673..ea4d7d0 100644 --- a/src/qemu/qemu_conf.c +++ b/src/qemu/qemu_conf.c @@ -115,6 +115,9 @@ int qemudLoadDriverConfig(struct qemud_driver *driver, } #endif
+ if (!(driver->lockManager = + virLockManagerPluginNew("nop", 0))) + return -1;
/* Just check the file is readable before opening it, otherwise * libvirt emits an error. @@ -428,6 +431,15 @@ int qemudLoadDriverConfig(struct qemud_driver *driver, CHECK_TYPE("max_processes", VIR_CONF_LONG); if (p) driver->maxProcesses = p->l;
+ p = virConfGetValue (conf, "lock_manager"); + CHECK_TYPE ("lock_manager", VIR_CONF_STRING); + if (p && p->str) { + virLockManagerPluginUnref(driver->lockManager); + if (!(driver->lockManager = + virLockManagerPluginNew(p->str, 0))) + VIR_ERROR(_("Failed to load lock manager %s"), p->str); + } + virConfFree (conf); return 0; } diff --git a/src/qemu/qemu_conf.h b/src/qemu/qemu_conf.h index ceec16d..bf6dcf4 100644 --- a/src/qemu/qemu_conf.h +++ b/src/qemu/qemu_conf.h @@ -43,6 +43,7 @@ # include "macvtap.h" # include "command.h" # include "threadpool.h" +# include "locking/lock_manager.h"
# define QEMUD_CPUMASK_LEN CPU_SETSIZE
@@ -124,6 +125,8 @@ struct qemud_driver { virBitmapPtr reservedVNCPorts;
virSysinfoDefPtr hostsysinfo; + + virLockManagerPluginPtr lockManager; };
typedef struct _qemuDomainCmdlineDef qemuDomainCmdlineDef; diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c index bcacb18..81ac2bc 100644 --- a/src/qemu/qemu_domain.c +++ b/src/qemu/qemu_domain.c @@ -101,6 +101,7 @@ static void qemuDomainObjPrivateFree(void *data) qemuDomainPCIAddressSetFree(priv->pciaddrs); virDomainChrSourceDefFree(priv->monConfig); VIR_FREE(priv->vcpupids); + VIR_FREE(priv->lockState);
/* This should never be non-NULL if we get here, but just in case... */ if (priv->mon) { @@ -157,6 +158,9 @@ static int qemuDomainObjPrivateXMLFormat(virBufferPtr buf, void *data) virBufferAddLit(buf, " </qemuCaps>\n"); }
+ if (priv->lockState) + virBufferAsprintf(buf, " <lockstate>%s</lockstate>\n", priv->lockState); + return 0; }
@@ -260,6 +264,7 @@ static int qemuDomainObjPrivateXMLParse(xmlXPathContextPtr ctxt, void *data) } VIR_FREE(nodes);
+ priv->lockState = virXPathString("string(./lockstate)", ctxt);
return 0;
diff --git a/src/qemu/qemu_domain.h b/src/qemu/qemu_domain.h index 6d24f53..0fca974 100644 --- a/src/qemu/qemu_domain.h +++ b/src/qemu/qemu_domain.h @@ -79,6 +79,7 @@ struct _qemuDomainObjPrivate { int persistentAddrs;
virBitmapPtr qemuCaps; + char *lockState; };
struct qemuDomainWatchdogEvent diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c index f836d79..18233b7 100644 --- a/src/qemu/qemu_driver.c +++ b/src/qemu/qemu_driver.c @@ -1,5 +1,5 @@ /* - * driver.c: core driver methods for managing qemu guests + * qemu_driver.c: core driver methods for managing qemu guests * * Copyright (C) 2006-2011 Red Hat, Inc. * Copyright (C) 2006 Daniel P. Berrange @@ -87,6 +87,7 @@ #include "fdstream.h" #include "configmake.h" #include "threadpool.h" +#include "locking/lock_manager.h"
#define VIR_FROM_THIS VIR_FROM_QEMU
@@ -529,6 +530,14 @@ qemudStartup(int privileged) { } VIR_FREE(driverConf);
+ /* We should always at least have the 'nop' manager, so + * NULLs here are a fatal error + */ + if (!qemu_driver->lockManager) { + VIR_ERROR(_("Missing lock manager implementation")); + goto error; + } + if (qemuSecurityInit(qemu_driver) < 0) goto error;
@@ -769,6 +778,8 @@ qemudShutdown(void) {
virCgroupFree(&qemu_driver->cgroup);
+ virLockManagerPluginUnref(qemu_driver->lockManager); + qemuDriverUnlock(qemu_driver); virMutexDestroy(&qemu_driver->lock); virThreadPoolFree(qemu_driver->workerPool); diff --git a/src/qemu/qemu_hotplug.c b/src/qemu/qemu_hotplug.c index 3cf7d35..a8e73c4 100644 --- a/src/qemu/qemu_hotplug.c +++ b/src/qemu/qemu_hotplug.c @@ -38,6 +38,7 @@ #include "pci.h" #include "files.h" #include "qemu_cgroup.h" +#include "locking/domain_lock.h"
#define VIR_FROM_THIS VIR_FROM_QEMU
@@ -82,9 +83,15 @@ int qemuDomainChangeEjectableMedia(struct qemud_driver *driver, return -1; }
+ if (virDomainLockDiskAttach(driver->lockManager, vm, disk) < 0) + return -1; + if (virSecurityManagerSetImageLabel(driver->securityManager, - vm, disk) < 0) + vm, disk) < 0) { + if (virDomainLockDiskDetach(driver->lockManager, vm, disk) < 0) + VIR_WARN("Unable to release lock on %s", disk->src); return -1; + }
if (!(driveAlias = qemuDeviceDriveHostAlias(origdisk, priv->qemuCaps))) goto error; @@ -115,6 +122,9 @@ int qemuDomainChangeEjectableMedia(struct qemud_driver *driver, vm, origdisk) < 0) VIR_WARN("Unable to restore security label on ejected image %s", origdisk->src);
+ if (virDomainLockDiskDetach(driver->lockManager, vm, origdisk) < 0) + VIR_WARN("Unable to release lock on disk %s", origdisk->src); + VIR_FREE(origdisk->src); origdisk->src = disk->src; disk->src = NULL; @@ -128,9 +138,14 @@ int qemuDomainChangeEjectableMedia(struct qemud_driver *driver,
error: VIR_FREE(driveAlias); + if (virSecurityManagerRestoreImageLabel(driver->securityManager, vm, disk) < 0) VIR_WARN("Unable to restore security label on new media %s", disk->src); + + if (virDomainLockDiskDetach(driver->lockManager, vm, disk) < 0) + VIR_WARN("Unable to release lock on %s", disk->src); + return -1; }
@@ -154,9 +169,15 @@ int qemuDomainAttachPciDiskDevice(struct qemud_driver *driver, } }
+ if (virDomainLockDiskAttach(driver->lockManager, vm, disk) < 0) + return -1; + if (virSecurityManagerSetImageLabel(driver->securityManager, - vm, disk) < 0) + vm, disk) < 0) { + if (virDomainLockDiskDetach(driver->lockManager, vm, disk) < 0) + VIR_WARN("Unable to release lock on %s", disk->src); return -1; + }
if (qemuCapsGet(priv->qemuCaps, QEMU_CAPS_DEVICE)) { if (qemuDomainPCIAddressEnsureAddr(priv->pciaddrs, &disk->info) < 0) @@ -228,6 +249,9 @@ error: vm, disk) < 0) VIR_WARN("Unable to restore security label on %s", disk->src);
+ if (virDomainLockDiskDetach(driver->lockManager, vm, disk) < 0) + VIR_WARN("Unable to release lock on %s", disk->src); + return -1; }
@@ -364,10 +388,15 @@ int qemuDomainAttachSCSIDisk(struct qemud_driver *driver, } }
+ if (virDomainLockDiskAttach(driver->lockManager, vm, disk) < 0) + return -1;
if (virSecurityManagerSetImageLabel(driver->securityManager, - vm, disk) < 0) + vm, disk) < 0) { + if (virDomainLockDiskDetach(driver->lockManager, vm, disk) < 0) + VIR_WARN("Unable to release lock on %s", disk->src); return -1; + }
/* We should have an address already, so make sure */ if (disk->info.type != VIR_DOMAIN_DEVICE_ADDRESS_TYPE_DRIVE) { @@ -456,6 +485,9 @@ error: vm, disk) < 0) VIR_WARN("Unable to restore security label on %s", disk->src);
+ if (virDomainLockDiskDetach(driver->lockManager, vm, disk) < 0) + VIR_WARN("Unable to release lock on %s", disk->src); + return -1; }
@@ -477,10 +509,17 @@ int qemuDomainAttachUsbMassstorageDevice(struct qemud_driver *driver, } }
+ if (virDomainLockDiskAttach(driver->lockManager, vm, disk) < 0) + return -1; + if (virSecurityManagerSetImageLabel(driver->securityManager, - vm, disk) < 0) + vm, disk) < 0) { + if (virDomainLockDiskDetach(driver->lockManager, vm, disk) < 0) + VIR_WARN("Unable to release lock on %s", disk->src); return -1; + }
+ /* XXX not correct once we allow attaching a USB CDROM */ if (!disk->src) { qemuReportError(VIR_ERR_INTERNAL_ERROR, "%s", _("disk source path is missing")); @@ -538,6 +577,9 @@ error: vm, disk) < 0) VIR_WARN("Unable to restore security label on %s", disk->src);
+ if (virDomainLockDiskDetach(driver->lockManager, vm, disk) < 0) + VIR_WARN("Unable to release lock on %s", disk->src); + return -1; }
@@ -1184,6 +1226,9 @@ int qemuDomainDetachPciDiskDevice(struct qemud_driver *driver, NULLSTR(dev->data.disk->src)); }
+ if (virDomainLockDiskDetach(driver->lockManager, vm, dev->data.disk) < 0) + VIR_WARN("Unable to release lock on %s", dev->data.disk->src); + ret = 0;
cleanup: @@ -1262,6 +1307,9 @@ int qemuDomainDetachDiskDevice(struct qemud_driver *driver, NULLSTR(dev->data.disk->src)); }
+ if (virDomainLockDiskDetach(driver->lockManager, vm, dev->data.disk) < 0) + VIR_WARN("Unable to release lock on disk %s", dev->data.disk->src); + ret = 0;
cleanup: diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c index 01b15e0..89211f9 100644 --- a/src/qemu/qemu_process.c +++ b/src/qemu/qemu_process.c @@ -50,6 +50,7 @@ #include "nodeinfo.h" #include "processinfo.h" #include "domain_nwfilter.h" +#include "locking/domain_lock.h"
#define VIR_FROM_THIS VIR_FROM_QEMU
@@ -344,6 +345,7 @@ qemuProcessHandleStop(qemuMonitorPtr mon ATTRIBUTE_UNUSED,
virDomainObjLock(vm); if (virDomainObjGetState(vm, NULL) == VIR_DOMAIN_RUNNING) { + qemuDomainObjPrivatePtr priv = vm->privateData; VIR_DEBUG("Transitioned guest %s to paused state due to unknown event", vm->def->name);
@@ -352,6 +354,11 @@ qemuProcessHandleStop(qemuMonitorPtr mon ATTRIBUTE_UNUSED, VIR_DOMAIN_EVENT_SUSPENDED, VIR_DOMAIN_EVENT_SUSPENDED_PAUSED);
+ VIR_FREE(priv->lockState); + if (virDomainLockProcessPause(driver->lockManager, vm, &priv->lockState) < 0) + VIR_WARN("Unable to release lease on %s", vm->def->name); + VIR_DEBUG("Preserving lock state '%s'", NULLSTR(priv->lockState)); + if (virDomainSaveStatus(driver->caps, driver->stateDir, vm) < 0) { VIR_WARN("Unable to save status on vm %s after state change", vm->def->name); @@ -413,6 +420,7 @@ qemuProcessHandleWatchdog(qemuMonitorPtr mon ATTRIBUTE_UNUSED,
if (action == VIR_DOMAIN_EVENT_WATCHDOG_PAUSE && virDomainObjGetState(vm, NULL) == VIR_DOMAIN_RUNNING) { + qemuDomainObjPrivatePtr priv = vm->privateData; VIR_DEBUG("Transitioned guest %s to paused state due to watchdog", vm->def->name);
virDomainObjSetState(vm, VIR_DOMAIN_PAUSED, VIR_DOMAIN_PAUSED_WATCHDOG); @@ -420,6 +428,11 @@ qemuProcessHandleWatchdog(qemuMonitorPtr mon ATTRIBUTE_UNUSED, VIR_DOMAIN_EVENT_SUSPENDED, VIR_DOMAIN_EVENT_SUSPENDED_WATCHDOG);
+ VIR_FREE(priv->lockState); + if (virDomainLockProcessPause(driver->lockManager, vm, &priv->lockState) < 0) + VIR_WARN("Unable to release lease on %s", vm->def->name); + VIR_DEBUG("Preserving lock state '%s'", NULLSTR(priv->lockState)); + if (virDomainSaveStatus(driver->caps, driver->stateDir, vm) < 0) { VIR_WARN("Unable to save status on vm %s after watchdog event", vm->def->name); @@ -492,6 +505,7 @@ qemuProcessHandleIOError(qemuMonitorPtr mon ATTRIBUTE_UNUSED,
if (action == VIR_DOMAIN_EVENT_IO_ERROR_PAUSE && virDomainObjGetState(vm, NULL) == VIR_DOMAIN_RUNNING) { + qemuDomainObjPrivatePtr priv = vm->privateData; VIR_DEBUG("Transitioned guest %s to paused state due to IO error", vm->def->name);
virDomainObjSetState(vm, VIR_DOMAIN_PAUSED, VIR_DOMAIN_PAUSED_IOERROR); @@ -499,6 +513,11 @@ qemuProcessHandleIOError(qemuMonitorPtr mon ATTRIBUTE_UNUSED, VIR_DOMAIN_EVENT_SUSPENDED, VIR_DOMAIN_EVENT_SUSPENDED_IOERROR);
+ VIR_FREE(priv->lockState); + if (virDomainLockProcessPause(driver->lockManager, vm, &priv->lockState) < 0) + VIR_WARN("Unable to release lease on %s", vm->def->name); + VIR_DEBUG("Preserving lock state '%s'", NULLSTR(priv->lockState)); + if (virDomainSaveStatus(driver->caps, driver->stateDir, vm) < 0) VIR_WARN("Unable to save status on vm %s after IO error", vm->def->name); } @@ -1777,6 +1796,17 @@ struct qemuProcessHookData { static int qemuProcessHook(void *data) { struct qemuProcessHookData *h = data; + int ret = -1; + + /* Some later calls want pid present */ + h->vm->pid = getpid(); + + VIR_DEBUG("Obtaining domain lock"); + if (virDomainLockProcessStart(h->driver->lockManager, + h->vm, + /* QEMU is always pased initially */ + true) < 0) + goto cleanup;
if (qemuProcessLimits(h->driver) < 0) return -1; @@ -1784,18 +1814,25 @@ static int qemuProcessHook(void *data) /* This must take place before exec(), so that all QEMU * memory allocation is on the correct NUMA node */ + VIR_DEBUG("Moving procss to cgroup");
Typo: "Moving process to cgroup"
if (qemuAddToCgroup(h->driver, h->vm->def) < 0) - return -1; + goto cleanup;
/* This must be done after cgroup placement to avoid resetting CPU * affinity */ + VIR_DEBUG("Setup CPU affinity"); if (qemuProcessInitCpuAffinity(h->vm) < 0) - return -1; + goto cleanup;
+ VIR_DEBUG("Setting up security labeling"); if (virSecurityManagerSetProcessLabel(h->driver->securityManager, h->vm) < 0) - return -1; + goto cleanup;
- return 0; + ret = 0; + +cleanup: + VIR_DEBUG("Hook complete ret=%d", ret); + return ret; }
@@ -1824,12 +1861,24 @@ qemuProcessStartCPUs(struct qemud_driver *driver, virDomainObjPtr vm, int ret; qemuDomainObjPrivatePtr priv = vm->privateData;
+ VIR_DEBUG("Using lock state '%s'", NULLSTR(priv->lockState)); + if (virDomainLockProcessResume(driver->lockManager, vm, priv->lockState) < 0) { + VIR_FREE(priv->lockState); + return -1; + } + VIR_FREE(priv->lockState); + qemuDomainObjEnterMonitorWithDriver(driver, vm); ret = qemuMonitorStartCPUs(priv->mon, conn); qemuDomainObjExitMonitorWithDriver(driver, vm);
- if (ret == 0) + if (ret == 0) { virDomainObjSetState(vm, VIR_DOMAIN_RUNNING, reason); + } else { + if (virDomainLockProcessPause(driver->lockManager, vm, &priv->lockState) < 0) + VIR_WARN("Unable to release lease on %s", vm->def->name); + VIR_DEBUG("Preserving lock state '%s'", NULLSTR(priv->lockState)); + }
return ret; } @@ -1843,6 +1892,7 @@ int qemuProcessStopCPUs(struct qemud_driver *driver, virDomainObjPtr vm, int oldReason; qemuDomainObjPrivatePtr priv = vm->privateData;
+ VIR_FREE(priv->lockState); oldState = virDomainObjGetState(vm, &oldReason); virDomainObjSetState(vm, VIR_DOMAIN_PAUSED, reason);
@@ -1850,8 +1900,13 @@ int qemuProcessStopCPUs(struct qemud_driver *driver, virDomainObjPtr vm, ret = qemuMonitorStopCPUs(priv->mon); qemuDomainObjExitMonitorWithDriver(driver, vm);
- if (ret < 0) + if (ret == 0) { + if (virDomainLockProcessPause(driver->lockManager, vm, &priv->lockState) < 0) + VIR_WARN("Unable to release lease on %s", vm->def->name); + VIR_DEBUG("Preserving lock state '%s'", NULLSTR(priv->lockState)); + } else { virDomainObjSetState(vm, oldState, oldReason); + }
return ret; } @@ -2096,29 +2151,6 @@ int qemuProcessStart(virConnectPtr conn, } qemuAuditSecurityLabel(vm, true);
- VIR_DEBUG("Generating setting domain security labels (if required)"); - if (virSecurityManagerSetAllLabel(driver->securityManager, - vm, stdin_path) < 0) - goto cleanup; - - if (stdin_fd != -1) { - /* if there's an fd to migrate from, and it's a pipe, put the - * proper security label on it - */ - struct stat stdin_sb; - - VIR_DEBUG("setting security label on pipe used for migration"); - - if (fstat(stdin_fd, &stdin_sb) < 0) { - virReportSystemError(errno, - _("cannot stat fd %d"), stdin_fd); - goto cleanup; - } - if (S_ISFIFO(stdin_sb.st_mode) && - virSecurityManagerSetFDLabel(driver->securityManager, vm, stdin_fd) < 0) - goto cleanup; - } - /* Ensure no historical cgroup for this VM is lying around bogus * settings */ VIR_DEBUG("Ensuring no historical cgroup is lying around"); @@ -2303,6 +2335,7 @@ int qemuProcessStart(virConnectPtr conn, virCommandNonblockingFDs(cmd); virCommandSetPidFile(cmd, pidfile); virCommandDaemonize(cmd); + virCommandRequireHandshake(cmd);
ret = virCommandRun(cmd, NULL); VIR_FREE(pidfile); @@ -2333,6 +2366,42 @@ int qemuProcessStart(virConnectPtr conn, #endif }
+ VIR_DEBUG("Waiting for handshake from child"); + if (virCommandHandshakeWait(cmd) < 0) { + ret = -1; + goto cleanup; + } + + VIR_DEBUG("Setting domain security labels"); + if (virSecurityManagerSetAllLabel(driver->securityManager, + vm, stdin_path) < 0) + goto cleanup; + + if (stdin_fd != -1) { + /* if there's an fd to migrate from, and it's a pipe, put the + * proper security label on it + */ + struct stat stdin_sb; + + VIR_DEBUG("setting security label on pipe used for migration"); + + if (fstat(stdin_fd, &stdin_sb) < 0) { + virReportSystemError(errno, + _("cannot stat fd %d"), stdin_fd); + goto cleanup; + } + if (S_ISFIFO(stdin_sb.st_mode) && + virSecurityManagerSetFDLabel(driver->securityManager, vm, stdin_fd) < 0) + goto cleanup; + } + + VIR_DEBUG("Labelling done, completing handshake to child"); + if (virCommandHandshakeNotify(cmd) < 0) { + ret = -1; + goto cleanup; + } + VIR_DEBUG("Handshake complete, child running"); + if (migrateFrom) start_paused = true;
diff --git a/src/qemu/test_libvirtd_qemu.aug b/src/qemu/test_libvirtd_qemu.aug index 917bd4f..b1f9114 100644 --- a/src/qemu/test_libvirtd_qemu.aug +++ b/src/qemu/test_libvirtd_qemu.aug @@ -113,6 +113,8 @@ allow_disk_format_probing = 1 vnc_auto_unix_socket = 1
max_processes = 12345 + +lock_manager = \"fcntl\" "
test Libvirtd_qemu.lns get conf = @@ -236,3 +238,5 @@ max_processes = 12345 { "vnc_auto_unix_socket" = "1" } { "#empty" } { "max_processes" = "12345" } +{ "#empty" } +{ "lock_manager" = "fcntl" } -- 1.7.4.4
Pure code review of this is hard, as it's very difficult to find places where locking should be added if not in the patch or risk of races. That looks sane and best is to apply and activate it to test as much as possible. ACK, Daniel -- Daniel Veillard | libxml Gnome XML XSLT toolkit http://xmlsoft.org/ daniel@veillard.com | Rpmfind RPM search engine http://rpmfind.net/ http://veillard.com/ | virtualization library http://libvirt.org/

Some lock managers associate state with leases, allowing a process to temporarily release its leases, and re-acquire them later, safe in the knowledge that no other process has acquired + released the leases in between. This is already used between suspend/resume operations, and must also be used across migration. This passes the lockstate in the migration cookie. If the lock manager uses lockstate, then it becomes compulsory to use the migration v3 protocol to get the cookie support. * src/qemu/qemu_driver.c: Validate that migration v2 protocol is not used if lock manager needs state transfer * src/qemu/qemu_migration.c: Transfer lock state in migration cookie XML --- src/qemu/qemu_driver.c | 27 +++++++++- src/qemu/qemu_migration.c | 119 +++++++++++++++++++++++++++++++++++++++++---- 2 files changed, 134 insertions(+), 12 deletions(-) diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c index 18233b7..6d4a6f4 100644 --- a/src/qemu/qemu_driver.c +++ b/src/qemu/qemu_driver.c @@ -5846,6 +5846,8 @@ qemudDomainMigratePrepareTunnel(virConnectPtr dconn, VIR_MIGRATE_NON_SHARED_DISK | VIR_MIGRATE_NON_SHARED_INC, -1); + qemuDriverLock(driver); + if (!dom_xml) { qemuReportError(VIR_ERR_INTERNAL_ERROR, "%s", _("no domain XML passed")); @@ -5862,13 +5864,19 @@ qemudDomainMigratePrepareTunnel(virConnectPtr dconn, goto cleanup; } - qemuDriverLock(driver); + if (virLockManagerPluginUsesState(driver->lockManager)) { + qemuReportError(VIR_ERR_INTERNAL_ERROR, + _("Cannot use migrate v2 protocol with lock manager %s"), + virLockManagerPluginGetName(driver->lockManager)); + goto cleanup; + } + ret = qemuMigrationPrepareTunnel(driver, dconn, NULL, 0, NULL, NULL, /* No cookies in v2 */ st, dname, dom_xml); - qemuDriverUnlock(driver); cleanup: + qemuDriverUnlock(driver); return ret; } @@ -5902,6 +5910,14 @@ qemudDomainMigratePrepare2 (virConnectPtr dconn, *uri_out = NULL; qemuDriverLock(driver); + + if (virLockManagerPluginUsesState(driver->lockManager)) { + qemuReportError(VIR_ERR_INTERNAL_ERROR, + _("Cannot use migrate v2 protocol with lock manager %s"), + virLockManagerPluginGetName(driver->lockManager)); + goto cleanup; + } + if (flags & VIR_MIGRATE_TUNNELLED) { /* this is a logical error; we never should have gotten here with * VIR_MIGRATE_TUNNELLED set @@ -5956,6 +5972,13 @@ qemudDomainMigratePerform (virDomainPtr dom, VIR_MIGRATE_NON_SHARED_INC, -1); qemuDriverLock(driver); + if (virLockManagerPluginUsesState(driver->lockManager)) { + qemuReportError(VIR_ERR_INTERNAL_ERROR, + _("Cannot use migrate v2 protocol with lock manager %s"), + virLockManagerPluginGetName(driver->lockManager)); + goto cleanup; + } + vm = virDomainFindByUUID(&driver->domains, dom->uuid); if (!vm) { char uuidstr[VIR_UUID_STRING_BUFLEN]; diff --git a/src/qemu/qemu_migration.c b/src/qemu/qemu_migration.c index 9e01923..72a113a 100644 --- a/src/qemu/qemu_migration.c +++ b/src/qemu/qemu_migration.c @@ -41,6 +41,7 @@ #include "datatypes.h" #include "fdstream.h" #include "uuid.h" +#include "locking/domain_lock.h" #define VIR_FROM_THIS VIR_FROM_QEMU @@ -49,6 +50,7 @@ enum qemuMigrationCookieFlags { QEMU_MIGRATION_COOKIE_FLAG_GRAPHICS, + QEMU_MIGRATION_COOKIE_FLAG_LOCKSTATE, QEMU_MIGRATION_COOKIE_FLAG_LAST }; @@ -56,10 +58,11 @@ enum qemuMigrationCookieFlags { VIR_ENUM_DECL(qemuMigrationCookieFlag); VIR_ENUM_IMPL(qemuMigrationCookieFlag, QEMU_MIGRATION_COOKIE_FLAG_LAST, - "graphics"); + "graphics", "lockstate"); enum qemuMigrationCookieFeatures { QEMU_MIGRATION_COOKIE_GRAPHICS = (1 << QEMU_MIGRATION_COOKIE_FLAG_GRAPHICS), + QEMU_MIGRATION_COOKIE_LOCKSTATE = (1 << QEMU_MIGRATION_COOKIE_FLAG_LOCKSTATE), }; typedef struct _qemuMigrationCookieGraphics qemuMigrationCookieGraphics; @@ -86,6 +89,10 @@ struct _qemuMigrationCookie { unsigned char uuid[VIR_UUID_BUFLEN]; char *name; + /* If (flags & QEMU_MIGRATION_COOKIE_LOCKSTATE) */ + char *lockState; + char *lockDriver; + /* If (flags & QEMU_MIGRATION_COOKIE_GRAPHICS) */ qemuMigrationCookieGraphicsPtr graphics; }; @@ -110,6 +117,8 @@ static void qemuMigrationCookieFree(qemuMigrationCookiePtr mig) VIR_FREE(mig->hostname); VIR_FREE(mig->name); + VIR_FREE(mig->lockState); + VIR_FREE(mig->lockDriver); VIR_FREE(mig); } @@ -275,6 +284,41 @@ qemuMigrationCookieAddGraphics(qemuMigrationCookiePtr mig, } +static int +qemuMigrationCookieAddLockstate(qemuMigrationCookiePtr mig, + struct qemud_driver *driver, + virDomainObjPtr dom) +{ + qemuDomainObjPrivatePtr priv = dom->privateData; + + if (mig->flags & QEMU_MIGRATION_COOKIE_LOCKSTATE) { + qemuReportError(VIR_ERR_INTERNAL_ERROR, "%s", + _("Migration lockstate data already present")); + return -1; + } + + if (virDomainObjGetState(dom, NULL) == VIR_DOMAIN_PAUSED) { + if (priv->lockState && + !(mig->lockState = strdup(priv->lockState))) + return -1; + } else { + if (virDomainLockProcessInquire(driver->lockManager, dom, &mig->lockState) < 0) + return -1; + } + + if (!(mig->lockDriver = strdup(virLockManagerPluginGetName(driver->lockManager)))) { + VIR_FREE(mig->lockState); + return -1; + } + + mig->flags |= QEMU_MIGRATION_COOKIE_LOCKSTATE; + mig->flagsMandatory |= QEMU_MIGRATION_COOKIE_LOCKSTATE; + + return 0; +} + + + static void qemuMigrationCookieGraphicsXMLFormat(virBufferPtr buf, qemuMigrationCookieGraphicsPtr grap) { @@ -319,6 +363,15 @@ static void qemuMigrationCookieXMLFormat(virBufferPtr buf, mig->graphics) qemuMigrationCookieGraphicsXMLFormat(buf, mig->graphics); + if ((mig->flags & QEMU_MIGRATION_COOKIE_LOCKSTATE) && + mig->lockState) { + virBufferAsprintf(buf, " <lockstate driver='%s'>\n", + mig->lockDriver); + virBufferAsprintf(buf, " <leases>%s</leases>\n", + mig->lockState); + virBufferAddLit(buf, " </lockstate>\n"); + } + virBufferAddLit(buf, "</qemu-migration>\n"); } @@ -498,6 +551,18 @@ qemuMigrationCookieXMLParse(qemuMigrationCookiePtr mig, (!(mig->graphics = qemuMigrationCookieGraphicsXMLParse(ctxt)))) goto error; + if (mig->flags & QEMU_MIGRATION_COOKIE_LOCKSTATE) { + mig->lockDriver = virXPathString("string(./lockstate[1]/@driver)", ctxt); + if (!mig->lockDriver) { + qemuReportError(VIR_ERR_INTERNAL_ERROR, "%s", + _("Missing lock driver name in migration cookie")); + goto error; + } + mig->lockState = virXPathString("string(./lockstate[1]/leases[1])", ctxt); + if (mig->lockState && STREQ(mig->lockState, "")) + VIR_FREE(mig->lockState); + } + return 0; error: @@ -558,6 +623,10 @@ qemuMigrationBakeCookie(qemuMigrationCookiePtr mig, qemuMigrationCookieAddGraphics(mig, driver, dom) < 0) return -1; + if (flags & QEMU_MIGRATION_COOKIE_LOCKSTATE && + qemuMigrationCookieAddLockstate(mig, driver, dom) < 0) + return -1; + if (!(*cookieout = qemuMigrationCookieXMLFormatStr(mig))) return -1; @@ -570,7 +639,8 @@ qemuMigrationBakeCookie(qemuMigrationCookiePtr mig, static qemuMigrationCookiePtr -qemuMigrationEatCookie(virDomainObjPtr dom, +qemuMigrationEatCookie(struct qemud_driver *driver, + virDomainObjPtr dom, const char *cookiein, int cookieinlen, int flags) @@ -596,6 +666,17 @@ qemuMigrationEatCookie(virDomainObjPtr dom, flags) < 0) goto error; + if (mig->flags & QEMU_MIGRATION_COOKIE_LOCKSTATE) { + if (STRNEQ(mig->lockDriver, + virLockManagerPluginGetName(driver->lockManager))) { + qemuReportError(VIR_ERR_INTERNAL_ERROR, + _("Source host lock driver %s different from target %s"), + mig->lockDriver, + virLockManagerPluginGetName(driver->lockManager)); + goto error; + } + } + return mig; error: @@ -885,12 +966,12 @@ char *qemuMigrationBegin(struct qemud_driver *driver, if (!qemuMigrationIsAllowed(vm->def)) goto cleanup; - if (!(mig = qemuMigrationEatCookie(vm, NULL, 0, 0))) + if (!(mig = qemuMigrationEatCookie(driver, vm, NULL, 0, 0))) goto cleanup; if (qemuMigrationBakeCookie(mig, driver, vm, cookieout, cookieoutlen, - 0) < 0) + QEMU_MIGRATION_COOKIE_LOCKSTATE) < 0) goto cleanup; rv = qemuDomainFormatXML(driver, vm, @@ -964,7 +1045,8 @@ qemuMigrationPrepareTunnel(struct qemud_driver *driver, def = NULL; priv = vm->privateData; - if (!(mig = qemuMigrationEatCookie(vm, cookiein, cookieinlen, 0))) + if (!(mig = qemuMigrationEatCookie(driver, vm, cookiein, cookieinlen, + QEMU_MIGRATION_COOKIE_LOCKSTATE))) goto cleanup; if (qemuDomainObjBeginJobWithDriver(driver, vm) < 0) @@ -1193,7 +1275,8 @@ qemuMigrationPrepareDirect(struct qemud_driver *driver, def = NULL; priv = vm->privateData; - if (!(mig = qemuMigrationEatCookie(vm, cookiein, cookieinlen, 0))) + if (!(mig = qemuMigrationEatCookie(driver, vm, cookiein, cookieinlen, + QEMU_MIGRATION_COOKIE_LOCKSTATE))) goto cleanup; if (qemuDomainObjBeginJobWithDriver(driver, vm) < 0) @@ -1287,7 +1370,15 @@ static int doNativeMigrate(struct qemud_driver *driver, unsigned int background_flags = QEMU_MONITOR_MIGRATE_BACKGROUND; qemuMigrationCookiePtr mig = NULL; - if (!(mig = qemuMigrationEatCookie(vm, cookiein, cookieinlen, + if (virLockManagerPluginUsesState(driver->lockManager) && + !cookieout) { + qemuReportError(VIR_ERR_INTERNAL_ERROR, + _("Migration with lock driver %s requires cookie support"), + virLockManagerPluginGetName(driver->lockManager)); + return -1; + } + + if (!(mig = qemuMigrationEatCookie(driver, vm, cookiein, cookieinlen, QEMU_MIGRATION_COOKIE_GRAPHICS))) goto cleanup; @@ -1473,6 +1564,14 @@ static int doTunnelMigrate(struct qemud_driver *driver, qemuMigrationCookiePtr mig = NULL; qemuMigrationIOThreadPtr iothread = NULL; + if (virLockManagerPluginUsesState(driver->lockManager) && + !cookieout) { + qemuReportError(VIR_ERR_INTERNAL_ERROR, + _("Migration with lock driver %s requires cookie support"), + virLockManagerPluginGetName(driver->lockManager)); + return -1; + } + if (!qemuCapsGet(priv->qemuCaps, QEMU_CAPS_MIGRATE_QEMU_UNIX) && !qemuCapsGet(priv->qemuCaps, QEMU_CAPS_MIGRATE_QEMU_EXEC)) { qemuReportError(VIR_ERR_OPERATION_FAILED, @@ -1532,7 +1631,7 @@ static int doTunnelMigrate(struct qemud_driver *driver, goto cleanup; } - if (!(mig = qemuMigrationEatCookie(vm, cookiein, cookieinlen, + if (!(mig = qemuMigrationEatCookie(driver, vm, cookiein, cookieinlen, QEMU_MIGRATION_COOKIE_GRAPHICS))) goto cleanup; @@ -2192,7 +2291,7 @@ qemuMigrationFinish(struct qemud_driver *driver, priv->jobActive = QEMU_JOB_NONE; memset(&priv->jobInfo, 0, sizeof(priv->jobInfo)); - if (!(mig = qemuMigrationEatCookie(vm, cookiein, cookieinlen, 0))) + if (!(mig = qemuMigrationEatCookie(driver, vm, cookiein, cookieinlen, 0))) goto cleanup; if (qemuDomainObjBeginJobWithDriver(driver, vm) < 0) @@ -2313,7 +2412,7 @@ int qemuMigrationConfirm(struct qemud_driver *driver, virDomainEventPtr event = NULL; int rv = -1; - if (!(mig = qemuMigrationEatCookie(vm, cookiein, cookieinlen, 0))) + if (!(mig = qemuMigrationEatCookie(driver, vm, cookiein, cookieinlen, 0))) return -1; if (!skipJob && -- 1.7.4.4

On Thu, May 19, 2011 at 07:24:23AM -0400, Daniel P. Berrange wrote:
Some lock managers associate state with leases, allowing a process to temporarily release its leases, and re-acquire them later, safe in the knowledge that no other process has acquired + released the leases in between.
This is already used between suspend/resume operations, and must also be used across migration. This passes the lockstate in the migration cookie. If the lock manager uses lockstate, then it becomes compulsory to use the migration v3 protocol to get the cookie support.
* src/qemu/qemu_driver.c: Validate that migration v2 protocol is not used if lock manager needs state transfer * src/qemu/qemu_migration.c: Transfer lock state in migration cookie XML --- src/qemu/qemu_driver.c | 27 +++++++++- src/qemu/qemu_migration.c | 119 +++++++++++++++++++++++++++++++++++++++++---- 2 files changed, 134 insertions(+), 12 deletions(-)
diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c index 18233b7..6d4a6f4 100644 --- a/src/qemu/qemu_driver.c +++ b/src/qemu/qemu_driver.c @@ -5846,6 +5846,8 @@ qemudDomainMigratePrepareTunnel(virConnectPtr dconn, VIR_MIGRATE_NON_SHARED_DISK | VIR_MIGRATE_NON_SHARED_INC, -1);
+ qemuDriverLock(driver); + if (!dom_xml) { qemuReportError(VIR_ERR_INTERNAL_ERROR, "%s", _("no domain XML passed")); @@ -5862,13 +5864,19 @@ qemudDomainMigratePrepareTunnel(virConnectPtr dconn, goto cleanup; }
- qemuDriverLock(driver); + if (virLockManagerPluginUsesState(driver->lockManager)) { + qemuReportError(VIR_ERR_INTERNAL_ERROR, + _("Cannot use migrate v2 protocol with lock manager %s"), + virLockManagerPluginGetName(driver->lockManager)); + goto cleanup; + } + ret = qemuMigrationPrepareTunnel(driver, dconn, NULL, 0, NULL, NULL, /* No cookies in v2 */ st, dname, dom_xml); - qemuDriverUnlock(driver);
cleanup: + qemuDriverUnlock(driver); return ret; }
@@ -5902,6 +5910,14 @@ qemudDomainMigratePrepare2 (virConnectPtr dconn, *uri_out = NULL;
qemuDriverLock(driver); + + if (virLockManagerPluginUsesState(driver->lockManager)) { + qemuReportError(VIR_ERR_INTERNAL_ERROR, + _("Cannot use migrate v2 protocol with lock manager %s"), + virLockManagerPluginGetName(driver->lockManager)); + goto cleanup; + } + if (flags & VIR_MIGRATE_TUNNELLED) { /* this is a logical error; we never should have gotten here with * VIR_MIGRATE_TUNNELLED set @@ -5956,6 +5972,13 @@ qemudDomainMigratePerform (virDomainPtr dom, VIR_MIGRATE_NON_SHARED_INC, -1);
qemuDriverLock(driver); + if (virLockManagerPluginUsesState(driver->lockManager)) { + qemuReportError(VIR_ERR_INTERNAL_ERROR, + _("Cannot use migrate v2 protocol with lock manager %s"), + virLockManagerPluginGetName(driver->lockManager)); + goto cleanup; + } + vm = virDomainFindByUUID(&driver->domains, dom->uuid); if (!vm) { char uuidstr[VIR_UUID_STRING_BUFLEN]; diff --git a/src/qemu/qemu_migration.c b/src/qemu/qemu_migration.c index 9e01923..72a113a 100644 --- a/src/qemu/qemu_migration.c +++ b/src/qemu/qemu_migration.c @@ -41,6 +41,7 @@ #include "datatypes.h" #include "fdstream.h" #include "uuid.h" +#include "locking/domain_lock.h"
#define VIR_FROM_THIS VIR_FROM_QEMU @@ -49,6 +50,7 @@
enum qemuMigrationCookieFlags { QEMU_MIGRATION_COOKIE_FLAG_GRAPHICS, + QEMU_MIGRATION_COOKIE_FLAG_LOCKSTATE,
QEMU_MIGRATION_COOKIE_FLAG_LAST }; @@ -56,10 +58,11 @@ enum qemuMigrationCookieFlags { VIR_ENUM_DECL(qemuMigrationCookieFlag); VIR_ENUM_IMPL(qemuMigrationCookieFlag, QEMU_MIGRATION_COOKIE_FLAG_LAST, - "graphics"); + "graphics", "lockstate");
enum qemuMigrationCookieFeatures { QEMU_MIGRATION_COOKIE_GRAPHICS = (1 << QEMU_MIGRATION_COOKIE_FLAG_GRAPHICS), + QEMU_MIGRATION_COOKIE_LOCKSTATE = (1 << QEMU_MIGRATION_COOKIE_FLAG_LOCKSTATE), };
typedef struct _qemuMigrationCookieGraphics qemuMigrationCookieGraphics; @@ -86,6 +89,10 @@ struct _qemuMigrationCookie { unsigned char uuid[VIR_UUID_BUFLEN]; char *name;
+ /* If (flags & QEMU_MIGRATION_COOKIE_LOCKSTATE) */ + char *lockState; + char *lockDriver; + /* If (flags & QEMU_MIGRATION_COOKIE_GRAPHICS) */ qemuMigrationCookieGraphicsPtr graphics; }; @@ -110,6 +117,8 @@ static void qemuMigrationCookieFree(qemuMigrationCookiePtr mig)
VIR_FREE(mig->hostname); VIR_FREE(mig->name); + VIR_FREE(mig->lockState); + VIR_FREE(mig->lockDriver); VIR_FREE(mig); }
@@ -275,6 +284,41 @@ qemuMigrationCookieAddGraphics(qemuMigrationCookiePtr mig, }
+static int +qemuMigrationCookieAddLockstate(qemuMigrationCookiePtr mig, + struct qemud_driver *driver, + virDomainObjPtr dom) +{ + qemuDomainObjPrivatePtr priv = dom->privateData; + + if (mig->flags & QEMU_MIGRATION_COOKIE_LOCKSTATE) { + qemuReportError(VIR_ERR_INTERNAL_ERROR, "%s", + _("Migration lockstate data already present")); + return -1; + } + + if (virDomainObjGetState(dom, NULL) == VIR_DOMAIN_PAUSED) { + if (priv->lockState && + !(mig->lockState = strdup(priv->lockState))) + return -1; + } else { + if (virDomainLockProcessInquire(driver->lockManager, dom, &mig->lockState) < 0) + return -1; + } + + if (!(mig->lockDriver = strdup(virLockManagerPluginGetName(driver->lockManager)))) { + VIR_FREE(mig->lockState); + return -1; + } + + mig->flags |= QEMU_MIGRATION_COOKIE_LOCKSTATE; + mig->flagsMandatory |= QEMU_MIGRATION_COOKIE_LOCKSTATE; + + return 0; +} + + + static void qemuMigrationCookieGraphicsXMLFormat(virBufferPtr buf, qemuMigrationCookieGraphicsPtr grap) { @@ -319,6 +363,15 @@ static void qemuMigrationCookieXMLFormat(virBufferPtr buf, mig->graphics) qemuMigrationCookieGraphicsXMLFormat(buf, mig->graphics);
+ if ((mig->flags & QEMU_MIGRATION_COOKIE_LOCKSTATE) && + mig->lockState) { + virBufferAsprintf(buf, " <lockstate driver='%s'>\n", + mig->lockDriver); + virBufferAsprintf(buf, " <leases>%s</leases>\n", + mig->lockState); + virBufferAddLit(buf, " </lockstate>\n"); + } + virBufferAddLit(buf, "</qemu-migration>\n"); }
@@ -498,6 +551,18 @@ qemuMigrationCookieXMLParse(qemuMigrationCookiePtr mig, (!(mig->graphics = qemuMigrationCookieGraphicsXMLParse(ctxt)))) goto error;
+ if (mig->flags & QEMU_MIGRATION_COOKIE_LOCKSTATE) { + mig->lockDriver = virXPathString("string(./lockstate[1]/@driver)", ctxt); + if (!mig->lockDriver) { + qemuReportError(VIR_ERR_INTERNAL_ERROR, "%s", + _("Missing lock driver name in migration cookie")); + goto error; + } + mig->lockState = virXPathString("string(./lockstate[1]/leases[1])", ctxt); + if (mig->lockState && STREQ(mig->lockState, "")) + VIR_FREE(mig->lockState); + } + return 0;
error: @@ -558,6 +623,10 @@ qemuMigrationBakeCookie(qemuMigrationCookiePtr mig, qemuMigrationCookieAddGraphics(mig, driver, dom) < 0) return -1;
+ if (flags & QEMU_MIGRATION_COOKIE_LOCKSTATE && + qemuMigrationCookieAddLockstate(mig, driver, dom) < 0) + return -1; + if (!(*cookieout = qemuMigrationCookieXMLFormatStr(mig))) return -1;
@@ -570,7 +639,8 @@ qemuMigrationBakeCookie(qemuMigrationCookiePtr mig,
static qemuMigrationCookiePtr -qemuMigrationEatCookie(virDomainObjPtr dom, +qemuMigrationEatCookie(struct qemud_driver *driver, + virDomainObjPtr dom, const char *cookiein, int cookieinlen, int flags) @@ -596,6 +666,17 @@ qemuMigrationEatCookie(virDomainObjPtr dom, flags) < 0) goto error;
+ if (mig->flags & QEMU_MIGRATION_COOKIE_LOCKSTATE) { + if (STRNEQ(mig->lockDriver, + virLockManagerPluginGetName(driver->lockManager))) { + qemuReportError(VIR_ERR_INTERNAL_ERROR, + _("Source host lock driver %s different from target %s"), + mig->lockDriver, + virLockManagerPluginGetName(driver->lockManager)); + goto error; + } + } + return mig;
error: @@ -885,12 +966,12 @@ char *qemuMigrationBegin(struct qemud_driver *driver, if (!qemuMigrationIsAllowed(vm->def)) goto cleanup;
- if (!(mig = qemuMigrationEatCookie(vm, NULL, 0, 0))) + if (!(mig = qemuMigrationEatCookie(driver, vm, NULL, 0, 0))) goto cleanup;
if (qemuMigrationBakeCookie(mig, driver, vm, cookieout, cookieoutlen, - 0) < 0) + QEMU_MIGRATION_COOKIE_LOCKSTATE) < 0) goto cleanup;
rv = qemuDomainFormatXML(driver, vm, @@ -964,7 +1045,8 @@ qemuMigrationPrepareTunnel(struct qemud_driver *driver, def = NULL; priv = vm->privateData;
- if (!(mig = qemuMigrationEatCookie(vm, cookiein, cookieinlen, 0))) + if (!(mig = qemuMigrationEatCookie(driver, vm, cookiein, cookieinlen, + QEMU_MIGRATION_COOKIE_LOCKSTATE))) goto cleanup;
if (qemuDomainObjBeginJobWithDriver(driver, vm) < 0) @@ -1193,7 +1275,8 @@ qemuMigrationPrepareDirect(struct qemud_driver *driver, def = NULL; priv = vm->privateData;
- if (!(mig = qemuMigrationEatCookie(vm, cookiein, cookieinlen, 0))) + if (!(mig = qemuMigrationEatCookie(driver, vm, cookiein, cookieinlen, + QEMU_MIGRATION_COOKIE_LOCKSTATE))) goto cleanup;
if (qemuDomainObjBeginJobWithDriver(driver, vm) < 0) @@ -1287,7 +1370,15 @@ static int doNativeMigrate(struct qemud_driver *driver, unsigned int background_flags = QEMU_MONITOR_MIGRATE_BACKGROUND; qemuMigrationCookiePtr mig = NULL;
- if (!(mig = qemuMigrationEatCookie(vm, cookiein, cookieinlen, + if (virLockManagerPluginUsesState(driver->lockManager) && + !cookieout) { + qemuReportError(VIR_ERR_INTERNAL_ERROR, + _("Migration with lock driver %s requires cookie support"), + virLockManagerPluginGetName(driver->lockManager)); + return -1; + } + + if (!(mig = qemuMigrationEatCookie(driver, vm, cookiein, cookieinlen, QEMU_MIGRATION_COOKIE_GRAPHICS))) goto cleanup;
@@ -1473,6 +1564,14 @@ static int doTunnelMigrate(struct qemud_driver *driver, qemuMigrationCookiePtr mig = NULL; qemuMigrationIOThreadPtr iothread = NULL;
+ if (virLockManagerPluginUsesState(driver->lockManager) && + !cookieout) { + qemuReportError(VIR_ERR_INTERNAL_ERROR, + _("Migration with lock driver %s requires cookie support"), + virLockManagerPluginGetName(driver->lockManager)); + return -1; + } + if (!qemuCapsGet(priv->qemuCaps, QEMU_CAPS_MIGRATE_QEMU_UNIX) && !qemuCapsGet(priv->qemuCaps, QEMU_CAPS_MIGRATE_QEMU_EXEC)) { qemuReportError(VIR_ERR_OPERATION_FAILED, @@ -1532,7 +1631,7 @@ static int doTunnelMigrate(struct qemud_driver *driver, goto cleanup; }
- if (!(mig = qemuMigrationEatCookie(vm, cookiein, cookieinlen, + if (!(mig = qemuMigrationEatCookie(driver, vm, cookiein, cookieinlen, QEMU_MIGRATION_COOKIE_GRAPHICS))) goto cleanup;
@@ -2192,7 +2291,7 @@ qemuMigrationFinish(struct qemud_driver *driver, priv->jobActive = QEMU_JOB_NONE; memset(&priv->jobInfo, 0, sizeof(priv->jobInfo));
- if (!(mig = qemuMigrationEatCookie(vm, cookiein, cookieinlen, 0))) + if (!(mig = qemuMigrationEatCookie(driver, vm, cookiein, cookieinlen, 0))) goto cleanup;
if (qemuDomainObjBeginJobWithDriver(driver, vm) < 0) @@ -2313,7 +2412,7 @@ int qemuMigrationConfirm(struct qemud_driver *driver, virDomainEventPtr event = NULL; int rv = -1;
- if (!(mig = qemuMigrationEatCookie(vm, cookiein, cookieinlen, 0))) + if (!(mig = qemuMigrationEatCookie(driver, vm, cookiein, cookieinlen, 0))) return -1;
if (!skipJob &&
ACK, looks fine. But I'm wondering what's the scenario where a networked resource happen to be used from 2 different node by different domains, and one get migrated to the same box, the lock manager shoudl detect the problem migration will fail, but the broken "double use" will continue, right ? Daniel -- Daniel Veillard | libxml Gnome XML XSLT toolkit http://xmlsoft.org/ daniel@veillard.com | Rpmfind RPM search engine http://rpmfind.net/ http://veillard.com/ | virtualization library http://libvirt.org/

On Fri, May 27, 2011 at 05:12:38PM +0800, Daniel Veillard wrote:
On Thu, May 19, 2011 at 07:24:23AM -0400, Daniel P. Berrange wrote:
Some lock managers associate state with leases, allowing a process to temporarily release its leases, and re-acquire them later, safe in the knowledge that no other process has acquired + released the leases in between.
This is already used between suspend/resume operations, and must also be used across migration. This passes the lockstate in the migration cookie. If the lock manager uses lockstate, then it becomes compulsory to use the migration v3 protocol to get the cookie support.
* src/qemu/qemu_driver.c: Validate that migration v2 protocol is not used if lock manager needs state transfer * src/qemu/qemu_migration.c: Transfer lock state in migration cookie XML --- src/qemu/qemu_driver.c | 27 +++++++++- src/qemu/qemu_migration.c | 119 +++++++++++++++++++++++++++++++++++++++++---- 2 files changed, 134 insertions(+), 12 deletions(-)
diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c index 18233b7..6d4a6f4 100644 --- a/src/qemu/qemu_driver.c +++ b/src/qemu/qemu_driver.c @@ -5846,6 +5846,8 @@ qemudDomainMigratePrepareTunnel(virConnectPtr dconn, VIR_MIGRATE_NON_SHARED_DISK | VIR_MIGRATE_NON_SHARED_INC, -1);
+ qemuDriverLock(driver); + if (!dom_xml) { qemuReportError(VIR_ERR_INTERNAL_ERROR, "%s", _("no domain XML passed")); @@ -5862,13 +5864,19 @@ qemudDomainMigratePrepareTunnel(virConnectPtr dconn, goto cleanup; }
- qemuDriverLock(driver); + if (virLockManagerPluginUsesState(driver->lockManager)) { + qemuReportError(VIR_ERR_INTERNAL_ERROR, + _("Cannot use migrate v2 protocol with lock manager %s"), + virLockManagerPluginGetName(driver->lockManager)); + goto cleanup; + } + ret = qemuMigrationPrepareTunnel(driver, dconn, NULL, 0, NULL, NULL, /* No cookies in v2 */ st, dname, dom_xml); - qemuDriverUnlock(driver);
cleanup: + qemuDriverUnlock(driver); return ret; }
@@ -5902,6 +5910,14 @@ qemudDomainMigratePrepare2 (virConnectPtr dconn, *uri_out = NULL;
qemuDriverLock(driver); + + if (virLockManagerPluginUsesState(driver->lockManager)) { + qemuReportError(VIR_ERR_INTERNAL_ERROR, + _("Cannot use migrate v2 protocol with lock manager %s"), + virLockManagerPluginGetName(driver->lockManager)); + goto cleanup; + } + if (flags & VIR_MIGRATE_TUNNELLED) { /* this is a logical error; we never should have gotten here with * VIR_MIGRATE_TUNNELLED set @@ -5956,6 +5972,13 @@ qemudDomainMigratePerform (virDomainPtr dom, VIR_MIGRATE_NON_SHARED_INC, -1);
qemuDriverLock(driver); + if (virLockManagerPluginUsesState(driver->lockManager)) { + qemuReportError(VIR_ERR_INTERNAL_ERROR, + _("Cannot use migrate v2 protocol with lock manager %s"), + virLockManagerPluginGetName(driver->lockManager)); + goto cleanup; + } + vm = virDomainFindByUUID(&driver->domains, dom->uuid); if (!vm) { char uuidstr[VIR_UUID_STRING_BUFLEN]; diff --git a/src/qemu/qemu_migration.c b/src/qemu/qemu_migration.c index 9e01923..72a113a 100644 --- a/src/qemu/qemu_migration.c +++ b/src/qemu/qemu_migration.c @@ -41,6 +41,7 @@ #include "datatypes.h" #include "fdstream.h" #include "uuid.h" +#include "locking/domain_lock.h"
#define VIR_FROM_THIS VIR_FROM_QEMU @@ -49,6 +50,7 @@
enum qemuMigrationCookieFlags { QEMU_MIGRATION_COOKIE_FLAG_GRAPHICS, + QEMU_MIGRATION_COOKIE_FLAG_LOCKSTATE,
QEMU_MIGRATION_COOKIE_FLAG_LAST }; @@ -56,10 +58,11 @@ enum qemuMigrationCookieFlags { VIR_ENUM_DECL(qemuMigrationCookieFlag); VIR_ENUM_IMPL(qemuMigrationCookieFlag, QEMU_MIGRATION_COOKIE_FLAG_LAST, - "graphics"); + "graphics", "lockstate");
enum qemuMigrationCookieFeatures { QEMU_MIGRATION_COOKIE_GRAPHICS = (1 << QEMU_MIGRATION_COOKIE_FLAG_GRAPHICS), + QEMU_MIGRATION_COOKIE_LOCKSTATE = (1 << QEMU_MIGRATION_COOKIE_FLAG_LOCKSTATE), };
typedef struct _qemuMigrationCookieGraphics qemuMigrationCookieGraphics; @@ -86,6 +89,10 @@ struct _qemuMigrationCookie { unsigned char uuid[VIR_UUID_BUFLEN]; char *name;
+ /* If (flags & QEMU_MIGRATION_COOKIE_LOCKSTATE) */ + char *lockState; + char *lockDriver; + /* If (flags & QEMU_MIGRATION_COOKIE_GRAPHICS) */ qemuMigrationCookieGraphicsPtr graphics; }; @@ -110,6 +117,8 @@ static void qemuMigrationCookieFree(qemuMigrationCookiePtr mig)
VIR_FREE(mig->hostname); VIR_FREE(mig->name); + VIR_FREE(mig->lockState); + VIR_FREE(mig->lockDriver); VIR_FREE(mig); }
@@ -275,6 +284,41 @@ qemuMigrationCookieAddGraphics(qemuMigrationCookiePtr mig, }
+static int +qemuMigrationCookieAddLockstate(qemuMigrationCookiePtr mig, + struct qemud_driver *driver, + virDomainObjPtr dom) +{ + qemuDomainObjPrivatePtr priv = dom->privateData; + + if (mig->flags & QEMU_MIGRATION_COOKIE_LOCKSTATE) { + qemuReportError(VIR_ERR_INTERNAL_ERROR, "%s", + _("Migration lockstate data already present")); + return -1; + } + + if (virDomainObjGetState(dom, NULL) == VIR_DOMAIN_PAUSED) { + if (priv->lockState && + !(mig->lockState = strdup(priv->lockState))) + return -1; + } else { + if (virDomainLockProcessInquire(driver->lockManager, dom, &mig->lockState) < 0) + return -1; + } + + if (!(mig->lockDriver = strdup(virLockManagerPluginGetName(driver->lockManager)))) { + VIR_FREE(mig->lockState); + return -1; + } + + mig->flags |= QEMU_MIGRATION_COOKIE_LOCKSTATE; + mig->flagsMandatory |= QEMU_MIGRATION_COOKIE_LOCKSTATE; + + return 0; +} + + + static void qemuMigrationCookieGraphicsXMLFormat(virBufferPtr buf, qemuMigrationCookieGraphicsPtr grap) { @@ -319,6 +363,15 @@ static void qemuMigrationCookieXMLFormat(virBufferPtr buf, mig->graphics) qemuMigrationCookieGraphicsXMLFormat(buf, mig->graphics);
+ if ((mig->flags & QEMU_MIGRATION_COOKIE_LOCKSTATE) && + mig->lockState) { + virBufferAsprintf(buf, " <lockstate driver='%s'>\n", + mig->lockDriver); + virBufferAsprintf(buf, " <leases>%s</leases>\n", + mig->lockState); + virBufferAddLit(buf, " </lockstate>\n"); + } + virBufferAddLit(buf, "</qemu-migration>\n"); }
@@ -498,6 +551,18 @@ qemuMigrationCookieXMLParse(qemuMigrationCookiePtr mig, (!(mig->graphics = qemuMigrationCookieGraphicsXMLParse(ctxt)))) goto error;
+ if (mig->flags & QEMU_MIGRATION_COOKIE_LOCKSTATE) { + mig->lockDriver = virXPathString("string(./lockstate[1]/@driver)", ctxt); + if (!mig->lockDriver) { + qemuReportError(VIR_ERR_INTERNAL_ERROR, "%s", + _("Missing lock driver name in migration cookie")); + goto error; + } + mig->lockState = virXPathString("string(./lockstate[1]/leases[1])", ctxt); + if (mig->lockState && STREQ(mig->lockState, "")) + VIR_FREE(mig->lockState); + } + return 0;
error: @@ -558,6 +623,10 @@ qemuMigrationBakeCookie(qemuMigrationCookiePtr mig, qemuMigrationCookieAddGraphics(mig, driver, dom) < 0) return -1;
+ if (flags & QEMU_MIGRATION_COOKIE_LOCKSTATE && + qemuMigrationCookieAddLockstate(mig, driver, dom) < 0) + return -1; + if (!(*cookieout = qemuMigrationCookieXMLFormatStr(mig))) return -1;
@@ -570,7 +639,8 @@ qemuMigrationBakeCookie(qemuMigrationCookiePtr mig,
static qemuMigrationCookiePtr -qemuMigrationEatCookie(virDomainObjPtr dom, +qemuMigrationEatCookie(struct qemud_driver *driver, + virDomainObjPtr dom, const char *cookiein, int cookieinlen, int flags) @@ -596,6 +666,17 @@ qemuMigrationEatCookie(virDomainObjPtr dom, flags) < 0) goto error;
+ if (mig->flags & QEMU_MIGRATION_COOKIE_LOCKSTATE) { + if (STRNEQ(mig->lockDriver, + virLockManagerPluginGetName(driver->lockManager))) { + qemuReportError(VIR_ERR_INTERNAL_ERROR, + _("Source host lock driver %s different from target %s"), + mig->lockDriver, + virLockManagerPluginGetName(driver->lockManager)); + goto error; + } + } + return mig;
error: @@ -885,12 +966,12 @@ char *qemuMigrationBegin(struct qemud_driver *driver, if (!qemuMigrationIsAllowed(vm->def)) goto cleanup;
- if (!(mig = qemuMigrationEatCookie(vm, NULL, 0, 0))) + if (!(mig = qemuMigrationEatCookie(driver, vm, NULL, 0, 0))) goto cleanup;
if (qemuMigrationBakeCookie(mig, driver, vm, cookieout, cookieoutlen, - 0) < 0) + QEMU_MIGRATION_COOKIE_LOCKSTATE) < 0) goto cleanup;
rv = qemuDomainFormatXML(driver, vm, @@ -964,7 +1045,8 @@ qemuMigrationPrepareTunnel(struct qemud_driver *driver, def = NULL; priv = vm->privateData;
- if (!(mig = qemuMigrationEatCookie(vm, cookiein, cookieinlen, 0))) + if (!(mig = qemuMigrationEatCookie(driver, vm, cookiein, cookieinlen, + QEMU_MIGRATION_COOKIE_LOCKSTATE))) goto cleanup;
if (qemuDomainObjBeginJobWithDriver(driver, vm) < 0) @@ -1193,7 +1275,8 @@ qemuMigrationPrepareDirect(struct qemud_driver *driver, def = NULL; priv = vm->privateData;
- if (!(mig = qemuMigrationEatCookie(vm, cookiein, cookieinlen, 0))) + if (!(mig = qemuMigrationEatCookie(driver, vm, cookiein, cookieinlen, + QEMU_MIGRATION_COOKIE_LOCKSTATE))) goto cleanup;
if (qemuDomainObjBeginJobWithDriver(driver, vm) < 0) @@ -1287,7 +1370,15 @@ static int doNativeMigrate(struct qemud_driver *driver, unsigned int background_flags = QEMU_MONITOR_MIGRATE_BACKGROUND; qemuMigrationCookiePtr mig = NULL;
- if (!(mig = qemuMigrationEatCookie(vm, cookiein, cookieinlen, + if (virLockManagerPluginUsesState(driver->lockManager) && + !cookieout) { + qemuReportError(VIR_ERR_INTERNAL_ERROR, + _("Migration with lock driver %s requires cookie support"), + virLockManagerPluginGetName(driver->lockManager)); + return -1; + } + + if (!(mig = qemuMigrationEatCookie(driver, vm, cookiein, cookieinlen, QEMU_MIGRATION_COOKIE_GRAPHICS))) goto cleanup;
@@ -1473,6 +1564,14 @@ static int doTunnelMigrate(struct qemud_driver *driver, qemuMigrationCookiePtr mig = NULL; qemuMigrationIOThreadPtr iothread = NULL;
+ if (virLockManagerPluginUsesState(driver->lockManager) && + !cookieout) { + qemuReportError(VIR_ERR_INTERNAL_ERROR, + _("Migration with lock driver %s requires cookie support"), + virLockManagerPluginGetName(driver->lockManager)); + return -1; + } + if (!qemuCapsGet(priv->qemuCaps, QEMU_CAPS_MIGRATE_QEMU_UNIX) && !qemuCapsGet(priv->qemuCaps, QEMU_CAPS_MIGRATE_QEMU_EXEC)) { qemuReportError(VIR_ERR_OPERATION_FAILED, @@ -1532,7 +1631,7 @@ static int doTunnelMigrate(struct qemud_driver *driver, goto cleanup; }
- if (!(mig = qemuMigrationEatCookie(vm, cookiein, cookieinlen, + if (!(mig = qemuMigrationEatCookie(driver, vm, cookiein, cookieinlen, QEMU_MIGRATION_COOKIE_GRAPHICS))) goto cleanup;
@@ -2192,7 +2291,7 @@ qemuMigrationFinish(struct qemud_driver *driver, priv->jobActive = QEMU_JOB_NONE; memset(&priv->jobInfo, 0, sizeof(priv->jobInfo));
- if (!(mig = qemuMigrationEatCookie(vm, cookiein, cookieinlen, 0))) + if (!(mig = qemuMigrationEatCookie(driver, vm, cookiein, cookieinlen, 0))) goto cleanup;
if (qemuDomainObjBeginJobWithDriver(driver, vm) < 0) @@ -2313,7 +2412,7 @@ int qemuMigrationConfirm(struct qemud_driver *driver, virDomainEventPtr event = NULL; int rv = -1;
- if (!(mig = qemuMigrationEatCookie(vm, cookiein, cookieinlen, 0))) + if (!(mig = qemuMigrationEatCookie(driver, vm, cookiein, cookieinlen, 0))) return -1;
if (!skipJob &&
ACK, looks fine.
But I'm wondering what's the scenario where a networked resource happen to be used from 2 different node by different domains, and one get migrated to the same box, the lock manager shoudl detect the problem migration will fail, but the broken "double use" will continue, right ?
The lock manager support should protect against the same guest being started on twice on different machines, or on the same machine (eg protect against libvirtd "forgetting" a guest on libvirtd restart). If suitably configured, it will also protect against two different guests being given the same disk. Daniel -- |: http://berrange.com -o- http://www.flickr.com/photos/dberrange/ :| |: http://libvirt.org -o- http://virt-manager.org :| |: http://autobuild.org -o- http://search.cpan.org/~danberr/ :| |: http://entangle-photo.org -o- http://live.gnome.org/gtk-vnc :|

* src/conf/domain_conf.c, src/conf/domain_conf.h: APIs for inserting/finding/removing virDomainLeaseDefPtr instances * src/qemu/qemu_driver.c: Wire up hotplug/unplug for leases * src/qemu/qemu_hotplug.h, src/qemu/qemu_hotplug.c: Support for hotplug and unplug of leases --- bootstrap | 12 +------ src/conf/domain_conf.c | 78 ++++++++++++++++++++++++++++++++++++++++++++++ src/conf/domain_conf.h | 14 ++++++++- src/libvirt_private.syms | 6 +++ src/qemu/qemu_driver.c | 36 +++++++++++++++++++++ src/qemu/qemu_hotplug.c | 36 +++++++++++++++++++++ src/qemu/qemu_hotplug.h | 6 +++ 7 files changed, 177 insertions(+), 11 deletions(-) diff --git a/bootstrap b/bootstrap index 522ac70..d32db57 100755 --- a/bootstrap +++ b/bootstrap @@ -1,6 +1,6 @@ #! /bin/sh # Print a version string. -scriptversion=2011-05-16.16; # UTC +scriptversion=2011-05-11.17; # UTC # Bootstrap this package from checked-out sources. @@ -670,18 +670,10 @@ symlink_to_dir() cp -fp "$src" "$dst" } else - # Leave any existing symlink alone, if it already points to the source, - # so that broken build tools that care about symlink times - # aren't confused into doing unnecessary builds. Conversely, if the - # existing symlink's time stamp is older than the source, make it afresh, - # so that broken tools aren't confused into skipping needed builds. See - # <http://lists.gnu.org/archive/html/bug-gnulib/2011-05/msg00326.html>. test -h "$dst" && src_ls=`ls -diL "$src" 2>/dev/null` && set $src_ls && src_i=$1 && dst_ls=`ls -diL "$dst" 2>/dev/null` && set $dst_ls && dst_i=$1 && - test "$src_i" = "$dst_i" && - both_ls=`ls -dt "$src" "$dst"` && - test "X$both_ls" = "X$dst$nl$src" || { + test "$src_i" = "$dst_i" || { dot_dots= case $src in /*) ;; diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c index b6f7740..3c137a1 100644 --- a/src/conf/domain_conf.c +++ b/src/conf/domain_conf.c @@ -5330,6 +5330,84 @@ void virDomainControllerInsertPreAlloced(virDomainDefPtr def, } +int virDomainLeaseIndex(virDomainDefPtr def, + virDomainLeaseDefPtr lease) +{ + virDomainLeaseDefPtr vlease; + int i; + + for (i = 0; i < def->nleases; i++) { + vlease = def->leases[i]; + /* Either both must have lockspaces present which match.. */ + if (vlease->lockspace && lease->lockspace && + STRNEQ(vlease->lockspace, lease->lockspace)) + continue; + /* ...or neither must have a lockspace present */ + if (vlease->lockspace || lease->lockspace) + continue; + if (STREQ(vlease->key, lease->key)) + return i; + } + return -1; +} + + +int virDomainLeaseInsertPreAlloc(virDomainDefPtr def) +{ + if (VIR_EXPAND_N(def->leases, def->nleases, 1) < 0) { + virReportOOMError(); + return -1; + } + return 0; +} + +int virDomainLeaseInsert(virDomainDefPtr def, + virDomainLeaseDefPtr lease) +{ + if (virDomainLeaseInsertPreAlloc(def) < 0) + return -1; + + virDomainLeaseInsertPreAlloced(def, lease); + return 0; +} + + +void virDomainLeaseInsertPreAlloced(virDomainDefPtr def, + virDomainLeaseDefPtr lease) +{ + if (lease == NULL) + VIR_SHRINK_N(def->leases, def->nleases, 1); + else + def->leases[def->nleases-1] = lease; +} + + +void virDomainLeaseRemoveAt(virDomainDefPtr def, size_t i) +{ + if (def->nleases > 1) { + memmove(def->leases + i, + def->leases + i + 1, + sizeof(*def->leases) * + (def->nleases - (i + 1))); + VIR_SHRINK_N(def->leases, def->nleases, 1); + } else { + VIR_FREE(def->leases); + def->nleases = 0; + } +} + + +int virDomainLeaseRemove(virDomainDefPtr def, + virDomainLeaseDefPtr lease) +{ + int i = virDomainLeaseIndex(def, lease); + if (i < 0) + return -1; + virDomainLeaseRemoveAt(def, i); + return 0; +} + + static char *virDomainDefDefaultEmulator(virDomainDefPtr def, virCapsPtr caps) { const char *type; diff --git a/src/conf/domain_conf.h b/src/conf/domain_conf.h index b0771aa..ca1d792 100644 --- a/src/conf/domain_conf.h +++ b/src/conf/domain_conf.h @@ -1182,7 +1182,7 @@ struct _virDomainDef { int nchannels; virDomainChrDefPtr *channels; - int nleases; + size_t nleases; virDomainLeaseDefPtr *leases; /* Only 1 */ @@ -1381,6 +1381,18 @@ int virDomainControllerInsert(virDomainDefPtr def, void virDomainControllerInsertPreAlloced(virDomainDefPtr def, virDomainControllerDefPtr controller); + +int virDomainLeaseIndex(virDomainDefPtr def, + virDomainLeaseDefPtr lease); +int virDomainLeaseInsert(virDomainDefPtr def, + virDomainLeaseDefPtr lease); +int virDomainLeaseInsertPreAlloc(virDomainDefPtr def); +void virDomainLeaseInsertPreAlloced(virDomainDefPtr def, + virDomainLeaseDefPtr lease); +void virDomainLeaseRemoveAt(virDomainDefPtr def, size_t i); +int virDomainLeaseRemove(virDomainDefPtr def, + virDomainLeaseDefPtr lease); + int virDomainSaveXML(const char *configDir, virDomainDefPtr def, const char *xml); diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms index a2a6de9..a3fe2f1 100644 --- a/src/libvirt_private.syms +++ b/src/libvirt_private.syms @@ -285,6 +285,12 @@ virDomainHostdevDefFree; virDomainHostdevModeTypeToString; virDomainHostdevSubsysTypeToString; virDomainInputDefFree; +virDomainLeaseIndex; +virDomainLeaseInsert; +virDomainLeaseInsertPreAlloc; +virDomainLeaseInsertPreAlloced; +virDomainLeaseRemove; +virDomainLeaseRemoveAt; virDomainLifecycleCrashTypeFromString; virDomainLifecycleCrashTypeToString; virDomainLifecycleTypeFromString; diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c index 6d4a6f4..682ffd9 100644 --- a/src/qemu/qemu_driver.c +++ b/src/qemu/qemu_driver.c @@ -4074,6 +4074,13 @@ qemuDomainAttachDeviceLive(virDomainObjPtr vm, dev->data.controller = NULL; break; + case VIR_DOMAIN_DEVICE_LEASE: + ret = qemuDomainAttachLease(driver, vm, + dev->data.lease); + if (ret == 0) + dev->data.lease = NULL; + break; + case VIR_DOMAIN_DEVICE_NET: qemuDomainObjCheckNetTaint(driver, vm, dev->data.net, -1); ret = qemuDomainAttachNetDevice(dom->conn, driver, vm, @@ -4163,6 +4170,9 @@ qemuDomainDetachDeviceLive(virDomainObjPtr vm, case VIR_DOMAIN_DEVICE_CONTROLLER: ret = qemuDomainDetachDeviceControllerLive(driver, vm, dev); break; + case VIR_DOMAIN_DEVICE_LEASE: + ret = qemuDomainDetachLease(driver, vm, dev->data.lease); + break; case VIR_DOMAIN_DEVICE_NET: ret = qemuDomainDetachNetDevice(driver, vm, dev); break; @@ -4256,6 +4266,7 @@ qemuDomainAttachDeviceConfig(virDomainDefPtr vmdef, virDomainDeviceDefPtr dev) { virDomainDiskDefPtr disk; + virDomainLeaseDefPtr lease; switch (dev->type) { case VIR_DOMAIN_DEVICE_DISK: @@ -4278,6 +4289,21 @@ qemuDomainAttachDeviceConfig(virDomainDefPtr vmdef, return -1; break; + case VIR_DOMAIN_DEVICE_LEASE: + lease = dev->data.lease; + if (virDomainLeaseIndex(vmdef, lease) >= 0) { + qemuReportError(VIR_ERR_INVALID_ARG, + _("Lease %s in lockspace %s already exists"), + lease->key, NULLSTR(lease->lockspace)); + return -1; + } + if (virDomainLeaseInsert(vmdef, lease) < 0) + return -1; + + /* vmdef has the pointer. Generic codes for vmdef will do all jobs */ + dev->data.lease = NULL; + break; + default: qemuReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", _("persistent attach of device is not supported")); @@ -4292,6 +4318,7 @@ qemuDomainDetachDeviceConfig(virDomainDefPtr vmdef, virDomainDeviceDefPtr dev) { virDomainDiskDefPtr disk; + virDomainLeaseDefPtr lease; switch (dev->type) { case VIR_DOMAIN_DEVICE_DISK: @@ -4302,6 +4329,15 @@ qemuDomainDetachDeviceConfig(virDomainDefPtr vmdef, return -1; } break; + case VIR_DOMAIN_DEVICE_LEASE: + lease = dev->data.lease; + if (virDomainLeaseRemove(vmdef, lease) < 0) { + qemuReportError(VIR_ERR_INVALID_ARG, + _("Lease %s in lockspace %s does not exist"), + lease->key, NULLSTR(lease->lockspace)); + return -1; + } + break; default: qemuReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", _("persistent detach of device is not supported")); diff --git a/src/qemu/qemu_hotplug.c b/src/qemu/qemu_hotplug.c index a8e73c4..c9e2d08 100644 --- a/src/qemu/qemu_hotplug.c +++ b/src/qemu/qemu_hotplug.c @@ -1846,3 +1846,39 @@ cleanup: return ret; } + +int qemuDomainAttachLease(struct qemud_driver *driver, + virDomainObjPtr vm, + virDomainLeaseDefPtr lease) +{ + if (virDomainLeaseInsertPreAlloc(vm->def) < 0) + return -1; + + if (virDomainLockLeaseAttach(driver->lockManager, vm, lease) < 0) { + virDomainLeaseInsertPreAlloced(vm->def, NULL); + return -1; + } + + virDomainLeaseInsertPreAlloced(vm->def, lease); + return 0; +} + +int qemuDomainDetachLease(struct qemud_driver *driver, + virDomainObjPtr vm, + virDomainLeaseDefPtr lease) +{ + int i; + + if ((i = virDomainLeaseIndex(vm->def, lease)) < 0) { + qemuReportError(VIR_ERR_INVALID_ARG, + _("Lease %s in lockspace %s does not exist"), + lease->key, NULLSTR(lease->lockspace)); + return -1; + } + + if (virDomainLockLeaseDetach(driver->lockManager, vm, lease) < 0) + return -1; + + virDomainLeaseRemoveAt(vm->def, i); + return 0; +} diff --git a/src/qemu/qemu_hotplug.h b/src/qemu/qemu_hotplug.h index d18b393..009f1f6 100644 --- a/src/qemu/qemu_hotplug.h +++ b/src/qemu/qemu_hotplug.h @@ -85,6 +85,12 @@ int qemuDomainDetachHostUsbDevice(struct qemud_driver *driver, int qemuDomainDetachHostDevice(struct qemud_driver *driver, virDomainObjPtr vm, virDomainDeviceDefPtr dev); +int qemuDomainAttachLease(struct qemud_driver *driver, + virDomainObjPtr vm, + virDomainLeaseDefPtr lease); +int qemuDomainDetachLease(struct qemud_driver *driver, + virDomainObjPtr vm, + virDomainLeaseDefPtr lease); #endif /* __QEMU_HOTPLUG_H__ */ -- 1.7.4.4

On Thu, May 19, 2011 at 07:24:24AM -0400, Daniel P. Berrange wrote:
* src/conf/domain_conf.c, src/conf/domain_conf.h: APIs for inserting/finding/removing virDomainLeaseDefPtr instances * src/qemu/qemu_driver.c: Wire up hotplug/unplug for leases * src/qemu/qemu_hotplug.h, src/qemu/qemu_hotplug.c: Support for hotplug and unplug of leases --- bootstrap | 12 +------ src/conf/domain_conf.c | 78 ++++++++++++++++++++++++++++++++++++++++++++++ src/conf/domain_conf.h | 14 ++++++++- src/libvirt_private.syms | 6 +++ src/qemu/qemu_driver.c | 36 +++++++++++++++++++++ src/qemu/qemu_hotplug.c | 36 +++++++++++++++++++++ src/qemu/qemu_hotplug.h | 6 +++ 7 files changed, 177 insertions(+), 11 deletions(-)
diff --git a/bootstrap b/bootstrap index 522ac70..d32db57 100755 --- a/bootstrap +++ b/bootstrap @@ -1,6 +1,6 @@ #! /bin/sh # Print a version string. -scriptversion=2011-05-16.16; # UTC +scriptversion=2011-05-11.17; # UTC
# Bootstrap this package from checked-out sources.
@@ -670,18 +670,10 @@ symlink_to_dir() cp -fp "$src" "$dst" } else - # Leave any existing symlink alone, if it already points to the source, - # so that broken build tools that care about symlink times - # aren't confused into doing unnecessary builds. Conversely, if the - # existing symlink's time stamp is older than the source, make it afresh, - # so that broken tools aren't confused into skipping needed builds. See - # <http://lists.gnu.org/archive/html/bug-gnulib/2011-05/msg00326.html>. test -h "$dst" && src_ls=`ls -diL "$src" 2>/dev/null` && set $src_ls && src_i=$1 && dst_ls=`ls -diL "$dst" 2>/dev/null` && set $dst_ls && dst_i=$1 && - test "$src_i" = "$dst_i" && - both_ls=`ls -dt "$src" "$dst"` && - test "X$both_ls" = "X$dst$nl$src" || { + test "$src_i" = "$dst_i" || { dot_dots= case $src in /*) ;;
I assume that part is a rogue rebase leftover, right ?
diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c index b6f7740..3c137a1 100644 --- a/src/conf/domain_conf.c +++ b/src/conf/domain_conf.c @@ -5330,6 +5330,84 @@ void virDomainControllerInsertPreAlloced(virDomainDefPtr def, }
+int virDomainLeaseIndex(virDomainDefPtr def, + virDomainLeaseDefPtr lease) +{ + virDomainLeaseDefPtr vlease; + int i; + + for (i = 0; i < def->nleases; i++) { + vlease = def->leases[i]; + /* Either both must have lockspaces present which match.. */ + if (vlease->lockspace && lease->lockspace && + STRNEQ(vlease->lockspace, lease->lockspace)) + continue; + /* ...or neither must have a lockspace present */ + if (vlease->lockspace || lease->lockspace) + continue; + if (STREQ(vlease->key, lease->key)) + return i; + } + return -1; +} + + +int virDomainLeaseInsertPreAlloc(virDomainDefPtr def) +{ + if (VIR_EXPAND_N(def->leases, def->nleases, 1) < 0) { + virReportOOMError(); + return -1; + } + return 0; +} + +int virDomainLeaseInsert(virDomainDefPtr def, + virDomainLeaseDefPtr lease) +{ + if (virDomainLeaseInsertPreAlloc(def) < 0) + return -1; + + virDomainLeaseInsertPreAlloced(def, lease); + return 0; +} + + +void virDomainLeaseInsertPreAlloced(virDomainDefPtr def, + virDomainLeaseDefPtr lease) +{ + if (lease == NULL) + VIR_SHRINK_N(def->leases, def->nleases, 1); + else + def->leases[def->nleases-1] = lease; +} + + +void virDomainLeaseRemoveAt(virDomainDefPtr def, size_t i) +{ + if (def->nleases > 1) { + memmove(def->leases + i, + def->leases + i + 1, + sizeof(*def->leases) * + (def->nleases - (i + 1))); + VIR_SHRINK_N(def->leases, def->nleases, 1); + } else { + VIR_FREE(def->leases); + def->nleases = 0; + } +} + + +int virDomainLeaseRemove(virDomainDefPtr def, + virDomainLeaseDefPtr lease) +{ + int i = virDomainLeaseIndex(def, lease); + if (i < 0) + return -1; + virDomainLeaseRemoveAt(def, i); + return 0; +} + + static char *virDomainDefDefaultEmulator(virDomainDefPtr def, virCapsPtr caps) { const char *type; diff --git a/src/conf/domain_conf.h b/src/conf/domain_conf.h index b0771aa..ca1d792 100644 --- a/src/conf/domain_conf.h +++ b/src/conf/domain_conf.h @@ -1182,7 +1182,7 @@ struct _virDomainDef { int nchannels; virDomainChrDefPtr *channels;
- int nleases; + size_t nleases; virDomainLeaseDefPtr *leases;
/* Only 1 */ @@ -1381,6 +1381,18 @@ int virDomainControllerInsert(virDomainDefPtr def, void virDomainControllerInsertPreAlloced(virDomainDefPtr def, virDomainControllerDefPtr controller);
+ +int virDomainLeaseIndex(virDomainDefPtr def, + virDomainLeaseDefPtr lease); +int virDomainLeaseInsert(virDomainDefPtr def, + virDomainLeaseDefPtr lease); +int virDomainLeaseInsertPreAlloc(virDomainDefPtr def); +void virDomainLeaseInsertPreAlloced(virDomainDefPtr def, + virDomainLeaseDefPtr lease); +void virDomainLeaseRemoveAt(virDomainDefPtr def, size_t i); +int virDomainLeaseRemove(virDomainDefPtr def, + virDomainLeaseDefPtr lease); + int virDomainSaveXML(const char *configDir, virDomainDefPtr def, const char *xml); diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms index a2a6de9..a3fe2f1 100644 --- a/src/libvirt_private.syms +++ b/src/libvirt_private.syms @@ -285,6 +285,12 @@ virDomainHostdevDefFree; virDomainHostdevModeTypeToString; virDomainHostdevSubsysTypeToString; virDomainInputDefFree; +virDomainLeaseIndex; +virDomainLeaseInsert; +virDomainLeaseInsertPreAlloc; +virDomainLeaseInsertPreAlloced; +virDomainLeaseRemove; +virDomainLeaseRemoveAt; virDomainLifecycleCrashTypeFromString; virDomainLifecycleCrashTypeToString; virDomainLifecycleTypeFromString; diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c index 6d4a6f4..682ffd9 100644 --- a/src/qemu/qemu_driver.c +++ b/src/qemu/qemu_driver.c @@ -4074,6 +4074,13 @@ qemuDomainAttachDeviceLive(virDomainObjPtr vm, dev->data.controller = NULL; break;
+ case VIR_DOMAIN_DEVICE_LEASE: + ret = qemuDomainAttachLease(driver, vm, + dev->data.lease); + if (ret == 0) + dev->data.lease = NULL; + break; + case VIR_DOMAIN_DEVICE_NET: qemuDomainObjCheckNetTaint(driver, vm, dev->data.net, -1); ret = qemuDomainAttachNetDevice(dom->conn, driver, vm, @@ -4163,6 +4170,9 @@ qemuDomainDetachDeviceLive(virDomainObjPtr vm, case VIR_DOMAIN_DEVICE_CONTROLLER: ret = qemuDomainDetachDeviceControllerLive(driver, vm, dev); break; + case VIR_DOMAIN_DEVICE_LEASE: + ret = qemuDomainDetachLease(driver, vm, dev->data.lease); + break; case VIR_DOMAIN_DEVICE_NET: ret = qemuDomainDetachNetDevice(driver, vm, dev); break; @@ -4256,6 +4266,7 @@ qemuDomainAttachDeviceConfig(virDomainDefPtr vmdef, virDomainDeviceDefPtr dev) { virDomainDiskDefPtr disk; + virDomainLeaseDefPtr lease;
switch (dev->type) { case VIR_DOMAIN_DEVICE_DISK: @@ -4278,6 +4289,21 @@ qemuDomainAttachDeviceConfig(virDomainDefPtr vmdef, return -1; break;
+ case VIR_DOMAIN_DEVICE_LEASE: + lease = dev->data.lease; + if (virDomainLeaseIndex(vmdef, lease) >= 0) { + qemuReportError(VIR_ERR_INVALID_ARG, + _("Lease %s in lockspace %s already exists"), + lease->key, NULLSTR(lease->lockspace)); + return -1; + } + if (virDomainLeaseInsert(vmdef, lease) < 0) + return -1; + + /* vmdef has the pointer. Generic codes for vmdef will do all jobs */ + dev->data.lease = NULL; + break; + default: qemuReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", _("persistent attach of device is not supported")); @@ -4292,6 +4318,7 @@ qemuDomainDetachDeviceConfig(virDomainDefPtr vmdef, virDomainDeviceDefPtr dev) { virDomainDiskDefPtr disk; + virDomainLeaseDefPtr lease;
switch (dev->type) { case VIR_DOMAIN_DEVICE_DISK: @@ -4302,6 +4329,15 @@ qemuDomainDetachDeviceConfig(virDomainDefPtr vmdef, return -1; } break; + case VIR_DOMAIN_DEVICE_LEASE: + lease = dev->data.lease; + if (virDomainLeaseRemove(vmdef, lease) < 0) { + qemuReportError(VIR_ERR_INVALID_ARG, + _("Lease %s in lockspace %s does not exist"), + lease->key, NULLSTR(lease->lockspace)); + return -1; + } + break; default: qemuReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", _("persistent detach of device is not supported")); diff --git a/src/qemu/qemu_hotplug.c b/src/qemu/qemu_hotplug.c index a8e73c4..c9e2d08 100644 --- a/src/qemu/qemu_hotplug.c +++ b/src/qemu/qemu_hotplug.c @@ -1846,3 +1846,39 @@ cleanup:
return ret; } + +int qemuDomainAttachLease(struct qemud_driver *driver, + virDomainObjPtr vm, + virDomainLeaseDefPtr lease) +{ + if (virDomainLeaseInsertPreAlloc(vm->def) < 0) + return -1; + + if (virDomainLockLeaseAttach(driver->lockManager, vm, lease) < 0) { + virDomainLeaseInsertPreAlloced(vm->def, NULL); + return -1; + } + + virDomainLeaseInsertPreAlloced(vm->def, lease); + return 0; +} + +int qemuDomainDetachLease(struct qemud_driver *driver, + virDomainObjPtr vm, + virDomainLeaseDefPtr lease) +{ + int i; + + if ((i = virDomainLeaseIndex(vm->def, lease)) < 0) { + qemuReportError(VIR_ERR_INVALID_ARG, + _("Lease %s in lockspace %s does not exist"), + lease->key, NULLSTR(lease->lockspace)); + return -1; + } + + if (virDomainLockLeaseDetach(driver->lockManager, vm, lease) < 0) + return -1; + + virDomainLeaseRemoveAt(vm->def, i); + return 0; +} diff --git a/src/qemu/qemu_hotplug.h b/src/qemu/qemu_hotplug.h index d18b393..009f1f6 100644 --- a/src/qemu/qemu_hotplug.h +++ b/src/qemu/qemu_hotplug.h @@ -85,6 +85,12 @@ int qemuDomainDetachHostUsbDevice(struct qemud_driver *driver, int qemuDomainDetachHostDevice(struct qemud_driver *driver, virDomainObjPtr vm, virDomainDeviceDefPtr dev); +int qemuDomainAttachLease(struct qemud_driver *driver, + virDomainObjPtr vm, + virDomainLeaseDefPtr lease); +int qemuDomainDetachLease(struct qemud_driver *driver, + virDomainObjPtr vm, + virDomainLeaseDefPtr lease);
except for .bootstrap, ACK Daniel -- Daniel Veillard | libxml Gnome XML XSLT toolkit http://xmlsoft.org/ daniel@veillard.com | Rpmfind RPM search engine http://rpmfind.net/ http://veillard.com/ | virtualization library http://libvirt.org/

Sanlock is a project that implements a disk-paxos locking algorithm. This is suitable for cluster deployments with shared storage. * src/Makefile.am: Add dlopen plugin for sanlock * src/locking/lock_driver_sanlock.c: Sanlock driver --- libvirt.spec.in | 11 + po/POTFILES.in | 1 + src/Makefile.am | 12 + src/libvirt_private.syms | 1 + src/locking/lock_driver_sanlock.c | 413 +++++++++++++++++++++++++++++++++++++ 5 files changed, 438 insertions(+), 0 deletions(-) create mode 100644 src/locking/lock_driver_sanlock.c diff --git a/libvirt.spec.in b/libvirt.spec.in index e85f68f..73213ea 100644 --- a/libvirt.spec.in +++ b/libvirt.spec.in @@ -77,6 +77,7 @@ %define with_dtrace 0%{!?_without_dtrace:0} %define with_cgconfig 0%{!?_without_cgconfig:0} %define with_referential 0%{!?_without_referential:1} +%define with_sanlock 0%{!?_without_sanlock:0} # Non-server/HV driver defaults which are always enabled %define with_python 0%{!?_without_python:1} @@ -180,6 +181,7 @@ %if 0%{?fedora} >= 13 || 0%{?rhel} >= 6 %define with_dtrace 1 +%define with_sanlock 1 %endif # Pull in cgroups config system @@ -435,6 +437,9 @@ BuildRequires: systemtap-sdt-devel %if %{with_referential} BuildRequires: referential-devel %endif +%if %{with_sanlock} +BuildRequires: sanlock-devel +%endif %if %{with_storage_fs} # For mount/umount in FS driver @@ -718,6 +723,8 @@ rm -f $RPM_BUILD_ROOT%{_libdir}/*.la rm -f $RPM_BUILD_ROOT%{_libdir}/*.a rm -f $RPM_BUILD_ROOT%{_libdir}/python*/site-packages/*.la rm -f $RPM_BUILD_ROOT%{_libdir}/python*/site-packages/*.a +rm -f $RPM_BUILD_ROOT%{_libdir}/libvirt/lock-driver/*.la +rm -f $RPM_BUILD_ROOT%{_libdir}/libvirt/lock-driver/*.a %if %{with_network} install -d -m 0755 $RPM_BUILD_ROOT%{_datadir}/lib/libvirt/dnsmasq/ @@ -1004,6 +1011,10 @@ fi %attr(0755, root, root) %{_libexecdir}/libvirt_lxc %endif +%if %{with_sanlock} +%attr(0755, root, root) %{_libdir}/libvirt/lock-driver/sanlock.so +%endif + %attr(0755, root, root) %{_libexecdir}/libvirt_parthelper %attr(0755, root, root) %{_libexecdir}/libvirt_iohelper %attr(0755, root, root) %{_sbindir}/libvirtd diff --git a/po/POTFILES.in b/po/POTFILES.in index 9c3d287..c3b45f9 100644 --- a/po/POTFILES.in +++ b/po/POTFILES.in @@ -31,6 +31,7 @@ src/fdstream.c src/interface/netcf_driver.c src/internal.h src/libvirt.c +src/locking/lock_driver_sanlock.c src/locking/lock_manager.c src/lxc/lxc_container.c src/lxc/lxc_conf.c diff --git a/src/Makefile.am b/src/Makefile.am index 1e5a72e..edf017d 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -99,6 +99,9 @@ DRIVER_SOURCES = \ locking/lock_driver_nop.h locking/lock_driver_nop.c \ locking/domain_lock.h locking/domain_lock.c +LOCK_DRIVER_SANLOCK_SOURCES = \ + locking/lock_driver_sanlock.c + # XML configuration format handling sources # Domain driver generic impl APIs @@ -1159,6 +1162,15 @@ libvirt_qemu_la_CFLAGS = $(AM_CFLAGS) libvirt_qemu_la_LIBADD = libvirt.la $(CYGWIN_EXTRA_LIBADD) EXTRA_DIST += $(LIBVIRT_QEMU_SYMBOL_FILE) + +lockdriverdir = $(libdir)/libvirt/lock-driver +lockdriver_LTLIBRARIES = sanlock.la + +sanlock_la_SOURCES = $(LOCK_DRIVER_SANLOCK_SOURCES) +sanlock_la_CFLAGS = $(AM_CLFAGS) +sanlock_la_LDFLAGS = -module -avoid-version +sanlock_la_LIBADD = -lsanlock + libexec_PROGRAMS = if WITH_LIBVIRTD diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms index a3fe2f1..e61ea13 100644 --- a/src/libvirt_private.syms +++ b/src/libvirt_private.syms @@ -650,6 +650,7 @@ virVMOperationTypeToString; # memory.h virAlloc; virAllocN; +virAllocVar; virExpandN; virFree; virReallocN; diff --git a/src/locking/lock_driver_sanlock.c b/src/locking/lock_driver_sanlock.c new file mode 100644 index 0000000..6a31fdf --- /dev/null +++ b/src/locking/lock_driver_sanlock.c @@ -0,0 +1,413 @@ +/* + * lock_driver_sanlock.c: A lock driver for Sanlock + * + * Copyright (C) 2010-2011 Red Hat, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +#include <config.h> + +#include <stdlib.h> +#include <stdint.h> +#include <unistd.h> +#include <string.h> +#include <stdio.h> +#include <errno.h> +#include <sys/types.h> + +#include <sanlock.h> +#include <sanlock_resource.h> + +#include "lock_driver.h" +#include "logging.h" +#include "virterror_internal.h" +#include "memory.h" +#include "util.h" +#include "files.h" + +#define VIR_FROM_THIS VIR_FROM_LOCKING + +#define virLockError(code, ...) \ + virReportErrorHelper(VIR_FROM_THIS, code, __FILE__, \ + __FUNCTION__, __LINE__, __VA_ARGS__) + +typedef struct _virLockManagerSanlockPrivate virLockManagerSanlockPrivate; +typedef virLockManagerSanlockPrivate *virLockManagerSanlockPrivatePtr; + +struct _virLockManagerSanlockPrivate { + char vm_name[SANLK_NAME_LEN]; + char vm_uuid[VIR_UUID_BUFLEN]; + unsigned int vm_id; + unsigned int vm_pid; + unsigned int flags; + bool hasRWDisks; + int res_count; + struct sanlk_resource *res_args[SANLK_MAX_RESOURCES]; +}; + +/* + * sanlock plugin for the libvirt virLockManager API + */ + +static int virLockManagerSanlockInit(unsigned int version ATTRIBUTE_UNUSED, + unsigned int flags) +{ + virCheckFlags(0, -1); + return 0; +} + +static int virLockManagerSanlockDeinit(void) +{ + virLockError(VIR_ERR_INTERNAL_ERROR, "%s", + _("Unloading sanlock plugin is forbidden")); + return -1; +} + +static int virLockManagerSanlockNew(virLockManagerPtr lock, + unsigned int type, + size_t nparams, + virLockManagerParamPtr params, + unsigned int flags) +{ + virLockManagerParamPtr param; + virLockManagerSanlockPrivatePtr priv; + int i; + + virCheckFlags(0, -1); + + if (type != VIR_LOCK_MANAGER_OBJECT_TYPE_DOMAIN) { + virLockError(VIR_ERR_INTERNAL_ERROR, + _("Unsupported object type %d"), type); + return -1; + } + + if (VIR_ALLOC(priv) < 0) { + virReportOOMError(); + return -1; + } + + priv->flags = flags; + + for (i = 0; i < nparams; i++) { + param = ¶ms[i]; + + if (STREQ(param->key, "uuid")) { + memcpy(priv->vm_uuid, param->value.uuid, 16); + } else if (STREQ(param->key, "name")) { + if (!virStrcpy(priv->vm_name, param->value.str, SANLK_NAME_LEN)) { + virLockError(VIR_ERR_INTERNAL_ERROR, + _("Domain name '%s' exceeded %d characters"), + param->value.str, SANLK_NAME_LEN); + goto error; + } + } else if (STREQ(param->key, "pid")) { + priv->vm_pid = param->value.ui; + } else if (STREQ(param->key, "id")) { + priv->vm_id = param->value.ui; + } + } + + lock->privateData = priv; + return 0; + +error: + VIR_FREE(priv); + return -1; +} + +static void virLockManagerSanlockFree(virLockManagerPtr lock) +{ + virLockManagerSanlockPrivatePtr priv = lock->privateData; + int i; + + if (!priv) + return; + + for (i = 0; i < priv->res_count; i++) + VIR_FREE(priv->res_args[i]); + VIR_FREE(priv); + lock->privateData = NULL; +} + +static int virLockManagerSanlockAddResource(virLockManagerPtr lock, + unsigned int type, + const char *name, + size_t nparams, + virLockManagerParamPtr params, + unsigned int flags) +{ + virLockManagerSanlockPrivatePtr priv = lock->privateData; + struct sanlk_resource *res; + int i; + + virCheckFlags(VIR_LOCK_MANAGER_RESOURCE_READONLY | + VIR_LOCK_MANAGER_RESOURCE_SHARED, -1); + + if (priv->res_count == SANLK_MAX_RESOURCES) { + virLockError(VIR_ERR_INTERNAL_ERROR, + _("Too many resources %d for object"), + SANLK_MAX_RESOURCES); + return -1; + } + + if (type == VIR_LOCK_MANAGER_RESOURCE_TYPE_DISK) { + if (!(flags & (VIR_LOCK_MANAGER_RESOURCE_SHARED | + VIR_LOCK_MANAGER_RESOURCE_READONLY))) + priv->hasRWDisks = true; + return 0; + } + + if (type != VIR_LOCK_MANAGER_RESOURCE_TYPE_LEASE) + return 0; + + if (flags & VIR_LOCK_MANAGER_RESOURCE_READONLY) { + virLockError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", + _("Readonly leases are not supported")); + return -1; + } + if (flags & VIR_LOCK_MANAGER_RESOURCE_SHARED) { + virLockError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", + _("Sharable leases are not supported")); + return -1; + } + + if (VIR_ALLOC_VAR(res, struct sanlk_disk, 1) < 0) { + virReportOOMError(); + return -1; + } + + res->num_disks = 1; + if (!virStrcpy(res->name, name, SANLK_NAME_LEN)) { + virLockError(VIR_ERR_INTERNAL_ERROR, + _("Resource name '%s' exceeds %d characters"), + name, SANLK_NAME_LEN); + goto error; + } + + for (i = 0; i < nparams; i++) { + if (STREQ(params[i].key, "path")) { + if (!virStrcpy(res->disks[0].path, params[i].value.str, SANLK_PATH_LEN)) { + virLockError(VIR_ERR_INTERNAL_ERROR, + _("Lease path '%s' exceeds %d characters"), + params[i].value.str, SANLK_PATH_LEN); + goto error; + } + } else if (STREQ(params[i].key, "offset")) { + res->disks[0].offset = params[i].value.ul; + } else if (STREQ(params[i].key, "lockspace")) { + if (!virStrcpy(res->lockspace_name, params[i].value.str, SANLK_NAME_LEN)) { + virLockError(VIR_ERR_INTERNAL_ERROR, + _("Resource lockspace '%s' exceeds %d characters"), + params[i].value.str, SANLK_NAME_LEN); + goto error; + } + } + } + + priv->res_args[priv->res_count] = res; + priv->res_count++; + return 0; + +error: + VIR_FREE(res); + return -1; +} + +static int virLockManagerSanlockAcquire(virLockManagerPtr lock, + const char *state, + unsigned int flags) +{ + virLockManagerSanlockPrivatePtr priv = lock->privateData; + struct sanlk_options *opt; + struct sanlk_resource **res_args; + int res_count; + bool res_free = false; + int sock = -1; + int rv; + int i; + + virCheckFlags(VIR_LOCK_MANAGER_ACQUIRE_REGISTER_ONLY, -1); + + if (priv->res_count == 0 && + priv->hasRWDisks) { + virLockError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", + _("Read/write, exclusive access, disks were present, but no leases specified")); + return -1; + } + + if (VIR_ALLOC(opt) < 0) { + virReportOOMError(); + return -1; + } + + if (!virStrcpy(opt->owner_name, priv->vm_name, SANLK_NAME_LEN)) { + virLockError(VIR_ERR_INTERNAL_ERROR, + _("Domain name '%s' exceeded %d characters"), + priv->vm_name, SANLK_NAME_LEN); + goto error; + } + + if (state && STRNEQ(state, "") && 0) { + if ((rv = sanlock_state_to_args((char *)state, + &res_count, + &res_args)) < 0) { + virReportSystemError(-rv, + _("Unable to parse lock state %s"), + state); + goto error; + } + res_free = true; + } else { + res_args = priv->res_args; + res_count = priv->res_count; + } + + VIR_DEBUG("Register sanlock %d", flags); + /* We only initialize 'sock' if we are in the real + * child process and we need it to be inherited + * + * If sock==-1, then sanlock auto-open/closes a + * temporary sock + */ + if (priv->vm_pid == getpid() && + (sock = sanlock_register()) < 0) { + virReportSystemError(-sock, "%s", + _("Failed to open socket to sanlock daemon")); + goto error; + } + + if (!(flags & VIR_LOCK_MANAGER_ACQUIRE_REGISTER_ONLY)) { + VIR_DEBUG("Acquiring object %u", priv->res_count); + if ((rv = sanlock_acquire(sock, priv->vm_pid, 0, + priv->res_count, priv->res_args, + opt)) < 0) { +#if 1 + virReportSystemError(-rv, "%s", + _("Failed to acquire lock")); +#else + virLockError(VIR_ERR_INTERNAL_ERROR, "%s", + _("Failed to acquire lock")); +#endif + goto error; + } + } + + VIR_FREE(opt); + + /* + * We are *intentionally* "leaking" sock file descriptor + * because we want it to be inherited by QEMU. When the + * sock FD finally closes upon QEMU exit (or crash) then + * sanlock will notice EOF and release the lock + */ + if (sock != -1 && + virSetInherit(sock, true) < 0) + goto error; + + VIR_DEBUG("Acquire completed fd=%d", sock); + + if (res_free) { + for (i = 0 ; i < res_count ; i++) { + VIR_FREE(res_args[i]); + } + VIR_FREE(res_args); + } + + return 0; + +error: + if (res_free) { + for (i = 0 ; i < res_count ; i++) { + VIR_FREE(res_args[i]); + } + VIR_FREE(res_args); + } + VIR_FREE(opt); + VIR_FORCE_CLOSE(sock); + return -1; +} + + +static int virLockManagerSanlockRelease(virLockManagerPtr lock, + char **state, + unsigned int flags) +{ + virLockManagerSanlockPrivatePtr priv = lock->privateData; + int res_count; + int rv; + + virCheckFlags(0, -1); + + if ((rv = sanlock_inquire(-1, priv->vm_pid, 0, &res_count, state)) < 0) { + virReportSystemError(-rv, "%s", + _("Failed to release lock")); + return -1; + } + + if (STREQ(*state, "")) + VIR_FREE(*state); + + if ((rv = sanlock_release(-1, priv->vm_pid, SANLK_REL_ALL, 0, NULL)) < 0) { + virReportSystemError(-rv, "%s", + _("Failed to release lock")); + return -1; + } + + return 0; +} + +static int virLockManagerSanlockInquire(virLockManagerPtr lock, + char **state, + unsigned int flags) +{ + virLockManagerSanlockPrivatePtr priv = lock->privateData; + int rv, res_count; + + virCheckFlags(0, -1); + + VIR_DEBUG("pid=%d", priv->vm_pid); + + if ((rv = sanlock_inquire(-1, priv->vm_pid, 0, &res_count, state)) < 0) { + virReportSystemError(-rv, "%s", + _("Failed to inquire lock")); + return -1; + } + + if (STREQ(*state, "")) + VIR_FREE(*state); + + return 0; +} + +virLockDriver virLockDriverImpl = +{ + .version = VIR_LOCK_MANAGER_VERSION, + + .flags = VIR_LOCK_MANAGER_USES_STATE, + + .drvInit = virLockManagerSanlockInit, + .drvDeinit = virLockManagerSanlockDeinit, + + .drvNew = virLockManagerSanlockNew, + .drvFree = virLockManagerSanlockFree, + + .drvAddResource = virLockManagerSanlockAddResource, + + .drvAcquire = virLockManagerSanlockAcquire, + .drvRelease = virLockManagerSanlockRelease, + .drvInquire = virLockManagerSanlockInquire, +}; -- 1.7.4.4

On Thu, May 19, 2011 at 6:24 AM, Daniel P. Berrange <berrange@redhat.com> wrote:
Sanlock is a project that implements a disk-paxos locking algorithm. This is suitable for cluster deployments with shared storage.
* src/Makefile.am: Add dlopen plugin for sanlock * src/locking/lock_driver_sanlock.c: Sanlock driver --- libvirt.spec.in | 11 + po/POTFILES.in | 1 + src/Makefile.am | 12 + src/libvirt_private.syms | 1 + src/locking/lock_driver_sanlock.c | 413 +++++++++++++++++++++++++++++++++++++ 5 files changed, 438 insertions(+), 0 deletions(-) create mode 100644 src/locking/lock_driver_sanlock.c
<snip> Daniel, How does the dependency magic for sanlock work here? It appears that there will be an automagical dependency on libsanlock when its installed. It might be a good idea to provide a ./configure script knob to enable/disable this piece. Additionally, I see you've added virAllocVar to the syms list but I don't see it used anywhere else in the context of this patch. Not sure if I'm missing something. -- Doug Goldstein

On Fri, May 20, 2011 at 03:11:35PM -0500, Doug Goldstein wrote:
On Thu, May 19, 2011 at 6:24 AM, Daniel P. Berrange <berrange@redhat.com> wrote:
Sanlock is a project that implements a disk-paxos locking algorithm. This is suitable for cluster deployments with shared storage.
* src/Makefile.am: Add dlopen plugin for sanlock * src/locking/lock_driver_sanlock.c: Sanlock driver --- libvirt.spec.in | 11 + po/POTFILES.in | 1 + src/Makefile.am | 12 + src/libvirt_private.syms | 1 + src/locking/lock_driver_sanlock.c | 413 +++++++++++++++++++++++++++++++++++++ 5 files changed, 438 insertions(+), 0 deletions(-) create mode 100644 src/locking/lock_driver_sanlock.c
<snip>
Daniel,
How does the dependency magic for sanlock work here? It appears that there will be an automagical dependency on libsanlock when its installed. It might be a good idea to provide a ./configure script knob to enable/disable this piece.
That was waiting for sanlock to provide a pkgconfig file...
Additionally, I see you've added virAllocVar to the syms list but I don't see it used anywhere else in the context of this patch. Not sure if I'm missing something.
It is used in sanlock.c: + if (VIR_ALLOC_VAR(res, struct sanlk_disk, 1) < 0) { + virReportOOMError(); + return -1; + } Daniel -- |: http://berrange.com -o- http://www.flickr.com/photos/dberrange/ :| |: http://libvirt.org -o- http://virt-manager.org :| |: http://autobuild.org -o- http://search.cpan.org/~danberr/ :| |: http://entangle-photo.org -o- http://live.gnome.org/gtk-vnc :|

On Thu, May 19, 2011 at 07:24:25AM -0400, Daniel P. Berrange wrote:
Sanlock is a project that implements a disk-paxos locking algorithm. This is suitable for cluster deployments with shared storage.
* src/Makefile.am: Add dlopen plugin for sanlock * src/locking/lock_driver_sanlock.c: Sanlock driver --- libvirt.spec.in | 11 + po/POTFILES.in | 1 + src/Makefile.am | 12 + src/libvirt_private.syms | 1 + src/locking/lock_driver_sanlock.c | 413 +++++++++++++++++++++++++++++++++++++ 5 files changed, 438 insertions(+), 0 deletions(-) create mode 100644 src/locking/lock_driver_sanlock.c
diff --git a/libvirt.spec.in b/libvirt.spec.in index e85f68f..73213ea 100644 --- a/libvirt.spec.in +++ b/libvirt.spec.in @@ -77,6 +77,7 @@ %define with_dtrace 0%{!?_without_dtrace:0} %define with_cgconfig 0%{!?_without_cgconfig:0} %define with_referential 0%{!?_without_referential:1} +%define with_sanlock 0%{!?_without_sanlock:0}
# Non-server/HV driver defaults which are always enabled %define with_python 0%{!?_without_python:1} @@ -180,6 +181,7 @@
%if 0%{?fedora} >= 13 || 0%{?rhel} >= 6 %define with_dtrace 1 +%define with_sanlock 1 %endif
# Pull in cgroups config system @@ -435,6 +437,9 @@ BuildRequires: systemtap-sdt-devel %if %{with_referential} BuildRequires: referential-devel %endif +%if %{with_sanlock} +BuildRequires: sanlock-devel +%endif
Hum ... weird [root@paphio ~]# yum install sanlock-devel .. No package sanlock-devel available. Error: Nothing to do [root@paphio ~]# cat /etc/fedora-release Fedora release 14 (Laughlin) [root@paphio ~]# are you sure about the dep ?
%if %{with_storage_fs} # For mount/umount in FS driver @@ -718,6 +723,8 @@ rm -f $RPM_BUILD_ROOT%{_libdir}/*.la rm -f $RPM_BUILD_ROOT%{_libdir}/*.a rm -f $RPM_BUILD_ROOT%{_libdir}/python*/site-packages/*.la rm -f $RPM_BUILD_ROOT%{_libdir}/python*/site-packages/*.a +rm -f $RPM_BUILD_ROOT%{_libdir}/libvirt/lock-driver/*.la +rm -f $RPM_BUILD_ROOT%{_libdir}/libvirt/lock-driver/*.a
%if %{with_network} install -d -m 0755 $RPM_BUILD_ROOT%{_datadir}/lib/libvirt/dnsmasq/ @@ -1004,6 +1011,10 @@ fi %attr(0755, root, root) %{_libexecdir}/libvirt_lxc %endif
+%if %{with_sanlock} +%attr(0755, root, root) %{_libdir}/libvirt/lock-driver/sanlock.so +%endif + %attr(0755, root, root) %{_libexecdir}/libvirt_parthelper %attr(0755, root, root) %{_libexecdir}/libvirt_iohelper %attr(0755, root, root) %{_sbindir}/libvirtd diff --git a/po/POTFILES.in b/po/POTFILES.in index 9c3d287..c3b45f9 100644 --- a/po/POTFILES.in +++ b/po/POTFILES.in @@ -31,6 +31,7 @@ src/fdstream.c src/interface/netcf_driver.c src/internal.h src/libvirt.c +src/locking/lock_driver_sanlock.c src/locking/lock_manager.c src/lxc/lxc_container.c src/lxc/lxc_conf.c diff --git a/src/Makefile.am b/src/Makefile.am index 1e5a72e..edf017d 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -99,6 +99,9 @@ DRIVER_SOURCES = \ locking/lock_driver_nop.h locking/lock_driver_nop.c \ locking/domain_lock.h locking/domain_lock.c
+LOCK_DRIVER_SANLOCK_SOURCES = \ + locking/lock_driver_sanlock.c +
# XML configuration format handling sources # Domain driver generic impl APIs @@ -1159,6 +1162,15 @@ libvirt_qemu_la_CFLAGS = $(AM_CFLAGS) libvirt_qemu_la_LIBADD = libvirt.la $(CYGWIN_EXTRA_LIBADD) EXTRA_DIST += $(LIBVIRT_QEMU_SYMBOL_FILE)
+ +lockdriverdir = $(libdir)/libvirt/lock-driver +lockdriver_LTLIBRARIES = sanlock.la + +sanlock_la_SOURCES = $(LOCK_DRIVER_SANLOCK_SOURCES) +sanlock_la_CFLAGS = $(AM_CLFAGS) +sanlock_la_LDFLAGS = -module -avoid-version +sanlock_la_LIBADD = -lsanlock + libexec_PROGRAMS =
if WITH_LIBVIRTD diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms index a3fe2f1..e61ea13 100644 --- a/src/libvirt_private.syms +++ b/src/libvirt_private.syms @@ -650,6 +650,7 @@ virVMOperationTypeToString; # memory.h virAlloc; virAllocN; +virAllocVar; virExpandN; virFree; virReallocN; diff --git a/src/locking/lock_driver_sanlock.c b/src/locking/lock_driver_sanlock.c new file mode 100644 index 0000000..6a31fdf --- /dev/null +++ b/src/locking/lock_driver_sanlock.c @@ -0,0 +1,413 @@ +/* + * lock_driver_sanlock.c: A lock driver for Sanlock + * + * Copyright (C) 2010-2011 Red Hat, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
* Author: Daniel P. Berrange <berrange@redhat.com>
+ */ + +#include <config.h> + +#include <stdlib.h> +#include <stdint.h> +#include <unistd.h> +#include <string.h> +#include <stdio.h> +#include <errno.h> +#include <sys/types.h> + +#include <sanlock.h> +#include <sanlock_resource.h> + +#include "lock_driver.h" +#include "logging.h" +#include "virterror_internal.h" +#include "memory.h" +#include "util.h" +#include "files.h" + +#define VIR_FROM_THIS VIR_FROM_LOCKING + +#define virLockError(code, ...) \ + virReportErrorHelper(VIR_FROM_THIS, code, __FILE__, \ + __FUNCTION__, __LINE__, __VA_ARGS__) + +typedef struct _virLockManagerSanlockPrivate virLockManagerSanlockPrivate; +typedef virLockManagerSanlockPrivate *virLockManagerSanlockPrivatePtr; + +struct _virLockManagerSanlockPrivate { + char vm_name[SANLK_NAME_LEN]; + char vm_uuid[VIR_UUID_BUFLEN]; + unsigned int vm_id; + unsigned int vm_pid; + unsigned int flags; + bool hasRWDisks; + int res_count; + struct sanlk_resource *res_args[SANLK_MAX_RESOURCES]; +}; + +/* + * sanlock plugin for the libvirt virLockManager API + */ + +static int virLockManagerSanlockInit(unsigned int version ATTRIBUTE_UNUSED, + unsigned int flags) +{ + virCheckFlags(0, -1); + return 0; +} + +static int virLockManagerSanlockDeinit(void) +{ + virLockError(VIR_ERR_INTERNAL_ERROR, "%s", + _("Unloading sanlock plugin is forbidden")); + return -1; +} + +static int virLockManagerSanlockNew(virLockManagerPtr lock, + unsigned int type, + size_t nparams, + virLockManagerParamPtr params, + unsigned int flags) +{ + virLockManagerParamPtr param; + virLockManagerSanlockPrivatePtr priv; + int i; + + virCheckFlags(0, -1); + + if (type != VIR_LOCK_MANAGER_OBJECT_TYPE_DOMAIN) { + virLockError(VIR_ERR_INTERNAL_ERROR, + _("Unsupported object type %d"), type); + return -1; + } + + if (VIR_ALLOC(priv) < 0) { + virReportOOMError(); + return -1; + } + + priv->flags = flags; + + for (i = 0; i < nparams; i++) { + param = ¶ms[i]; + + if (STREQ(param->key, "uuid")) { + memcpy(priv->vm_uuid, param->value.uuid, 16); + } else if (STREQ(param->key, "name")) { + if (!virStrcpy(priv->vm_name, param->value.str, SANLK_NAME_LEN)) { + virLockError(VIR_ERR_INTERNAL_ERROR, + _("Domain name '%s' exceeded %d characters"), + param->value.str, SANLK_NAME_LEN); + goto error; + } + } else if (STREQ(param->key, "pid")) { + priv->vm_pid = param->value.ui; + } else if (STREQ(param->key, "id")) { + priv->vm_id = param->value.ui; + } + } + + lock->privateData = priv; + return 0; + +error: + VIR_FREE(priv); + return -1; +} + +static void virLockManagerSanlockFree(virLockManagerPtr lock) +{ + virLockManagerSanlockPrivatePtr priv = lock->privateData; + int i; + + if (!priv) + return; + + for (i = 0; i < priv->res_count; i++) + VIR_FREE(priv->res_args[i]); + VIR_FREE(priv); + lock->privateData = NULL; +} + +static int virLockManagerSanlockAddResource(virLockManagerPtr lock, + unsigned int type, + const char *name, + size_t nparams, + virLockManagerParamPtr params, + unsigned int flags) +{ + virLockManagerSanlockPrivatePtr priv = lock->privateData; + struct sanlk_resource *res; + int i; + + virCheckFlags(VIR_LOCK_MANAGER_RESOURCE_READONLY | + VIR_LOCK_MANAGER_RESOURCE_SHARED, -1); + + if (priv->res_count == SANLK_MAX_RESOURCES) { + virLockError(VIR_ERR_INTERNAL_ERROR, + _("Too many resources %d for object"), + SANLK_MAX_RESOURCES); + return -1; + } + + if (type == VIR_LOCK_MANAGER_RESOURCE_TYPE_DISK) { + if (!(flags & (VIR_LOCK_MANAGER_RESOURCE_SHARED | + VIR_LOCK_MANAGER_RESOURCE_READONLY))) + priv->hasRWDisks = true; + return 0; + } + + if (type != VIR_LOCK_MANAGER_RESOURCE_TYPE_LEASE) + return 0; + + if (flags & VIR_LOCK_MANAGER_RESOURCE_READONLY) { + virLockError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", + _("Readonly leases are not supported")); + return -1; + } + if (flags & VIR_LOCK_MANAGER_RESOURCE_SHARED) { + virLockError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", + _("Sharable leases are not supported")); + return -1; + } + + if (VIR_ALLOC_VAR(res, struct sanlk_disk, 1) < 0) { + virReportOOMError(); + return -1; + } + + res->num_disks = 1; + if (!virStrcpy(res->name, name, SANLK_NAME_LEN)) { + virLockError(VIR_ERR_INTERNAL_ERROR, + _("Resource name '%s' exceeds %d characters"), + name, SANLK_NAME_LEN); + goto error; + } + + for (i = 0; i < nparams; i++) { + if (STREQ(params[i].key, "path")) { + if (!virStrcpy(res->disks[0].path, params[i].value.str, SANLK_PATH_LEN)) { + virLockError(VIR_ERR_INTERNAL_ERROR, + _("Lease path '%s' exceeds %d characters"), + params[i].value.str, SANLK_PATH_LEN); + goto error; + } + } else if (STREQ(params[i].key, "offset")) { + res->disks[0].offset = params[i].value.ul; + } else if (STREQ(params[i].key, "lockspace")) { + if (!virStrcpy(res->lockspace_name, params[i].value.str, SANLK_NAME_LEN)) { + virLockError(VIR_ERR_INTERNAL_ERROR, + _("Resource lockspace '%s' exceeds %d characters"), + params[i].value.str, SANLK_NAME_LEN); + goto error; + } + } + } + + priv->res_args[priv->res_count] = res; + priv->res_count++; + return 0; + +error: + VIR_FREE(res); + return -1; +} + +static int virLockManagerSanlockAcquire(virLockManagerPtr lock, + const char *state, + unsigned int flags) +{ + virLockManagerSanlockPrivatePtr priv = lock->privateData; + struct sanlk_options *opt; + struct sanlk_resource **res_args; + int res_count; + bool res_free = false; + int sock = -1; + int rv; + int i; + + virCheckFlags(VIR_LOCK_MANAGER_ACQUIRE_REGISTER_ONLY, -1); + + if (priv->res_count == 0 && + priv->hasRWDisks) { + virLockError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", + _("Read/write, exclusive access, disks were present, but no leases specified")); + return -1; + } + + if (VIR_ALLOC(opt) < 0) { + virReportOOMError(); + return -1; + } + + if (!virStrcpy(opt->owner_name, priv->vm_name, SANLK_NAME_LEN)) { + virLockError(VIR_ERR_INTERNAL_ERROR, + _("Domain name '%s' exceeded %d characters"), + priv->vm_name, SANLK_NAME_LEN); + goto error; + } + + if (state && STRNEQ(state, "") && 0) { + if ((rv = sanlock_state_to_args((char *)state, + &res_count, + &res_args)) < 0) { + virReportSystemError(-rv, + _("Unable to parse lock state %s"), + state); + goto error; + } + res_free = true; + } else { + res_args = priv->res_args; + res_count = priv->res_count; + } + + VIR_DEBUG("Register sanlock %d", flags); + /* We only initialize 'sock' if we are in the real + * child process and we need it to be inherited + * + * If sock==-1, then sanlock auto-open/closes a + * temporary sock + */ + if (priv->vm_pid == getpid() && + (sock = sanlock_register()) < 0) { + virReportSystemError(-sock, "%s", + _("Failed to open socket to sanlock daemon")); + goto error; + } + + if (!(flags & VIR_LOCK_MANAGER_ACQUIRE_REGISTER_ONLY)) { + VIR_DEBUG("Acquiring object %u", priv->res_count); + if ((rv = sanlock_acquire(sock, priv->vm_pid, 0, + priv->res_count, priv->res_args, + opt)) < 0) {
Hum ...
+#if 1 + virReportSystemError(-rv, "%s", + _("Failed to acquire lock")); +#else + virLockError(VIR_ERR_INTERNAL_ERROR, "%s", + _("Failed to acquire lock")); +#endif
this probably is worth some kind of comment or cleaned up
+ goto error; + } + } + + VIR_FREE(opt); + + /* + * We are *intentionally* "leaking" sock file descriptor + * because we want it to be inherited by QEMU. When the + * sock FD finally closes upon QEMU exit (or crash) then + * sanlock will notice EOF and release the lock + */ + if (sock != -1 && + virSetInherit(sock, true) < 0) + goto error; + + VIR_DEBUG("Acquire completed fd=%d", sock); + + if (res_free) { + for (i = 0 ; i < res_count ; i++) { + VIR_FREE(res_args[i]); + } + VIR_FREE(res_args); + } + + return 0; + +error: + if (res_free) { + for (i = 0 ; i < res_count ; i++) { + VIR_FREE(res_args[i]); + } + VIR_FREE(res_args); + } + VIR_FREE(opt); + VIR_FORCE_CLOSE(sock); + return -1; +} + + +static int virLockManagerSanlockRelease(virLockManagerPtr lock, + char **state, + unsigned int flags) +{ + virLockManagerSanlockPrivatePtr priv = lock->privateData; + int res_count; + int rv; + + virCheckFlags(0, -1); + + if ((rv = sanlock_inquire(-1, priv->vm_pid, 0, &res_count, state)) < 0) { + virReportSystemError(-rv, "%s", + _("Failed to release lock")); + return -1; + } + + if (STREQ(*state, "")) + VIR_FREE(*state); + + if ((rv = sanlock_release(-1, priv->vm_pid, SANLK_REL_ALL, 0, NULL)) < 0) { + virReportSystemError(-rv, "%s", + _("Failed to release lock")); + return -1; + } + + return 0; +} + +static int virLockManagerSanlockInquire(virLockManagerPtr lock, + char **state, + unsigned int flags) +{ + virLockManagerSanlockPrivatePtr priv = lock->privateData; + int rv, res_count; + + virCheckFlags(0, -1); + + VIR_DEBUG("pid=%d", priv->vm_pid); + + if ((rv = sanlock_inquire(-1, priv->vm_pid, 0, &res_count, state)) < 0) { + virReportSystemError(-rv, "%s", + _("Failed to inquire lock")); + return -1; + } + + if (STREQ(*state, "")) + VIR_FREE(*state); + + return 0; +} + +virLockDriver virLockDriverImpl = +{ + .version = VIR_LOCK_MANAGER_VERSION, + + .flags = VIR_LOCK_MANAGER_USES_STATE, + + .drvInit = virLockManagerSanlockInit, + .drvDeinit = virLockManagerSanlockDeinit, + + .drvNew = virLockManagerSanlockNew, + .drvFree = virLockManagerSanlockFree, + + .drvAddResource = virLockManagerSanlockAddResource, + + .drvAcquire = virLockManagerSanlockAcquire, + .drvRelease = virLockManagerSanlockRelease, + .drvInquire = virLockManagerSanlockInquire, +};
I'm a bit puzzled by the new dependancy, and this might prevent me from building rc1 of 0.9.2 if pushed as-is, but ACK in principle. Daniel -- Daniel Veillard | libxml Gnome XML XSLT toolkit http://xmlsoft.org/ daniel@veillard.com | Rpmfind RPM search engine http://rpmfind.net/ http://veillard.com/ | virtualization library http://libvirt.org/

On Fri, May 27, 2011 at 05:37:51PM +0800, Daniel Veillard wrote:
On Thu, May 19, 2011 at 07:24:25AM -0400, Daniel P. Berrange wrote:
Sanlock is a project that implements a disk-paxos locking algorithm. This is suitable for cluster deployments with shared storage.
* src/Makefile.am: Add dlopen plugin for sanlock * src/locking/lock_driver_sanlock.c: Sanlock driver --- libvirt.spec.in | 11 + po/POTFILES.in | 1 + src/Makefile.am | 12 + src/libvirt_private.syms | 1 + src/locking/lock_driver_sanlock.c | 413 +++++++++++++++++++++++++++++++++++++ 5 files changed, 438 insertions(+), 0 deletions(-) create mode 100644 src/locking/lock_driver_sanlock.c
diff --git a/libvirt.spec.in b/libvirt.spec.in index e85f68f..73213ea 100644 --- a/libvirt.spec.in +++ b/libvirt.spec.in @@ -77,6 +77,7 @@ %define with_dtrace 0%{!?_without_dtrace:0} %define with_cgconfig 0%{!?_without_cgconfig:0} %define with_referential 0%{!?_without_referential:1} +%define with_sanlock 0%{!?_without_sanlock:0}
# Non-server/HV driver defaults which are always enabled %define with_python 0%{!?_without_python:1} @@ -180,6 +181,7 @@
%if 0%{?fedora} >= 13 || 0%{?rhel} >= 6 %define with_dtrace 1 +%define with_sanlock 1 %endif
# Pull in cgroups config system @@ -435,6 +437,9 @@ BuildRequires: systemtap-sdt-devel %if %{with_referential} BuildRequires: referential-devel %endif +%if %{with_sanlock} +BuildRequires: sanlock-devel +%endif
Hum ... weird
[root@paphio ~]# yum install sanlock-devel .. No package sanlock-devel available. Error: Nothing to do [root@paphio ~]# cat /etc/fedora-release Fedora release 14 (Laughlin) [root@paphio ~]#
are you sure about the dep ?
%if %{with_storage_fs} # For mount/umount in FS driver @@ -718,6 +723,8 @@ rm -f $RPM_BUILD_ROOT%{_libdir}/*.la rm -f $RPM_BUILD_ROOT%{_libdir}/*.a rm -f $RPM_BUILD_ROOT%{_libdir}/python*/site-packages/*.la rm -f $RPM_BUILD_ROOT%{_libdir}/python*/site-packages/*.a +rm -f $RPM_BUILD_ROOT%{_libdir}/libvirt/lock-driver/*.la +rm -f $RPM_BUILD_ROOT%{_libdir}/libvirt/lock-driver/*.a
%if %{with_network} install -d -m 0755 $RPM_BUILD_ROOT%{_datadir}/lib/libvirt/dnsmasq/ @@ -1004,6 +1011,10 @@ fi %attr(0755, root, root) %{_libexecdir}/libvirt_lxc %endif
+%if %{with_sanlock} +%attr(0755, root, root) %{_libdir}/libvirt/lock-driver/sanlock.so +%endif + %attr(0755, root, root) %{_libexecdir}/libvirt_parthelper %attr(0755, root, root) %{_libexecdir}/libvirt_iohelper %attr(0755, root, root) %{_sbindir}/libvirtd diff --git a/po/POTFILES.in b/po/POTFILES.in index 9c3d287..c3b45f9 100644 --- a/po/POTFILES.in +++ b/po/POTFILES.in @@ -31,6 +31,7 @@ src/fdstream.c src/interface/netcf_driver.c src/internal.h src/libvirt.c +src/locking/lock_driver_sanlock.c src/locking/lock_manager.c src/lxc/lxc_container.c src/lxc/lxc_conf.c diff --git a/src/Makefile.am b/src/Makefile.am index 1e5a72e..edf017d 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -99,6 +99,9 @@ DRIVER_SOURCES = \ locking/lock_driver_nop.h locking/lock_driver_nop.c \ locking/domain_lock.h locking/domain_lock.c
+LOCK_DRIVER_SANLOCK_SOURCES = \ + locking/lock_driver_sanlock.c +
# XML configuration format handling sources # Domain driver generic impl APIs @@ -1159,6 +1162,15 @@ libvirt_qemu_la_CFLAGS = $(AM_CFLAGS) libvirt_qemu_la_LIBADD = libvirt.la $(CYGWIN_EXTRA_LIBADD) EXTRA_DIST += $(LIBVIRT_QEMU_SYMBOL_FILE)
+ +lockdriverdir = $(libdir)/libvirt/lock-driver +lockdriver_LTLIBRARIES = sanlock.la + +sanlock_la_SOURCES = $(LOCK_DRIVER_SANLOCK_SOURCES) +sanlock_la_CFLAGS = $(AM_CLFAGS) +sanlock_la_LDFLAGS = -module -avoid-version +sanlock_la_LIBADD = -lsanlock + libexec_PROGRAMS =
if WITH_LIBVIRTD diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms index a3fe2f1..e61ea13 100644 --- a/src/libvirt_private.syms +++ b/src/libvirt_private.syms @@ -650,6 +650,7 @@ virVMOperationTypeToString; # memory.h virAlloc; virAllocN; +virAllocVar; virExpandN; virFree; virReallocN; diff --git a/src/locking/lock_driver_sanlock.c b/src/locking/lock_driver_sanlock.c new file mode 100644 index 0000000..6a31fdf --- /dev/null +++ b/src/locking/lock_driver_sanlock.c @@ -0,0 +1,413 @@ +/* + * lock_driver_sanlock.c: A lock driver for Sanlock + * + * Copyright (C) 2010-2011 Red Hat, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
* Author: Daniel P. Berrange <berrange@redhat.com>
+ */ + +#include <config.h> + +#include <stdlib.h> +#include <stdint.h> +#include <unistd.h> +#include <string.h> +#include <stdio.h> +#include <errno.h> +#include <sys/types.h> + +#include <sanlock.h> +#include <sanlock_resource.h> + +#include "lock_driver.h" +#include "logging.h" +#include "virterror_internal.h" +#include "memory.h" +#include "util.h" +#include "files.h" + +#define VIR_FROM_THIS VIR_FROM_LOCKING + +#define virLockError(code, ...) \ + virReportErrorHelper(VIR_FROM_THIS, code, __FILE__, \ + __FUNCTION__, __LINE__, __VA_ARGS__) + +typedef struct _virLockManagerSanlockPrivate virLockManagerSanlockPrivate; +typedef virLockManagerSanlockPrivate *virLockManagerSanlockPrivatePtr; + +struct _virLockManagerSanlockPrivate { + char vm_name[SANLK_NAME_LEN]; + char vm_uuid[VIR_UUID_BUFLEN]; + unsigned int vm_id; + unsigned int vm_pid; + unsigned int flags; + bool hasRWDisks; + int res_count; + struct sanlk_resource *res_args[SANLK_MAX_RESOURCES]; +}; + +/* + * sanlock plugin for the libvirt virLockManager API + */ + +static int virLockManagerSanlockInit(unsigned int version ATTRIBUTE_UNUSED, + unsigned int flags) +{ + virCheckFlags(0, -1); + return 0; +} + +static int virLockManagerSanlockDeinit(void) +{ + virLockError(VIR_ERR_INTERNAL_ERROR, "%s", + _("Unloading sanlock plugin is forbidden")); + return -1; +} + +static int virLockManagerSanlockNew(virLockManagerPtr lock, + unsigned int type, + size_t nparams, + virLockManagerParamPtr params, + unsigned int flags) +{ + virLockManagerParamPtr param; + virLockManagerSanlockPrivatePtr priv; + int i; + + virCheckFlags(0, -1); + + if (type != VIR_LOCK_MANAGER_OBJECT_TYPE_DOMAIN) { + virLockError(VIR_ERR_INTERNAL_ERROR, + _("Unsupported object type %d"), type); + return -1; + } + + if (VIR_ALLOC(priv) < 0) { + virReportOOMError(); + return -1; + } + + priv->flags = flags; + + for (i = 0; i < nparams; i++) { + param = ¶ms[i]; + + if (STREQ(param->key, "uuid")) { + memcpy(priv->vm_uuid, param->value.uuid, 16); + } else if (STREQ(param->key, "name")) { + if (!virStrcpy(priv->vm_name, param->value.str, SANLK_NAME_LEN)) { + virLockError(VIR_ERR_INTERNAL_ERROR, + _("Domain name '%s' exceeded %d characters"), + param->value.str, SANLK_NAME_LEN); + goto error; + } + } else if (STREQ(param->key, "pid")) { + priv->vm_pid = param->value.ui; + } else if (STREQ(param->key, "id")) { + priv->vm_id = param->value.ui; + } + } + + lock->privateData = priv; + return 0; + +error: + VIR_FREE(priv); + return -1; +} + +static void virLockManagerSanlockFree(virLockManagerPtr lock) +{ + virLockManagerSanlockPrivatePtr priv = lock->privateData; + int i; + + if (!priv) + return; + + for (i = 0; i < priv->res_count; i++) + VIR_FREE(priv->res_args[i]); + VIR_FREE(priv); + lock->privateData = NULL; +} + +static int virLockManagerSanlockAddResource(virLockManagerPtr lock, + unsigned int type, + const char *name, + size_t nparams, + virLockManagerParamPtr params, + unsigned int flags) +{ + virLockManagerSanlockPrivatePtr priv = lock->privateData; + struct sanlk_resource *res; + int i; + + virCheckFlags(VIR_LOCK_MANAGER_RESOURCE_READONLY | + VIR_LOCK_MANAGER_RESOURCE_SHARED, -1); + + if (priv->res_count == SANLK_MAX_RESOURCES) { + virLockError(VIR_ERR_INTERNAL_ERROR, + _("Too many resources %d for object"), + SANLK_MAX_RESOURCES); + return -1; + } + + if (type == VIR_LOCK_MANAGER_RESOURCE_TYPE_DISK) { + if (!(flags & (VIR_LOCK_MANAGER_RESOURCE_SHARED | + VIR_LOCK_MANAGER_RESOURCE_READONLY))) + priv->hasRWDisks = true; + return 0; + } + + if (type != VIR_LOCK_MANAGER_RESOURCE_TYPE_LEASE) + return 0; + + if (flags & VIR_LOCK_MANAGER_RESOURCE_READONLY) { + virLockError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", + _("Readonly leases are not supported")); + return -1; + } + if (flags & VIR_LOCK_MANAGER_RESOURCE_SHARED) { + virLockError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", + _("Sharable leases are not supported")); + return -1; + } + + if (VIR_ALLOC_VAR(res, struct sanlk_disk, 1) < 0) { + virReportOOMError(); + return -1; + } + + res->num_disks = 1; + if (!virStrcpy(res->name, name, SANLK_NAME_LEN)) { + virLockError(VIR_ERR_INTERNAL_ERROR, + _("Resource name '%s' exceeds %d characters"), + name, SANLK_NAME_LEN); + goto error; + } + + for (i = 0; i < nparams; i++) { + if (STREQ(params[i].key, "path")) { + if (!virStrcpy(res->disks[0].path, params[i].value.str, SANLK_PATH_LEN)) { + virLockError(VIR_ERR_INTERNAL_ERROR, + _("Lease path '%s' exceeds %d characters"), + params[i].value.str, SANLK_PATH_LEN); + goto error; + } + } else if (STREQ(params[i].key, "offset")) { + res->disks[0].offset = params[i].value.ul; + } else if (STREQ(params[i].key, "lockspace")) { + if (!virStrcpy(res->lockspace_name, params[i].value.str, SANLK_NAME_LEN)) { + virLockError(VIR_ERR_INTERNAL_ERROR, + _("Resource lockspace '%s' exceeds %d characters"), + params[i].value.str, SANLK_NAME_LEN); + goto error; + } + } + } + + priv->res_args[priv->res_count] = res; + priv->res_count++; + return 0; + +error: + VIR_FREE(res); + return -1; +} + +static int virLockManagerSanlockAcquire(virLockManagerPtr lock, + const char *state, + unsigned int flags) +{ + virLockManagerSanlockPrivatePtr priv = lock->privateData; + struct sanlk_options *opt; + struct sanlk_resource **res_args; + int res_count; + bool res_free = false; + int sock = -1; + int rv; + int i; + + virCheckFlags(VIR_LOCK_MANAGER_ACQUIRE_REGISTER_ONLY, -1); + + if (priv->res_count == 0 && + priv->hasRWDisks) { + virLockError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", + _("Read/write, exclusive access, disks were present, but no leases specified")); + return -1; + } + + if (VIR_ALLOC(opt) < 0) { + virReportOOMError(); + return -1; + } + + if (!virStrcpy(opt->owner_name, priv->vm_name, SANLK_NAME_LEN)) { + virLockError(VIR_ERR_INTERNAL_ERROR, + _("Domain name '%s' exceeded %d characters"), + priv->vm_name, SANLK_NAME_LEN); + goto error; + } + + if (state && STRNEQ(state, "") && 0) { + if ((rv = sanlock_state_to_args((char *)state, + &res_count, + &res_args)) < 0) { + virReportSystemError(-rv, + _("Unable to parse lock state %s"), + state); + goto error; + } + res_free = true; + } else { + res_args = priv->res_args; + res_count = priv->res_count; + } + + VIR_DEBUG("Register sanlock %d", flags); + /* We only initialize 'sock' if we are in the real + * child process and we need it to be inherited + * + * If sock==-1, then sanlock auto-open/closes a + * temporary sock + */ + if (priv->vm_pid == getpid() && + (sock = sanlock_register()) < 0) { + virReportSystemError(-sock, "%s", + _("Failed to open socket to sanlock daemon")); + goto error; + } + + if (!(flags & VIR_LOCK_MANAGER_ACQUIRE_REGISTER_ONLY)) { + VIR_DEBUG("Acquiring object %u", priv->res_count); + if ((rv = sanlock_acquire(sock, priv->vm_pid, 0, + priv->res_count, priv->res_args, + opt)) < 0) {
Hum ...
+#if 1 + virReportSystemError(-rv, "%s", + _("Failed to acquire lock")); +#else + virLockError(VIR_ERR_INTERNAL_ERROR, "%s", + _("Failed to acquire lock")); +#endif
this probably is worth some kind of comment or cleaned up
+ goto error; + } + } + + VIR_FREE(opt); + + /* + * We are *intentionally* "leaking" sock file descriptor + * because we want it to be inherited by QEMU. When the + * sock FD finally closes upon QEMU exit (or crash) then + * sanlock will notice EOF and release the lock + */ + if (sock != -1 && + virSetInherit(sock, true) < 0) + goto error; + + VIR_DEBUG("Acquire completed fd=%d", sock); + + if (res_free) { + for (i = 0 ; i < res_count ; i++) { + VIR_FREE(res_args[i]); + } + VIR_FREE(res_args); + } + + return 0; + +error: + if (res_free) { + for (i = 0 ; i < res_count ; i++) { + VIR_FREE(res_args[i]); + } + VIR_FREE(res_args); + } + VIR_FREE(opt); + VIR_FORCE_CLOSE(sock); + return -1; +} + + +static int virLockManagerSanlockRelease(virLockManagerPtr lock, + char **state, + unsigned int flags) +{ + virLockManagerSanlockPrivatePtr priv = lock->privateData; + int res_count; + int rv; + + virCheckFlags(0, -1); + + if ((rv = sanlock_inquire(-1, priv->vm_pid, 0, &res_count, state)) < 0) { + virReportSystemError(-rv, "%s", + _("Failed to release lock")); + return -1; + } + + if (STREQ(*state, "")) + VIR_FREE(*state); + + if ((rv = sanlock_release(-1, priv->vm_pid, SANLK_REL_ALL, 0, NULL)) < 0) { + virReportSystemError(-rv, "%s", + _("Failed to release lock")); + return -1; + } + + return 0; +} + +static int virLockManagerSanlockInquire(virLockManagerPtr lock, + char **state, + unsigned int flags) +{ + virLockManagerSanlockPrivatePtr priv = lock->privateData; + int rv, res_count; + + virCheckFlags(0, -1); + + VIR_DEBUG("pid=%d", priv->vm_pid); + + if ((rv = sanlock_inquire(-1, priv->vm_pid, 0, &res_count, state)) < 0) { + virReportSystemError(-rv, "%s", + _("Failed to inquire lock")); + return -1; + } + + if (STREQ(*state, "")) + VIR_FREE(*state); + + return 0; +} + +virLockDriver virLockDriverImpl = +{ + .version = VIR_LOCK_MANAGER_VERSION, + + .flags = VIR_LOCK_MANAGER_USES_STATE, + + .drvInit = virLockManagerSanlockInit, + .drvDeinit = virLockManagerSanlockDeinit, + + .drvNew = virLockManagerSanlockNew, + .drvFree = virLockManagerSanlockFree, + + .drvAddResource = virLockManagerSanlockAddResource, + + .drvAcquire = virLockManagerSanlockAcquire, + .drvRelease = virLockManagerSanlockRelease, + .drvInquire = virLockManagerSanlockInquire, +};
I'm a bit puzzled by the new dependancy, and this might prevent me from building rc1 of 0.9.2 if pushed as-is,
but ACK in principle.
This is last weeks v4 posting. There is a v5 I posted this week which addresses the things you mention here. Regards, Daniel -- |: http://berrange.com -o- http://www.flickr.com/photos/dberrange/ :| |: http://libvirt.org -o- http://virt-manager.org :| |: http://autobuild.org -o- http://search.cpan.org/~danberr/ :| |: http://entangle-photo.org -o- http://live.gnome.org/gtk-vnc :|

On Thu, May 19, 2011 at 07:24:15AM -0400, Daniel P. Berrange wrote:
An update to
http://www.redhat.com/archives/libvir-list/2011-May/msg00622.html
New in this series:
- Split the QEMU lock manager integration into 3 patches for easier review - Actually implemented the lease hotplug/unplug code fully - Fixes from previous review of patches 1+2 - Fix QEMU startup to not rely on uninitialized variable !
NB, since this series depends on quite a few other patches I've sent but not yet merged, if you want to apply it, then it is simpler to pull from http://gitorious.org/~berrange/libvirt/staging/commits/qemu-locking Regards, Daniel -- |: http://berrange.com -o- http://www.flickr.com/photos/dberrange/ :| |: http://libvirt.org -o- http://virt-manager.org :| |: http://autobuild.org -o- http://search.cpan.org/~danberr/ :| |: http://entangle-photo.org -o- http://live.gnome.org/gtk-vnc :|
participants (3)
-
Daniel P. Berrange
-
Daniel Veillard
-
Doug Goldstein