[libvirt] [PATCH 00/10 v5] Integration of lock managers in QEMU

Another update to http://www.redhat.com/archives/libvir-list/2011-May/msg01272.html New in this series - Return values < -200 are not errnos, they are sanlock specific error codes. - Fix state handling after migration This is also available at http://gitorious.org/~berrange/libvirt/staging/commits/qemu-locking where it is combined with the migration series i just sent

Allow the parent process to perform a bi-directional handshake with the child process during fork/exec. The child process will fork and do its initial setup. Immediately prior to the exec(), it will stop & wait for a handshake from the parent process. The parent process will spawn the child and wait until the child reaches the handshake point. It will do whatever extra setup work is required, before signalling the child to continue. The implementation of this is done using two pairs of blocking pipes. The first pair is used to block the parent, until the child writes a single byte. Then the second pair pair is used to block the child, until the parent confirms with another single byte. * src/util/command.c, src/util/command.h, src/libvirt_private.syms: Add APIs to perform a handshake --- src/libvirt_private.syms | 3 + src/util/command.c | 182 +++++++++++++++++++++++++++++++++++++++++++++- src/util/command.h | 22 ++++++ 3 files changed, 206 insertions(+), 1 deletions(-) diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms index 805bddb..ca6319b 100644 --- a/src/libvirt_private.syms +++ b/src/libvirt_private.syms @@ -109,11 +109,14 @@ virCommandClearCaps; virCommandDaemonize; virCommandExec; virCommandFree; +virCommandHandshakeNotify; +virCommandHandshakeWait; virCommandNew; virCommandNewArgList; virCommandNewArgs; virCommandNonblockingFDs; virCommandPreserveFD; +virCommandRequireHandshake; virCommandRun; virCommandRunAsync; virCommandSetErrorBuffer; diff --git a/src/util/command.c b/src/util/command.c index ebb90cb..2991daa 100644 --- a/src/util/command.c +++ b/src/util/command.c @@ -36,6 +36,8 @@ #include "files.h" #include "buf.h" +#include <stdlib.h> + #define VIR_FROM_THIS VIR_FROM_NONE #define virCommandError(code, ...) \ @@ -77,6 +79,10 @@ struct _virCommand { int *outfdptr; int *errfdptr; + bool handshake; + int handshakeWait[2]; + int handshakeNotify[2]; + virExecHook hook; void *opaque; @@ -108,6 +114,11 @@ virCommandNewArgs(const char *const*args) if (VIR_ALLOC(cmd) < 0) return NULL; + cmd->handshakeWait[0] = -1; + cmd->handshakeWait[1] = -1; + cmd->handshakeNotify[0] = -1; + cmd->handshakeNotify[1] = -1; + FD_ZERO(&cmd->preserve); FD_ZERO(&cmd->transfer); cmd->infd = cmd->outfd = cmd->errfd = -1; @@ -1174,12 +1185,61 @@ virCommandHook(void *data) virCommandPtr cmd = data; int res = 0; - if (cmd->hook) + if (cmd->hook) { + VIR_DEBUG("Run hook %p %p", cmd->hook, cmd->opaque); res = cmd->hook(cmd->opaque); + VIR_DEBUG("Done hook %d", res); + } if (res == 0 && cmd->pwd) { VIR_DEBUG("Running child in %s", cmd->pwd); res = chdir(cmd->pwd); + if (res < 0) { + virReportSystemError(errno, + _("Unable to change to %s"), cmd->pwd); + } + } + if (cmd->handshake) { + char c = res < 0 ? '0' : '1'; + int rv; + VIR_DEBUG("Notifying parent for handshake start on %d", cmd->handshakeWait[1]); + if (safewrite(cmd->handshakeWait[1], &c, sizeof(c)) != sizeof(c)) { + virReportSystemError(errno, "%s", _("Unable to notify parent process")); + return -1; + } + + /* On failure we pass the error message back to parent, + * so they don't have to dig through stderr logs + */ + if (res < 0) { + virErrorPtr err = virGetLastError(); + const char *msg = err ? err->message : + _("Unknown failure during hook execution"); + size_t len = strlen(msg) + 1; + if (safewrite(cmd->handshakeWait[1], msg, len) != len) { + virReportSystemError(errno, "%s", _("Unable to send error to parent process")); + return -1; + } + return -1; + } + + VIR_DEBUG("Waiting on parent for handshake complete on %d", cmd->handshakeNotify[0]); + if ((rv = saferead(cmd->handshakeNotify[0], &c, sizeof(c))) != sizeof(c)) { + if (rv < 0) + virReportSystemError(errno, "%s", _("Unable to wait on parent process")); + else + virReportSystemError(EIO, "%s", _("libvirtd quit during handshake")); + return -1; + } + if (c != '1') { + virReportSystemError(EINVAL, _("Unexpected confirm code '%c' from parent process"), c); + return -1; + } + VIR_FORCE_CLOSE(cmd->handshakeWait[1]); + VIR_FORCE_CLOSE(cmd->handshakeNotify[0]); } + + VIR_DEBUG("Hook is done %d", res); + return res; } @@ -1409,6 +1469,119 @@ virCommandAbort(virCommandPtr cmd ATTRIBUTE_UNUSED) } #endif + +void virCommandRequireHandshake(virCommandPtr cmd) +{ + if (!cmd || cmd->has_error) + return; + + if (cmd->handshake) { + cmd->has_error = -1; + VIR_DEBUG("Cannot require handshake twice"); + return; + } + + if (pipe(cmd->handshakeWait) < 0) { + cmd->has_error = errno; + return; + } + if (pipe(cmd->handshakeNotify) < 0) { + VIR_FORCE_CLOSE(cmd->handshakeWait[0]); + VIR_FORCE_CLOSE(cmd->handshakeWait[1]); + cmd->has_error = errno; + return; + } + + VIR_DEBUG("Transfer handshake wait=%d notify=%d", + cmd->handshakeWait[1], cmd->handshakeNotify[0]); + virCommandTransferFD(cmd, cmd->handshakeWait[1]); + virCommandTransferFD(cmd, cmd->handshakeNotify[0]); + cmd->handshake = true; +} + +int virCommandHandshakeWait(virCommandPtr cmd) +{ + char c; + int rv; + if (!cmd ||cmd->has_error == ENOMEM) { + virReportOOMError(); + return -1; + } + if (cmd->has_error || !cmd->handshake) { + virCommandError(VIR_ERR_INTERNAL_ERROR, "%s", + _("invalid use of command API")); + return -1; + } + + if (cmd->handshakeWait[0] == -1) { + virCommandError(VIR_ERR_INTERNAL_ERROR, "%s", + _("Handshake is already complete")); + return -1; + } + + VIR_DEBUG("Wait for handshake on %d", cmd->handshakeWait[0]); + if ((rv = saferead(cmd->handshakeWait[0], &c, sizeof(c))) != sizeof(c)) { + if (rv < 0) + virReportSystemError(errno, "%s", _("Unable to wait for child process")); + else + virReportSystemError(EIO, "%s", _("Child process quit during startup handshake")); + VIR_FORCE_CLOSE(cmd->handshakeWait[0]); + return -1; + } + if (c != '1') { + char *msg; + ssize_t len; + if (VIR_ALLOC_N(msg, 1024) < 0) { + virReportOOMError(); + VIR_FORCE_CLOSE(cmd->handshakeWait[0]); + return -1; + } + if ((len = saferead(cmd->handshakeWait[0], msg, 1024)) < 0) { + VIR_FORCE_CLOSE(cmd->handshakeWait[0]); + VIR_FREE(msg); + virReportSystemError(errno, "%s", _("No error message from child failure")); + return -1; + } + VIR_FORCE_CLOSE(cmd->handshakeWait[0]); + msg[len-1] = '\0'; + virCommandError(VIR_ERR_INTERNAL_ERROR, "%s", msg); + VIR_FREE(msg); + return -1; + } + VIR_FORCE_CLOSE(cmd->handshakeWait[0]); + return 0; +} + +int virCommandHandshakeNotify(virCommandPtr cmd) +{ + char c = '1'; + if (!cmd ||cmd->has_error == ENOMEM) { + virReportOOMError(); + return -1; + } + if (cmd->has_error || !cmd->handshake) { + virCommandError(VIR_ERR_INTERNAL_ERROR, "%s", + _("invalid use of command API")); + return -1; + } + + if (cmd->handshakeNotify[1] == -1) { + virCommandError(VIR_ERR_INTERNAL_ERROR, "%s", + _("Handshake is already complete")); + return -1; + } + + VIR_DEBUG("Notify handshake on %d", cmd->handshakeWait[0]); + if (safewrite(cmd->handshakeNotify[1], &c, sizeof(c)) != sizeof(c)) { + virReportSystemError(errno, "%s", _("Unable to notify child process")); + VIR_FORCE_CLOSE(cmd->handshakeNotify[1]); + return -1; + } + VIR_FORCE_CLOSE(cmd->handshakeNotify[1]); + return 0; +} + + /* * Release all resources */ @@ -1440,6 +1613,13 @@ virCommandFree(virCommandPtr cmd) VIR_FREE(cmd->pwd); + if (cmd->handshake) { + VIR_FORCE_CLOSE(cmd->handshakeWait[0]); + VIR_FORCE_CLOSE(cmd->handshakeWait[1]); + VIR_FORCE_CLOSE(cmd->handshakeNotify[0]); + VIR_FORCE_CLOSE(cmd->handshakeNotify[1]); + } + VIR_FREE(cmd->pidfile); if (cmd->reap) diff --git a/src/util/command.h b/src/util/command.h index aa5136b..95b6a5e 100644 --- a/src/util/command.h +++ b/src/util/command.h @@ -292,6 +292,28 @@ int virCommandWait(virCommandPtr cmd, int *exitstatus) ATTRIBUTE_RETURN_CHECK; /* + * Request that the child perform a handshake with + * the parent when the hook function has completed + * execution. The child will not exec() until the + * parent has notified + */ +void virCommandRequireHandshake(virCommandPtr cmd); + +/* + * Wait for the child to complete execution of its + * hook function + */ +int virCommandHandshakeWait(virCommandPtr cmd) + ATTRIBUTE_RETURN_CHECK; + +/* + * Notify the child that it is OK to exec() the + * real binary now + */ +int virCommandHandshakeNotify(virCommandPtr cmd) + ATTRIBUTE_RETURN_CHECK; + +/* * Abort an async command if it is running, without issuing * any errors or affecting errno. Designed for error paths * where some but not all paths to the cleanup code might -- 1.7.4.4

A lock manager may operate in various modes. The direct mode of operation is to obtain locks based on the resources associated with devices in the XML. The indirect mode is where the app creating the domain provides explicit leases for each resource that needs to be locked. This XML extension allows for listing resources in the XML <devices> ... <lease> <lockspace>somearea</lockspace> <key>thequickbrownfoxjumpsoverthelazydog</key> <target path='/some/lease/path' offset='23432'/> </lease> ... </devices> The 'lockspace' is a unique identifier for the lockspace which the lease is associated The 'key' is a unique identifier for the resource associated with the lease. The 'target' is the file on disk where the leases are held. * docs/schemas/domain.rng: Add lease schema * src/conf/domain_conf.c, src/conf/domain_conf.h: parsing and formatting for leases * tests/qemuxml2argvdata/qemuxml2argv-lease.args, tests/qemuxml2argvdata/qemuxml2argv-lease.xml, tests/qemuxml2xmltest.c: Test XML handling for leases --- docs/formatdomain.html.in | 39 +++++++ docs/schemas/domain.rng | 24 ++++ src/conf/domain_conf.c | 134 ++++++++++++++++++++++++ src/conf/domain_conf.h | 14 +++ tests/qemuxml2argvdata/qemuxml2argv-lease.args | 4 + tests/qemuxml2argvdata/qemuxml2argv-lease.xml | 36 +++++++ tests/qemuxml2xmltest.c | 1 + 7 files changed, 252 insertions(+), 0 deletions(-) create mode 100644 tests/qemuxml2argvdata/qemuxml2argv-lease.args create mode 100644 tests/qemuxml2argvdata/qemuxml2argv-lease.xml diff --git a/docs/formatdomain.html.in b/docs/formatdomain.html.in index f8baffd..0517119 100644 --- a/docs/formatdomain.html.in +++ b/docs/formatdomain.html.in @@ -1071,6 +1071,45 @@ sub-element. </p> + <h4><a name="elementsLease">Device leases</a></h4> + + <p> + When using a lock manager, it may be desirable to record device leases + against a VM. The lock manager will ensure the VM won't start unless + the leases can be acquired. + </p> + +<pre> + ... + <devices> + ... + <lease> + <lockspace>somearea</lockspace> + <key>somekey</key> + <target path='/some/lease/path' offset='1024'/> + </lease> + ... + </devices> + ...</pre> + + <dl> + <dt>lockspace</dt> + <dd>This is an arbitrary string, identifying the lockspace + within which the key is held. Lock managers may impose + extra restrictions on the format, or length of the lockspace + name.</dd> + <dt>key</dt> + <dd>This is an arbitrary string, uniquely identifying the + lease to be acquired. Lock managers may impose extra + restrictions on the format, or length of the key. + </dd> + <dt>target</dt> + <dd>This is the fully qualified path of the file associated + with the lockspace. The offset specifies where the lease + is stored within the file. If the lock manager does not + require a offset, just pass 0. + </dd> + </dl> <h4><a name="elementsUSB">USB and PCI devices</a></h4> diff --git a/docs/schemas/domain.rng b/docs/schemas/domain.rng index 1ae5b36..ac16a50 100644 --- a/docs/schemas/domain.rng +++ b/docs/schemas/domain.rng @@ -586,6 +586,29 @@ <ref name="address"/> </optional> </define> + + <define name="lease"> + <element name="lease"> + <interleave> + <element name="lockspace"> + <text/> + </element> + <element name="key"> + <text/> + </element> + <element name="target"> + <attribute name="path"> + <text/> + </attribute> + <optional> + <attribute name="offset"> + <ref name="unsignedInt"/> + </attribute> + </optional> + </element> + </interleave> + </element> + </define> <!-- A disk description can be either of type file or block The name of the attribute on the source element depends on the type @@ -1952,6 +1975,7 @@ <choice> <ref name="disk"/> <ref name="controller"/> + <ref name="lease"/> <ref name="filesystem"/> <ref name="interface"/> <ref name="input"/> diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c index ffcf8a7..ce2355a 100644 --- a/src/conf/domain_conf.c +++ b/src/conf/domain_conf.c @@ -102,6 +102,7 @@ VIR_ENUM_IMPL(virDomainLifecycleCrash, VIR_DOMAIN_LIFECYCLE_CRASH_LAST, VIR_ENUM_IMPL(virDomainDevice, VIR_DOMAIN_DEVICE_LAST, "disk", + "lease", "filesystem", "interface", "input", @@ -645,6 +646,18 @@ void virDomainInputDefFree(virDomainInputDefPtr def) VIR_FREE(def); } +static void virDomainLeaseDefFree(virDomainLeaseDefPtr def) +{ + if (!def) + return; + + VIR_FREE(def->lockspace); + VIR_FREE(def->key); + VIR_FREE(def->path); + + VIR_FREE(def); +} + void virDomainDiskDefFree(virDomainDiskDefPtr def) { unsigned int i; @@ -907,6 +920,9 @@ void virDomainDeviceDefFree(virDomainDeviceDefPtr def) case VIR_DOMAIN_DEVICE_DISK: virDomainDiskDefFree(def->data.disk); break; + case VIR_DOMAIN_DEVICE_LEASE: + virDomainLeaseDefFree(def->data.lease); + break; case VIR_DOMAIN_DEVICE_NET: virDomainNetDefFree(def->data.net); break; @@ -981,6 +997,10 @@ void virDomainDefFree(virDomainDefPtr def) if (!def) return; + for (i = 0 ; i < def->nleases ; i++) + virDomainLeaseDefFree(def->leases[i]); + VIR_FREE(def->leases); + for (i = 0 ; i < def->ngraphics ; i++) virDomainGraphicsDefFree(def->graphics[i]); VIR_FREE(def->graphics); @@ -1887,6 +1907,79 @@ virDomainDiskDefAssignAddress(virCapsPtr caps, virDomainDiskDefPtr def) return 0; } +/* Parse the XML definition for a lease + */ +static virDomainLeaseDefPtr +virDomainLeaseDefParseXML(xmlNodePtr node) +{ + virDomainLeaseDefPtr def; + xmlNodePtr cur; + char *lockspace = NULL; + char *key = NULL; + char *path = NULL; + char *offset = NULL; + + if (VIR_ALLOC(def) < 0) { + virReportOOMError(); + return NULL; + } + + cur = node->children; + while (cur != NULL) { + if (cur->type == XML_ELEMENT_NODE) { + if ((key == NULL) && + (xmlStrEqual(cur->name, BAD_CAST "key"))) { + key = (char *)xmlNodeGetContent(cur); + } else if ((lockspace == NULL) && + (xmlStrEqual(cur->name, BAD_CAST "lockspace"))) { + lockspace = (char *)xmlNodeGetContent(cur); + } else if ((path == NULL) && + (xmlStrEqual(cur->name, BAD_CAST "target"))) { + path = virXMLPropString(cur, "path"); + offset = virXMLPropString(cur, "offset"); + } + } + cur = cur->next; + } + + if (!key) { + virDomainReportError(VIR_ERR_XML_ERROR, "%s", + _("Missing 'key' element for lease")); + goto error; + } + if (!path) { + virDomainReportError(VIR_ERR_XML_ERROR, "%s", + _("Missing 'target' element for lease")); + goto error; + } + + if (offset && + virStrToLong_ull(offset, NULL, 10, &def->offset) < 0) { + virDomainReportError(VIR_ERR_XML_ERROR, + _("Malformed lease target offset %s"), offset); + goto error; + } + + def->key = key; + def->lockspace = lockspace; + def->path = path; + path = key = lockspace = NULL; + +cleanup: + VIR_FREE(lockspace); + VIR_FREE(key); + VIR_FREE(path); + VIR_FREE(offset); + + return def; + + error: + virDomainLeaseDefFree(def); + def = NULL; + goto cleanup; +} + + /* Parse the XML definition for a disk * @param node XML nodeset to parse for disk definition */ @@ -4993,6 +5086,10 @@ virDomainDeviceDefPtr virDomainDeviceDefParse(virCapsPtr caps, if (!(dev->data.disk = virDomainDiskDefParseXML(caps, node, NULL, flags))) goto error; + } else if (xmlStrEqual(node->name, BAD_CAST "lease")) { + dev->type = VIR_DOMAIN_DEVICE_LEASE; + if (!(dev->data.lease = virDomainLeaseDefParseXML(node))) + goto error; } else if (xmlStrEqual(node->name, BAD_CAST "filesystem")) { dev->type = VIR_DOMAIN_DEVICE_FS; if (!(dev->data.fs = virDomainFSDefParseXML(node, flags))) @@ -5865,6 +5962,23 @@ static virDomainDefPtr virDomainDefParseXML(virCapsPtr caps, } VIR_FREE(nodes); + /* analysis of the resource leases */ + if ((n = virXPathNodeSet("./devices/lease", ctxt, &nodes)) < 0) { + virDomainReportError(VIR_ERR_INTERNAL_ERROR, + "%s", _("cannot extract device leases")); + goto error; + } + if (n && VIR_ALLOC_N(def->leases, n) < 0) + goto no_memory; + for (i = 0 ; i < n ; i++) { + virDomainLeaseDefPtr lease = virDomainLeaseDefParseXML(nodes[i]); + if (!lease) + goto error; + + def->leases[def->nleases++] = lease; + } + VIR_FREE(nodes); + /* analysis of the filesystems */ if ((n = virXPathNodeSet("./devices/filesystem", ctxt, &nodes)) < 0) { goto error; @@ -7039,6 +7153,22 @@ virDomainLifecycleDefFormat(virBufferPtr buf, static int +virDomainLeaseDefFormat(virBufferPtr buf, + virDomainLeaseDefPtr def) +{ + virBufferAddLit(buf, " <lease>\n"); + virBufferEscapeString(buf, " <lockspace>%s</lockspace>\n", def->lockspace); + virBufferEscapeString(buf, " <key>%s</key>\n", def->key); + virBufferEscapeString(buf, " <target path='%s'", def->path); + if (def->offset) + virBufferAsprintf(buf, " offset='%llu'", def->offset); + virBufferAddLit(buf, "/>\n"); + virBufferAddLit(buf, " </lease>\n"); + + return 0; +} + +static int virDomainDiskDefFormat(virBufferPtr buf, virDomainDiskDefPtr def, int flags) @@ -8445,6 +8575,10 @@ char *virDomainDefFormat(virDomainDefPtr def, if (virDomainControllerDefFormat(&buf, def->controllers[n], flags) < 0) goto cleanup; + for (n = 0 ; n < def->nleases ; n++) + if (virDomainLeaseDefFormat(&buf, def->leases[n]) < 0) + goto cleanup; + for (n = 0 ; n < def->nfss ; n++) if (virDomainFSDefFormat(&buf, def->fss[n], flags) < 0) goto cleanup; diff --git a/src/conf/domain_conf.h b/src/conf/domain_conf.h index 9839f0d..42606a1 100644 --- a/src/conf/domain_conf.h +++ b/src/conf/domain_conf.h @@ -123,6 +123,15 @@ struct _virDomainDeviceInfo { } addr; }; +typedef struct _virDomainLeaseDef virDomainLeaseDef; +typedef virDomainLeaseDef *virDomainLeaseDefPtr; +struct _virDomainLeaseDef { + char *lockspace; + char *key; + char *path; + unsigned long long offset; +}; + /* Two types of disk backends */ enum virDomainDiskType { @@ -826,6 +835,7 @@ enum virDomainSmbiosMode { /* Flags for the 'type' field in next struct */ enum virDomainDeviceType { VIR_DOMAIN_DEVICE_DISK, + VIR_DOMAIN_DEVICE_LEASE, VIR_DOMAIN_DEVICE_FS, VIR_DOMAIN_DEVICE_NET, VIR_DOMAIN_DEVICE_INPUT, @@ -846,6 +856,7 @@ struct _virDomainDeviceDef { union { virDomainDiskDefPtr disk; virDomainControllerDefPtr controller; + virDomainLeaseDefPtr lease; virDomainFSDefPtr fs; virDomainNetDefPtr net; virDomainInputDefPtr input; @@ -1181,6 +1192,9 @@ struct _virDomainDef { int nchannels; virDomainChrDefPtr *channels; + int nleases; + virDomainLeaseDefPtr *leases; + /* Only 1 */ virDomainChrDefPtr console; virSecurityLabelDef seclabel; diff --git a/tests/qemuxml2argvdata/qemuxml2argv-lease.args b/tests/qemuxml2argvdata/qemuxml2argv-lease.args new file mode 100644 index 0000000..63f9bef --- /dev/null +++ b/tests/qemuxml2argvdata/qemuxml2argv-lease.args @@ -0,0 +1,4 @@ +LC_ALL=C PATH=/bin HOME=/home/test USER=test LOGNAME=test /usr/bin/qemu -S \ +-M pc -m 214 -smp 1 -nographic -monitor unix:/tmp/test-monitor,server,nowait \ +-no-acpi -boot c -hda /dev/HostVG/QEMUGuest1 -cdrom /root/boot.iso -net none \ +-serial none -parallel none -usb diff --git a/tests/qemuxml2argvdata/qemuxml2argv-lease.xml b/tests/qemuxml2argvdata/qemuxml2argv-lease.xml new file mode 100644 index 0000000..7efe1ef --- /dev/null +++ b/tests/qemuxml2argvdata/qemuxml2argv-lease.xml @@ -0,0 +1,36 @@ +<domain type='qemu'> + <name>QEMUGuest1</name> + <uuid>c7a5fdbd-edaf-9455-926a-d65c16db1809</uuid> + <memory>219200</memory> + <currentMemory>219200</currentMemory> + <vcpu>1</vcpu> + <os> + <type arch='i686' machine='pc'>hvm</type> + <boot dev='hd'/> + </os> + <clock offset='utc'/> + <on_poweroff>destroy</on_poweroff> + <on_reboot>restart</on_reboot> + <on_crash>destroy</on_crash> + <devices> + <emulator>/usr/bin/qemu</emulator> + <disk type='block' device='disk'> + <source dev='/dev/HostVG/QEMUGuest1'/> + <target dev='hda' bus='ide'/> + <address type='drive' controller='0' bus='0' unit='0'/> + </disk> + <disk type='file' device='cdrom'> + <source file='/root/boot.iso'/> + <target dev='hdc' bus='ide'/> + <readonly/> + <address type='drive' controller='0' bus='1' unit='0'/> + </disk> + <controller type='ide' index='0'/> + <lease> + <lockspace>somearea</lockspace> + <key>thequickbrownfoxjumpedoverthelazydog</key> + <target path='/some/lease/path' offset='1024'/> + </lease> + <memballoon model='virtio'/> + </devices> +</domain> diff --git a/tests/qemuxml2xmltest.c b/tests/qemuxml2xmltest.c index 5bfbcab..e74c337 100644 --- a/tests/qemuxml2xmltest.c +++ b/tests/qemuxml2xmltest.c @@ -179,6 +179,7 @@ mymain(void) DO_TEST("cputune"); DO_TEST("smp"); + DO_TEST("lease"); /* These tests generate different XML */ DO_TEST_DIFFERENT("balloon-device-auto"); -- 1.7.4.4

Define the basic framework lock manager plugins. The basic plugin API for 3rd parties to implemented is defined in src/locking/lock_driver.h This allows dlopen()able modules for alternative locking schemes, however, we do not install the header. This requires lock plugins to be in-tree allowing changing of the lock manager plugin API in future. The libvirt code for loading & calling into plugins is in src/locking/lock_manager.{c,h} * include/libvirt/virterror.h, src/util/virterror.c: Add VIR_FROM_LOCKING * src/locking/lock_driver.h: API for lock driver plugins to implement * src/locking/lock_manager.c, src/locking/lock_manager.h: Internal API for managing locking * src/Makefile.am: Add locking code --- include/libvirt/virterror.h | 1 + po/POTFILES.in | 1 + src/Makefile.am | 3 +- src/libvirt_private.syms | 14 ++ src/locking/README | 158 +++++++++++++++++++ src/locking/lock_driver.h | 293 +++++++++++++++++++++++++++++++++++ src/locking/lock_manager.c | 357 +++++++++++++++++++++++++++++++++++++++++++ src/locking/lock_manager.h | 65 ++++++++ src/util/virterror.c | 3 + 9 files changed, 894 insertions(+), 1 deletions(-) create mode 100644 src/locking/README create mode 100644 src/locking/lock_driver.h create mode 100644 src/locking/lock_manager.c create mode 100644 src/locking/lock_manager.h diff --git a/include/libvirt/virterror.h b/include/libvirt/virterror.h index 0708e02..efa4796 100644 --- a/include/libvirt/virterror.h +++ b/include/libvirt/virterror.h @@ -81,6 +81,7 @@ typedef enum { VIR_FROM_VMWARE = 39, /* Error from VMware driver */ VIR_FROM_EVENT = 40, /* Error from event loop impl */ VIR_FROM_LIBXL = 41, /* Error from libxenlight driver */ + VIR_FROM_LOCKING = 42, /* Error from lock manager */ } virErrorDomain; diff --git a/po/POTFILES.in b/po/POTFILES.in index dd44da2..9c3d287 100644 --- a/po/POTFILES.in +++ b/po/POTFILES.in @@ -31,6 +31,7 @@ src/fdstream.c src/interface/netcf_driver.c src/internal.h src/libvirt.c +src/locking/lock_manager.c src/lxc/lxc_container.c src/lxc/lxc_conf.c src/lxc/lxc_controller.c diff --git a/src/Makefile.am b/src/Makefile.am index 58eb2a7..a27838b 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -93,7 +93,8 @@ DRIVER_SOURCES = \ datatypes.c datatypes.h \ fdstream.c fdstream.h \ $(NODE_INFO_SOURCES) \ - libvirt.c libvirt_internal.h + libvirt.c libvirt_internal.h \ + locking/lock_manager.c locking/lock_manager.h # XML configuration format handling sources diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms index ca6319b..d27d294 100644 --- a/src/libvirt_private.syms +++ b/src/libvirt_private.syms @@ -590,6 +590,20 @@ virRegisterSecretDriver; virRegisterStorageDriver; +# locking.h +virLockManagerAcquire; +virLockManagerAddResource; +virLockManagerFree; +virLockManagerInquire; +virLockManagerNew; +virLockManagerPluginNew; +virLockManagerPluginRef; +virLockManagerPluginUnref; +virLockManagerPluginUsesState; +virLockManagerPluginGetName; +virLockManagerRelease; + + # logging.h virLogDefineFilter; virLogDefineOutput; diff --git a/src/locking/README b/src/locking/README new file mode 100644 index 0000000..4fa4f89 --- /dev/null +++ b/src/locking/README @@ -0,0 +1,158 @@ + +At libvirtd startup: + + plugin = virLockManagerPluginLoad("sync-manager"); + + +At libvirtd shtudown: + + virLockManagerPluginUnload(plugin) + + +At guest startup: + + manager = virLockManagerNew(plugin, + VIR_LOCK_MANAGER_OBJECT_DOMAIN, + 0); + + virLockManagerSetParameter(manager, "id", id); + virLockManagerSetParameter(manager, "uuid", uuid); + virLockManagerSetParameter(manager, "name", name); + + foreach disk + virLockManagerRegisterResource(manager, + VIR_LOCK_MANAGER_RESOURCE_TYPE_DISK, + disk.path, + ..flags...); + + if (!virLockManagerAcquireObject(manager)) + abort.. + + run QEMU + + +At guest shutdown: + + ...send QEMU 'quit' monitor command, and/or kill(qemupid)... + + if (!virLockManagerShutdown(manager)) + kill(supervisorpid); /* XXX or leave it running ??? */ + + virLockManagerFree(manager); + + + +At libvirtd restart with running guests: + + foreach still running guest + manager = virLockManagerNew(driver, + VIR_LOCK_MANAGER_START_DOMAIN, + VIR_LOCK_MANAGER_NEW_ATTACH); + virLockManagerSetParameter(manager, "id", id); + virLockManagerSetParameter(manager, "uuid", uuid); + virLockManagerSetParameter(manager, "name", name); + + if (!virLockManagerGetChild(manager, &qemupid)) + kill(supervisorpid); /* XXX or leave it running ??? */ + + + +With disk hotplug: + + if (virLockManagerAcquireResource(manager, + VIR_LOCK_MANAGER_RESOURCE_TYPE_DISK, + disk.path + ..flags..)) + ...abort hotplug attempt ... + + ...hotplug the device... + + + +With disk unhotplug: + + ...hotunplug the device... + + if (virLockManagerReleaseResource(manager, + VIR_LOCK_MANAGER_RESOURCE_TYPE_DISK, + disk.path + ..flags..)) + ...log warning ... + + + +During migration: + + 1. On source host + + if (!virLockManagerPrepareMigrate(manager, hosturi)) + ..don't start migration.. + + 2. On dest host + + manager = virLockManagerNew(driver, + VIR_LOCK_MANAGER_START_DOMAIN, + VIR_LOCK_MANAGER_NEW_MIGRATE); + virLockManagerSetParameter(manager, "id", id); + virLockManagerSetParameter(manager, "uuid", uuid); + virLockManagerSetParameter(manager, "name", name); + + foreach disk + virLockManagerRegisterResource(manager, + VIR_LOCK_MANAGER_RESOURCE_TYPE_DISK, + disk.path, + ..flags...); + + char **supervisorargv; + int supervisorargc; + + supervisor = virLockManagerGetSupervisorPath(manager); + virLockManagerGetSupervisorArgs(&argv, &argc); + + cmd = qemuBuildCommandLine(supervisor, supervisorargv, supervisorargv); + + supervisorpid = virCommandExec(cmd); + + if (!virLockManagerGetChild(manager, &qemupid)) + kill(supervisorpid); /* XXX or leave it running ??? */ + + 3. Initiate migration in QEMU on source and wait for completion + + 4a. On failure + + 4a1 On target + + virLockManagerCompleteMigrateIn(manager, + VIR_LOCK_MANAGER_MIGRATE_CANCEL); + virLockManagerShutdown(manager); + virLockManagerFree(manager); + + 4a2 On source + + virLockManagerCompleteMigrateIn(manager, + VIR_LOCK_MANAGER_MIGRATE_CANCEL); + + 4b. On succcess + + + 4b1 On target + + virLockManagerCompleteMigrateIn(manager, 0); + + 42 On source + + virLockManagerCompleteMigrateIn(manager, 0); + virLockManagerShutdown(manager); + virLockManagerFree(manager); + + +Notes: + + - If a lock manager impl does just VM level leases, it can + ignore all the resource paths at startup. + + - If a lock manager impl does not support migrate + it can return an error from all migrate calls + + - If a lock manger impl does not support hotplug + it can return an error from all resource acquire/release calls diff --git a/src/locking/lock_driver.h b/src/locking/lock_driver.h new file mode 100644 index 0000000..40a55f6 --- /dev/null +++ b/src/locking/lock_driver.h @@ -0,0 +1,293 @@ +/* + * lock_driver.h: Defines the lock driver plugin API + * + * Copyright (C) 2010-2011 Red Hat, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +#ifndef __VIR_PLUGINS_LOCK_DRIVER_H__ +# define __VIR_PLUGINS_LOCK_DRIVER_H__ + +# include "internal.h" + +typedef struct _virLockManager virLockManager; +typedef virLockManager *virLockManagerPtr; + +typedef struct _virLockDriver virLockDriver; +typedef virLockDriver *virLockDriverPtr; + +typedef struct _virLockManagerParam virLockManagerParam; +typedef virLockManagerParam *virLockManagerParamPtr; + +typedef enum { + /* State passing is used to re-acquire existing leases */ + VIR_LOCK_MANAGER_USES_STATE = (1 << 0) +} virLockManagerFlags; + +typedef enum { + /* The managed object is a virtual guest domain */ + VIR_LOCK_MANAGER_OBJECT_TYPE_DOMAIN = 0, +} virLockManagerObjectType; + +typedef enum { + /* The resource to be locked is a virtual disk */ + VIR_LOCK_MANAGER_RESOURCE_TYPE_DISK = 0, + /* A lease against an arbitrary resource */ + VIR_LOCK_MANAGER_RESOURCE_TYPE_LEASE = 1, +} virLockManagerResourceType; + +typedef enum { + /* The resource is assigned in readonly mode */ + VIR_LOCK_MANAGER_RESOURCE_READONLY = (1 << 0), + /* The resource is assigned in shared, writable mode */ + VIR_LOCK_MANAGER_RESOURCE_SHARED = (1 << 1), +} virLockManagerResourceFlags; + +typedef enum { + /* Don't acquire the resources, just register the object PID */ + VIR_LOCK_MANAGER_ACQUIRE_REGISTER_ONLY = (1 << 0) +} virLockManagerAcquireFlags; + +enum { + VIR_LOCK_MANAGER_PARAM_TYPE_STRING, + VIR_LOCK_MANAGER_PARAM_TYPE_INT, + VIR_LOCK_MANAGER_PARAM_TYPE_LONG, + VIR_LOCK_MANAGER_PARAM_TYPE_UINT, + VIR_LOCK_MANAGER_PARAM_TYPE_ULONG, + VIR_LOCK_MANAGER_PARAM_TYPE_DOUBLE, + VIR_LOCK_MANAGER_PARAM_TYPE_UUID, +}; + +struct _virLockManagerParam { + int type; + const char *key; + union { + int i; + long long l; + unsigned int ui; + unsigned long long ul; + double d; + char *str; + unsigned char uuid[16]; + } value; +}; + + +/* + * Changes in major version denote incompatible ABI changes + * Changes in minor version denote new compatible API entry points + * Changes in micro version denote new compatible flags + */ +# define VIR_LOCK_MANAGER_VERSION_MAJOR 1 +# define VIR_LOCK_MANAGER_VERSION_MINOR 0 +# define VIR_LOCK_MANAGER_VERSION_MICRO 0 + +# define VIR_LOCK_MANAGER_VERSION \ + ((VIR_LOCK_MANAGER_VERSION_MAJOR * 1000 * 1000) + \ + (VIR_LOCK_MANAGER_VERSION_MINOR * 1000) + \ + (VIR_LOCK_MANAGER_VERSION_MICRO)) + + + +/** + * virLockDriverInit: + * @version: the libvirt requested plugin ABI version + * @flags: the libvirt requested plugin optional extras + * + * Allow the plugin to validate the libvirt requested + * plugin version / flags. This allows the plugin impl + * to block its use in versions of libvirtd which are + * too old to support key features. + * + * NB: A plugin may be loaded multiple times, for different + * libvirt drivers (eg QEMU, LXC, UML) + * + * Returns -1 if the requested version/flags were inadequate + */ +typedef int (*virLockDriverInit)(unsigned int version, + unsigned int flags); + +/** + * virLockDriverDeinit: + * + * Called to release any resources prior to the plugin + * being unloaded from memory. Returns -1 to prevent + * plugin from being unloaded from memory. + */ +typedef int (*virLockDriverDeinit)(void); + +/** + * virLockManagerNew: + * @man: the lock manager context + * @type: the type of process to be supervised + * @nparams: number of metadata parameters + * @params: extra metadata parameters + * @flags: optional flags, currently unused + * + * Initialize a new context to supervise a process, usually + * a virtual machine. The lock driver implementation can use + * the <code>privateData</code> field of <code>man</code> + * to store a pointer to any driver specific state. + * + * A process of VIR_LOCK_MANAGER_START_DOMAIN will be + * given the following parameters + * + * - id: the domain unique id (unsigned int) + * - uuid: the domain uuid (uuid) + * - name: the domain name (string) + * - pid: process ID to own/owning the lock (unsigned int) + * + * Returns 0 if successful initialized a new context, -1 on error + */ +typedef int (*virLockDriverNew)(virLockManagerPtr man, + unsigned int type, + size_t nparams, + virLockManagerParamPtr params, + unsigned int flags); + +/** + * virLockDriverFree: + * @manager: the lock manager context + * + * Release any resources associated with the lock manager + * context private data + */ +typedef void (*virLockDriverFree)(virLockManagerPtr man); + +/** + * virLockDriverAddResource: + * @manager: the lock manager context + * @type: the resource type virLockManagerResourceType + * @name: the resource name + * @nparams: number of metadata parameters + * @params: extra metadata parameters + * @flags: the resource access flags + * + * Assign a resource to a managed object. This will + * only be called prior to the object is being locked + * when it is inactive. eg, to set the initial boot + * time disk assignments on a VM + * The format of @name varies according to + * the resource @type. A VIR_LOCK_MANAGER_RESOURCE_TYPE_DISK + * will have the fully qualified file path, while a resource + * of type VIR_LOCK_MANAGER_RESOURCE_TYPE_LEASE will have the + * unique name of the lease + * + * A resource of type VIR_LOCK_MANAGER_RESOURCE_TYPE_LEASE + * will receive at least the following extra parameters + * + * - 'path': a fully qualified path to the lockspace + * - 'lockspace': globally string identifying the lockspace name + * - 'offset': byte offset within the lease (unsigned long long) + * + * If no flags are given, the resource is assumed to be + * used in exclusive, read-write mode. Access can be + * relaxed to readonly, or shared read-write. + * + * Returns 0 on success, or -1 on failure + */ +typedef int (*virLockDriverAddResource)(virLockManagerPtr man, + unsigned int type, + const char *name, + size_t nparams, + virLockManagerParamPtr params, + unsigned int flags); + +/** + * virLockDriverAcquire: + * @manager: the lock manager context + * @state: the current lock state + * @flags: optional flags, currently unused + * + * Start managing resources for the object. This + * must be called from the PID that represents the + * object to be managed. If the lock is lost at any + * time, the PID will be killed off by the lock manager. + * The optional state contains information about the + * locks previously held for the object. + * + * Returns 0 on success, or -1 on failure + */ +typedef int (*virLockDriverAcquire)(virLockManagerPtr man, + const char *state, + unsigned int flags); + +/** + * virLockDriverRelease: + * @manager: the lock manager context + * @state: pointer to be filled with lock state + * @flags: optional flags + * + * Inform the lock manager that the supervised process has + * been, or can be stopped. + * + * Returns 0 on success, or -1 on failure + */ +typedef int (*virLockDriverRelease)(virLockManagerPtr man, + char **state, + unsigned int flags); + +/** + * virLockDriverInquire: + * @manager: the lock manager context + * @state: pointer to be filled with lock state + * @flags: optional flags, currently unused + * + * Retrieve the current lock state. The returned + * lock state may be NULL if none is required. The + * caller is responsible for freeing the lock + * state string when it is no longer required + * + * Returns 0 on success, or -1 on failure. + */ +typedef int (*virLockDriverInquire)(virLockManagerPtr man, + char **state, + unsigned int flags); + + +struct _virLockManager { + virLockDriverPtr driver; + void *privateData; +}; + +/** + * The plugin must export a static instance of this + * driver table, with the name 'virLockDriverImpl' + */ +struct _virLockDriver { + /** + * @version: the newest implemented plugin ABI version + * @flags: optional flags, currently unused + */ + unsigned int version; + unsigned int flags; + + virLockDriverInit drvInit; + virLockDriverDeinit drvDeinit; + + virLockDriverNew drvNew; + virLockDriverFree drvFree; + + virLockDriverAddResource drvAddResource; + + virLockDriverAcquire drvAcquire; + virLockDriverRelease drvRelease; + virLockDriverInquire drvInquire; +}; + + +#endif /* __VIR_PLUGINS_LOCK_DRIVER_H__ */ diff --git a/src/locking/lock_manager.c b/src/locking/lock_manager.c new file mode 100644 index 0000000..cb96091 --- /dev/null +++ b/src/locking/lock_manager.c @@ -0,0 +1,357 @@ +/* + * lock_manager.c: Implements the internal lock manager API + * + * Copyright (C) 2010-2011 Red Hat, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +#include <config.h> + +#include "lock_manager.h" +#include "virterror_internal.h" +#include "logging.h" +#include "util.h" +#include "memory.h" +#include "uuid.h" + +#include <dlfcn.h> +#include <stdlib.h> +#include <unistd.h> + +#include "configmake.h" + +#define VIR_FROM_THIS VIR_FROM_LOCKING + +#define virLockError(code, ...) \ + virReportErrorHelper(VIR_FROM_THIS, code, __FILE__, \ + __FUNCTION__, __LINE__, __VA_ARGS__) + +#define CHECK_PLUGIN(field, errret) \ + if (!plugin->driver->field) { \ + virLockError(VIR_ERR_INTERNAL_ERROR, \ + _("Missing '%s' field in lock manager driver"), \ + #field); \ + return errret; \ + } + +#define CHECK_MANAGER(field, errret) \ + if (!lock->driver->field) { \ + virLockError(VIR_ERR_INTERNAL_ERROR, \ + _("Missing '%s' field in lock manager driver"), \ + #field); \ + return errret; \ + } + +struct _virLockManagerPlugin { + char *name; + virLockDriverPtr driver; + void *handle; + int refs; +}; + +#define DEFAULT_LOCK_MANAGER_PLUGIN_DIR LIBDIR "/libvirt/lock-driver" + +static void virLockManagerLogParams(size_t nparams, + virLockManagerParamPtr params) +{ + int i; + char uuidstr[VIR_UUID_STRING_BUFLEN]; + for (i = 0 ; i < nparams ; i++) { + switch (params[i].type) { + case VIR_LOCK_MANAGER_PARAM_TYPE_INT: + VIR_DEBUG(" key=%s type=int value=%d", params[i].key, params[i].value.i); + break; + case VIR_LOCK_MANAGER_PARAM_TYPE_UINT: + VIR_DEBUG(" key=%s type=uint value=%u", params[i].key, params[i].value.ui); + break; + case VIR_LOCK_MANAGER_PARAM_TYPE_LONG: + VIR_DEBUG(" key=%s type=long value=%lld", params[i].key, params[i].value.l); + break; + case VIR_LOCK_MANAGER_PARAM_TYPE_ULONG: + VIR_DEBUG(" key=%s type=ulong value=%llu", params[i].key, params[i].value.ul); + break; + case VIR_LOCK_MANAGER_PARAM_TYPE_DOUBLE: + VIR_DEBUG(" key=%s type=double value=%lf", params[i].key, params[i].value.d); + break; + case VIR_LOCK_MANAGER_PARAM_TYPE_STRING: + VIR_DEBUG(" key=%s type=string value=%s", params[i].key, params[i].value.str); + break; + case VIR_LOCK_MANAGER_PARAM_TYPE_UUID: + virUUIDFormat(params[i].value.uuid, uuidstr); + VIR_DEBUG(" key=%s type=uuid value=%s", params[i].key, uuidstr); + break; + } + } +} + + +/** + * virLockManagerPluginNew: + * @name: the name of the plugin + * @flag: optional plugin flags + * + * Attempt to load the plugin $(libdir)/libvirt/lock-driver/@name.so + * The plugin driver entry point will be resolved & invoked to obtain + * the lock manager driver. + * + * Even if the loading of the plugin succeeded, this may still + * return NULL if the plugin impl decided that we (libvirtd) + * are too old to support a feature it requires + * + * Returns a plugin object, or NULL if loading failed. + */ +virLockManagerPluginPtr virLockManagerPluginNew(const char *name, + unsigned int flags) +{ + void *handle = NULL; + virLockDriverPtr driver; + virLockManagerPluginPtr plugin; + const char *moddir = getenv("LIBVIRT_LOCK_MANAGER_PLUGIN_DIR"); + char *modfile = NULL; + + if (moddir == NULL) + moddir = DEFAULT_LOCK_MANAGER_PLUGIN_DIR; + + VIR_DEBUG("Module load %s from %s", name, moddir); + + if (virAsprintf(&modfile, "%s/%s.so", moddir, name) < 0) { + virReportOOMError(); + return NULL; + } + + if (access(modfile, R_OK) < 0) { + virReportSystemError(errno, + _("Plugin %s not accessible"), + modfile); + goto cleanup; + } + + handle = dlopen(modfile, RTLD_NOW | RTLD_LOCAL); + if (!handle) { + virLockError(VIR_ERR_SYSTEM_ERROR, + _("Failed to load plugin %s: %s"), + modfile, dlerror()); + goto cleanup; + } + + if (!(driver = dlsym(handle, "virLockDriverImpl"))) { + virLockError(VIR_ERR_INTERNAL_ERROR, "%s", + _("Missing plugin initialization symbol 'virLockDriverImpl'")); + goto cleanup; + } + + if (driver->drvInit(VIR_LOCK_MANAGER_VERSION, flags) < 0) { + virLockError(VIR_ERR_INTERNAL_ERROR, "%s", + _("plugin ABI is not compatible")); + goto cleanup; + } + + if (VIR_ALLOC(plugin) < 0) { + virReportOOMError(); + goto cleanup; + } + + plugin->driver = driver; + plugin->handle = handle; + plugin->refs = 1; + if (!(plugin->name = strdup(name))) { + virReportOOMError(); + goto cleanup; + } + + VIR_FREE(modfile); + return plugin; + +cleanup: + VIR_FREE(modfile); + if (handle) + dlclose(handle); + return NULL; +} + + +/** + * virLockManagerPluginRef: + * @plugin: the plugin implementation to ref + * + * Acquires an additional reference on the plugin. + */ +void virLockManagerPluginRef(virLockManagerPluginPtr plugin) +{ + plugin->refs++; +} + + +/** + * virLockManagerPluginUnref: + * @plugin: the plugin implementation to unref + * + * Releases a reference on the plugin. When the last reference + * is released, it will attempt to unload the plugin from memory. + * The plugin may refuse to allow unloading if this would + * result in an unsafe scenario. + * + */ +void virLockManagerPluginUnref(virLockManagerPluginPtr plugin) +{ + if (!plugin) + return; + + plugin->refs--; + + if (plugin->refs > 0) + return; + + if (plugin->driver->drvDeinit() >= 0) { + if (plugin->handle) + dlclose(plugin->handle); + } else { + VIR_WARN("Unable to unload lock maanger plugin from memory"); + return; + } + + VIR_FREE(plugin->name); + VIR_FREE(plugin); +} + + +const char *virLockManagerPluginGetName(virLockManagerPluginPtr plugin) +{ + VIR_DEBUG("plugin=%p", plugin); + + return plugin->name; +} + + +bool virLockManagerPluginUsesState(virLockManagerPluginPtr plugin) +{ + VIR_DEBUG("plugin=%p", plugin); + + return plugin->driver->flags & VIR_LOCK_MANAGER_USES_STATE; +} + + +/** + * virLockManagerNew: + * @plugin: the plugin implementation to use + * @type: the type of process to be supervised + * @flags: optional flags, currently unused + * + * Create a new context to supervise a process, usually + * a virtual machine. + * + * Returns a new lock manager context + */ +virLockManagerPtr virLockManagerNew(virLockManagerPluginPtr plugin, + unsigned int type, + size_t nparams, + virLockManagerParamPtr params, + unsigned int flags) +{ + virLockManagerPtr lock; + VIR_DEBUG("plugin=%p type=%u nparams=%zu params=%p flags=%u", + plugin, type, nparams, params, flags); + virLockManagerLogParams(nparams, params); + + CHECK_PLUGIN(drvNew, NULL); + + if (VIR_ALLOC(lock) < 0) { + virReportOOMError(); + return NULL; + } + + lock->driver = plugin->driver; + + if (plugin->driver->drvNew(lock, type, nparams, params, flags) < 0) { + VIR_FREE(lock); + return NULL; + } + + return lock; +} + + +int virLockManagerAddResource(virLockManagerPtr lock, + unsigned int type, + const char *name, + size_t nparams, + virLockManagerParamPtr params, + unsigned int flags) +{ + VIR_DEBUG("lock=%p type=%u name=%s nparams=%zu params=%p flags=%u", + lock, type, name, nparams, params, flags); + virLockManagerLogParams(nparams, params); + + CHECK_MANAGER(drvAddResource, -1); + + return lock->driver->drvAddResource(lock, + type, name, + nparams, params, + flags); +} + +int virLockManagerAcquire(virLockManagerPtr lock, + const char *state, + unsigned int flags) +{ + VIR_DEBUG("lock=%p state='%s' flags=%u", lock, NULLSTR(state), flags); + + CHECK_MANAGER(drvAcquire, -1); + + return lock->driver->drvAcquire(lock, state, flags); +} + + +int virLockManagerRelease(virLockManagerPtr lock, + char **state, + unsigned int flags) +{ + VIR_DEBUG("lock=%p state=%p flags=%u", lock, state, flags); + + CHECK_MANAGER(drvRelease, -1); + + return lock->driver->drvRelease(lock, state, flags); +} + + +int virLockManagerInquire(virLockManagerPtr lock, + char **state, + unsigned int flags) +{ + VIR_DEBUG("lock=%p state=%p flags=%u", lock, state, flags); + + CHECK_MANAGER(drvInquire, -1); + + return lock->driver->drvInquire(lock, state, flags); +} + + +int virLockManagerFree(virLockManagerPtr lock) +{ + VIR_DEBUG("lock=%p", lock); + + if (!lock) + return 0; + + CHECK_MANAGER(drvFree, -1); + + lock->driver->drvFree(lock); + + VIR_FREE(lock); + + return 0; +} diff --git a/src/locking/lock_manager.h b/src/locking/lock_manager.h new file mode 100644 index 0000000..13ad372 --- /dev/null +++ b/src/locking/lock_manager.h @@ -0,0 +1,65 @@ +/* + * lock_manager.h: Defines the internal lock manager API + * + * Copyright (C) 2010-2011 Red Hat, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +#ifndef __VIR_LOCK_MANAGER_H__ +# define __VIR_LOCK_MANAGER_H__ + +# include "internal.h" +# include "lock_driver.h" + +typedef struct _virLockManagerPlugin virLockManagerPlugin; +typedef virLockManagerPlugin *virLockManagerPluginPtr; + +virLockManagerPluginPtr virLockManagerPluginNew(const char *name, + unsigned int flags); +void virLockManagerPluginRef(virLockManagerPluginPtr plugin); +void virLockManagerPluginUnref(virLockManagerPluginPtr plugin); + +const char *virLockManagerPluginGetName(virLockManagerPluginPtr plugin); +bool virLockManagerPluginUsesState(virLockManagerPluginPtr plugin); + + +virLockManagerPtr virLockManagerNew(virLockManagerPluginPtr plugin, + unsigned int type, + size_t nparams, + virLockManagerParamPtr params, + unsigned int flags); + +int virLockManagerAddResource(virLockManagerPtr manager, + unsigned int type, + const char *name, + size_t nparams, + virLockManagerParamPtr params, + unsigned int flags); + +int virLockManagerAcquire(virLockManagerPtr manager, + const char *state, + unsigned int flags); +int virLockManagerRelease(virLockManagerPtr manager, + char **state, + unsigned int flags); +int virLockManagerInquire(virLockManagerPtr manager, + char **state, + unsigned int flags); + +int virLockManagerFree(virLockManagerPtr manager); + +#endif /* __VIR_LOCK_MANAGER_H__ */ diff --git a/src/util/virterror.c b/src/util/virterror.c index 2d7309a..95b718e 100644 --- a/src/util/virterror.c +++ b/src/util/virterror.c @@ -206,6 +206,9 @@ static const char *virErrorDomainName(virErrorDomain domain) { case VIR_FROM_EVENT: dom = "Events "; break; + case VIR_FROM_LOCKING: + dom = "Locking "; + break; } return(dom); } -- 1.7.4.4

To allow hypervisor drivers to assume that a lock driver impl will be guaranteed to exist, provide a 'nop' impl that is compiled into the library * src/Makefile.am: Add nop driver * src/locking/lock_driver_nop.c, src/locking/lock_driver_nop.h: Nop lock driver implementation * src/locking/lock_manager.c: Enable direct access of 'nop' driver, instead of dlopen()ing it. --- src/Makefile.am | 4 +- src/locking/lock_driver_nop.c | 115 +++++++++++++++++++++++++++++++++++++++++ src/locking/lock_driver_nop.h | 30 +++++++++++ src/locking/lock_manager.c | 53 ++++++++++--------- 4 files changed, 177 insertions(+), 25 deletions(-) create mode 100644 src/locking/lock_driver_nop.c create mode 100644 src/locking/lock_driver_nop.h diff --git a/src/Makefile.am b/src/Makefile.am index a27838b..96e2edf 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -94,7 +94,9 @@ DRIVER_SOURCES = \ fdstream.c fdstream.h \ $(NODE_INFO_SOURCES) \ libvirt.c libvirt_internal.h \ - locking/lock_manager.c locking/lock_manager.h + locking/lock_manager.c locking/lock_manager.h \ + locking/lock_driver.h \ + locking/lock_driver_nop.h locking/lock_driver_nop.c # XML configuration format handling sources diff --git a/src/locking/lock_driver_nop.c b/src/locking/lock_driver_nop.c new file mode 100644 index 0000000..5ebbd8d --- /dev/null +++ b/src/locking/lock_driver_nop.c @@ -0,0 +1,115 @@ +/* + * lock_driver_nop.c: A lock driver which locks nothing + * + * Copyright (C) 2010-2011 Red Hat, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +#include <config.h> + +#include "lock_driver_nop.h" +#include "memory.h" +#include "logging.h" +#include "uuid.h" + + +static int virLockManagerNopInit(unsigned int version, + unsigned int flags) +{ + VIR_DEBUG("version=%u flags=%u", version, flags); + + return 0; +} + +static int virLockManagerNopDeinit(void) +{ + VIR_DEBUG(" "); + + return 0; +} + + +static int virLockManagerNopNew(virLockManagerPtr lock ATTRIBUTE_UNUSED, + unsigned int type ATTRIBUTE_UNUSED, + size_t nparams ATTRIBUTE_UNUSED, + virLockManagerParamPtr params ATTRIBUTE_UNUSED, + unsigned int flags ATTRIBUTE_UNUSED) +{ + return 0; +} + +static int virLockManagerNopAddResource(virLockManagerPtr lock ATTRIBUTE_UNUSED, + unsigned int type ATTRIBUTE_UNUSED, + const char *name ATTRIBUTE_UNUSED, + size_t nparams ATTRIBUTE_UNUSED, + virLockManagerParamPtr params ATTRIBUTE_UNUSED, + unsigned int flags ATTRIBUTE_UNUSED) +{ + + return 0; +} + + +static int virLockManagerNopAcquire(virLockManagerPtr lock ATTRIBUTE_UNUSED, + const char *state ATTRIBUTE_UNUSED, + unsigned int flags ATTRIBUTE_UNUSED) +{ + + return 0; +} + +static int virLockManagerNopRelease(virLockManagerPtr lock ATTRIBUTE_UNUSED, + char **state, + unsigned int flags ATTRIBUTE_UNUSED) +{ + *state = NULL; + + return 0; +} + +static int virLockManagerNopInquire(virLockManagerPtr lock ATTRIBUTE_UNUSED, + char **state, + unsigned int flags ATTRIBUTE_UNUSED) +{ + + *state = NULL; + + return 0; +} + +static void virLockManagerNopFree(virLockManagerPtr lock ATTRIBUTE_UNUSED) +{ +} + +virLockDriver virLockDriverNop = +{ + .version = VIR_LOCK_MANAGER_VERSION, + .flags = 0, + + .drvInit = virLockManagerNopInit, + .drvDeinit = virLockManagerNopDeinit, + + .drvNew = virLockManagerNopNew, + .drvFree = virLockManagerNopFree, + + .drvAddResource = virLockManagerNopAddResource, + + .drvAcquire = virLockManagerNopAcquire, + .drvRelease = virLockManagerNopRelease, + + .drvInquire = virLockManagerNopInquire, +}; diff --git a/src/locking/lock_driver_nop.h b/src/locking/lock_driver_nop.h new file mode 100644 index 0000000..4be5377 --- /dev/null +++ b/src/locking/lock_driver_nop.h @@ -0,0 +1,30 @@ +/* + * lock_driver_nop.h: A lock driver which locks nothing + * + * Copyright (C) 2010-2011 Red Hat, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +#ifndef __VIR_LOCK_DRIVER_NOP_H__ +# define __VIR_LOCK_DRIVER_NOP_H__ + +# include "lock_driver.h" + +extern virLockDriver virLockDriverNop; + + +#endif /* __VIR_LOCK_DRIVER_NOP_H__ */ diff --git a/src/locking/lock_manager.c b/src/locking/lock_manager.c index cb96091..6197fd4 100644 --- a/src/locking/lock_manager.c +++ b/src/locking/lock_manager.c @@ -22,6 +22,7 @@ #include <config.h> #include "lock_manager.h" +#include "lock_driver_nop.h" #include "virterror_internal.h" #include "logging.h" #include "util.h" @@ -123,35 +124,39 @@ virLockManagerPluginPtr virLockManagerPluginNew(const char *name, const char *moddir = getenv("LIBVIRT_LOCK_MANAGER_PLUGIN_DIR"); char *modfile = NULL; - if (moddir == NULL) - moddir = DEFAULT_LOCK_MANAGER_PLUGIN_DIR; + if (STREQ(name, "nop")) { + driver = &virLockDriverNop; + } else { + if (moddir == NULL) + moddir = DEFAULT_LOCK_MANAGER_PLUGIN_DIR; - VIR_DEBUG("Module load %s from %s", name, moddir); + VIR_DEBUG("Module load %s from %s", name, moddir); - if (virAsprintf(&modfile, "%s/%s.so", moddir, name) < 0) { - virReportOOMError(); - return NULL; - } + if (virAsprintf(&modfile, "%s/%s.so", moddir, name) < 0) { + virReportOOMError(); + return NULL; + } - if (access(modfile, R_OK) < 0) { - virReportSystemError(errno, - _("Plugin %s not accessible"), - modfile); - goto cleanup; - } + if (access(modfile, R_OK) < 0) { + virReportSystemError(errno, + _("Plugin %s not accessible"), + modfile); + goto cleanup; + } - handle = dlopen(modfile, RTLD_NOW | RTLD_LOCAL); - if (!handle) { - virLockError(VIR_ERR_SYSTEM_ERROR, - _("Failed to load plugin %s: %s"), - modfile, dlerror()); - goto cleanup; - } + handle = dlopen(modfile, RTLD_NOW | RTLD_LOCAL); + if (!handle) { + virLockError(VIR_ERR_SYSTEM_ERROR, + _("Failed to load plugin %s: %s"), + modfile, dlerror()); + goto cleanup; + } - if (!(driver = dlsym(handle, "virLockDriverImpl"))) { - virLockError(VIR_ERR_INTERNAL_ERROR, "%s", - _("Missing plugin initialization symbol 'virLockDriverImpl'")); - goto cleanup; + if (!(driver = dlsym(handle, "virLockDriverImpl"))) { + virLockError(VIR_ERR_INTERNAL_ERROR, "%s", + _("Missing plugin initialization symbol 'virLockDriverImpl'")); + goto cleanup; + } } if (driver->drvInit(VIR_LOCK_MANAGER_VERSION, flags) < 0) { -- 1.7.4.4

To facilitate use of the locking plugins from hypervisor drivers, introduce a higher level API for locking virDomainObjPtr instances. In includes APIs targetted to VM startup, and hotplug/unplug * src/Makefile.am: Add domain lock API * src/locking/domain_lock.c, src/locking/domain_lock.h: High level API for domain locking --- src/Makefile.am | 3 +- src/libvirt_private.syms | 11 ++ src/locking/README | 7 + src/locking/domain_lock.c | 284 +++++++++++++++++++++++++++++++++++++++++++++ src/locking/domain_lock.h | 56 +++++++++ 5 files changed, 360 insertions(+), 1 deletions(-) create mode 100644 src/locking/domain_lock.c create mode 100644 src/locking/domain_lock.h diff --git a/src/Makefile.am b/src/Makefile.am index 96e2edf..1e5a72e 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -96,7 +96,8 @@ DRIVER_SOURCES = \ libvirt.c libvirt_internal.h \ locking/lock_manager.c locking/lock_manager.h \ locking/lock_driver.h \ - locking/lock_driver_nop.h locking/lock_driver_nop.c + locking/lock_driver_nop.h locking/lock_driver_nop.c \ + locking/domain_lock.h locking/domain_lock.c # XML configuration format handling sources diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms index d27d294..779fada 100644 --- a/src/libvirt_private.syms +++ b/src/libvirt_private.syms @@ -409,6 +409,17 @@ virDomainEventWatchdogNewFromDom; virDomainEventWatchdogNewFromObj; +# domain_lock.h +virDomainLockProcessStart; +virDomainLockProcessInquire; +virDomainLockProcessPause; +virDomainLockProcessResume; +virDomainLockDiskAttach; +virDomainLockDiskDetach; +virDomainLockLeaseAttach; +virDomainLockLeaseDetach; + + # domain_nwfilter.h virDomainConfNWFilterInstantiate; virDomainConfNWFilterRegister; diff --git a/src/locking/README b/src/locking/README index 4fa4f89..da2a8f8 100644 --- a/src/locking/README +++ b/src/locking/README @@ -1,3 +1,10 @@ + Using the Lock Manager APIs + =========================== + +This file describes how to use the lock manager APIs. +All the guest lifecycle sequences here have higher +level wrappers provided by the 'domain_lock.h' API, +which simplify thue usage At libvirtd startup: diff --git a/src/locking/domain_lock.c b/src/locking/domain_lock.c new file mode 100644 index 0000000..85352e2 --- /dev/null +++ b/src/locking/domain_lock.c @@ -0,0 +1,284 @@ +/* + * domain_lock.c: Locking for domain lifecycle operations + * + * Copyright (C) 2010-2011 Red Hat, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +#include <config.h> + +#include <intprops.h> + +#include "domain_lock.h" +#include "memory.h" +#include "uuid.h" +#include "virterror_internal.h" +#include "logging.h" + +#define VIR_FROM_THIS VIR_FROM_LOCKING + + +static int virDomainLockManagerAddLease(virLockManagerPtr lock, + virDomainLeaseDefPtr lease) +{ + unsigned int leaseFlags = 0; + virLockManagerParam lparams[] = { + { .type = VIR_LOCK_MANAGER_PARAM_TYPE_STRING, + .key = "path", + .value = { .str = lease->path }, + }, + { .type = VIR_LOCK_MANAGER_PARAM_TYPE_ULONG, + .key = "offset", + .value = { .ul = lease->offset }, + }, + { .type = VIR_LOCK_MANAGER_PARAM_TYPE_STRING, + .key = "lockspace", + .value = { .str = lease->lockspace }, + }, + }; + size_t nparams = ARRAY_CARDINALITY(lparams); + if (!lease->lockspace) + nparams--; + + VIR_DEBUG("Add lease %s", lease->path); + if (virLockManagerAddResource(lock, + VIR_LOCK_MANAGER_RESOURCE_TYPE_LEASE, + lease->key, + nparams, + lparams, + leaseFlags) < 0) { + VIR_DEBUG("Failed to add lease %s", lease->path); + return -1; + } + return 0; +} + + +static int virDomainLockManagerAddDisk(virLockManagerPtr lock, + virDomainDiskDefPtr disk) +{ + unsigned int diskFlags = 0; + if (!disk->src) + return 0; + + if (!(disk->type == VIR_DOMAIN_DISK_TYPE_BLOCK || + disk->type == VIR_DOMAIN_DISK_TYPE_FILE || + disk->type == VIR_DOMAIN_DISK_TYPE_DIR)) + return 0; + + if (disk->readonly) + diskFlags |= VIR_LOCK_MANAGER_RESOURCE_READONLY; + if (disk->shared) + diskFlags |= VIR_LOCK_MANAGER_RESOURCE_SHARED; + + VIR_DEBUG("Add disk %s", disk->src); + if (virLockManagerAddResource(lock, + VIR_LOCK_MANAGER_RESOURCE_TYPE_DISK, + disk->src, + 0, + NULL, + diskFlags) < 0) { + VIR_DEBUG("Failed add disk %s", disk->src); + return -1; + } + return 0; +} + +static virLockManagerPtr virDomainLockManagerNew(virLockManagerPluginPtr plugin, + virDomainObjPtr dom, + bool withResources) +{ + virLockManagerPtr lock; + int i; + virLockManagerParam params[] = { + { .type = VIR_LOCK_MANAGER_PARAM_TYPE_UUID, + .key = "uuid", + }, + { .type = VIR_LOCK_MANAGER_PARAM_TYPE_STRING, + .key = "name", + .value = { .str = dom->def->name }, + }, + { .type = VIR_LOCK_MANAGER_PARAM_TYPE_UINT, + .key = "id", + .value = { .i = dom->def->id }, + }, + { .type = VIR_LOCK_MANAGER_PARAM_TYPE_UINT, + .key = "pid", + .value = { .i = dom->pid }, + }, + }; + VIR_DEBUG("plugin=%p dom=%p withResources=%d", + plugin, dom, withResources); + + memcpy(params[0].value.uuid, dom->def->uuid, VIR_UUID_BUFLEN); + + if (!(lock = virLockManagerNew(plugin, + VIR_LOCK_MANAGER_OBJECT_TYPE_DOMAIN, + ARRAY_CARDINALITY(params), + params, + 0))) + goto error; + + if (withResources) { + VIR_DEBUG("Adding leases"); + for (i = 0 ; i < dom->def->nleases ; i++) + if (virDomainLockManagerAddLease(lock, dom->def->leases[i]) < 0) + goto error; + + VIR_DEBUG("Adding disks"); + for (i = 0 ; i < dom->def->ndisks ; i++) + if (virDomainLockManagerAddDisk(lock, dom->def->disks[i]) < 0) + goto error; + } + + return lock; + +error: + virLockManagerFree(lock); + return NULL; +} + + +int virDomainLockProcessStart(virLockManagerPluginPtr plugin, + virDomainObjPtr dom, + bool paused) +{ + virLockManagerPtr lock = virDomainLockManagerNew(plugin, dom, true); + int ret; + if (paused) + ret = virLockManagerAcquire(lock, NULL, VIR_LOCK_MANAGER_ACQUIRE_REGISTER_ONLY); + else + ret = virLockManagerAcquire(lock, NULL, 0); + + virLockManagerFree(lock); + + return ret; +} + +int virDomainLockProcessPause(virLockManagerPluginPtr plugin, + virDomainObjPtr dom, + char **state) +{ + virLockManagerPtr lock = virDomainLockManagerNew(plugin, dom, true); + int ret = virLockManagerRelease(lock, state, 0); + + virLockManagerFree(lock); + + return ret; +} + +int virDomainLockProcessResume(virLockManagerPluginPtr plugin, + virDomainObjPtr dom, + const char *state) +{ + virLockManagerPtr lock = virDomainLockManagerNew(plugin, dom, true); + int ret = virLockManagerAcquire(lock, state, 0); + + virLockManagerFree(lock); + + return ret; +} + +int virDomainLockProcessInquire(virLockManagerPluginPtr plugin, + virDomainObjPtr dom, + char **state) +{ + virLockManagerPtr lock = virDomainLockManagerNew(plugin, dom, true); + int ret = virLockManagerInquire(lock, state, 0); + + virLockManagerFree(lock); + + return ret; +} + + +int virDomainLockDiskAttach(virLockManagerPluginPtr plugin, + virDomainObjPtr dom, + virDomainDiskDefPtr disk) +{ + virLockManagerPtr lock = virDomainLockManagerNew(plugin, dom, false); + int ret = -1; + + if (virDomainLockManagerAddDisk(lock, disk) < 0) + goto cleanup; + + if (virLockManagerAcquire(lock, NULL, 0) < 0) + goto cleanup; + +cleanup: + virLockManagerFree(lock); + + return ret; +} + +int virDomainLockDiskDetach(virLockManagerPluginPtr plugin, + virDomainObjPtr dom, + virDomainDiskDefPtr disk) +{ + virLockManagerPtr lock = virDomainLockManagerNew(plugin, dom, false); + int ret = -1; + + if (virDomainLockManagerAddDisk(lock, disk) < 0) + goto cleanup; + + if (virLockManagerRelease(lock, NULL, 0) < 0) + goto cleanup; + +cleanup: + virLockManagerFree(lock); + + return ret; +} + + +int virDomainLockLeaseAttach(virLockManagerPluginPtr plugin, + virDomainObjPtr dom, + virDomainLeaseDefPtr lease) +{ + virLockManagerPtr lock = virDomainLockManagerNew(plugin, dom, false); + int ret = -1; + + if (virDomainLockManagerAddLease(lock, lease) < 0) + goto cleanup; + + if (virLockManagerAcquire(lock, NULL, 0) < 0) + goto cleanup; + +cleanup: + virLockManagerFree(lock); + + return ret; +} + +int virDomainLockLeaseDetach(virLockManagerPluginPtr plugin, + virDomainObjPtr dom, + virDomainLeaseDefPtr lease) +{ + virLockManagerPtr lock = virDomainLockManagerNew(plugin, dom, false); + int ret = -1; + + if (virDomainLockManagerAddLease(lock, lease) < 0) + goto cleanup; + + if (virLockManagerRelease(lock, NULL, 0) < 0) + goto cleanup; + +cleanup: + virLockManagerFree(lock); + + return ret; +} diff --git a/src/locking/domain_lock.h b/src/locking/domain_lock.h new file mode 100644 index 0000000..40fadd4 --- /dev/null +++ b/src/locking/domain_lock.h @@ -0,0 +1,56 @@ +/* + * domain_lock.c: Locking for domain lifecycle operations + * + * Copyright (C) 2010-2011 Red Hat, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +#ifndef __VIR_DOMAIN_LOCK_H__ +# define __VIR_DOMAIN_LOCK_H__ + +# include "internal.h" +# include "domain_conf.h" +# include "lock_manager.h" + +int virDomainLockProcessStart(virLockManagerPluginPtr plugin, + virDomainObjPtr dom, + bool paused); +int virDomainLockProcessPause(virLockManagerPluginPtr plugin, + virDomainObjPtr dom, + char **state); +int virDomainLockProcessResume(virLockManagerPluginPtr plugin, + virDomainObjPtr dom, + const char *state); +int virDomainLockProcessInquire(virLockManagerPluginPtr plugin, + virDomainObjPtr dom, + char **state); + +int virDomainLockDiskAttach(virLockManagerPluginPtr plugin, + virDomainObjPtr dom, + virDomainDiskDefPtr disk); +int virDomainLockDiskDetach(virLockManagerPluginPtr plugin, + virDomainObjPtr dom, + virDomainDiskDefPtr disk); + +int virDomainLockLeaseAttach(virLockManagerPluginPtr plugin, + virDomainObjPtr dom, + virDomainLeaseDefPtr lease); +int virDomainLockLeaseDetach(virLockManagerPluginPtr plugin, + virDomainObjPtr dom, + virDomainLeaseDefPtr lease); + +#endif /* __VIR_DOMAIN_LOCK_H__ */ -- 1.7.4.4

--- docs/internals/locking.html.in | 257 ++++++++++++++++++++++++++++++++++++++++ docs/sitemap.html.in | 4 + 2 files changed, 261 insertions(+), 0 deletions(-) create mode 100644 docs/internals/locking.html.in diff --git a/docs/internals/locking.html.in b/docs/internals/locking.html.in new file mode 100644 index 0000000..3790ef0 --- /dev/null +++ b/docs/internals/locking.html.in @@ -0,0 +1,257 @@ +<html> + <body> + <h1>Resource Lock Manager</h1> + + <ul id="toc"></ul> + + <p> + This page describes the design of the resource lock manager + that is used for locking disk images, to ensure exclusive + access to content. + </p> + + <h2><a name="goals">Goals</a></h2> + + <p> + The high level goal is to prevent the same disk image being + used by more than one QEMU instance at a time (unless the + disk is marked as sharable, or readonly). The scenarios + to be prevented are thus: + </p> + + <ol> + <li> + Two different guests running configured to point at the + same disk image. + </li> + <li> + One guest being started more than once on two different + machines due to admin mistake + </li> + <li> + One guest being started more than once on a single machine + due to libvirt driver bug on a single machine. + </li> + </ol> + + <h2><a name="requirement">Requirements</a></h2> + + <p> + The high level goal leads to a set of requirements + for the lock manager design + </p> + + <ol> + <li> + A lock must be held on a disk whenever a QEMU process + has the disk open + </li> + <li> + The lock scheme must allow QEMU to be configured with + readonly, shared write, or exclusive writable disks + </li> + <li> + A lock handover must be performed during the migration + process where 2 QEMU processes will have the same disk + open concurrently. + </li> + <li> + The lock manager must be able to identify and kill the + process accessing the resource if the lock is revoked. + </li> + <li> + Locks can be acquired for arbitrary VM related resources, + as determined by the management application. + </li> + </ol> + + <h2><a name="design">Design</a></h2> + + <p> + Within a lock manager the following series of operations + will need to be supported. + </p> + + <ul> + <li> + <strong>Register object</strong> + Register the identity of an object against which + locks will be acquired + </li> + <li> + <strong>Add resource</strong> + Associate a resource with an object for future + lock acquisition / release + </li> + <li> + <strong>Acquire locks</strong> + Acquire the locks for all resources associated + with the object + </li> + <li> + <strong>Release locks</strong> + Release the locks for all resources associated + with the object + </li> + <li> + <strong>Inquire locks</strong> + Get a representation of the state of the locks + for all resources associated with the object + </li> + </ul> + + <h2><a name="impl">Plugin Implementations</a></h2> + + <p> + Lock manager implementations are provided as LGPLv2+ + licensed, dlopen()able library modules. The plugins + will be loadable from the following location: + </p> + + <pre> +/usr/{lib,lib64}/libvirt/lock_manager/$NAME.so +</pre> + + <p> + The lock manager plugin must export a single ELF + symbol named <code>virLockDriverImpl</code>, which is + a static instance of the <code>virLockDriver</code> + struct. The struct is defined in the header file + </p> + + <pre> + #include <libvirt/plugins/lock_manager.h> + </pre> + + <p> + All callbacks in the struct must be initialized + to non-NULL pointers. The semantics of each + callback are defined in the API docs embedded + in the previously mentioned header file + </p> + + <h2><a name="qemuIntegrate">QEMU Driver integration</a></h2> + + <p> + With the QEMU driver, the lock plugin will be set + in the <code>/etc/libvirt/qemu.conf</code> configuration + file by specifying the lock manager name. + </p> + + <pre> + lockManager="sanlock" + </pre> + + <p> + By default the lock manager will be a 'no op' implementation + for backwards compatibility + </p> + + <h2><a name="usagePatterns">Lock usage patterns</a></h2> + + <p> + The following psuedo code illustrates the common + patterns of operations invoked on the lock + manager plugin callbacks. + </p> + + <h3><a name="usageLockAcquire">Lock acquisition</a></h3> + + <p> + Initial lock acquisition will be performed from the + process that is to own the lock. This is typically + the QEMU child process, in between the fork+exec + pairing. When adding further resources on the fly, + to an existing object holding locks, this will be + done from the libvirtd process. + </p> + + <pre> + virLockManagerParam params[] = { + { .type = VIR_LOCK_MANAGER_PARAM_TYPE_UUID, + .key = "uuid", + }, + { .type = VIR_LOCK_MANAGER_PARAM_TYPE_STRING, + .key = "name", + .value = { .str = dom->def->name }, + }, + { .type = VIR_LOCK_MANAGER_PARAM_TYPE_UINT, + .key = "id", + .value = { .i = dom->def->id }, + }, + { .type = VIR_LOCK_MANAGER_PARAM_TYPE_UINT, + .key = "pid", + .value = { .i = dom->pid }, + }, + }; + mgr = virLockManagerNew(lockPlugin, + VIR_LOCK_MANAGER_TYPE_DOMAIN, + ARRAY_CARDINALITY(params), + params, + 0))); + + foreach (initial disks) + virLockManagerAddResource(mgr, + VIR_LOCK_MANAGER_RESOURCE_TYPE_DISK, + $path, 0, NULL, $flags); + + if (virLockManagerAcquire(lock, NULL, 0) < 0); + ...abort... + </pre> + + <h3><a name="usageLockAttach">Lock release</a></h3> + + <p> + The locks are all implicitly released when the process + that acquired them exits, however, a process may + voluntarily give up the lock by running + </p> + + <pre> + char *state = NULL; + virLockManagerParam params[] = { + { .type = VIR_LOCK_MANAGER_PARAM_TYPE_UUID, + .key = "uuid", + }, + { .type = VIR_LOCK_MANAGER_PARAM_TYPE_STRING, + .key = "name", + .value = { .str = dom->def->name }, + }, + { .type = VIR_LOCK_MANAGER_PARAM_TYPE_UINT, + .key = "id", + .value = { .i = dom->def->id }, + }, + { .type = VIR_LOCK_MANAGER_PARAM_TYPE_UINT, + .key = "pid", + .value = { .i = dom->pid }, + }, + }; + mgr = virLockManagerNew(lockPlugin, + VIR_LOCK_MANAGER_TYPE_DOMAIN, + ARRAY_CARDINALITY(params), + params, + 0))); + + foreach (initial disks) + virLockManagerAddResource(mgr, + VIR_LOCK_MANAGER_RESOURCE_TYPE_DISK, + $path, 0, NULL, $flags); + + virLockManagerRelease(mgr, & state, 0); + </pre> + + <p> + The returned state string can be passed to the + <code>virLockManagerAcquire</code> method to + later re-acquire the exact same locks. This + state transfer is commonly used when performing + live migration of virtual machines. By validating + the state the lock manager can ensure no other + VM has re-acquire the same locks on a different + host. The state can also be obtained without + releasing the locks, by calling the + <code>virLockManagerInquire</code> method. + </p> + + </body> +</html> diff --git a/docs/sitemap.html.in b/docs/sitemap.html.in index ad8dc7b..db2963e 100644 --- a/docs/sitemap.html.in +++ b/docs/sitemap.html.in @@ -284,6 +284,10 @@ <a href="internals/command.html">Spawning commands</a> <span>Spawning commands from libvirt driver code</span> </li> + <li> + <a href="internals/locking.html">Lock managers</a> + <span>Use lock managers to protect disk content</span> + </li> </ul> </li> <li> -- 1.7.4.4

The QEMU integrates with the lock manager instructure in a number of key places * During startup, a lock is acquired in between the fork & exec * During startup, the libvirtd process acquires a lock before setting file labelling * During shutdown, the libvirtd process acquires a lock before restoring file labelling * During hotplug, unplug & media change the libvirtd process holds a lock while setting/restoring labels The main content lock is only ever held by the QEMU child process, or libvirtd during VM shutdown. The rest of the operations only require libvirtd to hold the metadata locks, relying on the active QEMU still holding the content lock. * src/qemu/qemu_conf.c, src/qemu/qemu_conf.h, src/qemu/libvirtd_qemu.aug, src/qemu/test_libvirtd_qemu.aug: Add config parameter for configuring lock managers * src/qemu/qemu_driver.c: Add calls to the lock manager --- src/qemu/libvirtd_qemu.aug | 1 + src/qemu/qemu.conf | 7 ++ src/qemu/qemu_conf.c | 12 ++++ src/qemu/qemu_conf.h | 3 + src/qemu/qemu_domain.c | 5 ++ src/qemu/qemu_domain.h | 1 + src/qemu/qemu_driver.c | 13 ++++- src/qemu/qemu_hotplug.c | 56 ++++++++++++++++- src/qemu/qemu_process.c | 130 ++++++++++++++++++++++++++++++--------- src/qemu/test_libvirtd_qemu.aug | 4 + 10 files changed, 198 insertions(+), 34 deletions(-) diff --git a/src/qemu/libvirtd_qemu.aug b/src/qemu/libvirtd_qemu.aug index ac30b8e..66858ae 100644 --- a/src/qemu/libvirtd_qemu.aug +++ b/src/qemu/libvirtd_qemu.aug @@ -48,6 +48,7 @@ module Libvirtd_qemu = | bool_entry "allow_disk_format_probing" | bool_entry "set_process_name" | int_entry "max_processes" + | str_entry "lock_manager" (* Each enty in the config is one of the following three ... *) let entry = vnc_entry diff --git a/src/qemu/qemu.conf b/src/qemu/qemu.conf index c70050e..2c50d9d 100644 --- a/src/qemu/qemu.conf +++ b/src/qemu/qemu.conf @@ -280,3 +280,10 @@ # override default value set by host OS. # # max_processes = 0 + +# To enable strict 'fcntl' based locking of the file +# content (to prevent two VMs writing to the same +# disk), start the 'virtlockd' service, and uncomment +# this +# +# lock_manager = "fcntl" diff --git a/src/qemu/qemu_conf.c b/src/qemu/qemu_conf.c index f977673..ea4d7d0 100644 --- a/src/qemu/qemu_conf.c +++ b/src/qemu/qemu_conf.c @@ -115,6 +115,9 @@ int qemudLoadDriverConfig(struct qemud_driver *driver, } #endif + if (!(driver->lockManager = + virLockManagerPluginNew("nop", 0))) + return -1; /* Just check the file is readable before opening it, otherwise * libvirt emits an error. @@ -428,6 +431,15 @@ int qemudLoadDriverConfig(struct qemud_driver *driver, CHECK_TYPE("max_processes", VIR_CONF_LONG); if (p) driver->maxProcesses = p->l; + p = virConfGetValue (conf, "lock_manager"); + CHECK_TYPE ("lock_manager", VIR_CONF_STRING); + if (p && p->str) { + virLockManagerPluginUnref(driver->lockManager); + if (!(driver->lockManager = + virLockManagerPluginNew(p->str, 0))) + VIR_ERROR(_("Failed to load lock manager %s"), p->str); + } + virConfFree (conf); return 0; } diff --git a/src/qemu/qemu_conf.h b/src/qemu/qemu_conf.h index ceec16d..bf6dcf4 100644 --- a/src/qemu/qemu_conf.h +++ b/src/qemu/qemu_conf.h @@ -43,6 +43,7 @@ # include "macvtap.h" # include "command.h" # include "threadpool.h" +# include "locking/lock_manager.h" # define QEMUD_CPUMASK_LEN CPU_SETSIZE @@ -124,6 +125,8 @@ struct qemud_driver { virBitmapPtr reservedVNCPorts; virSysinfoDefPtr hostsysinfo; + + virLockManagerPluginPtr lockManager; }; typedef struct _qemuDomainCmdlineDef qemuDomainCmdlineDef; diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c index bcacb18..81ac2bc 100644 --- a/src/qemu/qemu_domain.c +++ b/src/qemu/qemu_domain.c @@ -101,6 +101,7 @@ static void qemuDomainObjPrivateFree(void *data) qemuDomainPCIAddressSetFree(priv->pciaddrs); virDomainChrSourceDefFree(priv->monConfig); VIR_FREE(priv->vcpupids); + VIR_FREE(priv->lockState); /* This should never be non-NULL if we get here, but just in case... */ if (priv->mon) { @@ -157,6 +158,9 @@ static int qemuDomainObjPrivateXMLFormat(virBufferPtr buf, void *data) virBufferAddLit(buf, " </qemuCaps>\n"); } + if (priv->lockState) + virBufferAsprintf(buf, " <lockstate>%s</lockstate>\n", priv->lockState); + return 0; } @@ -260,6 +264,7 @@ static int qemuDomainObjPrivateXMLParse(xmlXPathContextPtr ctxt, void *data) } VIR_FREE(nodes); + priv->lockState = virXPathString("string(./lockstate)", ctxt); return 0; diff --git a/src/qemu/qemu_domain.h b/src/qemu/qemu_domain.h index 6d24f53..0fca974 100644 --- a/src/qemu/qemu_domain.h +++ b/src/qemu/qemu_domain.h @@ -79,6 +79,7 @@ struct _qemuDomainObjPrivate { int persistentAddrs; virBitmapPtr qemuCaps; + char *lockState; }; struct qemuDomainWatchdogEvent diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c index 4e09c61..112237a 100644 --- a/src/qemu/qemu_driver.c +++ b/src/qemu/qemu_driver.c @@ -1,5 +1,5 @@ /* - * driver.c: core driver methods for managing qemu guests + * qemu_driver.c: core driver methods for managing qemu guests * * Copyright (C) 2006-2011 Red Hat, Inc. * Copyright (C) 2006 Daniel P. Berrange @@ -87,6 +87,7 @@ #include "fdstream.h" #include "configmake.h" #include "threadpool.h" +#include "locking/lock_manager.h" #define VIR_FROM_THIS VIR_FROM_QEMU @@ -529,6 +530,14 @@ qemudStartup(int privileged) { } VIR_FREE(driverConf); + /* We should always at least have the 'nop' manager, so + * NULLs here are a fatal error + */ + if (!qemu_driver->lockManager) { + VIR_ERROR(_("Missing lock manager implementation")); + goto error; + } + if (qemuSecurityInit(qemu_driver) < 0) goto error; @@ -769,6 +778,8 @@ qemudShutdown(void) { virCgroupFree(&qemu_driver->cgroup); + virLockManagerPluginUnref(qemu_driver->lockManager); + qemuDriverUnlock(qemu_driver); virMutexDestroy(&qemu_driver->lock); virThreadPoolFree(qemu_driver->workerPool); diff --git a/src/qemu/qemu_hotplug.c b/src/qemu/qemu_hotplug.c index 3cf7d35..a8e73c4 100644 --- a/src/qemu/qemu_hotplug.c +++ b/src/qemu/qemu_hotplug.c @@ -38,6 +38,7 @@ #include "pci.h" #include "files.h" #include "qemu_cgroup.h" +#include "locking/domain_lock.h" #define VIR_FROM_THIS VIR_FROM_QEMU @@ -82,9 +83,15 @@ int qemuDomainChangeEjectableMedia(struct qemud_driver *driver, return -1; } + if (virDomainLockDiskAttach(driver->lockManager, vm, disk) < 0) + return -1; + if (virSecurityManagerSetImageLabel(driver->securityManager, - vm, disk) < 0) + vm, disk) < 0) { + if (virDomainLockDiskDetach(driver->lockManager, vm, disk) < 0) + VIR_WARN("Unable to release lock on %s", disk->src); return -1; + } if (!(driveAlias = qemuDeviceDriveHostAlias(origdisk, priv->qemuCaps))) goto error; @@ -115,6 +122,9 @@ int qemuDomainChangeEjectableMedia(struct qemud_driver *driver, vm, origdisk) < 0) VIR_WARN("Unable to restore security label on ejected image %s", origdisk->src); + if (virDomainLockDiskDetach(driver->lockManager, vm, origdisk) < 0) + VIR_WARN("Unable to release lock on disk %s", origdisk->src); + VIR_FREE(origdisk->src); origdisk->src = disk->src; disk->src = NULL; @@ -128,9 +138,14 @@ int qemuDomainChangeEjectableMedia(struct qemud_driver *driver, error: VIR_FREE(driveAlias); + if (virSecurityManagerRestoreImageLabel(driver->securityManager, vm, disk) < 0) VIR_WARN("Unable to restore security label on new media %s", disk->src); + + if (virDomainLockDiskDetach(driver->lockManager, vm, disk) < 0) + VIR_WARN("Unable to release lock on %s", disk->src); + return -1; } @@ -154,9 +169,15 @@ int qemuDomainAttachPciDiskDevice(struct qemud_driver *driver, } } + if (virDomainLockDiskAttach(driver->lockManager, vm, disk) < 0) + return -1; + if (virSecurityManagerSetImageLabel(driver->securityManager, - vm, disk) < 0) + vm, disk) < 0) { + if (virDomainLockDiskDetach(driver->lockManager, vm, disk) < 0) + VIR_WARN("Unable to release lock on %s", disk->src); return -1; + } if (qemuCapsGet(priv->qemuCaps, QEMU_CAPS_DEVICE)) { if (qemuDomainPCIAddressEnsureAddr(priv->pciaddrs, &disk->info) < 0) @@ -228,6 +249,9 @@ error: vm, disk) < 0) VIR_WARN("Unable to restore security label on %s", disk->src); + if (virDomainLockDiskDetach(driver->lockManager, vm, disk) < 0) + VIR_WARN("Unable to release lock on %s", disk->src); + return -1; } @@ -364,10 +388,15 @@ int qemuDomainAttachSCSIDisk(struct qemud_driver *driver, } } + if (virDomainLockDiskAttach(driver->lockManager, vm, disk) < 0) + return -1; if (virSecurityManagerSetImageLabel(driver->securityManager, - vm, disk) < 0) + vm, disk) < 0) { + if (virDomainLockDiskDetach(driver->lockManager, vm, disk) < 0) + VIR_WARN("Unable to release lock on %s", disk->src); return -1; + } /* We should have an address already, so make sure */ if (disk->info.type != VIR_DOMAIN_DEVICE_ADDRESS_TYPE_DRIVE) { @@ -456,6 +485,9 @@ error: vm, disk) < 0) VIR_WARN("Unable to restore security label on %s", disk->src); + if (virDomainLockDiskDetach(driver->lockManager, vm, disk) < 0) + VIR_WARN("Unable to release lock on %s", disk->src); + return -1; } @@ -477,10 +509,17 @@ int qemuDomainAttachUsbMassstorageDevice(struct qemud_driver *driver, } } + if (virDomainLockDiskAttach(driver->lockManager, vm, disk) < 0) + return -1; + if (virSecurityManagerSetImageLabel(driver->securityManager, - vm, disk) < 0) + vm, disk) < 0) { + if (virDomainLockDiskDetach(driver->lockManager, vm, disk) < 0) + VIR_WARN("Unable to release lock on %s", disk->src); return -1; + } + /* XXX not correct once we allow attaching a USB CDROM */ if (!disk->src) { qemuReportError(VIR_ERR_INTERNAL_ERROR, "%s", _("disk source path is missing")); @@ -538,6 +577,9 @@ error: vm, disk) < 0) VIR_WARN("Unable to restore security label on %s", disk->src); + if (virDomainLockDiskDetach(driver->lockManager, vm, disk) < 0) + VIR_WARN("Unable to release lock on %s", disk->src); + return -1; } @@ -1184,6 +1226,9 @@ int qemuDomainDetachPciDiskDevice(struct qemud_driver *driver, NULLSTR(dev->data.disk->src)); } + if (virDomainLockDiskDetach(driver->lockManager, vm, dev->data.disk) < 0) + VIR_WARN("Unable to release lock on %s", dev->data.disk->src); + ret = 0; cleanup: @@ -1262,6 +1307,9 @@ int qemuDomainDetachDiskDevice(struct qemud_driver *driver, NULLSTR(dev->data.disk->src)); } + if (virDomainLockDiskDetach(driver->lockManager, vm, dev->data.disk) < 0) + VIR_WARN("Unable to release lock on disk %s", dev->data.disk->src); + ret = 0; cleanup: diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c index 01b15e0..833e035 100644 --- a/src/qemu/qemu_process.c +++ b/src/qemu/qemu_process.c @@ -50,6 +50,7 @@ #include "nodeinfo.h" #include "processinfo.h" #include "domain_nwfilter.h" +#include "locking/domain_lock.h" #define VIR_FROM_THIS VIR_FROM_QEMU @@ -344,6 +345,7 @@ qemuProcessHandleStop(qemuMonitorPtr mon ATTRIBUTE_UNUSED, virDomainObjLock(vm); if (virDomainObjGetState(vm, NULL) == VIR_DOMAIN_RUNNING) { + qemuDomainObjPrivatePtr priv = vm->privateData; VIR_DEBUG("Transitioned guest %s to paused state due to unknown event", vm->def->name); @@ -352,6 +354,11 @@ qemuProcessHandleStop(qemuMonitorPtr mon ATTRIBUTE_UNUSED, VIR_DOMAIN_EVENT_SUSPENDED, VIR_DOMAIN_EVENT_SUSPENDED_PAUSED); + VIR_FREE(priv->lockState); + if (virDomainLockProcessPause(driver->lockManager, vm, &priv->lockState) < 0) + VIR_WARN("Unable to release lease on %s", vm->def->name); + VIR_DEBUG("Preserving lock state '%s'", NULLSTR(priv->lockState)); + if (virDomainSaveStatus(driver->caps, driver->stateDir, vm) < 0) { VIR_WARN("Unable to save status on vm %s after state change", vm->def->name); @@ -413,6 +420,7 @@ qemuProcessHandleWatchdog(qemuMonitorPtr mon ATTRIBUTE_UNUSED, if (action == VIR_DOMAIN_EVENT_WATCHDOG_PAUSE && virDomainObjGetState(vm, NULL) == VIR_DOMAIN_RUNNING) { + qemuDomainObjPrivatePtr priv = vm->privateData; VIR_DEBUG("Transitioned guest %s to paused state due to watchdog", vm->def->name); virDomainObjSetState(vm, VIR_DOMAIN_PAUSED, VIR_DOMAIN_PAUSED_WATCHDOG); @@ -420,6 +428,11 @@ qemuProcessHandleWatchdog(qemuMonitorPtr mon ATTRIBUTE_UNUSED, VIR_DOMAIN_EVENT_SUSPENDED, VIR_DOMAIN_EVENT_SUSPENDED_WATCHDOG); + VIR_FREE(priv->lockState); + if (virDomainLockProcessPause(driver->lockManager, vm, &priv->lockState) < 0) + VIR_WARN("Unable to release lease on %s", vm->def->name); + VIR_DEBUG("Preserving lock state '%s'", NULLSTR(priv->lockState)); + if (virDomainSaveStatus(driver->caps, driver->stateDir, vm) < 0) { VIR_WARN("Unable to save status on vm %s after watchdog event", vm->def->name); @@ -492,6 +505,7 @@ qemuProcessHandleIOError(qemuMonitorPtr mon ATTRIBUTE_UNUSED, if (action == VIR_DOMAIN_EVENT_IO_ERROR_PAUSE && virDomainObjGetState(vm, NULL) == VIR_DOMAIN_RUNNING) { + qemuDomainObjPrivatePtr priv = vm->privateData; VIR_DEBUG("Transitioned guest %s to paused state due to IO error", vm->def->name); virDomainObjSetState(vm, VIR_DOMAIN_PAUSED, VIR_DOMAIN_PAUSED_IOERROR); @@ -499,6 +513,11 @@ qemuProcessHandleIOError(qemuMonitorPtr mon ATTRIBUTE_UNUSED, VIR_DOMAIN_EVENT_SUSPENDED, VIR_DOMAIN_EVENT_SUSPENDED_IOERROR); + VIR_FREE(priv->lockState); + if (virDomainLockProcessPause(driver->lockManager, vm, &priv->lockState) < 0) + VIR_WARN("Unable to release lease on %s", vm->def->name); + VIR_DEBUG("Preserving lock state '%s'", NULLSTR(priv->lockState)); + if (virDomainSaveStatus(driver->caps, driver->stateDir, vm) < 0) VIR_WARN("Unable to save status on vm %s after IO error", vm->def->name); } @@ -1777,6 +1796,17 @@ struct qemuProcessHookData { static int qemuProcessHook(void *data) { struct qemuProcessHookData *h = data; + int ret = -1; + + /* Some later calls want pid present */ + h->vm->pid = getpid(); + + VIR_DEBUG("Obtaining domain lock"); + if (virDomainLockProcessStart(h->driver->lockManager, + h->vm, + /* QEMU is always pased initially */ + true) < 0) + goto cleanup; if (qemuProcessLimits(h->driver) < 0) return -1; @@ -1784,18 +1814,25 @@ static int qemuProcessHook(void *data) /* This must take place before exec(), so that all QEMU * memory allocation is on the correct NUMA node */ + VIR_DEBUG("Moving procss to cgroup"); if (qemuAddToCgroup(h->driver, h->vm->def) < 0) - return -1; + goto cleanup; /* This must be done after cgroup placement to avoid resetting CPU * affinity */ + VIR_DEBUG("Setup CPU affinity"); if (qemuProcessInitCpuAffinity(h->vm) < 0) - return -1; + goto cleanup; + VIR_DEBUG("Setting up security labeling"); if (virSecurityManagerSetProcessLabel(h->driver->securityManager, h->vm) < 0) - return -1; + goto cleanup; - return 0; + ret = 0; + +cleanup: + VIR_DEBUG("Hook complete ret=%d", ret); + return ret; } @@ -1824,12 +1861,27 @@ qemuProcessStartCPUs(struct qemud_driver *driver, virDomainObjPtr vm, int ret; qemuDomainObjPrivatePtr priv = vm->privateData; + VIR_DEBUG("Using lock state '%s'", NULLSTR(priv->lockState)); + if (virDomainLockProcessResume(driver->lockManager, vm, priv->lockState) < 0) { + /* Don't free priv->lockState on error, because we need + * to make sure we have state still present if the user + * tries to resume again + */ + return -1; + } + VIR_FREE(priv->lockState); + qemuDomainObjEnterMonitorWithDriver(driver, vm); ret = qemuMonitorStartCPUs(priv->mon, conn); qemuDomainObjExitMonitorWithDriver(driver, vm); - if (ret == 0) + if (ret == 0) { virDomainObjSetState(vm, VIR_DOMAIN_RUNNING, reason); + } else { + if (virDomainLockProcessPause(driver->lockManager, vm, &priv->lockState) < 0) + VIR_WARN("Unable to release lease on %s", vm->def->name); + VIR_DEBUG("Preserving lock state '%s'", NULLSTR(priv->lockState)); + } return ret; } @@ -1843,6 +1895,7 @@ int qemuProcessStopCPUs(struct qemud_driver *driver, virDomainObjPtr vm, int oldReason; qemuDomainObjPrivatePtr priv = vm->privateData; + VIR_FREE(priv->lockState); oldState = virDomainObjGetState(vm, &oldReason); virDomainObjSetState(vm, VIR_DOMAIN_PAUSED, reason); @@ -1850,8 +1903,13 @@ int qemuProcessStopCPUs(struct qemud_driver *driver, virDomainObjPtr vm, ret = qemuMonitorStopCPUs(priv->mon); qemuDomainObjExitMonitorWithDriver(driver, vm); - if (ret < 0) + if (ret == 0) { + if (virDomainLockProcessPause(driver->lockManager, vm, &priv->lockState) < 0) + VIR_WARN("Unable to release lease on %s", vm->def->name); + VIR_DEBUG("Preserving lock state '%s'", NULLSTR(priv->lockState)); + } else { virDomainObjSetState(vm, oldState, oldReason); + } return ret; } @@ -2096,29 +2154,6 @@ int qemuProcessStart(virConnectPtr conn, } qemuAuditSecurityLabel(vm, true); - VIR_DEBUG("Generating setting domain security labels (if required)"); - if (virSecurityManagerSetAllLabel(driver->securityManager, - vm, stdin_path) < 0) - goto cleanup; - - if (stdin_fd != -1) { - /* if there's an fd to migrate from, and it's a pipe, put the - * proper security label on it - */ - struct stat stdin_sb; - - VIR_DEBUG("setting security label on pipe used for migration"); - - if (fstat(stdin_fd, &stdin_sb) < 0) { - virReportSystemError(errno, - _("cannot stat fd %d"), stdin_fd); - goto cleanup; - } - if (S_ISFIFO(stdin_sb.st_mode) && - virSecurityManagerSetFDLabel(driver->securityManager, vm, stdin_fd) < 0) - goto cleanup; - } - /* Ensure no historical cgroup for this VM is lying around bogus * settings */ VIR_DEBUG("Ensuring no historical cgroup is lying around"); @@ -2303,6 +2338,7 @@ int qemuProcessStart(virConnectPtr conn, virCommandNonblockingFDs(cmd); virCommandSetPidFile(cmd, pidfile); virCommandDaemonize(cmd); + virCommandRequireHandshake(cmd); ret = virCommandRun(cmd, NULL); VIR_FREE(pidfile); @@ -2333,6 +2369,42 @@ int qemuProcessStart(virConnectPtr conn, #endif } + VIR_DEBUG("Waiting for handshake from child"); + if (virCommandHandshakeWait(cmd) < 0) { + ret = -1; + goto cleanup; + } + + VIR_DEBUG("Setting domain security labels"); + if (virSecurityManagerSetAllLabel(driver->securityManager, + vm, stdin_path) < 0) + goto cleanup; + + if (stdin_fd != -1) { + /* if there's an fd to migrate from, and it's a pipe, put the + * proper security label on it + */ + struct stat stdin_sb; + + VIR_DEBUG("setting security label on pipe used for migration"); + + if (fstat(stdin_fd, &stdin_sb) < 0) { + virReportSystemError(errno, + _("cannot stat fd %d"), stdin_fd); + goto cleanup; + } + if (S_ISFIFO(stdin_sb.st_mode) && + virSecurityManagerSetFDLabel(driver->securityManager, vm, stdin_fd) < 0) + goto cleanup; + } + + VIR_DEBUG("Labelling done, completing handshake to child"); + if (virCommandHandshakeNotify(cmd) < 0) { + ret = -1; + goto cleanup; + } + VIR_DEBUG("Handshake complete, child running"); + if (migrateFrom) start_paused = true; diff --git a/src/qemu/test_libvirtd_qemu.aug b/src/qemu/test_libvirtd_qemu.aug index 917bd4f..b1f9114 100644 --- a/src/qemu/test_libvirtd_qemu.aug +++ b/src/qemu/test_libvirtd_qemu.aug @@ -113,6 +113,8 @@ allow_disk_format_probing = 1 vnc_auto_unix_socket = 1 max_processes = 12345 + +lock_manager = \"fcntl\" " test Libvirtd_qemu.lns get conf = @@ -236,3 +238,5 @@ max_processes = 12345 { "vnc_auto_unix_socket" = "1" } { "#empty" } { "max_processes" = "12345" } +{ "#empty" } +{ "lock_manager" = "fcntl" } -- 1.7.4.4

Some lock managers associate state with leases, allowing a process to temporarily release its leases, and re-acquire them later, safe in the knowledge that no other process has acquired + released the leases in between. This is already used between suspend/resume operations, and must also be used across migration. This passes the lockstate in the migration cookie. If the lock manager uses lockstate, then it becomes compulsory to use the migration v3 protocol to get the cookie support. * src/qemu/qemu_driver.c: Validate that migration v2 protocol is not used if lock manager needs state transfer * src/qemu/qemu_migration.c: Transfer lock state in migration cookie XML --- src/qemu/qemu_driver.c | 27 ++++++++- src/qemu/qemu_migration.c | 136 +++++++++++++++++++++++++++++++++++++++++--- 2 files changed, 151 insertions(+), 12 deletions(-) diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c index 112237a..fd74283 100644 --- a/src/qemu/qemu_driver.c +++ b/src/qemu/qemu_driver.c @@ -5848,6 +5848,8 @@ qemudDomainMigratePrepareTunnel(virConnectPtr dconn, VIR_MIGRATE_NON_SHARED_DISK | VIR_MIGRATE_NON_SHARED_INC, -1); + qemuDriverLock(driver); + if (!dom_xml) { qemuReportError(VIR_ERR_INTERNAL_ERROR, "%s", _("no domain XML passed")); @@ -5864,13 +5866,19 @@ qemudDomainMigratePrepareTunnel(virConnectPtr dconn, goto cleanup; } - qemuDriverLock(driver); + if (virLockManagerPluginUsesState(driver->lockManager)) { + qemuReportError(VIR_ERR_INTERNAL_ERROR, + _("Cannot use migrate v2 protocol with lock manager %s"), + virLockManagerPluginGetName(driver->lockManager)); + goto cleanup; + } + ret = qemuMigrationPrepareTunnel(driver, dconn, NULL, 0, NULL, NULL, /* No cookies in v2 */ st, dname, dom_xml); - qemuDriverUnlock(driver); cleanup: + qemuDriverUnlock(driver); return ret; } @@ -5904,6 +5912,14 @@ qemudDomainMigratePrepare2 (virConnectPtr dconn, *uri_out = NULL; qemuDriverLock(driver); + + if (virLockManagerPluginUsesState(driver->lockManager)) { + qemuReportError(VIR_ERR_INTERNAL_ERROR, + _("Cannot use migrate v2 protocol with lock manager %s"), + virLockManagerPluginGetName(driver->lockManager)); + goto cleanup; + } + if (flags & VIR_MIGRATE_TUNNELLED) { /* this is a logical error; we never should have gotten here with * VIR_MIGRATE_TUNNELLED set @@ -5959,6 +5975,13 @@ qemudDomainMigratePerform (virDomainPtr dom, VIR_MIGRATE_NON_SHARED_INC, -1); qemuDriverLock(driver); + if (virLockManagerPluginUsesState(driver->lockManager)) { + qemuReportError(VIR_ERR_INTERNAL_ERROR, + _("Cannot use migrate v2 protocol with lock manager %s"), + virLockManagerPluginGetName(driver->lockManager)); + goto cleanup; + } + vm = virDomainFindByUUID(&driver->domains, dom->uuid); if (!vm) { char uuidstr[VIR_UUID_STRING_BUFLEN]; diff --git a/src/qemu/qemu_migration.c b/src/qemu/qemu_migration.c index 8c447b4..291d7e5 100644 --- a/src/qemu/qemu_migration.c +++ b/src/qemu/qemu_migration.c @@ -41,6 +41,7 @@ #include "datatypes.h" #include "fdstream.h" #include "uuid.h" +#include "locking/domain_lock.h" #define VIR_FROM_THIS VIR_FROM_QEMU @@ -49,6 +50,7 @@ enum qemuMigrationCookieFlags { QEMU_MIGRATION_COOKIE_FLAG_GRAPHICS, + QEMU_MIGRATION_COOKIE_FLAG_LOCKSTATE, QEMU_MIGRATION_COOKIE_FLAG_LAST }; @@ -56,10 +58,11 @@ enum qemuMigrationCookieFlags { VIR_ENUM_DECL(qemuMigrationCookieFlag); VIR_ENUM_IMPL(qemuMigrationCookieFlag, QEMU_MIGRATION_COOKIE_FLAG_LAST, - "graphics"); + "graphics", "lockstate"); enum qemuMigrationCookieFeatures { QEMU_MIGRATION_COOKIE_GRAPHICS = (1 << QEMU_MIGRATION_COOKIE_FLAG_GRAPHICS), + QEMU_MIGRATION_COOKIE_LOCKSTATE = (1 << QEMU_MIGRATION_COOKIE_FLAG_LOCKSTATE), }; typedef struct _qemuMigrationCookieGraphics qemuMigrationCookieGraphics; @@ -88,6 +91,10 @@ struct _qemuMigrationCookie { unsigned char uuid[VIR_UUID_BUFLEN]; char *name; + /* If (flags & QEMU_MIGRATION_COOKIE_LOCKSTATE) */ + char *lockState; + char *lockDriver; + /* If (flags & QEMU_MIGRATION_COOKIE_GRAPHICS) */ qemuMigrationCookieGraphicsPtr graphics; }; @@ -113,6 +120,8 @@ static void qemuMigrationCookieFree(qemuMigrationCookiePtr mig) VIR_FREE(mig->localHostname); VIR_FREE(mig->remoteHostname); VIR_FREE(mig->name); + VIR_FREE(mig->lockState); + VIR_FREE(mig->lockDriver); VIR_FREE(mig); } @@ -278,6 +287,41 @@ qemuMigrationCookieAddGraphics(qemuMigrationCookiePtr mig, } +static int +qemuMigrationCookieAddLockstate(qemuMigrationCookiePtr mig, + struct qemud_driver *driver, + virDomainObjPtr dom) +{ + qemuDomainObjPrivatePtr priv = dom->privateData; + + if (mig->flags & QEMU_MIGRATION_COOKIE_LOCKSTATE) { + qemuReportError(VIR_ERR_INTERNAL_ERROR, "%s", + _("Migration lockstate data already present")); + return -1; + } + + if (virDomainObjGetState(dom, NULL) == VIR_DOMAIN_PAUSED) { + if (priv->lockState && + !(mig->lockState = strdup(priv->lockState))) + return -1; + } else { + if (virDomainLockProcessInquire(driver->lockManager, dom, &mig->lockState) < 0) + return -1; + } + + if (!(mig->lockDriver = strdup(virLockManagerPluginGetName(driver->lockManager)))) { + VIR_FREE(mig->lockState); + return -1; + } + + mig->flags |= QEMU_MIGRATION_COOKIE_LOCKSTATE; + mig->flagsMandatory |= QEMU_MIGRATION_COOKIE_LOCKSTATE; + + return 0; +} + + + static void qemuMigrationCookieGraphicsXMLFormat(virBufferPtr buf, qemuMigrationCookieGraphicsPtr grap) { @@ -322,6 +366,15 @@ static void qemuMigrationCookieXMLFormat(virBufferPtr buf, mig->graphics) qemuMigrationCookieGraphicsXMLFormat(buf, mig->graphics); + if ((mig->flags & QEMU_MIGRATION_COOKIE_LOCKSTATE) && + mig->lockState) { + virBufferAsprintf(buf, " <lockstate driver='%s'>\n", + mig->lockDriver); + virBufferAsprintf(buf, " <leases>%s</leases>\n", + mig->lockState); + virBufferAddLit(buf, " </lockstate>\n"); + } + virBufferAddLit(buf, "</qemu-migration>\n"); } @@ -504,6 +557,19 @@ qemuMigrationCookieXMLParse(qemuMigrationCookiePtr mig, (!(mig->graphics = qemuMigrationCookieGraphicsXMLParse(ctxt)))) goto error; + if ((flags & QEMU_MIGRATION_COOKIE_LOCKSTATE) && + virXPathBoolean("count(./lockstate) > 0", ctxt)) { + mig->lockDriver = virXPathString("string(./lockstate[1]/@driver)", ctxt); + if (!mig->lockDriver) { + qemuReportError(VIR_ERR_INTERNAL_ERROR, "%s", + _("Missing lock driver name in migration cookie")); + goto error; + } + mig->lockState = virXPathString("string(./lockstate[1]/leases[1])", ctxt); + if (mig->lockState && STREQ(mig->lockState, "")) + VIR_FREE(mig->lockState); + } + return 0; error: @@ -564,6 +630,10 @@ qemuMigrationBakeCookie(qemuMigrationCookiePtr mig, qemuMigrationCookieAddGraphics(mig, driver, dom) < 0) return -1; + if (flags & QEMU_MIGRATION_COOKIE_LOCKSTATE && + qemuMigrationCookieAddLockstate(mig, driver, dom) < 0) + return -1; + if (!(*cookieout = qemuMigrationCookieXMLFormatStr(mig))) return -1; @@ -576,7 +646,8 @@ qemuMigrationBakeCookie(qemuMigrationCookiePtr mig, static qemuMigrationCookiePtr -qemuMigrationEatCookie(virDomainObjPtr dom, +qemuMigrationEatCookie(struct qemud_driver *driver, + virDomainObjPtr dom, const char *cookiein, int cookieinlen, int flags) @@ -602,6 +673,24 @@ qemuMigrationEatCookie(virDomainObjPtr dom, flags) < 0) goto error; + if (mig->flags & QEMU_MIGRATION_COOKIE_LOCKSTATE) { + if (!mig->lockDriver) { + if (virLockManagerPluginUsesState(driver->lockManager)) { + qemuReportError(VIR_ERR_INTERNAL_ERROR, + _("Missing %s lock state for migration cookie"), + virLockManagerPluginGetName(driver->lockManager)); + goto error; + } + } else if (STRNEQ(mig->lockDriver, + virLockManagerPluginGetName(driver->lockManager))) { + qemuReportError(VIR_ERR_INTERNAL_ERROR, + _("Source host lock driver %s different from target %s"), + mig->lockDriver, + virLockManagerPluginGetName(driver->lockManager)); + goto error; + } + } + return mig; error: @@ -893,12 +982,12 @@ char *qemuMigrationBegin(struct qemud_driver *driver, if (!qemuMigrationIsAllowed(vm->def)) goto cleanup; - if (!(mig = qemuMigrationEatCookie(vm, NULL, 0, 0))) + if (!(mig = qemuMigrationEatCookie(driver, vm, NULL, 0, 0))) goto cleanup; if (qemuMigrationBakeCookie(mig, driver, vm, cookieout, cookieoutlen, - 0) < 0) + QEMU_MIGRATION_COOKIE_LOCKSTATE) < 0) goto cleanup; rv = qemuDomainFormatXML(driver, vm, @@ -976,7 +1065,8 @@ qemuMigrationPrepareTunnel(struct qemud_driver *driver, def = NULL; priv = vm->privateData; - if (!(mig = qemuMigrationEatCookie(vm, cookiein, cookieinlen, 0))) + if (!(mig = qemuMigrationEatCookie(driver, vm, cookiein, cookieinlen, + QEMU_MIGRATION_COOKIE_LOCKSTATE))) goto cleanup; if (qemuDomainObjBeginJobWithDriver(driver, vm) < 0) @@ -1211,7 +1301,8 @@ qemuMigrationPrepareDirect(struct qemud_driver *driver, def = NULL; priv = vm->privateData; - if (!(mig = qemuMigrationEatCookie(vm, cookiein, cookieinlen, 0))) + if (!(mig = qemuMigrationEatCookie(driver, vm, cookiein, cookieinlen, + QEMU_MIGRATION_COOKIE_LOCKSTATE))) goto cleanup; if (qemuDomainObjBeginJobWithDriver(driver, vm) < 0) @@ -1239,6 +1330,15 @@ qemuMigrationPrepareDirect(struct qemud_driver *driver, goto endjob; } + if (mig->lockState) { + VIR_DEBUG("Received lockstate %s", mig->lockState); + VIR_FREE(priv->lockState); + priv->lockState = mig->lockState; + mig->lockState = NULL; + } else { + VIR_DEBUG("Received no lockstate"); + } + if (qemuMigrationBakeCookie(mig, driver, vm, cookieout, cookieoutlen, QEMU_MIGRATION_COOKIE_GRAPHICS) < 0) { /* We could tear down the whole guest here, but @@ -1309,7 +1409,15 @@ static int doNativeMigrate(struct qemud_driver *driver, driver, vm, uri, NULLSTR(cookiein), cookieinlen, cookieout, cookieoutlen, flags, dname, resource); - if (!(mig = qemuMigrationEatCookie(vm, cookiein, cookieinlen, + if (virLockManagerPluginUsesState(driver->lockManager) && + !cookieout) { + qemuReportError(VIR_ERR_INTERNAL_ERROR, + _("Migration with lock driver %s requires cookie support"), + virLockManagerPluginGetName(driver->lockManager)); + return -1; + } + + if (!(mig = qemuMigrationEatCookie(driver, vm, cookiein, cookieinlen, QEMU_MIGRATION_COOKIE_GRAPHICS))) goto cleanup; @@ -1506,6 +1614,14 @@ static int doTunnelMigrate(struct qemud_driver *driver, driver, vm, st, NULLSTR(cookiein), cookieinlen, cookieout, cookieoutlen, flags, resource); + if (virLockManagerPluginUsesState(driver->lockManager) && + !cookieout) { + qemuReportError(VIR_ERR_INTERNAL_ERROR, + _("Migration with lock driver %s requires cookie support"), + virLockManagerPluginGetName(driver->lockManager)); + return -1; + } + if (!qemuCapsGet(priv->qemuCaps, QEMU_CAPS_MIGRATE_QEMU_UNIX) && !qemuCapsGet(priv->qemuCaps, QEMU_CAPS_MIGRATE_QEMU_EXEC)) { qemuReportError(VIR_ERR_OPERATION_FAILED, @@ -1565,7 +1681,7 @@ static int doTunnelMigrate(struct qemud_driver *driver, goto cleanup; } - if (!(mig = qemuMigrationEatCookie(vm, cookiein, cookieinlen, + if (!(mig = qemuMigrationEatCookie(driver, vm, cookiein, cookieinlen, QEMU_MIGRATION_COOKIE_GRAPHICS))) goto cleanup; @@ -2265,7 +2381,7 @@ qemuMigrationFinish(struct qemud_driver *driver, priv->jobActive = QEMU_JOB_NONE; memset(&priv->jobInfo, 0, sizeof(priv->jobInfo)); - if (!(mig = qemuMigrationEatCookie(vm, cookiein, cookieinlen, 0))) + if (!(mig = qemuMigrationEatCookie(driver, vm, cookiein, cookieinlen, 0))) goto cleanup; if (qemuDomainObjBeginJobWithDriver(driver, vm) < 0) @@ -2420,7 +2536,7 @@ int qemuMigrationConfirm(struct qemud_driver *driver, driver, conn, vm, NULLSTR(cookiein), cookieinlen, flags, retcode); - if (!(mig = qemuMigrationEatCookie(vm, cookiein, cookieinlen, 0))) + if (!(mig = qemuMigrationEatCookie(driver, vm, cookiein, cookieinlen, 0))) return -1; if (!virDomainObjIsActive(vm)) { -- 1.7.4.4

* src/conf/domain_conf.c, src/conf/domain_conf.h: APIs for inserting/finding/removing virDomainLeaseDefPtr instances * src/qemu/qemu_driver.c: Wire up hotplug/unplug for leases * src/qemu/qemu_hotplug.h, src/qemu/qemu_hotplug.c: Support for hotplug and unplug of leases --- bootstrap | 12 +------ src/conf/domain_conf.c | 78 ++++++++++++++++++++++++++++++++++++++++++++++ src/conf/domain_conf.h | 14 ++++++++- src/libvirt_private.syms | 6 +++ src/qemu/qemu_driver.c | 36 +++++++++++++++++++++ src/qemu/qemu_hotplug.c | 36 +++++++++++++++++++++ src/qemu/qemu_hotplug.h | 6 +++ 7 files changed, 177 insertions(+), 11 deletions(-) diff --git a/bootstrap b/bootstrap index 522ac70..d32db57 100755 --- a/bootstrap +++ b/bootstrap @@ -1,6 +1,6 @@ #! /bin/sh # Print a version string. -scriptversion=2011-05-16.16; # UTC +scriptversion=2011-05-11.17; # UTC # Bootstrap this package from checked-out sources. @@ -670,18 +670,10 @@ symlink_to_dir() cp -fp "$src" "$dst" } else - # Leave any existing symlink alone, if it already points to the source, - # so that broken build tools that care about symlink times - # aren't confused into doing unnecessary builds. Conversely, if the - # existing symlink's time stamp is older than the source, make it afresh, - # so that broken tools aren't confused into skipping needed builds. See - # <http://lists.gnu.org/archive/html/bug-gnulib/2011-05/msg00326.html>. test -h "$dst" && src_ls=`ls -diL "$src" 2>/dev/null` && set $src_ls && src_i=$1 && dst_ls=`ls -diL "$dst" 2>/dev/null` && set $dst_ls && dst_i=$1 && - test "$src_i" = "$dst_i" && - both_ls=`ls -dt "$src" "$dst"` && - test "X$both_ls" = "X$dst$nl$src" || { + test "$src_i" = "$dst_i" || { dot_dots= case $src in /*) ;; diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c index ce2355a..067d4a8 100644 --- a/src/conf/domain_conf.c +++ b/src/conf/domain_conf.c @@ -5357,6 +5357,84 @@ void virDomainControllerInsertPreAlloced(virDomainDefPtr def, } +int virDomainLeaseIndex(virDomainDefPtr def, + virDomainLeaseDefPtr lease) +{ + virDomainLeaseDefPtr vlease; + int i; + + for (i = 0; i < def->nleases; i++) { + vlease = def->leases[i]; + /* Either both must have lockspaces present which match.. */ + if (vlease->lockspace && lease->lockspace && + STRNEQ(vlease->lockspace, lease->lockspace)) + continue; + /* ...or neither must have a lockspace present */ + if (vlease->lockspace || lease->lockspace) + continue; + if (STREQ(vlease->key, lease->key)) + return i; + } + return -1; +} + + +int virDomainLeaseInsertPreAlloc(virDomainDefPtr def) +{ + if (VIR_EXPAND_N(def->leases, def->nleases, 1) < 0) { + virReportOOMError(); + return -1; + } + return 0; +} + +int virDomainLeaseInsert(virDomainDefPtr def, + virDomainLeaseDefPtr lease) +{ + if (virDomainLeaseInsertPreAlloc(def) < 0) + return -1; + + virDomainLeaseInsertPreAlloced(def, lease); + return 0; +} + + +void virDomainLeaseInsertPreAlloced(virDomainDefPtr def, + virDomainLeaseDefPtr lease) +{ + if (lease == NULL) + VIR_SHRINK_N(def->leases, def->nleases, 1); + else + def->leases[def->nleases-1] = lease; +} + + +void virDomainLeaseRemoveAt(virDomainDefPtr def, size_t i) +{ + if (def->nleases > 1) { + memmove(def->leases + i, + def->leases + i + 1, + sizeof(*def->leases) * + (def->nleases - (i + 1))); + VIR_SHRINK_N(def->leases, def->nleases, 1); + } else { + VIR_FREE(def->leases); + def->nleases = 0; + } +} + + +int virDomainLeaseRemove(virDomainDefPtr def, + virDomainLeaseDefPtr lease) +{ + int i = virDomainLeaseIndex(def, lease); + if (i < 0) + return -1; + virDomainLeaseRemoveAt(def, i); + return 0; +} + + static char *virDomainDefDefaultEmulator(virDomainDefPtr def, virCapsPtr caps) { const char *type; diff --git a/src/conf/domain_conf.h b/src/conf/domain_conf.h index 42606a1..93ebaf1 100644 --- a/src/conf/domain_conf.h +++ b/src/conf/domain_conf.h @@ -1192,7 +1192,7 @@ struct _virDomainDef { int nchannels; virDomainChrDefPtr *channels; - int nleases; + size_t nleases; virDomainLeaseDefPtr *leases; /* Only 1 */ @@ -1391,6 +1391,18 @@ int virDomainControllerInsert(virDomainDefPtr def, void virDomainControllerInsertPreAlloced(virDomainDefPtr def, virDomainControllerDefPtr controller); + +int virDomainLeaseIndex(virDomainDefPtr def, + virDomainLeaseDefPtr lease); +int virDomainLeaseInsert(virDomainDefPtr def, + virDomainLeaseDefPtr lease); +int virDomainLeaseInsertPreAlloc(virDomainDefPtr def); +void virDomainLeaseInsertPreAlloced(virDomainDefPtr def, + virDomainLeaseDefPtr lease); +void virDomainLeaseRemoveAt(virDomainDefPtr def, size_t i); +int virDomainLeaseRemove(virDomainDefPtr def, + virDomainLeaseDefPtr lease); + int virDomainSaveXML(const char *configDir, virDomainDefPtr def, const char *xml); diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms index 779fada..780b090 100644 --- a/src/libvirt_private.syms +++ b/src/libvirt_private.syms @@ -287,6 +287,12 @@ virDomainHostdevDefFree; virDomainHostdevModeTypeToString; virDomainHostdevSubsysTypeToString; virDomainInputDefFree; +virDomainLeaseIndex; +virDomainLeaseInsert; +virDomainLeaseInsertPreAlloc; +virDomainLeaseInsertPreAlloced; +virDomainLeaseRemove; +virDomainLeaseRemoveAt; virDomainLifecycleCrashTypeFromString; virDomainLifecycleCrashTypeToString; virDomainLifecycleTypeFromString; diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c index fd74283..49cbbb0 100644 --- a/src/qemu/qemu_driver.c +++ b/src/qemu/qemu_driver.c @@ -4074,6 +4074,13 @@ qemuDomainAttachDeviceLive(virDomainObjPtr vm, dev->data.controller = NULL; break; + case VIR_DOMAIN_DEVICE_LEASE: + ret = qemuDomainAttachLease(driver, vm, + dev->data.lease); + if (ret == 0) + dev->data.lease = NULL; + break; + case VIR_DOMAIN_DEVICE_NET: qemuDomainObjCheckNetTaint(driver, vm, dev->data.net, -1); ret = qemuDomainAttachNetDevice(dom->conn, driver, vm, @@ -4163,6 +4170,9 @@ qemuDomainDetachDeviceLive(virDomainObjPtr vm, case VIR_DOMAIN_DEVICE_CONTROLLER: ret = qemuDomainDetachDeviceControllerLive(driver, vm, dev); break; + case VIR_DOMAIN_DEVICE_LEASE: + ret = qemuDomainDetachLease(driver, vm, dev->data.lease); + break; case VIR_DOMAIN_DEVICE_NET: ret = qemuDomainDetachNetDevice(driver, vm, dev); break; @@ -4256,6 +4266,7 @@ qemuDomainAttachDeviceConfig(virDomainDefPtr vmdef, virDomainDeviceDefPtr dev) { virDomainDiskDefPtr disk; + virDomainLeaseDefPtr lease; switch (dev->type) { case VIR_DOMAIN_DEVICE_DISK: @@ -4278,6 +4289,21 @@ qemuDomainAttachDeviceConfig(virDomainDefPtr vmdef, return -1; break; + case VIR_DOMAIN_DEVICE_LEASE: + lease = dev->data.lease; + if (virDomainLeaseIndex(vmdef, lease) >= 0) { + qemuReportError(VIR_ERR_INVALID_ARG, + _("Lease %s in lockspace %s already exists"), + lease->key, NULLSTR(lease->lockspace)); + return -1; + } + if (virDomainLeaseInsert(vmdef, lease) < 0) + return -1; + + /* vmdef has the pointer. Generic codes for vmdef will do all jobs */ + dev->data.lease = NULL; + break; + default: qemuReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", _("persistent attach of device is not supported")); @@ -4292,6 +4318,7 @@ qemuDomainDetachDeviceConfig(virDomainDefPtr vmdef, virDomainDeviceDefPtr dev) { virDomainDiskDefPtr disk; + virDomainLeaseDefPtr lease; switch (dev->type) { case VIR_DOMAIN_DEVICE_DISK: @@ -4302,6 +4329,15 @@ qemuDomainDetachDeviceConfig(virDomainDefPtr vmdef, return -1; } break; + case VIR_DOMAIN_DEVICE_LEASE: + lease = dev->data.lease; + if (virDomainLeaseRemove(vmdef, lease) < 0) { + qemuReportError(VIR_ERR_INVALID_ARG, + _("Lease %s in lockspace %s does not exist"), + lease->key, NULLSTR(lease->lockspace)); + return -1; + } + break; default: qemuReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", _("persistent detach of device is not supported")); diff --git a/src/qemu/qemu_hotplug.c b/src/qemu/qemu_hotplug.c index a8e73c4..c9e2d08 100644 --- a/src/qemu/qemu_hotplug.c +++ b/src/qemu/qemu_hotplug.c @@ -1846,3 +1846,39 @@ cleanup: return ret; } + +int qemuDomainAttachLease(struct qemud_driver *driver, + virDomainObjPtr vm, + virDomainLeaseDefPtr lease) +{ + if (virDomainLeaseInsertPreAlloc(vm->def) < 0) + return -1; + + if (virDomainLockLeaseAttach(driver->lockManager, vm, lease) < 0) { + virDomainLeaseInsertPreAlloced(vm->def, NULL); + return -1; + } + + virDomainLeaseInsertPreAlloced(vm->def, lease); + return 0; +} + +int qemuDomainDetachLease(struct qemud_driver *driver, + virDomainObjPtr vm, + virDomainLeaseDefPtr lease) +{ + int i; + + if ((i = virDomainLeaseIndex(vm->def, lease)) < 0) { + qemuReportError(VIR_ERR_INVALID_ARG, + _("Lease %s in lockspace %s does not exist"), + lease->key, NULLSTR(lease->lockspace)); + return -1; + } + + if (virDomainLockLeaseDetach(driver->lockManager, vm, lease) < 0) + return -1; + + virDomainLeaseRemoveAt(vm->def, i); + return 0; +} diff --git a/src/qemu/qemu_hotplug.h b/src/qemu/qemu_hotplug.h index d18b393..009f1f6 100644 --- a/src/qemu/qemu_hotplug.h +++ b/src/qemu/qemu_hotplug.h @@ -85,6 +85,12 @@ int qemuDomainDetachHostUsbDevice(struct qemud_driver *driver, int qemuDomainDetachHostDevice(struct qemud_driver *driver, virDomainObjPtr vm, virDomainDeviceDefPtr dev); +int qemuDomainAttachLease(struct qemud_driver *driver, + virDomainObjPtr vm, + virDomainLeaseDefPtr lease); +int qemuDomainDetachLease(struct qemud_driver *driver, + virDomainObjPtr vm, + virDomainLeaseDefPtr lease); #endif /* __QEMU_HOTPLUG_H__ */ -- 1.7.4.4

Sanlock is a project that implements a disk-paxos locking algorithm. This is suitable for cluster deployments with shared storage. * src/Makefile.am: Add dlopen plugin for sanlock * src/locking/lock_driver_sanlock.c: Sanlock driver * configure.ac: Check for sanlock * libvirt.spec.in: Add a libvirt-lock-sanlock RPM --- configure.ac | 55 +++++ libvirt.spec.in | 34 +++ po/POTFILES.in | 1 + src/Makefile.am | 16 ++ src/libvirt_private.syms | 1 + src/locking/lock_driver_sanlock.c | 434 +++++++++++++++++++++++++++++++++++++ 6 files changed, 541 insertions(+), 0 deletions(-) create mode 100644 src/locking/lock_driver_sanlock.c diff --git a/configure.ac b/configure.ac index 7cffbf2..9ef98ad 100644 --- a/configure.ac +++ b/configure.ac @@ -929,6 +929,56 @@ AC_SUBST([YAJL_CFLAGS]) AC_SUBST([YAJL_LIBS]) +dnl SANLOCK https://fedorahosted.org/sanlock/ +AC_ARG_WITH([sanlock], + AC_HELP_STRING([--with-sanlock], [use SANLOCK for lock management @<:@default=check@:>@]), + [], + [with_sanlock=check]) + +SANLOCK_CFLAGS= +SANLOCK_LIBS= +if test "x$with_sanlock" != "xno"; then + if test "x$with_sanlock" != "xyes" && test "x$with_sanlock" != "xcheck"; then + SANLOCK_CFLAGS="-I$with_sanlock/include" + SANLOCK_LIBS="-L$with_sanlock/lib" + fi + fail=0 + old_cppflags="$CPPFLAGS" + old_libs="$LIBS" + CPPFLAGS="$CPPFLAGS $SANLOCK_CFLAGS" + LIBS="$LIBS $SANLOCK_LIBS" + AC_CHECK_HEADER([sanlock.h],[],[ + if test "x$with_sanlock" = "xcheck" ; then + with_sanlock=no + else + fail=1 + fi]) + if test "x$with_sanlock" != "xno" ; then + AC_CHECK_LIB([sanlock], [sanlock_acquire],[ + SANLOCK_LIBS="$SANLOCK_LIBS -lsanlock" + with_sanlock=yes + ],[ + if test "x$with_sanlock" = "xcheck" ; then + with_sanlock=no + else + fail=1 + fi + ]) + fi + test $fail = 1 && + AC_MSG_ERROR([You must install the SANLOCK development package in order to compile libvirt]) + CPPFLAGS="$old_cppflags" + LIBS="$old_libs" + if test "x$with_sanlock" = "xyes" ; then + AC_DEFINE_UNQUOTED([HAVE_SANLOCK], 1, + [whether SANLOCK is available for JSON parsing/formatting]) + fi +fi +AM_CONDITIONAL([HAVE_SANLOCK], [test "x$with_sanlock" = "xyes"]) +AC_SUBST([SANLOCK_CFLAGS]) +AC_SUBST([SANLOCK_LIBS]) + + dnl PolicyKit library POLKIT_CFLAGS= POLKIT_LIBS= @@ -2440,6 +2490,11 @@ AC_MSG_NOTICE([ yajl: $YAJL_CFLAGS $YAJL_LIBS]) else AC_MSG_NOTICE([ yajl: no]) fi +if test "$with_sanlock" != "no" ; then +AC_MSG_NOTICE([ sanlock: $SANLOCK_CFLAGS $SANLOCK_LIBS]) +else +AC_MSG_NOTICE([ sanlock: no]) +fi if test "$with_avahi" = "yes" ; then AC_MSG_NOTICE([ avahi: $AVAHI_CFLAGS $AVAHI_LIBS]) else diff --git a/libvirt.spec.in b/libvirt.spec.in index c01b759..fc9659d 100644 --- a/libvirt.spec.in +++ b/libvirt.spec.in @@ -77,6 +77,7 @@ %define with_dtrace 0%{!?_without_dtrace:0} %define with_cgconfig 0%{!?_without_cgconfig:0} %define with_referential 0%{!?_without_referential:1} +%define with_sanlock 0%{!?_without_sanlock:0} # Non-server/HV driver defaults which are always enabled %define with_python 0%{!?_without_python:1} @@ -163,6 +164,11 @@ %define with_yajl 0%{!?_without_yajl:%{server_drivers}} %endif +# Enable sanlock library for lock management with QEMU +%if 0%{?fedora} >= 15 || 0%{?rhel} >= 6 +%define with_sanlock 0%{!?_without_sanlock:%{server_drivers}} +%endif + # Enable libpcap library %if %{with_qemu} %define with_nwfilter 0%{!?_without_nwfilter:%{server_drivers}} @@ -333,6 +339,9 @@ BuildRequires: libpciaccess-devel >= 0.10.9 %if %{with_yajl} BuildRequires: yajl-devel %endif +%if %{with_sanlock} +BuildRequires: sanlock-devel +%endif %if %{with_libpcap} BuildRequires: libpcap-devel %endif @@ -487,6 +496,18 @@ Requires: xen-devel Includes and documentations for the C library providing an API to use the virtualization capabilities of recent versions of Linux (and other OSes). +%if %{with_sanlock} +%package lock-sanlock +Summary: Sanlock lock manager plugin for QEMU driver +Group: Development/Libraries +Requires: sanlock +Requires: %{name} = %{version}-%{release} + +%description lock-sanlock +Includes the Sanlock lock manager plugin for the QEMU +driver +%endif + %if %{with_python} %package python Summary: Python bindings for the libvirt library @@ -635,6 +656,10 @@ libvirt reference counting %define _without_yajl --without-yajl %endif +%if ! %{with_sanlock} +%define _without_sanlock --without-sanlock +%endif + %if ! %{with_libpcap} %define _without_libpcap --without-libpcap %endif @@ -692,6 +717,7 @@ libvirt reference counting %{?_without_hal} \ %{?_without_udev} \ %{?_without_yajl} \ + %{?_without_sanlock} \ %{?_without_libpcap} \ %{?_without_macvtap} \ %{?_without_audit} \ @@ -718,6 +744,8 @@ rm -f $RPM_BUILD_ROOT%{_libdir}/*.la rm -f $RPM_BUILD_ROOT%{_libdir}/*.a rm -f $RPM_BUILD_ROOT%{_libdir}/python*/site-packages/*.la rm -f $RPM_BUILD_ROOT%{_libdir}/python*/site-packages/*.a +rm -f $RPM_BUILD_ROOT%{_libdir}/libvirt/lock-driver/*.la +rm -f $RPM_BUILD_ROOT%{_libdir}/libvirt/lock-driver/*.a %if %{with_network} install -d -m 0755 $RPM_BUILD_ROOT%{_datadir}/lib/libvirt/dnsmasq/ @@ -1013,6 +1041,12 @@ fi %doc docs/*.xml %endif +%if %{with_sanlock} +%files lock-sanlock +%defattr(-, root, root) +%attr(0755, root, root) %{_libdir}/libvirt/lock-driver/sanlock.so +%endif + %files client -f %{name}.lang %defattr(-, root, root) %doc AUTHORS ChangeLog.gz NEWS README COPYING.LIB TODO diff --git a/po/POTFILES.in b/po/POTFILES.in index 9c3d287..c3b45f9 100644 --- a/po/POTFILES.in +++ b/po/POTFILES.in @@ -31,6 +31,7 @@ src/fdstream.c src/interface/netcf_driver.c src/internal.h src/libvirt.c +src/locking/lock_driver_sanlock.c src/locking/lock_manager.c src/lxc/lxc_container.c src/lxc/lxc_conf.c diff --git a/src/Makefile.am b/src/Makefile.am index 1e5a72e..15d6ee7 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -99,6 +99,9 @@ DRIVER_SOURCES = \ locking/lock_driver_nop.h locking/lock_driver_nop.c \ locking/domain_lock.h locking/domain_lock.c +LOCK_DRIVER_SANLOCK_SOURCES = \ + locking/lock_driver_sanlock.c + # XML configuration format handling sources # Domain driver generic impl APIs @@ -1159,6 +1162,19 @@ libvirt_qemu_la_CFLAGS = $(AM_CFLAGS) libvirt_qemu_la_LIBADD = libvirt.la $(CYGWIN_EXTRA_LIBADD) EXTRA_DIST += $(LIBVIRT_QEMU_SYMBOL_FILE) + +if HAVE_SANLOCK +lockdriverdir = $(libdir)/libvirt/lock-driver +lockdriver_LTLIBRARIES = sanlock.la + +sanlock_la_SOURCES = $(LOCK_DRIVER_SANLOCK_SOURCES) +sanlock_la_CFLAGS = $(AM_CLFAGS) +sanlock_la_LDFLAGS = -module -avoid-version +sanlock_la_LIBADD = -lsanlock +else +EXTRA_DIST += $(LOCK_DRIVER_SANLOCK_SOURCES) +endif + libexec_PROGRAMS = if WITH_LIBVIRTD diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms index 780b090..136f887 100644 --- a/src/libvirt_private.syms +++ b/src/libvirt_private.syms @@ -652,6 +652,7 @@ virVMOperationTypeToString; # memory.h virAlloc; virAllocN; +virAllocVar; virExpandN; virFree; virReallocN; diff --git a/src/locking/lock_driver_sanlock.c b/src/locking/lock_driver_sanlock.c new file mode 100644 index 0000000..7e0610d --- /dev/null +++ b/src/locking/lock_driver_sanlock.c @@ -0,0 +1,434 @@ +/* + * lock_driver_sanlock.c: A lock driver for Sanlock + * + * Copyright (C) 2010-2011 Red Hat, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +#include <config.h> + +#include <stdlib.h> +#include <stdint.h> +#include <unistd.h> +#include <string.h> +#include <stdio.h> +#include <errno.h> +#include <sys/types.h> + +#include <sanlock.h> +#include <sanlock_resource.h> + +#include "lock_driver.h" +#include "logging.h" +#include "virterror_internal.h" +#include "memory.h" +#include "util.h" +#include "files.h" + +#define VIR_FROM_THIS VIR_FROM_LOCKING + +#define virLockError(code, ...) \ + virReportErrorHelper(VIR_FROM_THIS, code, __FILE__, \ + __FUNCTION__, __LINE__, __VA_ARGS__) + +typedef struct _virLockManagerSanlockPrivate virLockManagerSanlockPrivate; +typedef virLockManagerSanlockPrivate *virLockManagerSanlockPrivatePtr; + +struct _virLockManagerSanlockPrivate { + char vm_name[SANLK_NAME_LEN]; + char vm_uuid[VIR_UUID_BUFLEN]; + unsigned int vm_id; + unsigned int vm_pid; + unsigned int flags; + bool hasRWDisks; + int res_count; + struct sanlk_resource *res_args[SANLK_MAX_RESOURCES]; +}; + +/* + * sanlock plugin for the libvirt virLockManager API + */ + +static int virLockManagerSanlockInit(unsigned int version ATTRIBUTE_UNUSED, + unsigned int flags) +{ + virCheckFlags(0, -1); + return 0; +} + +static int virLockManagerSanlockDeinit(void) +{ + virLockError(VIR_ERR_INTERNAL_ERROR, "%s", + _("Unloading sanlock plugin is forbidden")); + return -1; +} + +static int virLockManagerSanlockNew(virLockManagerPtr lock, + unsigned int type, + size_t nparams, + virLockManagerParamPtr params, + unsigned int flags) +{ + virLockManagerParamPtr param; + virLockManagerSanlockPrivatePtr priv; + int i; + + virCheckFlags(0, -1); + + if (type != VIR_LOCK_MANAGER_OBJECT_TYPE_DOMAIN) { + virLockError(VIR_ERR_INTERNAL_ERROR, + _("Unsupported object type %d"), type); + return -1; + } + + if (VIR_ALLOC(priv) < 0) { + virReportOOMError(); + return -1; + } + + priv->flags = flags; + + for (i = 0; i < nparams; i++) { + param = ¶ms[i]; + + if (STREQ(param->key, "uuid")) { + memcpy(priv->vm_uuid, param->value.uuid, 16); + } else if (STREQ(param->key, "name")) { + if (!virStrcpy(priv->vm_name, param->value.str, SANLK_NAME_LEN)) { + virLockError(VIR_ERR_INTERNAL_ERROR, + _("Domain name '%s' exceeded %d characters"), + param->value.str, SANLK_NAME_LEN); + goto error; + } + } else if (STREQ(param->key, "pid")) { + priv->vm_pid = param->value.ui; + } else if (STREQ(param->key, "id")) { + priv->vm_id = param->value.ui; + } + } + + lock->privateData = priv; + return 0; + +error: + VIR_FREE(priv); + return -1; +} + +static void virLockManagerSanlockFree(virLockManagerPtr lock) +{ + virLockManagerSanlockPrivatePtr priv = lock->privateData; + int i; + + if (!priv) + return; + + for (i = 0; i < priv->res_count; i++) + VIR_FREE(priv->res_args[i]); + VIR_FREE(priv); + lock->privateData = NULL; +} + +static int virLockManagerSanlockAddResource(virLockManagerPtr lock, + unsigned int type, + const char *name, + size_t nparams, + virLockManagerParamPtr params, + unsigned int flags) +{ + virLockManagerSanlockPrivatePtr priv = lock->privateData; + struct sanlk_resource *res; + int i; + + virCheckFlags(VIR_LOCK_MANAGER_RESOURCE_READONLY | + VIR_LOCK_MANAGER_RESOURCE_SHARED, -1); + + if (priv->res_count == SANLK_MAX_RESOURCES) { + virLockError(VIR_ERR_INTERNAL_ERROR, + _("Too many resources %d for object"), + SANLK_MAX_RESOURCES); + return -1; + } + + if (type == VIR_LOCK_MANAGER_RESOURCE_TYPE_DISK) { + if (!(flags & (VIR_LOCK_MANAGER_RESOURCE_SHARED | + VIR_LOCK_MANAGER_RESOURCE_READONLY))) + priv->hasRWDisks = true; + return 0; + } + + if (type != VIR_LOCK_MANAGER_RESOURCE_TYPE_LEASE) + return 0; + + if (flags & VIR_LOCK_MANAGER_RESOURCE_READONLY) { + virLockError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", + _("Readonly leases are not supported")); + return -1; + } + if (flags & VIR_LOCK_MANAGER_RESOURCE_SHARED) { + virLockError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", + _("Sharable leases are not supported")); + return -1; + } + + if (VIR_ALLOC_VAR(res, struct sanlk_disk, 1) < 0) { + virReportOOMError(); + return -1; + } + + res->num_disks = 1; + if (!virStrcpy(res->name, name, SANLK_NAME_LEN)) { + virLockError(VIR_ERR_INTERNAL_ERROR, + _("Resource name '%s' exceeds %d characters"), + name, SANLK_NAME_LEN); + goto error; + } + + for (i = 0; i < nparams; i++) { + if (STREQ(params[i].key, "path")) { + if (!virStrcpy(res->disks[0].path, params[i].value.str, SANLK_PATH_LEN)) { + virLockError(VIR_ERR_INTERNAL_ERROR, + _("Lease path '%s' exceeds %d characters"), + params[i].value.str, SANLK_PATH_LEN); + goto error; + } + } else if (STREQ(params[i].key, "offset")) { + res->disks[0].offset = params[i].value.ul; + } else if (STREQ(params[i].key, "lockspace")) { + if (!virStrcpy(res->lockspace_name, params[i].value.str, SANLK_NAME_LEN)) { + virLockError(VIR_ERR_INTERNAL_ERROR, + _("Resource lockspace '%s' exceeds %d characters"), + params[i].value.str, SANLK_NAME_LEN); + goto error; + } + } + } + + priv->res_args[priv->res_count] = res; + priv->res_count++; + return 0; + +error: + VIR_FREE(res); + return -1; +} + +static int virLockManagerSanlockAcquire(virLockManagerPtr lock, + const char *state, + unsigned int flags) +{ + virLockManagerSanlockPrivatePtr priv = lock->privateData; + struct sanlk_options *opt; + struct sanlk_resource **res_args; + int res_count; + bool res_free = false; + int sock = -1; + int rv; + int i; + + virCheckFlags(VIR_LOCK_MANAGER_ACQUIRE_REGISTER_ONLY, -1); + + if (priv->res_count == 0 && + priv->hasRWDisks) { + virLockError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", + _("Read/write, exclusive access, disks were present, but no leases specified")); + return -1; + } + + if (VIR_ALLOC(opt) < 0) { + virReportOOMError(); + return -1; + } + + if (!virStrcpy(opt->owner_name, priv->vm_name, SANLK_NAME_LEN)) { + virLockError(VIR_ERR_INTERNAL_ERROR, + _("Domain name '%s' exceeded %d characters"), + priv->vm_name, SANLK_NAME_LEN); + goto error; + } + + if (state && STRNEQ(state, "") && 0) { + if ((rv = sanlock_state_to_args((char *)state, + &res_count, + &res_args)) < 0) { + if (rv <= -200) + virLockError(VIR_ERR_INTERNAL_ERROR, + _("Unable to parse lock state %s: error %d"), + state, rv); + else + virReportSystemError(-rv, + _("Unable to parse lock state %s"), + state); + goto error; + } + res_free = true; + } else { + res_args = priv->res_args; + res_count = priv->res_count; + } + + VIR_DEBUG("Register sanlock %d", flags); + /* We only initialize 'sock' if we are in the real + * child process and we need it to be inherited + * + * If sock==-1, then sanlock auto-open/closes a + * temporary sock + */ + if (priv->vm_pid == getpid() && + (sock = sanlock_register()) < 0) { + if (sock <= -200) + virLockError(VIR_ERR_INTERNAL_ERROR, + _("Failed to open socket to sanlock daemon: error %d"), + sock); + else + virReportSystemError(-sock, "%s", + _("Failed to open socket to sanlock daemon")); + goto error; + } + + if (!(flags & VIR_LOCK_MANAGER_ACQUIRE_REGISTER_ONLY)) { + VIR_DEBUG("Acquiring object %u", priv->res_count); + if ((rv = sanlock_acquire(sock, priv->vm_pid, 0, + priv->res_count, priv->res_args, + opt)) < 0) { + if (rv <= -200) + virLockError(VIR_ERR_INTERNAL_ERROR, + _("Failed to acquire lock: error %d"), rv); + else + virReportSystemError(-rv, "%s", + _("Failed to acquire lock")); + goto error; + } + } + + VIR_FREE(opt); + + /* + * We are *intentionally* "leaking" sock file descriptor + * because we want it to be inherited by QEMU. When the + * sock FD finally closes upon QEMU exit (or crash) then + * sanlock will notice EOF and release the lock + */ + if (sock != -1 && + virSetInherit(sock, true) < 0) + goto error; + + VIR_DEBUG("Acquire completed fd=%d", sock); + + if (res_free) { + for (i = 0 ; i < res_count ; i++) { + VIR_FREE(res_args[i]); + } + VIR_FREE(res_args); + } + + return 0; + +error: + if (res_free) { + for (i = 0 ; i < res_count ; i++) { + VIR_FREE(res_args[i]); + } + VIR_FREE(res_args); + } + VIR_FREE(opt); + VIR_FORCE_CLOSE(sock); + return -1; +} + + +static int virLockManagerSanlockRelease(virLockManagerPtr lock, + char **state, + unsigned int flags) +{ + virLockManagerSanlockPrivatePtr priv = lock->privateData; + int res_count; + int rv; + + virCheckFlags(0, -1); + + if ((rv = sanlock_inquire(-1, priv->vm_pid, 0, &res_count, state)) < 0) { + if (rv <= -200) + virLockError(VIR_ERR_INTERNAL_ERROR, + _("Failed to inquire lock: error %d"), rv); + else + virReportSystemError(-rv, "%s", + _("Failed to inquire lock")); + return -1; + } + + if (STREQ(*state, "")) + VIR_FREE(*state); + + if ((rv = sanlock_release(-1, priv->vm_pid, SANLK_REL_ALL, 0, NULL)) < 0) { + if (rv <= -200) + virLockError(VIR_ERR_INTERNAL_ERROR, + _("Failed to release lock: error %d"), rv); + else + virReportSystemError(-rv, "%s", + _("Failed to release lock")); + return -1; + } + + return 0; +} + +static int virLockManagerSanlockInquire(virLockManagerPtr lock, + char **state, + unsigned int flags) +{ + virLockManagerSanlockPrivatePtr priv = lock->privateData; + int rv, res_count; + + virCheckFlags(0, -1); + + VIR_DEBUG("pid=%d", priv->vm_pid); + + if ((rv = sanlock_inquire(-1, priv->vm_pid, 0, &res_count, state)) < 0) { + if (rv <= -200) + virLockError(VIR_ERR_INTERNAL_ERROR, + _("Failed to inquire lock: error %d"), rv); + else + virReportSystemError(-rv, "%s", + _("Failed to inquire lock")); + return -1; + } + + if (STREQ(*state, "")) + VIR_FREE(*state); + + return 0; +} + +virLockDriver virLockDriverImpl = +{ + .version = VIR_LOCK_MANAGER_VERSION, + + .flags = VIR_LOCK_MANAGER_USES_STATE, + + .drvInit = virLockManagerSanlockInit, + .drvDeinit = virLockManagerSanlockDeinit, + + .drvNew = virLockManagerSanlockNew, + .drvFree = virLockManagerSanlockFree, + + .drvAddResource = virLockManagerSanlockAddResource, + + .drvAcquire = virLockManagerSanlockAcquire, + .drvRelease = virLockManagerSanlockRelease, + .drvInquire = virLockManagerSanlockInquire, +}; -- 1.7.4.4

On 05/24/2011 09:21 AM, Daniel P. Berrange wrote:
Sanlock is a project that implements a disk-paxos locking algorithm. This is suitable for cluster deployments with shared storage.
* src/Makefile.am: Add dlopen plugin for sanlock * src/locking/lock_driver_sanlock.c: Sanlock driver * configure.ac: Check for sanlock * libvirt.spec.in: Add a libvirt-lock-sanlock RPM --- configure.ac | 55 +++++ libvirt.spec.in | 34 +++
+++ b/libvirt.spec.in @@ -77,6 +77,7 @@ %define with_dtrace 0%{!?_without_dtrace:0} %define with_cgconfig 0%{!?_without_cgconfig:0} %define with_referential 0%{!?_without_referential:1} +%define with_sanlock 0%{!?_without_sanlock:0}
# Non-server/HV driver defaults which are always enabled %define with_python 0%{!?_without_python:1} @@ -163,6 +164,11 @@ %define with_yajl 0%{!?_without_yajl:%{server_drivers}} %endif
+# Enable sanlock library for lock management with QEMU +%if 0%{?fedora} >= 15 || 0%{?rhel} >= 6 +%define with_sanlock 0%{!?_without_sanlock:%{server_drivers}} +%endif
Okay, I found sanlock-devel on rawhide, but not on F14, so 'make rpm' is working for me on both machines. But I'm still not seeing sanlock-devel it on my RHEL 6.1 machine; am I not looking in the right repositories, or does the RHEL conditional need to be further tuned to match reality of when sanlock-devel will be ported to RHEL? -- Eric Blake eblake@redhat.com +1-801-349-2682 Libvirt virtualization library http://libvirt.org
participants (2)
-
Daniel P. Berrange
-
Eric Blake