[libvirt] [PATCH 0/8 v3] Integration of lock managers in QEMU

This is an update to http://www.redhat.com/archives/libvir-list/2011-January/msg00952.html Changes in this series - Lock manager plugin API is dramatically simpler - Lock manager only protects disk content, not disk metadata (eg file ownership/selinux label changes) - Migration state transfer integrated - Updated for latest sanlock API - Locks are released upon VM pause and reacquired upon resume - Updated documentation NB, as before, the lock manager plugin API is currently *internal* only, so out-of-tree 3rd party plugin impls will be not be supported. This restriction may be released in the future, once we have determined that the current plugin API is suitable for long term ABI guarentees. My intention is to merge this initial series with the nop and sanlock plugin impls, and then provide a fcntl based impl later.

Allow the parent process to perform a bi-directional handshake with the child process during fork/exec. The child process will fork and do its initial setup. Immediately prior to the exec(), it will stop & wait for a handshake from the parent process. The parent process will spawn the child and wait until the child reaches the handshake point. It will do whatever extra setup work is required, before signalling the child to continue. The implementation of this is done using two pairs of blocking pipes. The first pair is used to block the parent, until the child writes a single byte. Then the second pair pair is used to block the child, until the parent confirms with another single byte. * src/util/command.c, src/util/command.h, src/libvirt_private.syms: Add APIs to perform a handshake --- src/libvirt_private.syms | 3 + src/util/command.c | 161 +++++++++++++++++++++++++++++++++++++++++++++- src/util/command.h | 22 ++++++ 3 files changed, 185 insertions(+), 1 deletions(-) diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms index 2abed07..7d471e0 100644 --- a/src/libvirt_private.syms +++ b/src/libvirt_private.syms @@ -109,11 +109,14 @@ virCommandClearCaps; virCommandDaemonize; virCommandExec; virCommandFree; +virCommandHandshakeNotify; +virCommandHandshakeWait; virCommandNew; virCommandNewArgList; virCommandNewArgs; virCommandNonblockingFDs; virCommandPreserveFD; +virCommandRequireHandshake; virCommandRun; virCommandRunAsync; virCommandSetErrorBuffer; diff --git a/src/util/command.c b/src/util/command.c index 78586e8..94e278b 100644 --- a/src/util/command.c +++ b/src/util/command.c @@ -35,6 +35,8 @@ #include "files.h" #include "buf.h" +#include <stdlib.h> + #define VIR_FROM_THIS VIR_FROM_NONE #define virCommandError(code, ...) \ @@ -76,6 +78,10 @@ struct _virCommand { int *outfdptr; int *errfdptr; + bool handshake; + int handshakeWait[2]; + int handshakeNotify[2]; + virExecHook hook; void *opaque; @@ -107,6 +113,11 @@ virCommandNewArgs(const char *const*args) if (VIR_ALLOC(cmd) < 0) return NULL; + cmd->handshakeWait[0] = -1; + cmd->handshakeWait[1] = -1; + cmd->handshakeNotify[0] = -1; + cmd->handshakeNotify[1] = -1; + FD_ZERO(&cmd->preserve); FD_ZERO(&cmd->transfer); cmd->infd = cmd->outfd = cmd->errfd = -1; @@ -1160,12 +1171,61 @@ virCommandHook(void *data) virCommandPtr cmd = data; int res = 0; - if (cmd->hook) + if (cmd->hook) { + VIR_DEBUG("Run hook %p %p", cmd->hook, cmd->opaque); res = cmd->hook(cmd->opaque); + VIR_DEBUG("Done hook %d", res); + } if (res == 0 && cmd->pwd) { VIR_DEBUG("Running child in %s", cmd->pwd); res = chdir(cmd->pwd); + if (res < 0) { + virReportSystemError(errno, + _("Unable to change to %s"), cmd->pwd); + } + } + if (cmd->handshake) { + char c = res < 0 ? '0' : '1'; + int rv; + VIR_DEBUG("Notifying parent for handshake start on %d", cmd->handshakeWait[1]); + if (safewrite(cmd->handshakeWait[1], &c, sizeof(c)) != sizeof(c)) { + virReportSystemError(errno, "%s", _("Unable to notify parent process")); + return -1; + } + + /* On failure we pass the error message back to parent, + * so they don't have to dig through stderr logs + */ + if (res < 0) { + virErrorPtr err = virGetLastError(); + const char *msg = err ? err->message : + _("Unknown failure during hook execution"); + size_t len = strlen(msg) + 1; + if (safewrite(cmd->handshakeWait[1], msg, len) != len) { + virReportSystemError(errno, "%s", _("Unable to send error to parent process")); + return -1; + } + return -1; + } + + VIR_DEBUG("Waiting on parent for handshake complete on %d", cmd->handshakeNotify[0]); + if ((rv = saferead(cmd->handshakeNotify[0], &c, sizeof(c))) != sizeof(c)) { + if (rv < 0) + virReportSystemError(errno, "%s", _("Unable to wait on parent process")); + else + virReportSystemError(EIO, "%s", _("libvirtd quit during handshake")); + return -1; + } + if (c != '1') { + virReportSystemError(EINVAL, _("Unexpected confirm code '%c' from parent process"), c); + return -1; + } + VIR_FORCE_CLOSE(cmd->handshakeWait[1]); + VIR_FORCE_CLOSE(cmd->handshakeNotify[0]); } + + VIR_DEBUG("Hook is done %d", res); + return res; } @@ -1255,6 +1315,10 @@ virCommandRunAsync(virCommandPtr cmd, pid_t *pid) FD_CLR(i, &cmd->transfer); } } + if (cmd->handshake) { + VIR_FORCE_CLOSE(cmd->handshakeWait[1]); + VIR_FORCE_CLOSE(cmd->handshakeNotify[0]); + } if (ret == 0 && pid) *pid = cmd->pid; @@ -1395,6 +1459,94 @@ virCommandAbort(virCommandPtr cmd ATTRIBUTE_UNUSED) } #endif + +void virCommandRequireHandshake(virCommandPtr cmd) +{ + if (!cmd || cmd->has_error) + return; + + if (pipe(cmd->handshakeWait) < 0) { + cmd->has_error = errno; + return; + } + if (pipe(cmd->handshakeNotify) < 0) { + VIR_FORCE_CLOSE(cmd->handshakeWait[0]); + VIR_FORCE_CLOSE(cmd->handshakeWait[1]); + cmd->has_error = errno; + return; + } + + VIR_DEBUG("Transfer handshake wait=%d notify=%d", + cmd->handshakeWait[1], cmd->handshakeNotify[0]); + virCommandPreserveFD(cmd, cmd->handshakeWait[1]); + virCommandPreserveFD(cmd, cmd->handshakeNotify[0]); + cmd->handshake = true; +} + +int virCommandHandshakeWait(virCommandPtr cmd) +{ + char c; + int rv; + if (!cmd ||cmd->has_error == ENOMEM) { + virReportOOMError(); + return -1; + } + if (cmd->has_error) { + virCommandError(VIR_ERR_INTERNAL_ERROR, "%s", + _("invalid use of command API")); + return -1; + } + + VIR_DEBUG("Wait for handshake on %d", cmd->handshakeWait[0]); + if ((rv = saferead(cmd->handshakeWait[0], &c, sizeof(c))) != sizeof(c)) { + if (rv < 0) + virReportSystemError(errno, "%s", _("Unable to wait for child process")); + else + virReportSystemError(EIO, "%s", _("Child process quit during startup handshake")); + return -1; + } + if (c != '1') { + char *msg; + ssize_t len; + if (VIR_ALLOC_N(msg, 1024) < 0) { + virReportOOMError(); + return -1; + } + if ((len = saferead(cmd->handshakeWait[0], msg, 1024)) < 0) { + VIR_FREE(msg); + virReportSystemError(errno, "%s", _("No error message from child failure")); + return -1; + } + msg[len-1] = '\0'; + virCommandError(VIR_ERR_INTERNAL_ERROR, "%s", msg); + VIR_FREE(msg); + return -1; + } + return 0; +} + +int virCommandHandshakeNotify(virCommandPtr cmd) +{ + char c = '1'; + if (!cmd ||cmd->has_error == ENOMEM) { + virReportOOMError(); + return -1; + } + if (cmd->has_error) { + virCommandError(VIR_ERR_INTERNAL_ERROR, "%s", + _("invalid use of command API")); + return -1; + } + + VIR_DEBUG("Notify handshake on %d", cmd->handshakeWait[0]); + if (safewrite(cmd->handshakeNotify[1], &c, sizeof(c)) != sizeof(c)) { + virReportSystemError(errno, "%s", _("Unable to notify child process")); + return -1; + } + return 0; +} + + /* * Release all resources */ @@ -1426,6 +1578,13 @@ virCommandFree(virCommandPtr cmd) VIR_FREE(cmd->pwd); + if (cmd->handshake) { + VIR_FORCE_CLOSE(cmd->handshakeWait[0]); + VIR_FORCE_CLOSE(cmd->handshakeWait[1]); + VIR_FORCE_CLOSE(cmd->handshakeNotify[0]); + VIR_FORCE_CLOSE(cmd->handshakeNotify[1]); + } + VIR_FREE(cmd->pidfile); if (cmd->reap) diff --git a/src/util/command.h b/src/util/command.h index aa5136b..95b6a5e 100644 --- a/src/util/command.h +++ b/src/util/command.h @@ -292,6 +292,28 @@ int virCommandWait(virCommandPtr cmd, int *exitstatus) ATTRIBUTE_RETURN_CHECK; /* + * Request that the child perform a handshake with + * the parent when the hook function has completed + * execution. The child will not exec() until the + * parent has notified + */ +void virCommandRequireHandshake(virCommandPtr cmd); + +/* + * Wait for the child to complete execution of its + * hook function + */ +int virCommandHandshakeWait(virCommandPtr cmd) + ATTRIBUTE_RETURN_CHECK; + +/* + * Notify the child that it is OK to exec() the + * real binary now + */ +int virCommandHandshakeNotify(virCommandPtr cmd) + ATTRIBUTE_RETURN_CHECK; + +/* * Abort an async command if it is running, without issuing * any errors or affecting errno. Designed for error paths * where some but not all paths to the cleanup code might -- 1.7.4.4

On 05/11/2011 03:33 AM, Daniel P. Berrange wrote:
Allow the parent process to perform a bi-directional handshake with the child process during fork/exec. The child process will fork and do its initial setup. Immediately prior to the exec(), it will stop & wait for a handshake from the parent process. The parent process will spawn the child and wait until the child reaches the handshake point. It will do whatever extra setup work is required, before signalling the child to continue.
The implementation of this is done using two pairs of blocking pipes. The first pair is used to block the parent, until the child writes a single byte. Then the second pair pair is used to block the child, until the parent confirms with another single byte.
* src/util/command.c, src/util/command.h, src/libvirt_private.syms: Add APIs to perform a handshake --- src/libvirt_private.syms | 3 + src/util/command.c | 161 +++++++++++++++++++++++++++++++++++++++++++++- src/util/command.h | 22 ++++++ 3 files changed, 185 insertions(+), 1 deletions(-)
Hopefully there aren't too many rebase issues if Cole's virCommand cleanup series goes in first.
@@ -1255,6 +1315,10 @@ virCommandRunAsync(virCommandPtr cmd, pid_t *pid) FD_CLR(i, &cmd->transfer); } } + if (cmd->handshake) { + VIR_FORCE_CLOSE(cmd->handshakeWait[1]); + VIR_FORCE_CLOSE(cmd->handshakeNotify[0]); + }
You don't need this hunk if you use virCommandTransferFD below...
@@ -1395,6 +1459,94 @@ virCommandAbort(virCommandPtr cmd ATTRIBUTE_UNUSED) } #endif
+ +void virCommandRequireHandshake(virCommandPtr cmd) +{ + if (!cmd || cmd->has_error) + return;
Avoid clobbering existing fds and causing an fd leak, by adding: if (cmd->handshake) { cmd->has_error = -1; VIR_DEBUG("cannot require handshake twice"); return; }
+ + if (pipe(cmd->handshakeWait) < 0) { + cmd->has_error = errno; + return; + } + if (pipe(cmd->handshakeNotify) < 0) { + VIR_FORCE_CLOSE(cmd->handshakeWait[0]); + VIR_FORCE_CLOSE(cmd->handshakeWait[1]); + cmd->has_error = errno; + return; + } + + VIR_DEBUG("Transfer handshake wait=%d notify=%d", + cmd->handshakeWait[1], cmd->handshakeNotify[0]); + virCommandPreserveFD(cmd, cmd->handshakeWait[1]); + virCommandPreserveFD(cmd, cmd->handshakeNotify[0]);
...here's where to use virCommandTransferFD, for slightly less bookkeeping.
+int virCommandHandshakeWait(virCommandPtr cmd) +{ + char c; + int rv; + if (!cmd ||cmd->has_error == ENOMEM) { + virReportOOMError(); + return -1; + } + if (cmd->has_error) {
Change to this, to avoid calling saferead on an invalid fd if no one called virCommandRequireHandshake: if (cmd->has_error || !cmd->handshake) Also, on completion you either need to set cmd->handshake = false or call VIR_CLOSE the fd on completion; if you do the latter, then on entry you should check that the fd is not -1, so that we ensure no one calls this method twice for a single child process.
+ if (c != '1') { + char *msg; + ssize_t len; + if (VIR_ALLOC_N(msg, 1024) < 0) { + virReportOOMError();
Should we stack-allocate this, to minimize the chance of a malloc failure while reporting the child's failure?
+ +int virCommandHandshakeNotify(virCommandPtr cmd) +{ + char c = '1'; + if (!cmd ||cmd->has_error == ENOMEM) { + virReportOOMError(); + return -1; + } + if (cmd->has_error) {
if (cmd->has_error || !cmd->handshake) -- Eric Blake eblake@redhat.com +1-801-349-2682 Libvirt virtualization library http://libvirt.org

A lock manager may operate in various modes. The direct mode of operation is to obtain locks based on the resources associated with devices in the XML. The indirect mode is where the app creating the domain provides explicit leases for each resource that needs to be locked. This XML extension allows for listing resources in the XML <devices> ... <lease> <lockspace>somearea</lockspace> <key>thequickbrownfoxjumpsoverthelazydog</key> <target path='/some/lease/path' offset='23432'/> </lease> ... </devices> The 'lockspace' is a unique identifier for the lockspace which the lease is associated The 'key' is a unique identifier for the resource associated with the lease. The 'target' is the file on disk where the leases are held. * docs/schemas/domain.rng: Add lease schema * src/conf/domain_conf.c, src/conf/domain_conf.h: parsing and formatting for leases * tests/qemuxml2argvdata/qemuxml2argv-lease.args, tests/qemuxml2argvdata/qemuxml2argv-lease.xml, tests/qemuxml2xmltest.c: Test XML handling for leases --- docs/schemas/domain.rng | 22 ++++ src/conf/domain_conf.c | 134 ++++++++++++++++++++++++ src/conf/domain_conf.h | 14 +++ tests/qemuxml2argvdata/qemuxml2argv-lease.args | 1 + tests/qemuxml2argvdata/qemuxml2argv-lease.xml | 36 +++++++ tests/qemuxml2xmltest.c | 1 + 6 files changed, 208 insertions(+), 0 deletions(-) create mode 100644 tests/qemuxml2argvdata/qemuxml2argv-lease.args create mode 100644 tests/qemuxml2argvdata/qemuxml2argv-lease.xml diff --git a/docs/schemas/domain.rng b/docs/schemas/domain.rng index 7163c6e..926285c 100644 --- a/docs/schemas/domain.rng +++ b/docs/schemas/domain.rng @@ -586,6 +586,27 @@ <ref name="address"/> </optional> </define> + + <define name="lease"> + <element name="lease"> + <element name="lockspace"> + <text/> + </element> + <element name="key"> + <text/> + </element> + <element name="target"> + <attribute name="path"> + <text/> + </attribute> + <optional> + <attribute name="offset"> + <ref name="unsignedInt"/> + </attribute> + </optional> + </element> + </element> + </define> <!-- A disk description can be either of type file or block The name of the attribute on the source element depends on the type @@ -1940,6 +1961,7 @@ <choice> <ref name="disk"/> <ref name="controller"/> + <ref name="lease"/> <ref name="filesystem"/> <ref name="interface"/> <ref name="input"/> diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c index d3efec6..a650c49 100644 --- a/src/conf/domain_conf.c +++ b/src/conf/domain_conf.c @@ -102,6 +102,7 @@ VIR_ENUM_IMPL(virDomainLifecycleCrash, VIR_DOMAIN_LIFECYCLE_CRASH_LAST, VIR_ENUM_IMPL(virDomainDevice, VIR_DOMAIN_DEVICE_LAST, "disk", + "lease", "filesystem", "interface", "input", @@ -587,6 +588,18 @@ void virDomainInputDefFree(virDomainInputDefPtr def) VIR_FREE(def); } +static void virDomainLeaseDefFree(virDomainLeaseDefPtr def) +{ + if (!def) + return; + + VIR_FREE(def->lockspace); + VIR_FREE(def->key); + VIR_FREE(def->path); + + VIR_FREE(def); +} + void virDomainDiskDefFree(virDomainDiskDefPtr def) { unsigned int i; @@ -849,6 +862,9 @@ void virDomainDeviceDefFree(virDomainDeviceDefPtr def) case VIR_DOMAIN_DEVICE_DISK: virDomainDiskDefFree(def->data.disk); break; + case VIR_DOMAIN_DEVICE_LEASE: + virDomainLeaseDefFree(def->data.lease); + break; case VIR_DOMAIN_DEVICE_NET: virDomainNetDefFree(def->data.net); break; @@ -923,6 +939,10 @@ void virDomainDefFree(virDomainDefPtr def) if (!def) return; + for (i = 0 ; i < def->nleases ; i++) + virDomainLeaseDefFree(def->leases[i]); + VIR_FREE(def->leases); + for (i = 0 ; i < def->ngraphics ; i++) virDomainGraphicsDefFree(def->graphics[i]); VIR_FREE(def->graphics); @@ -1826,6 +1846,79 @@ virDomainDiskDefAssignAddress(virCapsPtr caps, virDomainDiskDefPtr def) return 0; } +/* Parse the XML definition for a lease + */ +static virDomainLeaseDefPtr +virDomainLeaseDefParseXML(xmlNodePtr node) +{ + virDomainLeaseDefPtr def; + xmlNodePtr cur; + char *lockspace = NULL; + char *key = NULL; + char *path = NULL; + char *offset = NULL; + + if (VIR_ALLOC(def) < 0) { + virReportOOMError(); + return NULL; + } + + cur = node->children; + while (cur != NULL) { + if (cur->type == XML_ELEMENT_NODE) { + if ((key == NULL) && + (xmlStrEqual(cur->name, BAD_CAST "key"))) { + key = (char *)xmlNodeGetContent(cur); + } else if ((lockspace == NULL) && + (xmlStrEqual(cur->name, BAD_CAST "lockspace"))) { + lockspace = (char *)xmlNodeGetContent(cur); + } else if ((path == NULL) && + (xmlStrEqual(cur->name, BAD_CAST "target"))) { + path = virXMLPropString(cur, "path"); + offset = virXMLPropString(cur, "offset"); + } + } + cur = cur->next; + } + + if (!key) { + virDomainReportError(VIR_ERR_XML_ERROR, "%s", + _("Missing 'key' element for lease")); + goto error; + } + if (!path) { + virDomainReportError(VIR_ERR_XML_ERROR, "%s", + _("Missing 'target' element for lease")); + goto error; + } + + if (offset && + virStrToLong_ull(offset, NULL, 10, &def->offset) < 0) { + virDomainReportError(VIR_ERR_XML_ERROR, + _("Malformed lease target offset %s"), offset); + goto error; + } + + def->key = key; + def->lockspace = lockspace; + def->path = path; + path = key = lockspace = NULL; + +cleanup: + VIR_FREE(lockspace); + VIR_FREE(key); + VIR_FREE(path); + VIR_FREE(offset); + + return def; + + error: + virDomainLeaseDefFree(def); + def = NULL; + goto cleanup; +} + + /* Parse the XML definition for a disk * @param node XML nodeset to parse for disk definition */ @@ -4908,6 +5001,10 @@ virDomainDeviceDefPtr virDomainDeviceDefParse(virCapsPtr caps, if (!(dev->data.disk = virDomainDiskDefParseXML(caps, node, NULL, flags))) goto error; + } else if (xmlStrEqual(node->name, BAD_CAST "lease")) { + dev->type = VIR_DOMAIN_DEVICE_LEASE; + if (!(dev->data.lease = virDomainLeaseDefParseXML(node))) + goto error; } else if (xmlStrEqual(node->name, BAD_CAST "filesystem")) { dev->type = VIR_DOMAIN_DEVICE_FS; if (!(dev->data.fs = virDomainFSDefParseXML(node, flags))) @@ -5778,6 +5875,23 @@ static virDomainDefPtr virDomainDefParseXML(virCapsPtr caps, } VIR_FREE(nodes); + /* analysis of the resource leases */ + if ((n = virXPathNodeSet("./devices/lease", ctxt, &nodes)) < 0) { + virDomainReportError(VIR_ERR_INTERNAL_ERROR, + "%s", _("cannot extract device leases")); + goto error; + } + if (n && VIR_ALLOC_N(def->leases, n) < 0) + goto no_memory; + for (i = 0 ; i < n ; i++) { + virDomainLeaseDefPtr lease = virDomainLeaseDefParseXML(nodes[i]); + if (!lease) + goto error; + + def->leases[def->nleases++] = lease; + } + VIR_FREE(nodes); + /* analysis of the filesystems */ if ((n = virXPathNodeSet("./devices/filesystem", ctxt, &nodes)) < 0) { virDomainReportError(VIR_ERR_INTERNAL_ERROR, @@ -6966,6 +7080,22 @@ virDomainLifecycleDefFormat(virBufferPtr buf, static int +virDomainLeaseDefFormat(virBufferPtr buf, + virDomainLeaseDefPtr def) +{ + virBufferAddLit(buf, " <lease>\n"); + virBufferEscapeString(buf, " <lockspace>%s</lockspace>\n", def->lockspace); + virBufferEscapeString(buf, " <key>%s</key>\n", def->key); + virBufferEscapeString(buf, " <target path='%s'", def->path); + if (def->offset) + virBufferAsprintf(buf, " offset='%llu'", def->offset); + virBufferAddLit(buf, "/>\n"); + virBufferAddLit(buf, " </lease>\n"); + + return 0; +} + +static int virDomainDiskDefFormat(virBufferPtr buf, virDomainDiskDefPtr def, int flags) @@ -8369,6 +8499,10 @@ char *virDomainDefFormat(virDomainDefPtr def, if (virDomainControllerDefFormat(&buf, def->controllers[n], flags) < 0) goto cleanup; + for (n = 0 ; n < def->nleases ; n++) + if (virDomainLeaseDefFormat(&buf, def->leases[n]) < 0) + goto cleanup; + for (n = 0 ; n < def->nfss ; n++) if (virDomainFSDefFormat(&buf, def->fss[n], flags) < 0) goto cleanup; diff --git a/src/conf/domain_conf.h b/src/conf/domain_conf.h index a0f820c..d93e681 100644 --- a/src/conf/domain_conf.h +++ b/src/conf/domain_conf.h @@ -123,6 +123,15 @@ struct _virDomainDeviceInfo { } addr; }; +typedef struct _virDomainLeaseDef virDomainLeaseDef; +typedef virDomainLeaseDef *virDomainLeaseDefPtr; +struct _virDomainLeaseDef { + char *lockspace; + char *key; + char *path; + unsigned long long offset; +}; + /* Two types of disk backends */ enum virDomainDiskType { @@ -815,6 +824,7 @@ enum virDomainSmbiosMode { /* Flags for the 'type' field in next struct */ enum virDomainDeviceType { VIR_DOMAIN_DEVICE_DISK, + VIR_DOMAIN_DEVICE_LEASE, VIR_DOMAIN_DEVICE_FS, VIR_DOMAIN_DEVICE_NET, VIR_DOMAIN_DEVICE_INPUT, @@ -835,6 +845,7 @@ struct _virDomainDeviceDef { union { virDomainDiskDefPtr disk; virDomainControllerDefPtr controller; + virDomainLeaseDefPtr lease; virDomainFSDefPtr fs; virDomainNetDefPtr net; virDomainInputDefPtr input; @@ -1170,6 +1181,9 @@ struct _virDomainDef { int nchannels; virDomainChrDefPtr *channels; + int nleases; + virDomainLeaseDefPtr *leases; + /* Only 1 */ virDomainChrDefPtr console; virSecurityLabelDef seclabel; diff --git a/tests/qemuxml2argvdata/qemuxml2argv-lease.args b/tests/qemuxml2argvdata/qemuxml2argv-lease.args new file mode 100644 index 0000000..4a347ad --- /dev/null +++ b/tests/qemuxml2argvdata/qemuxml2argv-lease.args @@ -0,0 +1 @@ +LC_ALL=C PATH=/bin HOME=/home/test USER=test LOGNAME=test /usr/bin/qemu -S -M pc -m 214 -smp 1 -nographic -monitor unix:/tmp/test-monitor,server,nowait -no-acpi -boot c -hda /dev/HostVG/QEMUGuest1 -cdrom /root/boot.iso -net none -serial none -parallel none -usb diff --git a/tests/qemuxml2argvdata/qemuxml2argv-lease.xml b/tests/qemuxml2argvdata/qemuxml2argv-lease.xml new file mode 100644 index 0000000..7efe1ef --- /dev/null +++ b/tests/qemuxml2argvdata/qemuxml2argv-lease.xml @@ -0,0 +1,36 @@ +<domain type='qemu'> + <name>QEMUGuest1</name> + <uuid>c7a5fdbd-edaf-9455-926a-d65c16db1809</uuid> + <memory>219200</memory> + <currentMemory>219200</currentMemory> + <vcpu>1</vcpu> + <os> + <type arch='i686' machine='pc'>hvm</type> + <boot dev='hd'/> + </os> + <clock offset='utc'/> + <on_poweroff>destroy</on_poweroff> + <on_reboot>restart</on_reboot> + <on_crash>destroy</on_crash> + <devices> + <emulator>/usr/bin/qemu</emulator> + <disk type='block' device='disk'> + <source dev='/dev/HostVG/QEMUGuest1'/> + <target dev='hda' bus='ide'/> + <address type='drive' controller='0' bus='0' unit='0'/> + </disk> + <disk type='file' device='cdrom'> + <source file='/root/boot.iso'/> + <target dev='hdc' bus='ide'/> + <readonly/> + <address type='drive' controller='0' bus='1' unit='0'/> + </disk> + <controller type='ide' index='0'/> + <lease> + <lockspace>somearea</lockspace> + <key>thequickbrownfoxjumpedoverthelazydog</key> + <target path='/some/lease/path' offset='1024'/> + </lease> + <memballoon model='virtio'/> + </devices> +</domain> diff --git a/tests/qemuxml2xmltest.c b/tests/qemuxml2xmltest.c index 5bfbcab..e74c337 100644 --- a/tests/qemuxml2xmltest.c +++ b/tests/qemuxml2xmltest.c @@ -179,6 +179,7 @@ mymain(void) DO_TEST("cputune"); DO_TEST("smp"); + DO_TEST("lease"); /* These tests generate different XML */ DO_TEST_DIFFERENT("balloon-device-auto"); -- 1.7.4.4

On 05/11/2011 03:33 AM, Daniel P. Berrange wrote:
A lock manager may operate in various modes. The direct mode of operation is to obtain locks based on the resources associated with devices in the XML. The indirect mode is where the app creating the domain provides explicit leases for each resource that needs to be locked. This XML extension allows for listing resources in the XML
<devices> ... <lease> <lockspace>somearea</lockspace> <key>thequickbrownfoxjumpsoverthelazydog</key> <target path='/some/lease/path' offset='23432'/> </lease> ... </devices>
The 'lockspace' is a unique identifier for the lockspace which the lease is associated
The 'key' is a unique identifier for the resource associated with the lease.
The 'target' is the file on disk where the leases are held.
Sounds reasonable on the XML front. However,
* docs/schemas/domain.rng: Add lease schema * src/conf/domain_conf.c, src/conf/domain_conf.h: parsing and formatting for leases * tests/qemuxml2argvdata/qemuxml2argv-lease.args, tests/qemuxml2argvdata/qemuxml2argv-lease.xml, tests/qemuxml2xmltest.c: Test XML handling for leases --- docs/schemas/domain.rng | 22 ++++
Patch is incomplete - you need something in docs/formatdomain.html.in before you can commit this.
+++ b/docs/schemas/domain.rng @@ -586,6 +586,27 @@ <ref name="address"/> </optional> </define> + + <define name="lease"> + <element name="lease">
<interleave>
+ <element name="lockspace"> + <text/> + </element> + <element name="key"> + <text/> + </element> + <element name="target"> + <attribute name="path"> + <text/> + </attribute> + <optional> + <attribute name="offset"> + <ref name="unsignedInt"/> + </attribute> + </optional> + </element>
</interleave>
+ </element> + </define>
+ + cur = node->children; + while (cur != NULL) { + if (cur->type == XML_ELEMENT_NODE) { + if ((key == NULL) && + (xmlStrEqual(cur->name, BAD_CAST "key"))) { + key = (char *)xmlNodeGetContent(cur); + } else if ((lockspace == NULL) && + (xmlStrEqual(cur->name, BAD_CAST "lockspace"))) { + lockspace = (char *)xmlNodeGetContent(cur); + } else if ((path == NULL) && + (xmlStrEqual(cur->name, BAD_CAST "target"))) { + path = virXMLPropString(cur, "path"); + offset = virXMLPropString(cur, "offset"); + }
Do we want to reject unknown elements? Here, you just silently ignore them.
+++ b/tests/qemuxml2argvdata/qemuxml2argv-lease.args @@ -0,0 +1 @@ +LC_ALL=C PATH=/bin HOME=/home/test USER=test LOGNAME=test /usr/bin/qemu -S -M pc -m 214 -smp 1 -nographic -monitor unix:/tmp/test-monitor,server,nowait -no-acpi -boot c -hda /dev/HostVG/QEMUGuest1 -cdrom /root/boot.iso -net none -serial none -parallel none -usb
Please break this file up, using backslash-newline, so that it generally fits in 80 columns. -- Eric Blake eblake@redhat.com +1-801-349-2682 Libvirt virtualization library http://libvirt.org

Define the basic framework lock manager plugins. The basic plugin API for 3rd parties to implemented is defined in src/locking/lock_driver.h This allows dlopen()able modules for alternative locking schemes, however, we do not install the header. This requires lock plugins to be in-tree allowing changing of the lock manager plugin API in future. The libvirt code for loading & calling into plugins is in src/locking/lock_manager.{c,h} * include/libvirt/virterror.h, src/util/virterror.c: Add VIR_FROM_LOCKING * src/locking/lock_driver.h: API for lock driver plugins to implement * src/locking/lock_manager.c, src/locking/lock_manager.h: Internal API for managing locking * src/Makefile.am: Add locking code --- include/libvirt/virterror.h | 1 + po/POTFILES.in | 1 + src/Makefile.am | 3 +- src/libvirt_private.syms | 12 ++ src/locking/README | 158 ++++++++++++++++++++ src/locking/lock_driver.h | 288 +++++++++++++++++++++++++++++++++++++ src/locking/lock_manager.c | 335 +++++++++++++++++++++++++++++++++++++++++++ src/locking/lock_manager.h | 61 ++++++++ src/util/virterror.c | 3 + 9 files changed, 861 insertions(+), 1 deletions(-) create mode 100644 src/locking/README create mode 100644 src/locking/lock_driver.h create mode 100644 src/locking/lock_manager.c create mode 100644 src/locking/lock_manager.h diff --git a/include/libvirt/virterror.h b/include/libvirt/virterror.h index 0708e02..efa4796 100644 --- a/include/libvirt/virterror.h +++ b/include/libvirt/virterror.h @@ -81,6 +81,7 @@ typedef enum { VIR_FROM_VMWARE = 39, /* Error from VMware driver */ VIR_FROM_EVENT = 40, /* Error from event loop impl */ VIR_FROM_LIBXL = 41, /* Error from libxenlight driver */ + VIR_FROM_LOCKING = 42, /* Error from lock manager */ } virErrorDomain; diff --git a/po/POTFILES.in b/po/POTFILES.in index 96180d5..52fee59 100644 --- a/po/POTFILES.in +++ b/po/POTFILES.in @@ -31,6 +31,7 @@ src/fdstream.c src/interface/netcf_driver.c src/internal.h src/libvirt.c +src/locking/lock_manager.c src/lxc/lxc_container.c src/lxc/lxc_conf.c src/lxc/lxc_controller.c diff --git a/src/Makefile.am b/src/Makefile.am index 75ece49..de83831 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -93,7 +93,8 @@ DRIVER_SOURCES = \ datatypes.c datatypes.h \ fdstream.c fdstream.h \ $(NODE_INFO_SOURCES) \ - libvirt.c libvirt_internal.h + libvirt.c libvirt_internal.h \ + locking/lock_manager.c locking/lock_manager.h # XML configuration format handling sources diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms index 7d471e0..9cbcc78 100644 --- a/src/libvirt_private.syms +++ b/src/libvirt_private.syms @@ -577,6 +577,18 @@ virRegisterSecretDriver; virRegisterStorageDriver; +# locking.h +virLockManagerAcquire; +virLockManagerAddResource; +virLockManagerFree; +virLockManagerInquire; +virLockManagerNew; +virLockManagerPluginNew; +virLockManagerPluginRef; +virLockManagerPluginUnref; +virLockManagerRelease; + + # logging.h virLogDefineFilter; virLogDefineOutput; diff --git a/src/locking/README b/src/locking/README new file mode 100644 index 0000000..4fa4f89 --- /dev/null +++ b/src/locking/README @@ -0,0 +1,158 @@ + +At libvirtd startup: + + plugin = virLockManagerPluginLoad("sync-manager"); + + +At libvirtd shtudown: + + virLockManagerPluginUnload(plugin) + + +At guest startup: + + manager = virLockManagerNew(plugin, + VIR_LOCK_MANAGER_OBJECT_DOMAIN, + 0); + + virLockManagerSetParameter(manager, "id", id); + virLockManagerSetParameter(manager, "uuid", uuid); + virLockManagerSetParameter(manager, "name", name); + + foreach disk + virLockManagerRegisterResource(manager, + VIR_LOCK_MANAGER_RESOURCE_TYPE_DISK, + disk.path, + ..flags...); + + if (!virLockManagerAcquireObject(manager)) + abort.. + + run QEMU + + +At guest shutdown: + + ...send QEMU 'quit' monitor command, and/or kill(qemupid)... + + if (!virLockManagerShutdown(manager)) + kill(supervisorpid); /* XXX or leave it running ??? */ + + virLockManagerFree(manager); + + + +At libvirtd restart with running guests: + + foreach still running guest + manager = virLockManagerNew(driver, + VIR_LOCK_MANAGER_START_DOMAIN, + VIR_LOCK_MANAGER_NEW_ATTACH); + virLockManagerSetParameter(manager, "id", id); + virLockManagerSetParameter(manager, "uuid", uuid); + virLockManagerSetParameter(manager, "name", name); + + if (!virLockManagerGetChild(manager, &qemupid)) + kill(supervisorpid); /* XXX or leave it running ??? */ + + + +With disk hotplug: + + if (virLockManagerAcquireResource(manager, + VIR_LOCK_MANAGER_RESOURCE_TYPE_DISK, + disk.path + ..flags..)) + ...abort hotplug attempt ... + + ...hotplug the device... + + + +With disk unhotplug: + + ...hotunplug the device... + + if (virLockManagerReleaseResource(manager, + VIR_LOCK_MANAGER_RESOURCE_TYPE_DISK, + disk.path + ..flags..)) + ...log warning ... + + + +During migration: + + 1. On source host + + if (!virLockManagerPrepareMigrate(manager, hosturi)) + ..don't start migration.. + + 2. On dest host + + manager = virLockManagerNew(driver, + VIR_LOCK_MANAGER_START_DOMAIN, + VIR_LOCK_MANAGER_NEW_MIGRATE); + virLockManagerSetParameter(manager, "id", id); + virLockManagerSetParameter(manager, "uuid", uuid); + virLockManagerSetParameter(manager, "name", name); + + foreach disk + virLockManagerRegisterResource(manager, + VIR_LOCK_MANAGER_RESOURCE_TYPE_DISK, + disk.path, + ..flags...); + + char **supervisorargv; + int supervisorargc; + + supervisor = virLockManagerGetSupervisorPath(manager); + virLockManagerGetSupervisorArgs(&argv, &argc); + + cmd = qemuBuildCommandLine(supervisor, supervisorargv, supervisorargv); + + supervisorpid = virCommandExec(cmd); + + if (!virLockManagerGetChild(manager, &qemupid)) + kill(supervisorpid); /* XXX or leave it running ??? */ + + 3. Initiate migration in QEMU on source and wait for completion + + 4a. On failure + + 4a1 On target + + virLockManagerCompleteMigrateIn(manager, + VIR_LOCK_MANAGER_MIGRATE_CANCEL); + virLockManagerShutdown(manager); + virLockManagerFree(manager); + + 4a2 On source + + virLockManagerCompleteMigrateIn(manager, + VIR_LOCK_MANAGER_MIGRATE_CANCEL); + + 4b. On succcess + + + 4b1 On target + + virLockManagerCompleteMigrateIn(manager, 0); + + 42 On source + + virLockManagerCompleteMigrateIn(manager, 0); + virLockManagerShutdown(manager); + virLockManagerFree(manager); + + +Notes: + + - If a lock manager impl does just VM level leases, it can + ignore all the resource paths at startup. + + - If a lock manager impl does not support migrate + it can return an error from all migrate calls + + - If a lock manger impl does not support hotplug + it can return an error from all resource acquire/release calls diff --git a/src/locking/lock_driver.h b/src/locking/lock_driver.h new file mode 100644 index 0000000..4df7262 --- /dev/null +++ b/src/locking/lock_driver.h @@ -0,0 +1,288 @@ +/* + * lock_driver.h: Defines the lock driver plugin API + * + * Copyright (C) 2010-2011 Red Hat, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +#ifndef __VIR_PLUGINS_LOCK_DRIVER_H__ +# define __VIR_PLUGINS_LOCK_DRIVER_H__ + +# include "internal.h" + +typedef struct _virLockManager virLockManager; +typedef virLockManager *virLockManagerPtr; + +typedef struct _virLockDriver virLockDriver; +typedef virLockDriver *virLockDriverPtr; + +typedef struct _virLockManagerParam virLockManagerParam; +typedef virLockManagerParam *virLockManagerParamPtr; + +enum { + /* The managed object is a virtual guest domain */ + VIR_LOCK_MANAGER_OBJECT_TYPE_DOMAIN = 0, +} virLockManagerObjectType; + +enum { + /* The resource to be locked is a virtual disk */ + VIR_LOCK_MANAGER_RESOURCE_TYPE_DISK = 0, + /* A lease against an arbitrary resource */ + VIR_LOCK_MANAGER_RESOURCE_TYPE_LEASE = 1, +} virLockManagerResourceType; + +typedef enum { + /* The resource is assigned in readonly mode */ + VIR_LOCK_MANAGER_RESOURCE_READONLY = (1 << 0), + /* The resource is assigned in shared, writable mode */ + VIR_LOCK_MANAGER_RESOURCE_SHARED = (1 << 1), +} virLockManagerResourceFlags; + +typedef enum { + /* Don't acquire the resources, just register the object PID */ + VIR_LOCK_MANAGER_ACQUIRE_REGISTER_ONLY = (1 << 0) +} virLockManagerAcquireFlags; + +enum { + VIR_LOCK_MANAGER_PARAM_TYPE_STRING, + VIR_LOCK_MANAGER_PARAM_TYPE_INT, + VIR_LOCK_MANAGER_PARAM_TYPE_LONG, + VIR_LOCK_MANAGER_PARAM_TYPE_UINT, + VIR_LOCK_MANAGER_PARAM_TYPE_ULONG, + VIR_LOCK_MANAGER_PARAM_TYPE_DOUBLE, + VIR_LOCK_MANAGER_PARAM_TYPE_UUID, +}; + +struct _virLockManagerParam { + int type; + const char *key; + union { + int i; + long long l; + unsigned int ui; + unsigned long long ul; + double d; + char *str; + unsigned char uuid[16]; + } value; +}; + + +/* + * Changes in major version denote incompatible ABI changes + * Changes in minor version denote new compatible API entry points + * Changes in micro version denote new compatible flags + */ +# define VIR_LOCK_MANAGER_VERSION_MAJOR 1 +# define VIR_LOCK_MANAGER_VERSION_MINOR 0 +# define VIR_LOCK_MANAGER_VERSION_MICRO 0 + +# define VIR_LOCK_MANAGER_VERSION \ + ((VIR_LOCK_MANAGER_VERSION_MAJOR * 1000 * 1000) + \ + (VIR_LOCK_MANAGER_VERSION_MINOR * 1000) + \ + (VIR_LOCK_MANAGER_VERSION_MICRO)) + + + +/** + * virLockDriverInit: + * @version: the libvirt requested plugin ABI version + * @flags: the libvirt requested plugin optional extras + * + * Allow the plugin to validate the libvirt requested + * plugin version / flags. This allows the plugin impl + * to block its use in versions of libvirtd which are + * too old to support key features. + * + * NB: A plugin may be loaded multiple times, for different + * libvirt drivers (eg QEMU, LXC, UML) + * + * Returns -1 if the requested version/flags were inadequate + */ +typedef int (*virLockDriverInit)(unsigned int version, + unsigned int flags); + +/** + * virLockDriverDeinit: + * + * Called to release any resources prior to the plugin + * being unloaded from memory. Returns -1 to prevent + * plugin from being unloaded from memory. + */ +typedef int (*virLockDriverDeinit)(void); + +/** + * virLockManagerNew: + * @man: the lock manager context + * @type: the type of process to be supervised + * @nparams: number of metadata parameters + * @params: extra metadata parameters + * @flags: optional flags, currently unused + * + * Initialize a new context to supervise a process, usually + * a virtual machine. The lock driver implementation can use + * the <code>privateData</code> field of <code>man</code> + * to store a pointer to any driver specific state. + * + * A process of VIR_LOCK_MANAGER_START_DOMAIN will be + * given the following parameters + * + * - id: the domain unique id (unsigned int) + * - uuid: the domain uuid (uuid) + * - name: the domain name (string) + * - pid: process ID to own/owning the lock (unsigned int) + * + * Returns 0 if successful initialized a new context, -1 on error + */ +typedef int (*virLockDriverNew)(virLockManagerPtr man, + unsigned int type, + size_t nparams, + virLockManagerParamPtr params, + unsigned int flags); + +/** + * virLockDriverFree: + * @manager: the lock manager context + * + * Release any resources associated with the lock manager + * context private data + */ +typedef void (*virLockDriverFree)(virLockManagerPtr man); + +/** + * virLockDriverAddResource: + * @manager: the lock manager context + * @type: the resource type virLockManagerResourceType + * @name: the resource name + * @nparams: number of metadata parameters + * @params: extra metadata parameters + * @flags: the resource access flags + * + * Assign a resource to a managed object. This will + * only be called prior to the object is being locked + * when it is inactive. eg, to set the initial boot + * time disk assignments on a VM + * The format of @name varies according to + * the resource @type. A VIR_LOCK_MANAGER_RESOURCE_TYPE_DISK + * will have the fully qualified file path, while a resource + * of type VIR_LOCK_MANAGER_RESOURCE_TYPE_LEASE will have the + * unique name of the lease + * + * A resource of type VIR_LOCK_MANAGER_RESOURCE_TYPE_LEASE + * will receive at least the following extra parameters + * + * - 'path': a fully qualified path to the lockspace + * - 'lockspace': globally string identifying the lockspace name + * - 'offset': byte offset within the lease (unsigned long long) + * + * If no flags are given, the resource is assumed to be + * used in exclusive, read-write mode. Access can be + * relaxed to readonly, or shared read-write. + * + * Returns 0 on success, or -1 on failure + */ +typedef int (*virLockDriverAddResource)(virLockManagerPtr man, + unsigned int type, + const char *name, + size_t nparams, + virLockManagerParamPtr params, + unsigned int flags); + +/** + * virLockDriverAcquire: + * @manager: the lock manager context + * @state: the current lock state + * @flags: optional flags, currently unused + * + * Start managing resources for the object. This + * must be called from the PID that represents the + * object to be managed. If the lock is lost at any + * time, the PID will be killed off by the lock manager. + * The optional state contains information about the + * locks previously held for the object. + * + * Returns 0 on success, or -1 on failure + */ +typedef int (*virLockDriverAcquire)(virLockManagerPtr man, + const char *state, + unsigned int flags); + +/** + * virLockDriverRelease: + * @manager: the lock manager context + * @state: pointer to be filled with lock state + * @flags: optional flags + * + * Inform the lock manager that the supervised process has + * been, or can be stopped. + * + * Returns 0 on success, or -1 on failure + */ +typedef int (*virLockDriverRelease)(virLockManagerPtr man, + char **state, + unsigned int flags); + +/** + * virLockDriverInquire: + * @manager: the lock manager context + * @state: pointer to be filled with lock state + * @flags: optional flags, currently unused + * + * Retrieve the current lock state. The returned + * lock state may be NULL if none is required. The + * caller is responsible for freeing the lock + * state string when it is no longer required + * + * Returns 0 on success, or -1 on failure. + */ +typedef int (*virLockDriverInquire)(virLockManagerPtr man, + char **state, + unsigned int flags); + + +struct _virLockManager { + virLockDriverPtr driver; + void *privateData; +}; + +/** + * The plugin must export a static instance of this + * driver table, with the name 'virLockDriverImpl' + */ +struct _virLockDriver { + /** + * @version: the newest implemented plugin ABI version + * @flags: optional flags, currently unused + */ + unsigned int version; + unsigned int flags; + + virLockDriverInit drvInit; + virLockDriverDeinit drvDeinit; + + virLockDriverNew drvNew; + virLockDriverFree drvFree; + + virLockDriverAddResource drvAddResource; + + virLockDriverAcquire drvAcquire; + virLockDriverRelease drvRelease; + virLockDriverInquire drvInquire; +}; + + +#endif /* __VIR_PLUGINS_LOCK_DRIVER_H__ */ diff --git a/src/locking/lock_manager.c b/src/locking/lock_manager.c new file mode 100644 index 0000000..17fd008 --- /dev/null +++ b/src/locking/lock_manager.c @@ -0,0 +1,335 @@ +/* + * lock_manager.c: Implements the internal lock manager API + * + * Copyright (C) 2010-2011 Red Hat, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +#include <config.h> + +#include "lock_manager.h" +#include "virterror_internal.h" +#include "logging.h" +#include "util.h" +#include "memory.h" +#include "uuid.h" + +#include <dlfcn.h> +#include <stdlib.h> +#include <unistd.h> + +#include "configmake.h" + +#define VIR_FROM_THIS VIR_FROM_LOCKING + +#define virLockError(code, ...) \ + virReportErrorHelper(VIR_FROM_THIS, code, __FILE__, \ + __FUNCTION__, __LINE__, __VA_ARGS__) + +#define CHECK_PLUGIN(field, errret) \ + if (!plugin->driver->field) { \ + virLockError(VIR_ERR_INTERNAL_ERROR, \ + _("Missing '%s' field in lock manager driver"), \ + #field); \ + return errret; \ + } + +#define CHECK_MANAGER(field, errret) \ + if (!lock->driver->field) { \ + virLockError(VIR_ERR_INTERNAL_ERROR, \ + _("Missing '%s' field in lock manager driver"), \ + #field); \ + return errret; \ + } + +struct _virLockManagerPlugin { + virLockDriverPtr driver; + void *handle; + int refs; +}; + +#define DEFAULT_LOCK_MANAGER_PLUGIN_DIR LIBDIR "/libvirt/lock-driver" + +static void virLockManagerLogParams(size_t nparams, + virLockManagerParamPtr params) +{ + int i; + char uuidstr[VIR_UUID_STRING_BUFLEN]; + for (i = 0 ; i < nparams ; i++) { + switch (params[i].type) { + case VIR_LOCK_MANAGER_PARAM_TYPE_INT: + VIR_DEBUG(" key=%s type=int value=%d", params[i].key, params[i].value.i); + break; + case VIR_LOCK_MANAGER_PARAM_TYPE_UINT: + VIR_DEBUG(" key=%s type=uint value=%u", params[i].key, params[i].value.ui); + break; + case VIR_LOCK_MANAGER_PARAM_TYPE_LONG: + VIR_DEBUG(" key=%s type=long value=%lld", params[i].key, params[i].value.l); + break; + case VIR_LOCK_MANAGER_PARAM_TYPE_ULONG: + VIR_DEBUG(" key=%s type=ulong value=%llu", params[i].key, params[i].value.ul); + break; + case VIR_LOCK_MANAGER_PARAM_TYPE_DOUBLE: + VIR_DEBUG(" key=%s type=double value=%lf", params[i].key, params[i].value.d); + break; + case VIR_LOCK_MANAGER_PARAM_TYPE_STRING: + VIR_DEBUG(" key=%s type=string value=%s", params[i].key, params[i].value.str); + break; + case VIR_LOCK_MANAGER_PARAM_TYPE_UUID: + virUUIDFormat(params[i].value.uuid, uuidstr); + VIR_DEBUG(" key=%s type=uuid value=%s", params[i].key, uuidstr); + break; + } + } +} + + +/** + * virLockManagerPluginNew: + * @name: the name of the plugin + * @flag: optional plugin flags + * + * Attempt to load the plugin $(libdir)/libvirt/lock-driver/@name.so + * The plugin driver entry point will be resolved & invoked to obtain + * the lock manager driver. + * + * Even if the loading of the plugin succeeded, this may still + * return NULL if the plugin impl decided that we (libvirtd) + * are too old to support a feature it requires + * + * Returns a plugin object, or NULL if loading failed. + */ +virLockManagerPluginPtr virLockManagerPluginNew(const char *name, + unsigned int flags) +{ + void *handle = NULL; + virLockDriverPtr driver; + virLockManagerPluginPtr plugin; + const char *moddir = getenv("LIBVIRT_LOCK_MANAGER_PLUGIN_DIR"); + char *modfile = NULL; + + if (moddir == NULL) + moddir = DEFAULT_LOCK_MANAGER_PLUGIN_DIR; + + VIR_DEBUG("Module load %s from %s", name, moddir); + + if (virAsprintf(&modfile, "%s/%s.so", moddir, name) < 0) { + virReportOOMError(); + return NULL; + } + + if (access(modfile, R_OK) < 0) { + virReportSystemError(errno, + _("Plugin %s not accessible"), + modfile); + goto cleanup; + } + + handle = dlopen(modfile, RTLD_NOW | RTLD_LOCAL); + if (!handle) { + virLockError(VIR_ERR_SYSTEM_ERROR, + _("Failed to load plugin %s: %s"), + modfile, dlerror()); + goto cleanup; + } + + if (!(driver = dlsym(handle, "virLockDriverImpl"))) { + virLockError(VIR_ERR_INTERNAL_ERROR, "%s", + _("Missing plugin initialization symbol 'virLockDriverImpl'")); + goto cleanup; + } + + if (driver->drvInit(VIR_LOCK_MANAGER_VERSION, flags) < 0) { + virLockError(VIR_ERR_INTERNAL_ERROR, "%s", + _("plugin ABI is not compatible")); + goto cleanup; + } + + if (VIR_ALLOC(plugin) < 0) { + virReportOOMError(); + goto cleanup; + } + + plugin->driver = driver; + plugin->handle = handle; + plugin->refs = 1; + + VIR_FREE(modfile); + return plugin; + +cleanup: + VIR_FREE(modfile); + if (handle) + dlclose(handle); + return NULL; +} + + +/** + * virLockManagerPluginRef: + * @plugin: the plugin implementation to ref + * + * Acquires an additional reference on the plugin. + */ +void virLockManagerPluginRef(virLockManagerPluginPtr plugin) +{ + plugin->refs++; +} + + +/** + * virLockManagerPluginUnref: + * @plugin: the plugin implementation to unref + * + * Releases a reference on the plugin. When the last reference + * is released, it will attempt to unload the plugin from memory. + * The plugin may refuse to allow unloading if this would + * result in an unsafe scenario. + * + */ +void virLockManagerPluginUnref(virLockManagerPluginPtr plugin) +{ + if (!plugin) + return; + + plugin->refs--; + + if (plugin->refs > 0) + return; + + if (plugin->driver->drvDeinit() >= 0) { + if (plugin->handle) + dlclose(plugin->handle); + } else { + VIR_WARN0("Unable to unload lock maanger plugin from memory"); + return; + } + + VIR_FREE(plugin); +} + + +/** + * virLockManagerNew: + * @plugin: the plugin implementation to use + * @type: the type of process to be supervised + * @flags: optional flags, currently unused + * + * Create a new context to supervise a process, usually + * a virtual machine. + * + * Returns a new lock manager context + */ +virLockManagerPtr virLockManagerNew(virLockManagerPluginPtr plugin, + unsigned int type, + size_t nparams, + virLockManagerParamPtr params, + unsigned int flags) +{ + virLockManagerPtr lock; + VIR_DEBUG("plugin=%p type=%u nparams=%zu params=%p flags=%u", + plugin, type, nparams, params, flags); + virLockManagerLogParams(nparams, params); + + CHECK_PLUGIN(drvNew, NULL); + + if (VIR_ALLOC(lock) < 0) { + virReportOOMError(); + return NULL; + } + + lock->driver = plugin->driver; + + if (plugin->driver->drvNew(lock, type, nparams, params, flags) < 0) { + VIR_FREE(lock); + return NULL; + } + + return lock; +} + + +int virLockManagerAddResource(virLockManagerPtr lock, + unsigned int type, + const char *name, + size_t nparams, + virLockManagerParamPtr params, + unsigned int flags) +{ + VIR_DEBUG("lock=%p type=%u name=%s nparams=%zu params=%p flags=%u", + lock, type, name, nparams, params, flags); + virLockManagerLogParams(nparams, params); + + CHECK_MANAGER(drvAddResource, -1); + + return lock->driver->drvAddResource(lock, + type, name, + nparams, params, + flags); +} + +int virLockManagerAcquire(virLockManagerPtr lock, + const char *state, + unsigned int flags) +{ + VIR_DEBUG("lock=%p state='%s' flags=%u", lock, NULLSTR(state), flags); + + CHECK_MANAGER(drvAcquire, -1); + + return lock->driver->drvAcquire(lock, state, flags); +} + + +int virLockManagerRelease(virLockManagerPtr lock, + char **state, + unsigned int flags) +{ + VIR_DEBUG("lock=%p state=%p flags=%u", lock, state, flags); + + CHECK_MANAGER(drvRelease, -1); + + return lock->driver->drvRelease(lock, state, flags); +} + + +int virLockManagerInquire(virLockManagerPtr lock, + char **state, + unsigned int flags) +{ + VIR_DEBUG("lock=%p state=%p flags=%u", lock, state, flags); + + CHECK_MANAGER(drvInquire, -1); + + return lock->driver->drvInquire(lock, state, flags); +} + + +int virLockManagerFree(virLockManagerPtr lock) +{ + VIR_DEBUG("lock=%p", lock); + + if (!lock) + return 0; + + CHECK_MANAGER(drvFree, -1); + + lock->driver->drvFree(lock); + + VIR_FREE(lock); + + return 0; +} diff --git a/src/locking/lock_manager.h b/src/locking/lock_manager.h new file mode 100644 index 0000000..0a108c0 --- /dev/null +++ b/src/locking/lock_manager.h @@ -0,0 +1,61 @@ +/* + * lock_manager.h: Defines the internal lock manager API + * + * Copyright (C) 2010-2011 Red Hat, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +#ifndef __VIR_LOCK_MANAGER_H__ +# define __VIR_LOCK_MANAGER_H__ + +# include "internal.h" +# include "lock_driver.h" + +typedef struct _virLockManagerPlugin virLockManagerPlugin; +typedef virLockManagerPlugin *virLockManagerPluginPtr; + +virLockManagerPluginPtr virLockManagerPluginNew(const char *name, + unsigned int flags); +void virLockManagerPluginRef(virLockManagerPluginPtr plugin); +void virLockManagerPluginUnref(virLockManagerPluginPtr plugin); + +virLockManagerPtr virLockManagerNew(virLockManagerPluginPtr plugin, + unsigned int type, + size_t nparams, + virLockManagerParamPtr params, + unsigned int flags); + +int virLockManagerAddResource(virLockManagerPtr manager, + unsigned int type, + const char *name, + size_t nparams, + virLockManagerParamPtr params, + unsigned int flags); + +int virLockManagerAcquire(virLockManagerPtr manager, + const char *state, + unsigned int flags); +int virLockManagerRelease(virLockManagerPtr manager, + char **state, + unsigned int flags); +int virLockManagerInquire(virLockManagerPtr manager, + char **state, + unsigned int flags); + +int virLockManagerFree(virLockManagerPtr manager); + +#endif /* __VIR_LOCK_MANAGER_H__ */ diff --git a/src/util/virterror.c b/src/util/virterror.c index fbb4a45..4b1f92f 100644 --- a/src/util/virterror.c +++ b/src/util/virterror.c @@ -206,6 +206,9 @@ static const char *virErrorDomainName(virErrorDomain domain) { case VIR_FROM_EVENT: dom = "Events "; break; + case VIR_FROM_LOCKING: + dom = "Locking "; + break; } return(dom); } -- 1.7.4.4

To allow hypervisor drivers to assume that a lock driver impl will be guaranteed to exist, provide a 'nop' impl that is compiled into the library * src/Makefile.am: Add nop driver * src/locking/lock_driver_nop.c, src/locking/lock_driver_nop.h: Nop lock driver implementation * src/locking/lock_manager.c: Enable direct access of 'nop' driver, instead of dlopen()ing it. --- src/Makefile.am | 4 +- src/locking/lock_driver_nop.c | 115 +++++++++++++++++++++++++++++++++++++++++ src/locking/lock_driver_nop.h | 30 +++++++++++ src/locking/lock_manager.c | 53 ++++++++++--------- 4 files changed, 177 insertions(+), 25 deletions(-) create mode 100644 src/locking/lock_driver_nop.c create mode 100644 src/locking/lock_driver_nop.h diff --git a/src/Makefile.am b/src/Makefile.am index de83831..71320a4 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -94,7 +94,9 @@ DRIVER_SOURCES = \ fdstream.c fdstream.h \ $(NODE_INFO_SOURCES) \ libvirt.c libvirt_internal.h \ - locking/lock_manager.c locking/lock_manager.h + locking/lock_manager.c locking/lock_manager.h \ + locking/lock_driver.h \ + locking/lock_driver_nop.h locking/lock_driver_nop.c # XML configuration format handling sources diff --git a/src/locking/lock_driver_nop.c b/src/locking/lock_driver_nop.c new file mode 100644 index 0000000..bada9a7 --- /dev/null +++ b/src/locking/lock_driver_nop.c @@ -0,0 +1,115 @@ +/* + * lock_driver_nop.c: A lock driver which locks nothing + * + * Copyright (C) 2010-2011 Red Hat, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +#include <config.h> + +#include "lock_driver_nop.h" +#include "memory.h" +#include "logging.h" +#include "uuid.h" + + +static int virLockManagerNopInit(unsigned int version, + unsigned int flags) +{ + VIR_DEBUG("version=%u flags=%u", version, flags); + + return 0; +} + +static int virLockManagerNopDeinit(void) +{ + VIR_DEBUG0(""); + + return 0; +} + + +static int virLockManagerNopNew(virLockManagerPtr lock ATTRIBUTE_UNUSED, + unsigned int type ATTRIBUTE_UNUSED, + size_t nparams ATTRIBUTE_UNUSED, + virLockManagerParamPtr params ATTRIBUTE_UNUSED, + unsigned int flags ATTRIBUTE_UNUSED) +{ + return 0; +} + +static int virLockManagerNopAddResource(virLockManagerPtr lock ATTRIBUTE_UNUSED, + unsigned int type ATTRIBUTE_UNUSED, + const char *name ATTRIBUTE_UNUSED, + size_t nparams ATTRIBUTE_UNUSED, + virLockManagerParamPtr params ATTRIBUTE_UNUSED, + unsigned int flags ATTRIBUTE_UNUSED) +{ + + return 0; +} + + +static int virLockManagerNopAcquire(virLockManagerPtr lock ATTRIBUTE_UNUSED, + const char *state ATTRIBUTE_UNUSED, + unsigned int flags ATTRIBUTE_UNUSED) +{ + + return 0; +} + +static int virLockManagerNopRelease(virLockManagerPtr lock ATTRIBUTE_UNUSED, + char **state, + unsigned int flags ATTRIBUTE_UNUSED) +{ + *state = NULL; + + return 0; +} + +static int virLockManagerNopInquire(virLockManagerPtr lock ATTRIBUTE_UNUSED, + char **state, + unsigned int flags ATTRIBUTE_UNUSED) +{ + + *state = NULL; + + return 0; +} + +static void virLockManagerNopFree(virLockManagerPtr lock ATTRIBUTE_UNUSED) +{ +} + +virLockDriver virLockDriverNop = +{ + .version = VIR_LOCK_MANAGER_VERSION, + .flags = 0, + + .drvInit = virLockManagerNopInit, + .drvDeinit = virLockManagerNopDeinit, + + .drvNew = virLockManagerNopNew, + .drvFree = virLockManagerNopFree, + + .drvAddResource = virLockManagerNopAddResource, + + .drvAcquire = virLockManagerNopAcquire, + .drvRelease = virLockManagerNopRelease, + + .drvInquire = virLockManagerNopInquire, +}; diff --git a/src/locking/lock_driver_nop.h b/src/locking/lock_driver_nop.h new file mode 100644 index 0000000..4be5377 --- /dev/null +++ b/src/locking/lock_driver_nop.h @@ -0,0 +1,30 @@ +/* + * lock_driver_nop.h: A lock driver which locks nothing + * + * Copyright (C) 2010-2011 Red Hat, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +#ifndef __VIR_LOCK_DRIVER_NOP_H__ +# define __VIR_LOCK_DRIVER_NOP_H__ + +# include "lock_driver.h" + +extern virLockDriver virLockDriverNop; + + +#endif /* __VIR_LOCK_DRIVER_NOP_H__ */ diff --git a/src/locking/lock_manager.c b/src/locking/lock_manager.c index 17fd008..d6b8ca8 100644 --- a/src/locking/lock_manager.c +++ b/src/locking/lock_manager.c @@ -22,6 +22,7 @@ #include <config.h> #include "lock_manager.h" +#include "lock_driver_nop.h" #include "virterror_internal.h" #include "logging.h" #include "util.h" @@ -122,35 +123,39 @@ virLockManagerPluginPtr virLockManagerPluginNew(const char *name, const char *moddir = getenv("LIBVIRT_LOCK_MANAGER_PLUGIN_DIR"); char *modfile = NULL; - if (moddir == NULL) - moddir = DEFAULT_LOCK_MANAGER_PLUGIN_DIR; + if (STREQ(name, "nop")) { + driver = &virLockDriverNop; + } else { + if (moddir == NULL) + moddir = DEFAULT_LOCK_MANAGER_PLUGIN_DIR; - VIR_DEBUG("Module load %s from %s", name, moddir); + VIR_DEBUG("Module load %s from %s", name, moddir); - if (virAsprintf(&modfile, "%s/%s.so", moddir, name) < 0) { - virReportOOMError(); - return NULL; - } + if (virAsprintf(&modfile, "%s/%s.so", moddir, name) < 0) { + virReportOOMError(); + return NULL; + } - if (access(modfile, R_OK) < 0) { - virReportSystemError(errno, - _("Plugin %s not accessible"), - modfile); - goto cleanup; - } + if (access(modfile, R_OK) < 0) { + virReportSystemError(errno, + _("Plugin %s not accessible"), + modfile); + goto cleanup; + } - handle = dlopen(modfile, RTLD_NOW | RTLD_LOCAL); - if (!handle) { - virLockError(VIR_ERR_SYSTEM_ERROR, - _("Failed to load plugin %s: %s"), - modfile, dlerror()); - goto cleanup; - } + handle = dlopen(modfile, RTLD_NOW | RTLD_LOCAL); + if (!handle) { + virLockError(VIR_ERR_SYSTEM_ERROR, + _("Failed to load plugin %s: %s"), + modfile, dlerror()); + goto cleanup; + } - if (!(driver = dlsym(handle, "virLockDriverImpl"))) { - virLockError(VIR_ERR_INTERNAL_ERROR, "%s", - _("Missing plugin initialization symbol 'virLockDriverImpl'")); - goto cleanup; + if (!(driver = dlsym(handle, "virLockDriverImpl"))) { + virLockError(VIR_ERR_INTERNAL_ERROR, "%s", + _("Missing plugin initialization symbol 'virLockDriverImpl'")); + goto cleanup; + } } if (driver->drvInit(VIR_LOCK_MANAGER_VERSION, flags) < 0) { -- 1.7.4.4

To facilitate use of the locking plugins from hypervisor drivers, introduce a higher level API for locking virDomainObjPtr instances. In includes APIs targetted to VM startup, and hotplug/unplug * src/Makefile.am: Add domain lock API * src/locking/domain_lock.c, src/locking/domain_lock.h: High level API for domain locking --- src/Makefile.am | 3 +- src/libvirt_private.syms | 11 ++ src/locking/README | 7 + src/locking/domain_lock.c | 284 +++++++++++++++++++++++++++++++++++++++++++++ src/locking/domain_lock.h | 56 +++++++++ 5 files changed, 360 insertions(+), 1 deletions(-) create mode 100644 src/locking/domain_lock.c create mode 100644 src/locking/domain_lock.h diff --git a/src/Makefile.am b/src/Makefile.am index 71320a4..347fd87 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -96,7 +96,8 @@ DRIVER_SOURCES = \ libvirt.c libvirt_internal.h \ locking/lock_manager.c locking/lock_manager.h \ locking/lock_driver.h \ - locking/lock_driver_nop.h locking/lock_driver_nop.c + locking/lock_driver_nop.h locking/lock_driver_nop.c \ + locking/domain_lock.h locking/domain_lock.c # XML configuration format handling sources diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms index 9cbcc78..7fcb700 100644 --- a/src/libvirt_private.syms +++ b/src/libvirt_private.syms @@ -396,6 +396,17 @@ virDomainEventWatchdogNewFromDom; virDomainEventWatchdogNewFromObj; +# domain_lock.h +virDomainLockProcessStart; +virDomainLockProcessInquire; +virDomainLockProcessPause; +virDomainLockProcessResume; +virDomainLockDiskAttach; +virDomainLockDiskDetach; +virDomainLockLeaseAttach; +virDomainLockLeaseDetach; + + # domain_nwfilter.h virDomainConfNWFilterInstantiate; virDomainConfNWFilterRegister; diff --git a/src/locking/README b/src/locking/README index 4fa4f89..da2a8f8 100644 --- a/src/locking/README +++ b/src/locking/README @@ -1,3 +1,10 @@ + Using the Lock Manager APIs + =========================== + +This file describes how to use the lock manager APIs. +All the guest lifecycle sequences here have higher +level wrappers provided by the 'domain_lock.h' API, +which simplify thue usage At libvirtd startup: diff --git a/src/locking/domain_lock.c b/src/locking/domain_lock.c new file mode 100644 index 0000000..3644a28 --- /dev/null +++ b/src/locking/domain_lock.c @@ -0,0 +1,284 @@ +/* + * domain_lock.c: Locking for domain lifecycle operations + * + * Copyright (C) 2010-2011 Red Hat, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +#include <config.h> + +#include <intprops.h> + +#include "domain_lock.h" +#include "memory.h" +#include "uuid.h" +#include "virterror_internal.h" +#include "logging.h" + +#define VIR_FROM_THIS VIR_FROM_LOCKING + + +static int virDomainLockManagerAddLease(virLockManagerPtr lock, + virDomainLeaseDefPtr lease) +{ + unsigned int leaseFlags = 0; + virLockManagerParam lparams[] = { + { .type = VIR_LOCK_MANAGER_PARAM_TYPE_STRING, + .key = "path", + .value = { .str = lease->path }, + }, + { .type = VIR_LOCK_MANAGER_PARAM_TYPE_ULONG, + .key = "offset", + .value = { .ul = lease->offset }, + }, + { .type = VIR_LOCK_MANAGER_PARAM_TYPE_STRING, + .key = "lockspace", + .value = { .str = lease->lockspace }, + }, + }; + size_t nparams = ARRAY_CARDINALITY(lparams); + if (!lease->lockspace) + nparams--; + + VIR_DEBUG("Add lease %s", lease->path); + if (virLockManagerAddResource(lock, + VIR_LOCK_MANAGER_RESOURCE_TYPE_LEASE, + lease->key, + nparams, + lparams, + leaseFlags) < 0) { + VIR_DEBUG("Failed to add lease %s", lease->path); + return -1; + } + return 0; +} + + +static int virDomainLockManagerAddDisk(virLockManagerPtr lock, + virDomainDiskDefPtr disk) +{ + unsigned int diskFlags = 0; + if (!disk->src) + return 0; + + if (!(disk->type == VIR_DOMAIN_DISK_TYPE_BLOCK || + disk->type == VIR_DOMAIN_DISK_TYPE_FILE || + disk->type == VIR_DOMAIN_DISK_TYPE_DIR)) + return 0; + + if (disk->readonly) + diskFlags |= VIR_LOCK_MANAGER_RESOURCE_READONLY; + if (disk->shared) + diskFlags |= VIR_LOCK_MANAGER_RESOURCE_SHARED; + + VIR_DEBUG("Add disk %s", disk->src); + if (virLockManagerAddResource(lock, + VIR_LOCK_MANAGER_RESOURCE_TYPE_DISK, + disk->src, + 0, + NULL, + diskFlags) < 0) { + VIR_DEBUG("Failed add disk %s", disk->src); + return -1; + } + return 0; +} + +static virLockManagerPtr virDomainLockManagerNew(virLockManagerPluginPtr plugin, + virDomainObjPtr dom, + bool withResources) +{ + virLockManagerPtr lock; + int i; + virLockManagerParam params[] = { + { .type = VIR_LOCK_MANAGER_PARAM_TYPE_UUID, + .key = "uuid", + }, + { .type = VIR_LOCK_MANAGER_PARAM_TYPE_STRING, + .key = "name", + .value = { .str = dom->def->name }, + }, + { .type = VIR_LOCK_MANAGER_PARAM_TYPE_UINT, + .key = "id", + .value = { .i = dom->def->id }, + }, + { .type = VIR_LOCK_MANAGER_PARAM_TYPE_UINT, + .key = "pid", + .value = { .i = dom->pid }, + }, + }; + VIR_DEBUG("plugin=%p dom=%p withResources=%d", + plugin, dom, withResources); + + memcpy(params[0].value.uuid, dom->def->uuid, VIR_UUID_BUFLEN); + + if (!(lock = virLockManagerNew(plugin, + VIR_LOCK_MANAGER_OBJECT_TYPE_DOMAIN, + ARRAY_CARDINALITY(params), + params, + 0))) + goto error; + + if (withResources) { + VIR_DEBUG0("Adding leases"); + for (i = 0 ; i < dom->def->nleases ; i++) + if (virDomainLockManagerAddLease(lock, dom->def->leases[i]) < 0) + goto error; + + VIR_DEBUG0("Adding disks"); + for (i = 0 ; i < dom->def->ndisks ; i++) + if (virDomainLockManagerAddDisk(lock, dom->def->disks[i]) < 0) + goto error; + } + + return lock; + +error: + virLockManagerFree(lock); + return NULL; +} + + +int virDomainLockProcessStart(virLockManagerPluginPtr plugin, + virDomainObjPtr dom, + bool incomingMigrate) +{ + virLockManagerPtr lock = virDomainLockManagerNew(plugin, dom, true); + int ret; + if (incomingMigrate) + ret = virLockManagerAcquire(lock, NULL, VIR_LOCK_MANAGER_ACQUIRE_REGISTER_ONLY); + else + ret = virLockManagerAcquire(lock, NULL, 0); + + virLockManagerFree(lock); + + return ret; +} + +int virDomainLockProcessPause(virLockManagerPluginPtr plugin, + virDomainObjPtr dom, + char **state) +{ + virLockManagerPtr lock = virDomainLockManagerNew(plugin, dom, true); + int ret = virLockManagerRelease(lock, state, 0); + + virLockManagerFree(lock); + + return ret; +} + +int virDomainLockProcessResume(virLockManagerPluginPtr plugin, + virDomainObjPtr dom, + const char *state) +{ + virLockManagerPtr lock = virDomainLockManagerNew(plugin, dom, true); + int ret = virLockManagerAcquire(lock, state, 0); + + virLockManagerFree(lock); + + return ret; +} + +int virDomainLockProcessInquire(virLockManagerPluginPtr plugin, + virDomainObjPtr dom, + char **state) +{ + virLockManagerPtr lock = virDomainLockManagerNew(plugin, dom, true); + int ret = virLockManagerInquire(lock, state, 0); + + virLockManagerFree(lock); + + return ret; +} + + +int virDomainLockDiskAttach(virLockManagerPluginPtr plugin, + virDomainObjPtr dom, + virDomainDiskDefPtr disk) +{ + virLockManagerPtr lock = virDomainLockManagerNew(plugin, dom, false); + int ret = -1; + + if (virDomainLockManagerAddDisk(lock, disk) < 0) + goto cleanup; + + if (virLockManagerAcquire(lock, NULL, 0) < 0) + goto cleanup; + +cleanup: + virLockManagerFree(lock); + + return ret; +} + +int virDomainLockDiskDetach(virLockManagerPluginPtr plugin, + virDomainObjPtr dom, + virDomainDiskDefPtr disk) +{ + virLockManagerPtr lock = virDomainLockManagerNew(plugin, dom, false); + int ret = -1; + + if (virDomainLockManagerAddDisk(lock, disk) < 0) + goto cleanup; + + if (virLockManagerRelease(lock, NULL, 0) < 0) + goto cleanup; + +cleanup: + virLockManagerFree(lock); + + return ret; +} + + +int virDomainLockLeaseAttach(virLockManagerPluginPtr plugin, + virDomainObjPtr dom, + virDomainLeaseDefPtr lease) +{ + virLockManagerPtr lock = virDomainLockManagerNew(plugin, dom, false); + int ret = -1; + + if (virDomainLockManagerAddLease(lock, lease) < 0) + goto cleanup; + + if (virLockManagerAcquire(lock, NULL, 0) < 0) + goto cleanup; + +cleanup: + virLockManagerFree(lock); + + return ret; +} + +int virDomainLockLeaseDetach(virLockManagerPluginPtr plugin, + virDomainObjPtr dom, + virDomainLeaseDefPtr lease) +{ + virLockManagerPtr lock = virDomainLockManagerNew(plugin, dom, false); + int ret = -1; + + if (virDomainLockManagerAddLease(lock, lease) < 0) + goto cleanup; + + if (virLockManagerRelease(lock, NULL, 0) < 0) + goto cleanup; + +cleanup: + virLockManagerFree(lock); + + return ret; +} diff --git a/src/locking/domain_lock.h b/src/locking/domain_lock.h new file mode 100644 index 0000000..e64ddb5 --- /dev/null +++ b/src/locking/domain_lock.h @@ -0,0 +1,56 @@ +/* + * domain_lock.c: Locking for domain lifecycle operations + * + * Copyright (C) 2010-2011 Red Hat, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +#ifndef __VIR_DOMAIN_LOCK_H__ +# define __VIR_DOMAIN_LOCK_H__ + +# include "internal.h" +# include "domain_conf.h" +# include "lock_manager.h" + +int virDomainLockProcessStart(virLockManagerPluginPtr plugin, + virDomainObjPtr dom, + bool incomingMigrate); +int virDomainLockProcessPause(virLockManagerPluginPtr plugin, + virDomainObjPtr dom, + char **state); +int virDomainLockProcessResume(virLockManagerPluginPtr plugin, + virDomainObjPtr dom, + const char *state); +int virDomainLockProcessInquire(virLockManagerPluginPtr plugin, + virDomainObjPtr dom, + char **state); + +int virDomainLockDiskAttach(virLockManagerPluginPtr plugin, + virDomainObjPtr dom, + virDomainDiskDefPtr disk); +int virDomainLockDiskDetach(virLockManagerPluginPtr plugin, + virDomainObjPtr dom, + virDomainDiskDefPtr disk); + +int virDomainLockLeaseAttach(virLockManagerPluginPtr plugin, + virDomainObjPtr dom, + virDomainLeaseDefPtr lease); +int virDomainLockLeaseDetach(virLockManagerPluginPtr plugin, + virDomainObjPtr dom, + virDomainLeaseDefPtr lease); + +#endif /* __VIR_DOMAIN_LOCK_H__ */ -- 1.7.4.4

--- docs/internals/locking.html.in | 257 ++++++++++++++++++++++++++++++++++++++++ docs/sitemap.html.in | 4 + 2 files changed, 261 insertions(+), 0 deletions(-) create mode 100644 docs/internals/locking.html.in diff --git a/docs/internals/locking.html.in b/docs/internals/locking.html.in new file mode 100644 index 0000000..3790ef0 --- /dev/null +++ b/docs/internals/locking.html.in @@ -0,0 +1,257 @@ +<html> + <body> + <h1>Resource Lock Manager</h1> + + <ul id="toc"></ul> + + <p> + This page describes the design of the resource lock manager + that is used for locking disk images, to ensure exclusive + access to content. + </p> + + <h2><a name="goals">Goals</a></h2> + + <p> + The high level goal is to prevent the same disk image being + used by more than one QEMU instance at a time (unless the + disk is marked as sharable, or readonly). The scenarios + to be prevented are thus: + </p> + + <ol> + <li> + Two different guests running configured to point at the + same disk image. + </li> + <li> + One guest being started more than once on two different + machines due to admin mistake + </li> + <li> + One guest being started more than once on a single machine + due to libvirt driver bug on a single machine. + </li> + </ol> + + <h2><a name="requirement">Requirements</a></h2> + + <p> + The high level goal leads to a set of requirements + for the lock manager design + </p> + + <ol> + <li> + A lock must be held on a disk whenever a QEMU process + has the disk open + </li> + <li> + The lock scheme must allow QEMU to be configured with + readonly, shared write, or exclusive writable disks + </li> + <li> + A lock handover must be performed during the migration + process where 2 QEMU processes will have the same disk + open concurrently. + </li> + <li> + The lock manager must be able to identify and kill the + process accessing the resource if the lock is revoked. + </li> + <li> + Locks can be acquired for arbitrary VM related resources, + as determined by the management application. + </li> + </ol> + + <h2><a name="design">Design</a></h2> + + <p> + Within a lock manager the following series of operations + will need to be supported. + </p> + + <ul> + <li> + <strong>Register object</strong> + Register the identity of an object against which + locks will be acquired + </li> + <li> + <strong>Add resource</strong> + Associate a resource with an object for future + lock acquisition / release + </li> + <li> + <strong>Acquire locks</strong> + Acquire the locks for all resources associated + with the object + </li> + <li> + <strong>Release locks</strong> + Release the locks for all resources associated + with the object + </li> + <li> + <strong>Inquire locks</strong> + Get a representation of the state of the locks + for all resources associated with the object + </li> + </ul> + + <h2><a name="impl">Plugin Implementations</a></h2> + + <p> + Lock manager implementations are provided as LGPLv2+ + licensed, dlopen()able library modules. The plugins + will be loadable from the following location: + </p> + + <pre> +/usr/{lib,lib64}/libvirt/lock_manager/$NAME.so +</pre> + + <p> + The lock manager plugin must export a single ELF + symbol named <code>virLockDriverImpl</code>, which is + a static instance of the <code>virLockDriver</code> + struct. The struct is defined in the header file + </p> + + <pre> + #include <libvirt/plugins/lock_manager.h> + </pre> + + <p> + All callbacks in the struct must be initialized + to non-NULL pointers. The semantics of each + callback are defined in the API docs embedded + in the previously mentioned header file + </p> + + <h2><a name="qemuIntegrate">QEMU Driver integration</a></h2> + + <p> + With the QEMU driver, the lock plugin will be set + in the <code>/etc/libvirt/qemu.conf</code> configuration + file by specifying the lock manager name. + </p> + + <pre> + lockManager="sanlock" + </pre> + + <p> + By default the lock manager will be a 'no op' implementation + for backwards compatibility + </p> + + <h2><a name="usagePatterns">Lock usage patterns</a></h2> + + <p> + The following psuedo code illustrates the common + patterns of operations invoked on the lock + manager plugin callbacks. + </p> + + <h3><a name="usageLockAcquire">Lock acquisition</a></h3> + + <p> + Initial lock acquisition will be performed from the + process that is to own the lock. This is typically + the QEMU child process, in between the fork+exec + pairing. When adding further resources on the fly, + to an existing object holding locks, this will be + done from the libvirtd process. + </p> + + <pre> + virLockManagerParam params[] = { + { .type = VIR_LOCK_MANAGER_PARAM_TYPE_UUID, + .key = "uuid", + }, + { .type = VIR_LOCK_MANAGER_PARAM_TYPE_STRING, + .key = "name", + .value = { .str = dom->def->name }, + }, + { .type = VIR_LOCK_MANAGER_PARAM_TYPE_UINT, + .key = "id", + .value = { .i = dom->def->id }, + }, + { .type = VIR_LOCK_MANAGER_PARAM_TYPE_UINT, + .key = "pid", + .value = { .i = dom->pid }, + }, + }; + mgr = virLockManagerNew(lockPlugin, + VIR_LOCK_MANAGER_TYPE_DOMAIN, + ARRAY_CARDINALITY(params), + params, + 0))); + + foreach (initial disks) + virLockManagerAddResource(mgr, + VIR_LOCK_MANAGER_RESOURCE_TYPE_DISK, + $path, 0, NULL, $flags); + + if (virLockManagerAcquire(lock, NULL, 0) < 0); + ...abort... + </pre> + + <h3><a name="usageLockAttach">Lock release</a></h3> + + <p> + The locks are all implicitly released when the process + that acquired them exits, however, a process may + voluntarily give up the lock by running + </p> + + <pre> + char *state = NULL; + virLockManagerParam params[] = { + { .type = VIR_LOCK_MANAGER_PARAM_TYPE_UUID, + .key = "uuid", + }, + { .type = VIR_LOCK_MANAGER_PARAM_TYPE_STRING, + .key = "name", + .value = { .str = dom->def->name }, + }, + { .type = VIR_LOCK_MANAGER_PARAM_TYPE_UINT, + .key = "id", + .value = { .i = dom->def->id }, + }, + { .type = VIR_LOCK_MANAGER_PARAM_TYPE_UINT, + .key = "pid", + .value = { .i = dom->pid }, + }, + }; + mgr = virLockManagerNew(lockPlugin, + VIR_LOCK_MANAGER_TYPE_DOMAIN, + ARRAY_CARDINALITY(params), + params, + 0))); + + foreach (initial disks) + virLockManagerAddResource(mgr, + VIR_LOCK_MANAGER_RESOURCE_TYPE_DISK, + $path, 0, NULL, $flags); + + virLockManagerRelease(mgr, & state, 0); + </pre> + + <p> + The returned state string can be passed to the + <code>virLockManagerAcquire</code> method to + later re-acquire the exact same locks. This + state transfer is commonly used when performing + live migration of virtual machines. By validating + the state the lock manager can ensure no other + VM has re-acquire the same locks on a different + host. The state can also be obtained without + releasing the locks, by calling the + <code>virLockManagerInquire</code> method. + </p> + + </body> +</html> diff --git a/docs/sitemap.html.in b/docs/sitemap.html.in index 5650fee..0f90ce8 100644 --- a/docs/sitemap.html.in +++ b/docs/sitemap.html.in @@ -284,6 +284,10 @@ <a href="internals/command.html">Spawning commands</a> <span>Spawning commands from libvirt driver code</span> </li> + <li> + <a href="internals/locking.html">Lock managers</a> + <span>Use lock managers to protect disk content</span> + </li> </ul> </li> <li> -- 1.7.4.4

The QEMU integrates with the lock manager instructure in a number of key places * During startup, a lock is acquired in between the fork & exec * During startup, the libvirtd process acquires a lock before setting file labelling * During shutdown, the libvirtd process acquires a lock before restoring file labelling * During hotplug, unplug & media change the libvirtd process holds a lock while setting/restoring labels The main content lock is only ever held by the QEMU child process, or libvirtd during VM shutdown. The rest of the operations only require libvirtd to hold the metadata locks, relying on the active QEMU still holding the content lock. * src/qemu/qemu_conf.c, src/qemu/qemu_conf.h, src/qemu/libvirtd_qemu.aug, src/qemu/test_libvirtd_qemu.aug: Add config parameter for configuring lock managers * src/qemu/qemu_driver.c: Add calls to the lock manager --- src/qemu/libvirtd_qemu.aug | 2 + src/qemu/qemu.conf | 15 +++++ src/qemu/qemu_conf.c | 12 ++++ src/qemu/qemu_conf.h | 3 + src/qemu/qemu_domain.c | 5 ++ src/qemu/qemu_domain.h | 1 + src/qemu/qemu_driver.c | 23 +++++++- src/qemu/qemu_hotplug.c | 80 +++++++++++++++++++++++- src/qemu/qemu_hotplug.h | 6 ++ src/qemu/qemu_migration.c | 42 ++++++++++--- src/qemu/qemu_process.c | 126 ++++++++++++++++++++++++++++++--------- src/qemu/test_libvirtd_qemu.aug | 6 ++ 12 files changed, 278 insertions(+), 43 deletions(-) diff --git a/src/qemu/libvirtd_qemu.aug b/src/qemu/libvirtd_qemu.aug index ac30b8e..5b7a45c 100644 --- a/src/qemu/libvirtd_qemu.aug +++ b/src/qemu/libvirtd_qemu.aug @@ -48,6 +48,8 @@ module Libvirtd_qemu = | bool_entry "allow_disk_format_probing" | bool_entry "set_process_name" | int_entry "max_processes" + | str_entry "content_lock_manager" + | str_entry "metadata_lock_manager" (* Each enty in the config is one of the following three ... *) let entry = vnc_entry diff --git a/src/qemu/qemu.conf b/src/qemu/qemu.conf index c70050e..d67e5ce 100644 --- a/src/qemu/qemu.conf +++ b/src/qemu/qemu.conf @@ -280,3 +280,18 @@ # override default value set by host OS. # # max_processes = 0 + +# To enable strict 'fcntl' based locking of the file +# content (to prevent two VMs writing to the same +# disk), start the 'virtlockd' service, and uncomment +# this +# +# content_lock_manager = "fcntl" + + +# To enable strict 'fcntl' based locking of the file +# metadata (to prevent two libvirtd daemons on different +# hosts doing conflicting metadata changes), start the +# 'virtlockd' service, and uncomment this +# +# metadata_lock_manager = "fcntl" diff --git a/src/qemu/qemu_conf.c b/src/qemu/qemu_conf.c index bb5421b..2a27d12 100644 --- a/src/qemu/qemu_conf.c +++ b/src/qemu/qemu_conf.c @@ -115,6 +115,9 @@ int qemudLoadDriverConfig(struct qemud_driver *driver, } #endif + if (!(driver->lockManager = + virLockManagerPluginNew("nop", 0))) + return -1; /* Just check the file is readable before opening it, otherwise * libvirt emits an error. @@ -428,6 +431,15 @@ int qemudLoadDriverConfig(struct qemud_driver *driver, CHECK_TYPE("max_processes", VIR_CONF_LONG); if (p) driver->maxProcesses = p->l; + p = virConfGetValue (conf, "lock_manager"); + CHECK_TYPE ("lock_manager", VIR_CONF_STRING); + if (p && p->str) { + virLockManagerPluginUnref(driver->lockManager); + if (!(driver->lockManager = + virLockManagerPluginNew(p->str, 0))) + VIR_ERROR(_("Failed to load lock manager %s"), p->str); + } + virConfFree (conf); return 0; } diff --git a/src/qemu/qemu_conf.h b/src/qemu/qemu_conf.h index f2bfa1e..003565b 100644 --- a/src/qemu/qemu_conf.h +++ b/src/qemu/qemu_conf.h @@ -43,6 +43,7 @@ # include "macvtap.h" # include "command.h" # include "threadpool.h" +# include "locking/lock_manager.h" # define QEMUD_CPUMASK_LEN CPU_SETSIZE @@ -128,6 +129,8 @@ struct qemud_driver { virBitmapPtr reservedVNCPorts; virSysinfoDefPtr hostsysinfo; + + virLockManagerPluginPtr lockManager; }; typedef struct _qemuDomainCmdlineDef qemuDomainCmdlineDef; diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c index c61f9bf..f63a28e 100644 --- a/src/qemu/qemu_domain.c +++ b/src/qemu/qemu_domain.c @@ -122,6 +122,7 @@ static void qemuDomainObjPrivateFree(void *data) qemuDomainPCIAddressSetFree(priv->pciaddrs); virDomainChrSourceDefFree(priv->monConfig); VIR_FREE(priv->vcpupids); + VIR_FREE(priv->lockState); /* This should never be non-NULL if we get here, but just in case... */ if (priv->mon) { @@ -178,6 +179,9 @@ static int qemuDomainObjPrivateXMLFormat(virBufferPtr buf, void *data) virBufferAddLit(buf, " </qemuCaps>\n"); } + if (priv->lockState) + virBufferAsprintf(buf, " <lockstate>%s</lockstate>\n", priv->lockState); + return 0; } @@ -281,6 +285,7 @@ static int qemuDomainObjPrivateXMLParse(xmlXPathContextPtr ctxt, void *data) } VIR_FREE(nodes); + priv->lockState = virXPathString("string(./lockstate)", ctxt); return 0; diff --git a/src/qemu/qemu_domain.h b/src/qemu/qemu_domain.h index 6d24f53..0fca974 100644 --- a/src/qemu/qemu_domain.h +++ b/src/qemu/qemu_domain.h @@ -79,6 +79,7 @@ struct _qemuDomainObjPrivate { int persistentAddrs; virBitmapPtr qemuCaps; + char *lockState; }; struct qemuDomainWatchdogEvent diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c index 0b15437..7e91aae 100644 --- a/src/qemu/qemu_driver.c +++ b/src/qemu/qemu_driver.c @@ -1,5 +1,5 @@ /* - * driver.c: core driver methods for managing qemu guests + * qemu_driver.c: core driver methods for managing qemu guests * * Copyright (C) 2006-2011 Red Hat, Inc. * Copyright (C) 2006 Daniel P. Berrange @@ -87,6 +87,7 @@ #include "fdstream.h" #include "configmake.h" #include "threadpool.h" +#include "locking/lock_manager.h" #define VIR_FROM_THIS VIR_FROM_QEMU @@ -531,6 +532,14 @@ qemudStartup(int privileged) { } VIR_FREE(driverConf); + /* We should always at least have the 'nop' manager, so + * NULLs here are a fatal error + */ + if (!qemu_driver->lockManager) { + VIR_ERROR0(_("Missing lock manager implementation")); + goto error; + } + if (qemuSecurityInit(qemu_driver) < 0) goto error; @@ -775,6 +784,8 @@ qemudShutdown(void) { virCgroupFree(&qemu_driver->cgroup); + virLockManagerPluginUnref(qemu_driver->lockManager); + qemuDriverUnlock(qemu_driver); virMutexDestroy(&qemu_driver->lock); virThreadPoolFree(qemu_driver->workerPool); @@ -3941,6 +3952,13 @@ qemuDomainAttachDeviceLive(virDomainObjPtr vm, dev->data.controller = NULL; break; + case VIR_DOMAIN_DEVICE_LEASE: + ret = qemuDomainAddLease(driver, vm, + dev->data.lease); + if (ret == 0) + dev->data.lease = NULL; + break; + case VIR_DOMAIN_DEVICE_NET: qemuDomainObjCheckNetTaint(driver, vm, dev->data.net, -1); ret = qemuDomainAttachNetDevice(dom->conn, driver, vm, @@ -4030,6 +4048,9 @@ qemuDomainDetachDeviceLive(virDomainObjPtr vm, case VIR_DOMAIN_DEVICE_CONTROLLER: ret = qemuDomainDetachDeviceControllerLive(driver, vm, dev); break; + case VIR_DOMAIN_DEVICE_LEASE: + ret = qemuDomainRemoveLease(driver, vm, dev->data.lease); + break; case VIR_DOMAIN_DEVICE_NET: ret = qemuDomainDetachNetDevice(driver, vm, dev); break; diff --git a/src/qemu/qemu_hotplug.c b/src/qemu/qemu_hotplug.c index dae2269..2c02eb7 100644 --- a/src/qemu/qemu_hotplug.c +++ b/src/qemu/qemu_hotplug.c @@ -38,6 +38,7 @@ #include "pci.h" #include "files.h" #include "qemu_cgroup.h" +#include "locking/domain_lock.h" #define VIR_FROM_THIS VIR_FROM_QEMU @@ -82,9 +83,15 @@ int qemuDomainChangeEjectableMedia(struct qemud_driver *driver, return -1; } + if (virDomainLockDiskAttach(driver->lockManager, vm, disk) < 0) + return -1; + if (virSecurityManagerSetImageLabel(driver->securityManager, - vm, disk) < 0) + vm, disk) < 0) { + if (virDomainLockDiskDetach(driver->lockManager, vm, disk) < 0) + VIR_WARN("Unable to release lock on %s", disk->src); return -1; + } if (!(driveAlias = qemuDeviceDriveHostAlias(origdisk, priv->qemuCaps))) goto error; @@ -115,6 +122,9 @@ int qemuDomainChangeEjectableMedia(struct qemud_driver *driver, vm, origdisk) < 0) VIR_WARN("Unable to restore security label on ejected image %s", origdisk->src); + if (virDomainLockDiskDetach(driver->lockManager, vm, origdisk) < 0) + VIR_WARN("Unable to release lock on disk %s", origdisk->src); + VIR_FREE(origdisk->src); origdisk->src = disk->src; disk->src = NULL; @@ -128,9 +138,14 @@ int qemuDomainChangeEjectableMedia(struct qemud_driver *driver, error: VIR_FREE(driveAlias); + if (virSecurityManagerRestoreImageLabel(driver->securityManager, vm, disk) < 0) VIR_WARN("Unable to restore security label on new media %s", disk->src); + + if (virDomainLockDiskDetach(driver->lockManager, vm, disk) < 0) + VIR_WARN("Unable to release lock on %s", disk->src); + return -1; } @@ -154,9 +169,15 @@ int qemuDomainAttachPciDiskDevice(struct qemud_driver *driver, } } + if (virDomainLockDiskAttach(driver->lockManager, vm, disk) < 0) + return -1; + if (virSecurityManagerSetImageLabel(driver->securityManager, - vm, disk) < 0) + vm, disk) < 0) { + if (virDomainLockDiskDetach(driver->lockManager, vm, disk) < 0) + VIR_WARN("Unable to release lock on %s", disk->src); return -1; + } if (qemuCapsGet(priv->qemuCaps, QEMU_CAPS_DEVICE)) { if (qemuDomainPCIAddressEnsureAddr(priv->pciaddrs, &disk->info) < 0) @@ -228,6 +249,9 @@ error: vm, disk) < 0) VIR_WARN("Unable to restore security label on %s", disk->src); + if (virDomainLockDiskDetach(driver->lockManager, vm, disk) < 0) + VIR_WARN("Unable to release lock on %s", disk->src); + return -1; } @@ -364,10 +388,15 @@ int qemuDomainAttachSCSIDisk(struct qemud_driver *driver, } } + if (virDomainLockDiskAttach(driver->lockManager, vm, disk) < 0) + return -1; if (virSecurityManagerSetImageLabel(driver->securityManager, - vm, disk) < 0) + vm, disk) < 0) { + if (virDomainLockDiskDetach(driver->lockManager, vm, disk) < 0) + VIR_WARN("Unable to release lock on %s", disk->src); return -1; + } /* We should have an address already, so make sure */ if (disk->info.type != VIR_DOMAIN_DEVICE_ADDRESS_TYPE_DRIVE) { @@ -456,6 +485,9 @@ error: vm, disk) < 0) VIR_WARN("Unable to restore security label on %s", disk->src); + if (virDomainLockDiskDetach(driver->lockManager, vm, disk) < 0) + VIR_WARN("Unable to release lock on %s", disk->src); + return -1; } @@ -477,10 +509,17 @@ int qemuDomainAttachUsbMassstorageDevice(struct qemud_driver *driver, } } + if (virDomainLockDiskAttach(driver->lockManager, vm, disk) < 0) + return -1; + if (virSecurityManagerSetImageLabel(driver->securityManager, - vm, disk) < 0) + vm, disk) < 0) { + if (virDomainLockDiskDetach(driver->lockManager, vm, disk) < 0) + VIR_WARN("Unable to release lock on %s", disk->src); return -1; + } + /* XXX not correct once we allow attaching a USB CDROM */ if (!disk->src) { qemuReportError(VIR_ERR_INTERNAL_ERROR, "%s", _("disk source path is missing")); @@ -538,6 +577,9 @@ error: vm, disk) < 0) VIR_WARN("Unable to restore security label on %s", disk->src); + if (virDomainLockDiskDetach(driver->lockManager, vm, disk) < 0) + VIR_WARN("Unable to release lock on %s", disk->src); + return -1; } @@ -1184,10 +1226,14 @@ int qemuDomainDetachPciDiskDevice(struct qemud_driver *driver, NULLSTR(dev->data.disk->src)); } + if (virDomainLockDiskDetach(driver->lockManager, vm, dev->data.disk) < 0) + VIR_WARN("Unable to release lock on %s", dev->data.disk->src); + ret = 0; cleanup: VIR_FREE(drivestr); + virCgroupFree(&cgroup); return ret; } @@ -1262,6 +1308,9 @@ int qemuDomainDetachDiskDevice(struct qemud_driver *driver, NULLSTR(dev->data.disk->src)); } + if (virDomainLockDiskDetach(driver->lockManager, vm, dev->data.disk) < 0) + VIR_WARN("Unable to release lock on disk %s", dev->data.disk->src); + ret = 0; cleanup: @@ -1798,3 +1847,26 @@ cleanup: return ret; } + +int qemuDomainAddLease(struct qemud_driver *driver, + virDomainObjPtr vm, + virDomainLeaseDefPtr lease) +{ + if (virDomainLockLeaseAttach(driver->lockManager, vm, lease) < 0) + return -1; + /* XXX update def */ + + return 0; +} + +int qemuDomainRemoveLease(struct qemud_driver *driver, + virDomainObjPtr vm, + virDomainLeaseDefPtr lease) +{ + if (virDomainLockLeaseDetach(driver->lockManager, vm, lease) < 0) + return -1; + + /* XXX update def */ + + return 0; +} diff --git a/src/qemu/qemu_hotplug.h b/src/qemu/qemu_hotplug.h index d18b393..e9fe878 100644 --- a/src/qemu/qemu_hotplug.h +++ b/src/qemu/qemu_hotplug.h @@ -85,6 +85,12 @@ int qemuDomainDetachHostUsbDevice(struct qemud_driver *driver, int qemuDomainDetachHostDevice(struct qemud_driver *driver, virDomainObjPtr vm, virDomainDeviceDefPtr dev); +int qemuDomainAddLease(struct qemud_driver *driver, + virDomainObjPtr vm, + virDomainLeaseDefPtr lease); +int qemuDomainRemoveLease(struct qemud_driver *driver, + virDomainObjPtr vm, + virDomainLeaseDefPtr lease); #endif /* __QEMU_HOTPLUG_H__ */ diff --git a/src/qemu/qemu_migration.c b/src/qemu/qemu_migration.c index 5413186..0bfcb34 100644 --- a/src/qemu/qemu_migration.c +++ b/src/qemu/qemu_migration.c @@ -41,6 +41,7 @@ #include "datatypes.h" #include "fdstream.h" #include "uuid.h" +#include "locking/domain_lock.h" #define VIR_FROM_THIS VIR_FROM_QEMU @@ -74,6 +75,8 @@ struct _qemuMigrationCookie { unsigned char uuid[VIR_UUID_BUFLEN]; char *name; + char *lockState; + /* If (flags & QEMU_MIGRATION_COOKIE_GRAPHICS) */ qemuMigrationCookieGraphicsPtr graphics; }; @@ -98,6 +101,7 @@ static void qemuMigrationCookieFree(qemuMigrationCookiePtr mig) VIR_FREE(mig->hostname); VIR_FREE(mig->name); + VIR_FREE(mig->lockState); VIR_FREE(mig); } @@ -210,9 +214,11 @@ error: static qemuMigrationCookiePtr -qemuMigrationCookieNew(virDomainObjPtr dom) +qemuMigrationCookieNew(struct qemud_driver *driver, + virDomainObjPtr dom) { qemuMigrationCookiePtr mig = NULL; + qemuDomainObjPrivatePtr priv = dom->privateData; if (VIR_ALLOC(mig) < 0) goto no_memory; @@ -229,6 +235,15 @@ qemuMigrationCookieNew(virDomainObjPtr dom) goto error; } + if (dom->state == VIR_DOMAIN_PAUSED) { + if (priv->lockState && + !(mig->lockState = strdup(priv->lockState))) + goto no_memory; + } else { + if (virDomainLockProcessInquire(driver->lockManager, dom, &mig->lockState) < 0) + goto error; + } + return mig; no_memory: @@ -294,6 +309,8 @@ static void qemuMigrationCookieXMLFormat(virBufferPtr buf, virBufferAsprintf(buf, " <uuid>%s</uuid>\n", uuidstr); virBufferEscapeString(buf, " <hostname>%s</hostname>\n", mig->hostname); virBufferAsprintf(buf, " <hostuuid>%s</hostuuid>\n", hostuuidstr); + if (mig->lockState) + virBufferAsprintf(buf, " <lockstate>%s</lockstate>\n", mig->lockState); if (mig->flags & QEMU_MIGRATION_COOKIE_GRAPHICS) qemuMigrationCookieGraphicsXMLFormat(buf, mig->graphics); @@ -438,6 +455,10 @@ qemuMigrationCookieXMLParse(qemuMigrationCookiePtr mig, } VIR_FREE(tmp); + mig->lockState = virXPathString("string(./lockstate[1])", ctxt); + if (mig->lockState && STREQ(mig->lockState, "")) + VIR_FREE(mig->lockState); + if ((flags & QEMU_MIGRATION_COOKIE_GRAPHICS) && virXPathBoolean("count(./graphics) > 0", ctxt) && (!(mig->graphics = qemuMigrationCookieGraphicsXMLParse(ctxt)))) @@ -514,7 +535,8 @@ qemuMigrationBakeCookie(qemuMigrationCookiePtr mig, static qemuMigrationCookiePtr -qemuMigrationEatCookie(virDomainObjPtr dom, +qemuMigrationEatCookie(struct qemud_driver *driver, + virDomainObjPtr dom, const char *cookiein, int cookieinlen, int flags) @@ -531,7 +553,7 @@ qemuMigrationEatCookie(virDomainObjPtr dom, VIR_DEBUG("cookielen=%d cookie='%s'", cookieinlen, NULLSTR(cookiein)); - if (!(mig = qemuMigrationCookieNew(dom))) + if (!(mig = qemuMigrationCookieNew(driver, dom))) return NULL; if (cookiein && cookieinlen && @@ -822,7 +844,7 @@ char *qemuMigrationBegin(struct qemud_driver *driver, if (!qemuMigrationIsAllowed(vm->def)) goto cleanup; - if (!(mig = qemuMigrationEatCookie(vm, NULL, 0, 0))) + if (!(mig = qemuMigrationEatCookie(driver, vm, NULL, 0, 0))) goto cleanup; if (qemuMigrationBakeCookie(mig, driver, vm, @@ -901,7 +923,7 @@ qemuMigrationPrepareTunnel(struct qemud_driver *driver, def = NULL; priv = vm->privateData; - if (!(mig = qemuMigrationEatCookie(vm, cookiein, cookieinlen, + if (!(mig = qemuMigrationEatCookie(driver, vm, cookiein, cookieinlen, QEMU_MIGRATION_COOKIE_GRAPHICS))) goto cleanup; @@ -1131,7 +1153,7 @@ qemuMigrationPrepareDirect(struct qemud_driver *driver, def = NULL; priv = vm->privateData; - if (!(mig = qemuMigrationEatCookie(vm, cookiein, cookieinlen, + if (!(mig = qemuMigrationEatCookie(driver, vm, cookiein, cookieinlen, QEMU_MIGRATION_COOKIE_GRAPHICS))) goto cleanup; @@ -1226,7 +1248,7 @@ static int doNativeMigrate(struct qemud_driver *driver, unsigned int background_flags = QEMU_MONITOR_MIGRATE_BACKGROUND; qemuMigrationCookiePtr mig = NULL; - if (!(mig = qemuMigrationEatCookie(vm, cookiein, cookieinlen, + if (!(mig = qemuMigrationEatCookie(driver, vm, cookiein, cookieinlen, QEMU_MIGRATION_COOKIE_GRAPHICS))) goto cleanup; @@ -1469,7 +1491,7 @@ static int doTunnelMigrate(struct qemud_driver *driver, goto cleanup; } - if (!(mig = qemuMigrationEatCookie(vm, cookiein, cookieinlen, + if (!(mig = qemuMigrationEatCookie(driver, vm, cookiein, cookieinlen, QEMU_MIGRATION_COOKIE_GRAPHICS))) goto cleanup; @@ -2121,7 +2143,7 @@ qemuMigrationFinish(struct qemud_driver *driver, priv->jobActive = QEMU_JOB_NONE; memset(&priv->jobInfo, 0, sizeof(priv->jobInfo)); - if (!(mig = qemuMigrationEatCookie(vm, cookiein, cookieinlen, 0))) + if (!(mig = qemuMigrationEatCookie(driver, vm, cookiein, cookieinlen, 0))) goto cleanup; if (qemuDomainObjBeginJobWithDriver(driver, vm) < 0) @@ -2240,7 +2262,7 @@ int qemuMigrationConfirm(struct qemud_driver *driver, virDomainEventPtr event = NULL; int rv = -1; - if (!(mig = qemuMigrationEatCookie(vm, cookiein, cookieinlen, 0))) + if (!(mig = qemuMigrationEatCookie(driver, vm, cookiein, cookieinlen, 0))) return -1; if (!skipJob && diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c index de728a2..39f5fe4 100644 --- a/src/qemu/qemu_process.c +++ b/src/qemu/qemu_process.c @@ -50,6 +50,7 @@ #include "nodeinfo.h" #include "processinfo.h" #include "domain_nwfilter.h" +#include "locking/domain_lock.h" #define VIR_FROM_THIS VIR_FROM_QEMU @@ -341,6 +342,7 @@ qemuProcessHandleStop(qemuMonitorPtr mon ATTRIBUTE_UNUSED, virDomainObjLock(vm); if (vm->state == VIR_DOMAIN_RUNNING) { + qemuDomainObjPrivatePtr priv = vm->privateData; VIR_DEBUG("Transitioned guest %s to paused state due to unknown event", vm->def->name); @@ -349,6 +351,11 @@ qemuProcessHandleStop(qemuMonitorPtr mon ATTRIBUTE_UNUSED, VIR_DOMAIN_EVENT_SUSPENDED, VIR_DOMAIN_EVENT_SUSPENDED_PAUSED); + VIR_FREE(priv->lockState); + if (virDomainLockProcessPause(driver->lockManager, vm, &priv->lockState) < 0) + VIR_WARN("Unable to release lease on %s", vm->def->name); + VIR_DEBUG("Preserving lock state '%s'", NULLSTR(priv->lockState)); + if (virDomainSaveStatus(driver->caps, driver->stateDir, vm) < 0) { VIR_WARN("Unable to save status on vm %s after state change", vm->def->name); @@ -410,6 +417,7 @@ qemuProcessHandleWatchdog(qemuMonitorPtr mon ATTRIBUTE_UNUSED, if (action == VIR_DOMAIN_EVENT_WATCHDOG_PAUSE && vm->state == VIR_DOMAIN_RUNNING) { + qemuDomainObjPrivatePtr priv = vm->privateData; VIR_DEBUG("Transitioned guest %s to paused state due to watchdog", vm->def->name); vm->state = VIR_DOMAIN_PAUSED; @@ -417,6 +425,11 @@ qemuProcessHandleWatchdog(qemuMonitorPtr mon ATTRIBUTE_UNUSED, VIR_DOMAIN_EVENT_SUSPENDED, VIR_DOMAIN_EVENT_SUSPENDED_WATCHDOG); + VIR_FREE(priv->lockState); + if (virDomainLockProcessPause(driver->lockManager, vm, &priv->lockState) < 0) + VIR_WARN("Unable to release lease on %s", vm->def->name); + VIR_DEBUG("Preserving lock state '%s'", NULLSTR(priv->lockState)); + if (virDomainSaveStatus(driver->caps, driver->stateDir, vm) < 0) { VIR_WARN("Unable to save status on vm %s after watchdog event", vm->def->name); @@ -489,6 +502,7 @@ qemuProcessHandleIOError(qemuMonitorPtr mon ATTRIBUTE_UNUSED, if (action == VIR_DOMAIN_EVENT_IO_ERROR_PAUSE && vm->state == VIR_DOMAIN_RUNNING) { + qemuDomainObjPrivatePtr priv = vm->privateData; VIR_DEBUG("Transitioned guest %s to paused state due to IO error", vm->def->name); vm->state = VIR_DOMAIN_PAUSED; @@ -496,6 +510,11 @@ qemuProcessHandleIOError(qemuMonitorPtr mon ATTRIBUTE_UNUSED, VIR_DOMAIN_EVENT_SUSPENDED, VIR_DOMAIN_EVENT_SUSPENDED_IOERROR); + VIR_FREE(priv->lockState); + if (virDomainLockProcessPause(driver->lockManager, vm, &priv->lockState) < 0) + VIR_WARN("Unable to release lease on %s", vm->def->name); + VIR_DEBUG("Preserving lock state '%s'", NULLSTR(priv->lockState)); + if (virDomainSaveStatus(driver->caps, driver->stateDir, vm) < 0) VIR_WARN("Unable to save status on vm %s after IO error", vm->def->name); } @@ -1769,11 +1788,22 @@ struct qemuProcessHookData { virConnectPtr conn; virDomainObjPtr vm; struct qemud_driver *driver; + bool incomingMigrate; }; static int qemuProcessHook(void *data) { struct qemuProcessHookData *h = data; + int ret = -1; + + /* Some later calls want pid present */ + h->vm->pid = getpid(); + + VIR_DEBUG0("Obtaining domain lock"); + if (virDomainLockProcessStart(h->driver->lockManager, + h->vm, + h->incomingMigrate) < 0) + goto cleanup; if (qemuProcessLimits(h->driver) < 0) return -1; @@ -1781,18 +1811,25 @@ static int qemuProcessHook(void *data) /* This must take place before exec(), so that all QEMU * memory allocation is on the correct NUMA node */ + VIR_DEBUG0("Moving procss to cgroup"); if (qemuAddToCgroup(h->driver, h->vm->def) < 0) - return -1; + goto cleanup; /* This must be done after cgroup placement to avoid resetting CPU * affinity */ + VIR_DEBUG0("Setup CPU affinity"); if (qemuProcessInitCpuAffinity(h->vm) < 0) - return -1; + goto cleanup; + VIR_DEBUG0("Setting up security labeling"); if (virSecurityManagerSetProcessLabel(h->driver->securityManager, h->vm) < 0) - return -1; + goto cleanup; - return 0; + ret = 0; + +cleanup: + VIR_DEBUG("Hook complete ret=%d", ret); + return ret; } @@ -1821,11 +1858,22 @@ qemuProcessStartCPUs(struct qemud_driver *driver, virDomainObjPtr vm, int ret; qemuDomainObjPrivatePtr priv = vm->privateData; + VIR_DEBUG("Using lock state '%s'", NULLSTR(priv->lockState)); + if (virDomainLockProcessResume(driver->lockManager, vm, priv->lockState) < 0) { + VIR_FREE(priv->lockState); + return -1; + } + VIR_FREE(priv->lockState); + qemuDomainObjEnterMonitorWithDriver(driver, vm); ret = qemuMonitorStartCPUs(priv->mon, conn); qemuDomainObjExitMonitorWithDriver(driver, vm); if (ret == 0) { vm->state = VIR_DOMAIN_RUNNING; + } else { + if (virDomainLockProcessPause(driver->lockManager, vm, &priv->lockState) < 0) + VIR_WARN("Unable to release lease on %s", vm->def->name); + VIR_DEBUG("Preserving lock state '%s'", NULLSTR(priv->lockState)); } return ret; @@ -1838,13 +1886,21 @@ int qemuProcessStopCPUs(struct qemud_driver *driver, virDomainObjPtr vm) int oldState = vm->state; qemuDomainObjPrivatePtr priv = vm->privateData; + VIR_FREE(priv->lockState); + vm->state = VIR_DOMAIN_PAUSED; qemuDomainObjEnterMonitorWithDriver(driver, vm); ret = qemuMonitorStopCPUs(priv->mon); qemuDomainObjExitMonitorWithDriver(driver, vm); - if (ret < 0) { + + if (ret == 0) { + if (virDomainLockProcessPause(driver->lockManager, vm, &priv->lockState) < 0) + VIR_WARN("Unable to release lease on %s", vm->def->name); + VIR_DEBUG("Preserving lock state '%s'", NULLSTR(priv->lockState)); + } else { vm->state = oldState; } + return ret; } @@ -2050,29 +2106,6 @@ int qemuProcessStart(virConnectPtr conn, } qemuAuditSecurityLabel(vm, true); - VIR_DEBUG0("Generating setting domain security labels (if required)"); - if (virSecurityManagerSetAllLabel(driver->securityManager, - vm, stdin_path) < 0) - goto cleanup; - - if (stdin_fd != -1) { - /* if there's an fd to migrate from, and it's a pipe, put the - * proper security label on it - */ - struct stat stdin_sb; - - VIR_DEBUG0("setting security label on pipe used for migration"); - - if (fstat(stdin_fd, &stdin_sb) < 0) { - virReportSystemError(errno, - _("cannot stat fd %d"), stdin_fd); - goto cleanup; - } - if (S_ISFIFO(stdin_sb.st_mode) && - virSecurityManagerSetFDLabel(driver->securityManager, vm, stdin_fd) < 0) - goto cleanup; - } - /* Ensure no historical cgroup for this VM is lying around bogus * settings */ VIR_DEBUG0("Ensuring no historical cgroup is lying around"); @@ -2257,6 +2290,7 @@ int qemuProcessStart(virConnectPtr conn, virCommandNonblockingFDs(cmd); virCommandSetPidFile(cmd, pidfile); virCommandDaemonize(cmd); + virCommandRequireHandshake(cmd); ret = virCommandRun(cmd, NULL); VIR_FREE(pidfile); @@ -2287,6 +2321,42 @@ int qemuProcessStart(virConnectPtr conn, #endif } + VIR_DEBUG0("Waiting for handshake from child"); + if (virCommandHandshakeWait(cmd) < 0) { + ret = -1; + goto cleanup; + } + + VIR_DEBUG0("Setting domain security labels"); + if (virSecurityManagerSetAllLabel(driver->securityManager, + vm, stdin_path) < 0) + goto cleanup; + + if (stdin_fd != -1) { + /* if there's an fd to migrate from, and it's a pipe, put the + * proper security label on it + */ + struct stat stdin_sb; + + VIR_DEBUG0("setting security label on pipe used for migration"); + + if (fstat(stdin_fd, &stdin_sb) < 0) { + virReportSystemError(errno, + _("cannot stat fd %d"), stdin_fd); + goto cleanup; + } + if (S_ISFIFO(stdin_sb.st_mode) && + virSecurityManagerSetFDLabel(driver->securityManager, vm, stdin_fd) < 0) + goto cleanup; + } + + VIR_DEBUG0("Labelling done, completing handshake to child"); + if (virCommandHandshakeNotify(cmd) < 0) { + ret = -1; + goto cleanup; + } + VIR_DEBUG0("Handshake complete, child running"); + if (migrateFrom) start_paused = true; vm->state = start_paused ? VIR_DOMAIN_PAUSED : VIR_DOMAIN_RUNNING; diff --git a/src/qemu/test_libvirtd_qemu.aug b/src/qemu/test_libvirtd_qemu.aug index 917bd4f..90c80f0 100644 --- a/src/qemu/test_libvirtd_qemu.aug +++ b/src/qemu/test_libvirtd_qemu.aug @@ -113,6 +113,9 @@ allow_disk_format_probing = 1 vnc_auto_unix_socket = 1 max_processes = 12345 + +content_lock_manager = \"fcntl\" +metadata_lock_manager = \"fcntl\" " test Libvirtd_qemu.lns get conf = @@ -236,3 +239,6 @@ max_processes = 12345 { "vnc_auto_unix_socket" = "1" } { "#empty" } { "max_processes" = "12345" } +{ "#empty" } +{ "content_lock_manager" = "fcntl" } +{ "metadata_lock_manager" = "fcntl" } -- 1.7.4.4

Sanlock is a project that implements a disk-paxos locking algorithm. This is suitable for cluster deployments with shared storage. * src/Makefile.am: Add dlopen plugin for sanlock * src/locking/lock_driver_sanlock.c: Sanlock driver --- libvirt.spec.in | 11 + po/POTFILES.in | 1 + src/Makefile.am | 12 + src/libvirt_private.syms | 1 + src/locking/lock_driver_sanlock.c | 404 +++++++++++++++++++++++++++++++++++++ 5 files changed, 429 insertions(+), 0 deletions(-) create mode 100644 src/locking/lock_driver_sanlock.c diff --git a/libvirt.spec.in b/libvirt.spec.in index 8a6912f..f844e5a 100644 --- a/libvirt.spec.in +++ b/libvirt.spec.in @@ -76,6 +76,7 @@ %define with_audit 0%{!?_without_audit:0} %define with_dtrace 0%{!?_without_dtrace:0} %define with_cgconfig 0%{!?_without_cgconfig:0} +%define with_sanlock 0%{!?_without_sanlock:0} # Non-server/HV driver defaults which are always enabled %define with_python 0%{!?_without_python:1} @@ -179,6 +180,7 @@ %if 0%{?fedora} >= 13 || 0%{?rhel} >= 6 %define with_dtrace 1 +%define with_sanlock 1 %endif # Pull in cgroups config system @@ -431,6 +433,9 @@ BuildRequires: audit-libs-devel # we need /usr/sbin/dtrace BuildRequires: systemtap-sdt-devel %endif +%if %{with_sanlock} +BuildRequires: sanlock-devel +%endif %if %{with_storage_fs} # For mount/umount in FS driver @@ -698,6 +703,8 @@ rm -f $RPM_BUILD_ROOT%{_libdir}/*.la rm -f $RPM_BUILD_ROOT%{_libdir}/*.a rm -f $RPM_BUILD_ROOT%{_libdir}/python*/site-packages/*.la rm -f $RPM_BUILD_ROOT%{_libdir}/python*/site-packages/*.a +rm -f $RPM_BUILD_ROOT%{_libdir}/libvirt/lock-driver/*.la +rm -f $RPM_BUILD_ROOT%{_libdir}/libvirt/lock-driver/*.a %if %{with_network} install -d -m 0755 $RPM_BUILD_ROOT%{_datadir}/lib/libvirt/dnsmasq/ @@ -984,6 +991,10 @@ fi %attr(0755, root, root) %{_libexecdir}/libvirt_lxc %endif +%if %{with_sanlock} +%attr(0755, root, root) %{_libdir}/libvirt/lock-driver/sanlock.so +%endif + %attr(0755, root, root) %{_libexecdir}/libvirt_parthelper %attr(0755, root, root) %{_libexecdir}/libvirt_iohelper %attr(0755, root, root) %{_sbindir}/libvirtd diff --git a/po/POTFILES.in b/po/POTFILES.in index 52fee59..c7c8396 100644 --- a/po/POTFILES.in +++ b/po/POTFILES.in @@ -31,6 +31,7 @@ src/fdstream.c src/interface/netcf_driver.c src/internal.h src/libvirt.c +src/locking/lock_driver_sanlock.c src/locking/lock_manager.c src/lxc/lxc_container.c src/lxc/lxc_conf.c diff --git a/src/Makefile.am b/src/Makefile.am index 347fd87..291c448 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -99,6 +99,9 @@ DRIVER_SOURCES = \ locking/lock_driver_nop.h locking/lock_driver_nop.c \ locking/domain_lock.h locking/domain_lock.c +LOCK_DRIVER_SANLOCK_SOURCES = \ + locking/lock_driver_sanlock.c + # XML configuration format handling sources # Domain driver generic impl APIs @@ -1158,6 +1161,15 @@ libvirt_qemu_la_CFLAGS = $(AM_CFLAGS) libvirt_qemu_la_LIBADD = libvirt.la $(CYGWIN_EXTRA_LIBADD) EXTRA_DIST += $(LIBVIRT_QEMU_SYMBOL_FILE) + +lockdriverdir = $(libdir)/libvirt/lock-driver +lockdriver_LTLIBRARIES = sanlock.la + +sanlock_la_SOURCES = $(LOCK_DRIVER_SANLOCK_SOURCES) +sanlock_la_CFLAGS = $(AM_CLFAGS) +sanlock_la_LDFLAGS = -module -avoid-version +sanlock_la_LIBADD = -lsanlock + libexec_PROGRAMS = if WITH_LIBVIRTD diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms index 7fcb700..798b882 100644 --- a/src/libvirt_private.syms +++ b/src/libvirt_private.syms @@ -631,6 +631,7 @@ virVMOperationTypeToString; # memory.h virAlloc; virAllocN; +virAllocVar; virExpandN; virFree; virReallocN; diff --git a/src/locking/lock_driver_sanlock.c b/src/locking/lock_driver_sanlock.c new file mode 100644 index 0000000..9118255 --- /dev/null +++ b/src/locking/lock_driver_sanlock.c @@ -0,0 +1,404 @@ +/* + * lock_driver_sanlock.c: A lock driver for Sanlock + * + * Copyright (C) 2010-2011 Red Hat, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +#include <config.h> + +#include <stdlib.h> +#include <stdint.h> +#include <unistd.h> +#include <string.h> +#include <stdio.h> +#include <errno.h> +#include <sys/types.h> + +#include <sanlock.h> +#include <sanlock_resource.h> + +#include "lock_driver.h" +#include "logging.h" +#include "virterror_internal.h" +#include "memory.h" +#include "util.h" +#include "files.h" + +#define VIR_FROM_THIS VIR_FROM_LOCKING + +#define virLockError(code, ...) \ + virReportErrorHelper(VIR_FROM_THIS, code, __FILE__, \ + __FUNCTION__, __LINE__, __VA_ARGS__) + +typedef struct _virLockManagerSanlockPrivate virLockManagerSanlockPrivate; +typedef virLockManagerSanlockPrivate *virLockManagerSanlockPrivatePtr; + +struct _virLockManagerSanlockPrivate { + char vm_name[SANLK_NAME_LEN]; + char vm_uuid[VIR_UUID_BUFLEN]; + unsigned int vm_id; + unsigned int vm_pid; + unsigned int flags; + bool hasRWDisks; + int res_count; + struct sanlk_resource *res_args[SANLK_MAX_RESOURCES]; +}; + +/* + * sanlock plugin for the libvirt virLockManager API + */ + +static int virLockManagerSanlockInit(unsigned int version ATTRIBUTE_UNUSED, + unsigned int flags) +{ + virCheckFlags(0, -1); + return 0; +} + +static int virLockManagerSanlockDeinit(void) +{ + virLockError(VIR_ERR_INTERNAL_ERROR, "%s", + _("Unloading sanlock plugin is forbidden")); + return -1; +} + +static int virLockManagerSanlockNew(virLockManagerPtr lock, + unsigned int type, + size_t nparams, + virLockManagerParamPtr params, + unsigned int flags) +{ + virLockManagerParamPtr param; + virLockManagerSanlockPrivatePtr priv; + int i; + + virCheckFlags(0, -1); + + if (type != VIR_LOCK_MANAGER_OBJECT_TYPE_DOMAIN) { + virLockError(VIR_ERR_INTERNAL_ERROR, + _("Unsupported object type %d"), type); + return -1; + } + + if (VIR_ALLOC(priv) < 0) { + virReportOOMError(); + return -1; + } + + priv->flags = flags; + + for (i = 0; i < nparams; i++) { + param = ¶ms[i]; + + if (STREQ(param->key, "uuid")) { + memcpy(priv->vm_uuid, param->value.uuid, 16); + } else if (STREQ(param->key, "name")) { + if (!virStrcpy(priv->vm_name, param->value.str, SANLK_NAME_LEN)) { + virLockError(VIR_ERR_INTERNAL_ERROR, + _("Domain name '%s' exceeded %d characters"), + param->value.str, SANLK_NAME_LEN); + goto error; + } + } else if (STREQ(param->key, "pid")) { + priv->vm_pid = param->value.ui; + } else if (STREQ(param->key, "id")) { + priv->vm_id = param->value.ui; + } + } + + lock->privateData = priv; + return 0; + +error: + VIR_FREE(priv); + return -1; +} + +static void virLockManagerSanlockFree(virLockManagerPtr lock) +{ + virLockManagerSanlockPrivatePtr priv = lock->privateData; + int i; + + if (!priv) + return; + + for (i = 0; i < priv->res_count; i++) + VIR_FREE(priv->res_args[i]); + VIR_FREE(priv); + lock->privateData = NULL; +} + +static int virLockManagerSanlockAddResource(virLockManagerPtr lock, + unsigned int type, + const char *name, + size_t nparams, + virLockManagerParamPtr params, + unsigned int flags) +{ + virLockManagerSanlockPrivatePtr priv = lock->privateData; + struct sanlk_resource *res; + int i; + + virCheckFlags(VIR_LOCK_MANAGER_RESOURCE_READONLY | + VIR_LOCK_MANAGER_RESOURCE_SHARED, -1); + + if (priv->res_count == SANLK_MAX_RESOURCES) { + virLockError(VIR_ERR_INTERNAL_ERROR, + _("Too many resources %d for object"), + SANLK_MAX_RESOURCES); + return -1; + } + + if (type == VIR_LOCK_MANAGER_RESOURCE_TYPE_DISK) { + if (!(flags & (VIR_LOCK_MANAGER_RESOURCE_SHARED | + VIR_LOCK_MANAGER_RESOURCE_READONLY))) + priv->hasRWDisks = true; + return 0; + } + + if (type != VIR_LOCK_MANAGER_RESOURCE_TYPE_LEASE) + return 0; + + if (flags & VIR_LOCK_MANAGER_RESOURCE_READONLY) { + virLockError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", + _("Readonly leases are not supported")); + return -1; + } + if (flags & VIR_LOCK_MANAGER_RESOURCE_SHARED) { + virLockError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", + _("Sharable leases are not supported")); + return -1; + } + + if (VIR_ALLOC_VAR(res, struct sanlk_disk, 1) < 0) { + virReportOOMError(); + return -1; + } + + res->num_disks = 1; + if (!virStrcpy(res->name, name, SANLK_NAME_LEN)) { + virLockError(VIR_ERR_INTERNAL_ERROR, + _("Resource name '%s' exceeds %d characters"), + name, SANLK_NAME_LEN); + goto error; + } + + for (i = 0; i < nparams; i++) { + if (STREQ(params[i].key, "path")) { + if (!virStrcpy(res->disks[0].path, params[i].value.str, SANLK_PATH_LEN)) { + virLockError(VIR_ERR_INTERNAL_ERROR, + _("Lease path '%s' exceeds %d characters"), + params[i].value.str, SANLK_PATH_LEN); + goto error; + } + } else if (STREQ(params[i].key, "offset")) { + res->disks[0].offset = params[i].value.ul; + } else if (STREQ(params[i].key, "lockspace")) { + if (!virStrcpy(res->lockspace_name, params[i].value.str, SANLK_NAME_LEN)) { + virLockError(VIR_ERR_INTERNAL_ERROR, + _("Resource lockspace '%s' exceeds %d characters"), + params[i].value.str, SANLK_NAME_LEN); + goto error; + } + } + } + + priv->res_args[priv->res_count] = res; + priv->res_count++; + return 0; + +error: + VIR_FREE(res); + return -1; +} + +static int virLockManagerSanlockAcquire(virLockManagerPtr lock, + const char *state, + unsigned int flags) +{ + virLockManagerSanlockPrivatePtr priv = lock->privateData; + struct sanlk_options *opt; + struct sanlk_resource **res_args; + int res_count; + bool res_free = false; + int sock = -1; + int rv; + int i; + + virCheckFlags(VIR_LOCK_MANAGER_ACQUIRE_REGISTER_ONLY, -1); + + if (priv->res_count == 0 && + priv->hasRWDisks) { + virLockError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", + _("Read/write, exclusive access, disks were present, but no leases specified")); + return -1; + } + + if (VIR_ALLOC(opt) < 0) { + virReportOOMError(); + return -1; + } + + if (!virStrcpy(opt->owner_name, priv->vm_name, SANLK_NAME_LEN)) { + virLockError(VIR_ERR_INTERNAL_ERROR, + _("Domain name '%s' exceeded %d characters"), + priv->vm_name, SANLK_NAME_LEN); + goto error; + } + + if (state && STRNEQ(state, "") && 0) { + if ((rv = sanlock_state_to_args((char *)state, + &res_count, + &res_args)) < 0) { + virReportSystemError(-rv, + _("Unable to parse lock state %s"), + state); + goto error; + } + res_free = true; + } else { + res_args = priv->res_args; + res_count = priv->res_count; + } + + VIR_DEBUG0("Register sanlock"); + /* We only initialize 'sock' if we are in the real + * child process and we need it to be inherited + * + * If sock==-1, then sanlock auto-open/closes a + * temporary sock + */ + if (priv->vm_pid == getpid() && + (sock = sanlock_register()) < 0) { + virReportSystemError(-sock, "%s", + _("Failed to open socket to sanlock daemon")); + goto error; + } + + if (!(flags & VIR_LOCK_MANAGER_ACQUIRE_REGISTER_ONLY)) { + VIR_DEBUG("Acquiring object %u", priv->res_count); + if ((rv = sanlock_acquire(sock, priv->vm_pid, 0, + priv->res_count, priv->res_args, + opt)) < 0) { + virReportSystemError(-rv, "%s", + _("Failed to acquire lock")); + goto error; + } + } + + VIR_FREE(opt); + + /* + * We are *intentionally* "leaking" sock file descriptor + * because we want it to be inherited by QEMU. When the + * sock FD finally closes upon QEMU exit (or crash) then + * sanlock will notice EOF and release the lock + */ + if (sock != -1 && + virSetInherit(sock, true) < 0) + goto error; + + VIR_DEBUG("Acquire completed fd=%d", sock); + + if (res_free) { + for (i = 0 ; i < res_count ; i++) { + VIR_FREE(res_args[i]); + } + VIR_FREE(res_args); + } + + return 0; + +error: + if (res_free) { + for (i = 0 ; i < res_count ; i++) { + VIR_FREE(res_args[i]); + } + VIR_FREE(res_args); + } + VIR_FREE(opt); + VIR_FORCE_CLOSE(sock); + return -1; +} + + +static int virLockManagerSanlockRelease(virLockManagerPtr lock, + char **state, + unsigned int flags) +{ + virLockManagerSanlockPrivatePtr priv = lock->privateData; + int res_count; + int rv; + + virCheckFlags(0, -1); + + if ((rv = sanlock_inquire(-1, priv->vm_pid, 0, &res_count, state)) < 0) { + virReportSystemError(-rv, "%s", + _("Failed to release lock")); + return -1; + } + + if (STREQ(*state, "")) + VIR_FREE(*state); + + if ((rv = sanlock_release(-1, priv->vm_pid, SANLK_REL_ALL, 0, NULL)) < 0) { + virReportSystemError(-rv, "%s", + _("Failed to release lock")); + return -1; + } + + return 0; +} + +static int virLockManagerSanlockInquire(virLockManagerPtr lock, + char **state, + unsigned int flags) +{ + virLockManagerSanlockPrivatePtr priv = lock->privateData; + int rv; + + virCheckFlags(0, -1); + + if ((rv = sanlock_inquire(-1, priv->vm_pid, 0, NULL, state)) < 0) { + virReportSystemError(-rv, "%s", + _("Failed to release lock")); + return -1; + } + + if (STREQ(*state, "")) + VIR_FREE(*state); + + return 0; +} + +virLockDriver virLockDriverImpl = +{ + .version = VIR_LOCK_MANAGER_VERSION, + + .drvInit = virLockManagerSanlockInit, + .drvDeinit = virLockManagerSanlockDeinit, + + .drvNew = virLockManagerSanlockNew, + .drvFree = virLockManagerSanlockFree, + + .drvAddResource = virLockManagerSanlockAddResource, + + .drvAcquire = virLockManagerSanlockAcquire, + .drvRelease = virLockManagerSanlockRelease, + .drvInquire = virLockManagerSanlockInquire, +}; -- 1.7.4.4

On Wed, May 11, 2011 at 10:33:02AM +0100, Daniel P. Berrange wrote:
This is an update to
http://www.redhat.com/archives/libvir-list/2011-January/msg00952.html
Changes in this series
- Lock manager plugin API is dramatically simpler - Lock manager only protects disk content, not disk metadata (eg file ownership/selinux label changes) - Migration state transfer integrated - Updated for latest sanlock API - Locks are released upon VM pause and reacquired upon resume - Updated documentation
NB, as before, the lock manager plugin API is currently *internal* only, so out-of-tree 3rd party plugin impls will be not be supported. This restriction may be released in the future, once we have determined that the current plugin API is suitable for long term ABI guarentees.
My intention is to merge this initial series with the nop and sanlock plugin impls, and then provide a fcntl based impl later.
BTW, this series only applies ontop of the migration v3 series Daniel -- |: http://berrange.com -o- http://www.flickr.com/photos/dberrange/ :| |: http://libvirt.org -o- http://virt-manager.org :| |: http://autobuild.org -o- http://search.cpan.org/~danberr/ :| |: http://entangle-photo.org -o- http://live.gnome.org/gtk-vnc :|
participants (2)
-
Daniel P. Berrange
-
Eric Blake