On Thu, May 19, 2011 at 07:24:25AM -0400, Daniel P. Berrange wrote:
Sanlock is a project that implements a disk-paxos locking
algorithm. This is suitable for cluster deployments with
shared storage.
* src/Makefile.am: Add dlopen plugin for sanlock
* src/locking/lock_driver_sanlock.c: Sanlock driver
---
libvirt.spec.in | 11 +
po/POTFILES.in | 1 +
src/Makefile.am | 12 +
src/libvirt_private.syms | 1 +
src/locking/lock_driver_sanlock.c | 413 +++++++++++++++++++++++++++++++++++++
5 files changed, 438 insertions(+), 0 deletions(-)
create mode 100644 src/locking/lock_driver_sanlock.c
diff --git a/libvirt.spec.in b/libvirt.spec.in
index e85f68f..73213ea 100644
--- a/libvirt.spec.in
+++ b/libvirt.spec.in
@@ -77,6 +77,7 @@
%define with_dtrace 0%{!?_without_dtrace:0}
%define with_cgconfig 0%{!?_without_cgconfig:0}
%define with_referential 0%{!?_without_referential:1}
+%define with_sanlock 0%{!?_without_sanlock:0}
# Non-server/HV driver defaults which are always enabled
%define with_python 0%{!?_without_python:1}
@@ -180,6 +181,7 @@
%if 0%{?fedora} >= 13 || 0%{?rhel} >= 6
%define with_dtrace 1
+%define with_sanlock 1
%endif
# Pull in cgroups config system
@@ -435,6 +437,9 @@ BuildRequires: systemtap-sdt-devel
%if %{with_referential}
BuildRequires: referential-devel
%endif
+%if %{with_sanlock}
+BuildRequires: sanlock-devel
+%endif
Hum ... weird
[root@paphio ~]# yum install sanlock-devel
..
No package sanlock-devel available.
Error: Nothing to do
[root@paphio ~]# cat /etc/fedora-release
Fedora release 14 (Laughlin)
[root@paphio ~]#
are you sure about the dep ?
%if %{with_storage_fs}
# For mount/umount in FS driver
@@ -718,6 +723,8 @@ rm -f $RPM_BUILD_ROOT%{_libdir}/*.la
rm -f $RPM_BUILD_ROOT%{_libdir}/*.a
rm -f $RPM_BUILD_ROOT%{_libdir}/python*/site-packages/*.la
rm -f $RPM_BUILD_ROOT%{_libdir}/python*/site-packages/*.a
+rm -f $RPM_BUILD_ROOT%{_libdir}/libvirt/lock-driver/*.la
+rm -f $RPM_BUILD_ROOT%{_libdir}/libvirt/lock-driver/*.a
%if %{with_network}
install -d -m 0755 $RPM_BUILD_ROOT%{_datadir}/lib/libvirt/dnsmasq/
@@ -1004,6 +1011,10 @@ fi
%attr(0755, root, root) %{_libexecdir}/libvirt_lxc
%endif
+%if %{with_sanlock}
+%attr(0755, root, root) %{_libdir}/libvirt/lock-driver/sanlock.so
+%endif
+
%attr(0755, root, root) %{_libexecdir}/libvirt_parthelper
%attr(0755, root, root) %{_libexecdir}/libvirt_iohelper
%attr(0755, root, root) %{_sbindir}/libvirtd
diff --git a/po/POTFILES.in b/po/POTFILES.in
index 9c3d287..c3b45f9 100644
--- a/po/POTFILES.in
+++ b/po/POTFILES.in
@@ -31,6 +31,7 @@ src/fdstream.c
src/interface/netcf_driver.c
src/internal.h
src/libvirt.c
+src/locking/lock_driver_sanlock.c
src/locking/lock_manager.c
src/lxc/lxc_container.c
src/lxc/lxc_conf.c
diff --git a/src/Makefile.am b/src/Makefile.am
index 1e5a72e..edf017d 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -99,6 +99,9 @@ DRIVER_SOURCES = \
locking/lock_driver_nop.h locking/lock_driver_nop.c \
locking/domain_lock.h locking/domain_lock.c
+LOCK_DRIVER_SANLOCK_SOURCES = \
+ locking/lock_driver_sanlock.c
+
# XML configuration format handling sources
# Domain driver generic impl APIs
@@ -1159,6 +1162,15 @@ libvirt_qemu_la_CFLAGS = $(AM_CFLAGS)
libvirt_qemu_la_LIBADD = libvirt.la $(CYGWIN_EXTRA_LIBADD)
EXTRA_DIST += $(LIBVIRT_QEMU_SYMBOL_FILE)
+
+lockdriverdir = $(libdir)/libvirt/lock-driver
+lockdriver_LTLIBRARIES = sanlock.la
+
+sanlock_la_SOURCES = $(LOCK_DRIVER_SANLOCK_SOURCES)
+sanlock_la_CFLAGS = $(AM_CLFAGS)
+sanlock_la_LDFLAGS = -module -avoid-version
+sanlock_la_LIBADD = -lsanlock
+
libexec_PROGRAMS =
if WITH_LIBVIRTD
diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms
index a3fe2f1..e61ea13 100644
--- a/src/libvirt_private.syms
+++ b/src/libvirt_private.syms
@@ -650,6 +650,7 @@ virVMOperationTypeToString;
# memory.h
virAlloc;
virAllocN;
+virAllocVar;
virExpandN;
virFree;
virReallocN;
diff --git a/src/locking/lock_driver_sanlock.c b/src/locking/lock_driver_sanlock.c
new file mode 100644
index 0000000..6a31fdf
--- /dev/null
+++ b/src/locking/lock_driver_sanlock.c
@@ -0,0 +1,413 @@
+/*
+ * lock_driver_sanlock.c: A lock driver for Sanlock
+ *
+ * Copyright (C) 2010-2011 Red Hat, Inc.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
* Author: Daniel P. Berrange <berrange(a)redhat.com>
+ */
+
+#include <config.h>
+
+#include <stdlib.h>
+#include <stdint.h>
+#include <unistd.h>
+#include <string.h>
+#include <stdio.h>
+#include <errno.h>
+#include <sys/types.h>
+
+#include <sanlock.h>
+#include <sanlock_resource.h>
+
+#include "lock_driver.h"
+#include "logging.h"
+#include "virterror_internal.h"
+#include "memory.h"
+#include "util.h"
+#include "files.h"
+
+#define VIR_FROM_THIS VIR_FROM_LOCKING
+
+#define virLockError(code, ...) \
+ virReportErrorHelper(VIR_FROM_THIS, code, __FILE__, \
+ __FUNCTION__, __LINE__, __VA_ARGS__)
+
+typedef struct _virLockManagerSanlockPrivate virLockManagerSanlockPrivate;
+typedef virLockManagerSanlockPrivate *virLockManagerSanlockPrivatePtr;
+
+struct _virLockManagerSanlockPrivate {
+ char vm_name[SANLK_NAME_LEN];
+ char vm_uuid[VIR_UUID_BUFLEN];
+ unsigned int vm_id;
+ unsigned int vm_pid;
+ unsigned int flags;
+ bool hasRWDisks;
+ int res_count;
+ struct sanlk_resource *res_args[SANLK_MAX_RESOURCES];
+};
+
+/*
+ * sanlock plugin for the libvirt virLockManager API
+ */
+
+static int virLockManagerSanlockInit(unsigned int version ATTRIBUTE_UNUSED,
+ unsigned int flags)
+{
+ virCheckFlags(0, -1);
+ return 0;
+}
+
+static int virLockManagerSanlockDeinit(void)
+{
+ virLockError(VIR_ERR_INTERNAL_ERROR, "%s",
+ _("Unloading sanlock plugin is forbidden"));
+ return -1;
+}
+
+static int virLockManagerSanlockNew(virLockManagerPtr lock,
+ unsigned int type,
+ size_t nparams,
+ virLockManagerParamPtr params,
+ unsigned int flags)
+{
+ virLockManagerParamPtr param;
+ virLockManagerSanlockPrivatePtr priv;
+ int i;
+
+ virCheckFlags(0, -1);
+
+ if (type != VIR_LOCK_MANAGER_OBJECT_TYPE_DOMAIN) {
+ virLockError(VIR_ERR_INTERNAL_ERROR,
+ _("Unsupported object type %d"), type);
+ return -1;
+ }
+
+ if (VIR_ALLOC(priv) < 0) {
+ virReportOOMError();
+ return -1;
+ }
+
+ priv->flags = flags;
+
+ for (i = 0; i < nparams; i++) {
+ param = ¶ms[i];
+
+ if (STREQ(param->key, "uuid")) {
+ memcpy(priv->vm_uuid, param->value.uuid, 16);
+ } else if (STREQ(param->key, "name")) {
+ if (!virStrcpy(priv->vm_name, param->value.str, SANLK_NAME_LEN)) {
+ virLockError(VIR_ERR_INTERNAL_ERROR,
+ _("Domain name '%s' exceeded %d
characters"),
+ param->value.str, SANLK_NAME_LEN);
+ goto error;
+ }
+ } else if (STREQ(param->key, "pid")) {
+ priv->vm_pid = param->value.ui;
+ } else if (STREQ(param->key, "id")) {
+ priv->vm_id = param->value.ui;
+ }
+ }
+
+ lock->privateData = priv;
+ return 0;
+
+error:
+ VIR_FREE(priv);
+ return -1;
+}
+
+static void virLockManagerSanlockFree(virLockManagerPtr lock)
+{
+ virLockManagerSanlockPrivatePtr priv = lock->privateData;
+ int i;
+
+ if (!priv)
+ return;
+
+ for (i = 0; i < priv->res_count; i++)
+ VIR_FREE(priv->res_args[i]);
+ VIR_FREE(priv);
+ lock->privateData = NULL;
+}
+
+static int virLockManagerSanlockAddResource(virLockManagerPtr lock,
+ unsigned int type,
+ const char *name,
+ size_t nparams,
+ virLockManagerParamPtr params,
+ unsigned int flags)
+{
+ virLockManagerSanlockPrivatePtr priv = lock->privateData;
+ struct sanlk_resource *res;
+ int i;
+
+ virCheckFlags(VIR_LOCK_MANAGER_RESOURCE_READONLY |
+ VIR_LOCK_MANAGER_RESOURCE_SHARED, -1);
+
+ if (priv->res_count == SANLK_MAX_RESOURCES) {
+ virLockError(VIR_ERR_INTERNAL_ERROR,
+ _("Too many resources %d for object"),
+ SANLK_MAX_RESOURCES);
+ return -1;
+ }
+
+ if (type == VIR_LOCK_MANAGER_RESOURCE_TYPE_DISK) {
+ if (!(flags & (VIR_LOCK_MANAGER_RESOURCE_SHARED |
+ VIR_LOCK_MANAGER_RESOURCE_READONLY)))
+ priv->hasRWDisks = true;
+ return 0;
+ }
+
+ if (type != VIR_LOCK_MANAGER_RESOURCE_TYPE_LEASE)
+ return 0;
+
+ if (flags & VIR_LOCK_MANAGER_RESOURCE_READONLY) {
+ virLockError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
+ _("Readonly leases are not supported"));
+ return -1;
+ }
+ if (flags & VIR_LOCK_MANAGER_RESOURCE_SHARED) {
+ virLockError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
+ _("Sharable leases are not supported"));
+ return -1;
+ }
+
+ if (VIR_ALLOC_VAR(res, struct sanlk_disk, 1) < 0) {
+ virReportOOMError();
+ return -1;
+ }
+
+ res->num_disks = 1;
+ if (!virStrcpy(res->name, name, SANLK_NAME_LEN)) {
+ virLockError(VIR_ERR_INTERNAL_ERROR,
+ _("Resource name '%s' exceeds %d characters"),
+ name, SANLK_NAME_LEN);
+ goto error;
+ }
+
+ for (i = 0; i < nparams; i++) {
+ if (STREQ(params[i].key, "path")) {
+ if (!virStrcpy(res->disks[0].path, params[i].value.str, SANLK_PATH_LEN))
{
+ virLockError(VIR_ERR_INTERNAL_ERROR,
+ _("Lease path '%s' exceeds %d
characters"),
+ params[i].value.str, SANLK_PATH_LEN);
+ goto error;
+ }
+ } else if (STREQ(params[i].key, "offset")) {
+ res->disks[0].offset = params[i].value.ul;
+ } else if (STREQ(params[i].key, "lockspace")) {
+ if (!virStrcpy(res->lockspace_name, params[i].value.str, SANLK_NAME_LEN))
{
+ virLockError(VIR_ERR_INTERNAL_ERROR,
+ _("Resource lockspace '%s' exceeds %d
characters"),
+ params[i].value.str, SANLK_NAME_LEN);
+ goto error;
+ }
+ }
+ }
+
+ priv->res_args[priv->res_count] = res;
+ priv->res_count++;
+ return 0;
+
+error:
+ VIR_FREE(res);
+ return -1;
+}
+
+static int virLockManagerSanlockAcquire(virLockManagerPtr lock,
+ const char *state,
+ unsigned int flags)
+{
+ virLockManagerSanlockPrivatePtr priv = lock->privateData;
+ struct sanlk_options *opt;
+ struct sanlk_resource **res_args;
+ int res_count;
+ bool res_free = false;
+ int sock = -1;
+ int rv;
+ int i;
+
+ virCheckFlags(VIR_LOCK_MANAGER_ACQUIRE_REGISTER_ONLY, -1);
+
+ if (priv->res_count == 0 &&
+ priv->hasRWDisks) {
+ virLockError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
+ _("Read/write, exclusive access, disks were present, but no
leases specified"));
+ return -1;
+ }
+
+ if (VIR_ALLOC(opt) < 0) {
+ virReportOOMError();
+ return -1;
+ }
+
+ if (!virStrcpy(opt->owner_name, priv->vm_name, SANLK_NAME_LEN)) {
+ virLockError(VIR_ERR_INTERNAL_ERROR,
+ _("Domain name '%s' exceeded %d characters"),
+ priv->vm_name, SANLK_NAME_LEN);
+ goto error;
+ }
+
+ if (state && STRNEQ(state, "") && 0) {
+ if ((rv = sanlock_state_to_args((char *)state,
+ &res_count,
+ &res_args)) < 0) {
+ virReportSystemError(-rv,
+ _("Unable to parse lock state %s"),
+ state);
+ goto error;
+ }
+ res_free = true;
+ } else {
+ res_args = priv->res_args;
+ res_count = priv->res_count;
+ }
+
+ VIR_DEBUG("Register sanlock %d", flags);
+ /* We only initialize 'sock' if we are in the real
+ * child process and we need it to be inherited
+ *
+ * If sock==-1, then sanlock auto-open/closes a
+ * temporary sock
+ */
+ if (priv->vm_pid == getpid() &&
+ (sock = sanlock_register()) < 0) {
+ virReportSystemError(-sock, "%s",
+ _("Failed to open socket to sanlock daemon"));
+ goto error;
+ }
+
+ if (!(flags & VIR_LOCK_MANAGER_ACQUIRE_REGISTER_ONLY)) {
+ VIR_DEBUG("Acquiring object %u", priv->res_count);
+ if ((rv = sanlock_acquire(sock, priv->vm_pid, 0,
+ priv->res_count, priv->res_args,
+ opt)) < 0) {
Hum ...
+#if 1
+ virReportSystemError(-rv, "%s",
+ _("Failed to acquire lock"));
+#else
+ virLockError(VIR_ERR_INTERNAL_ERROR, "%s",
+ _("Failed to acquire lock"));
+#endif
this probably is worth some kind of comment or cleaned up
+ goto error;
+ }
+ }
+
+ VIR_FREE(opt);
+
+ /*
+ * We are *intentionally* "leaking" sock file descriptor
+ * because we want it to be inherited by QEMU. When the
+ * sock FD finally closes upon QEMU exit (or crash) then
+ * sanlock will notice EOF and release the lock
+ */
+ if (sock != -1 &&
+ virSetInherit(sock, true) < 0)
+ goto error;
+
+ VIR_DEBUG("Acquire completed fd=%d", sock);
+
+ if (res_free) {
+ for (i = 0 ; i < res_count ; i++) {
+ VIR_FREE(res_args[i]);
+ }
+ VIR_FREE(res_args);
+ }
+
+ return 0;
+
+error:
+ if (res_free) {
+ for (i = 0 ; i < res_count ; i++) {
+ VIR_FREE(res_args[i]);
+ }
+ VIR_FREE(res_args);
+ }
+ VIR_FREE(opt);
+ VIR_FORCE_CLOSE(sock);
+ return -1;
+}
+
+
+static int virLockManagerSanlockRelease(virLockManagerPtr lock,
+ char **state,
+ unsigned int flags)
+{
+ virLockManagerSanlockPrivatePtr priv = lock->privateData;
+ int res_count;
+ int rv;
+
+ virCheckFlags(0, -1);
+
+ if ((rv = sanlock_inquire(-1, priv->vm_pid, 0, &res_count, state)) < 0) {
+ virReportSystemError(-rv, "%s",
+ _("Failed to release lock"));
+ return -1;
+ }
+
+ if (STREQ(*state, ""))
+ VIR_FREE(*state);
+
+ if ((rv = sanlock_release(-1, priv->vm_pid, SANLK_REL_ALL, 0, NULL)) < 0) {
+ virReportSystemError(-rv, "%s",
+ _("Failed to release lock"));
+ return -1;
+ }
+
+ return 0;
+}
+
+static int virLockManagerSanlockInquire(virLockManagerPtr lock,
+ char **state,
+ unsigned int flags)
+{
+ virLockManagerSanlockPrivatePtr priv = lock->privateData;
+ int rv, res_count;
+
+ virCheckFlags(0, -1);
+
+ VIR_DEBUG("pid=%d", priv->vm_pid);
+
+ if ((rv = sanlock_inquire(-1, priv->vm_pid, 0, &res_count, state)) < 0) {
+ virReportSystemError(-rv, "%s",
+ _("Failed to inquire lock"));
+ return -1;
+ }
+
+ if (STREQ(*state, ""))
+ VIR_FREE(*state);
+
+ return 0;
+}
+
+virLockDriver virLockDriverImpl =
+{
+ .version = VIR_LOCK_MANAGER_VERSION,
+
+ .flags = VIR_LOCK_MANAGER_USES_STATE,
+
+ .drvInit = virLockManagerSanlockInit,
+ .drvDeinit = virLockManagerSanlockDeinit,
+
+ .drvNew = virLockManagerSanlockNew,
+ .drvFree = virLockManagerSanlockFree,
+
+ .drvAddResource = virLockManagerSanlockAddResource,
+
+ .drvAcquire = virLockManagerSanlockAcquire,
+ .drvRelease = virLockManagerSanlockRelease,
+ .drvInquire = virLockManagerSanlockInquire,
+};
I'm a bit puzzled by the new dependancy, and this might prevent me
from building rc1 of 0.9.2 if pushed as-is,
but ACK in principle.
Daniel
--
Daniel Veillard | libxml Gnome XML XSLT toolkit
http://xmlsoft.org/
daniel(a)veillard.com | Rpmfind RPM search engine
http://rpmfind.net/
http://veillard.com/ | virtualization library
http://libvirt.org/