On Fri, May 27, 2011 at 05:37:51PM +0800, Daniel Veillard wrote:
On Thu, May 19, 2011 at 07:24:25AM -0400, Daniel P. Berrange wrote:
> Sanlock is a project that implements a disk-paxos locking
> algorithm. This is suitable for cluster deployments with
> shared storage.
>
> * src/Makefile.am: Add dlopen plugin for sanlock
> * src/locking/lock_driver_sanlock.c: Sanlock driver
> ---
> libvirt.spec.in | 11 +
> po/POTFILES.in | 1 +
> src/Makefile.am | 12 +
> src/libvirt_private.syms | 1 +
> src/locking/lock_driver_sanlock.c | 413 +++++++++++++++++++++++++++++++++++++
> 5 files changed, 438 insertions(+), 0 deletions(-)
> create mode 100644 src/locking/lock_driver_sanlock.c
>
> diff --git a/libvirt.spec.in b/libvirt.spec.in
> index e85f68f..73213ea 100644
> --- a/libvirt.spec.in
> +++ b/libvirt.spec.in
> @@ -77,6 +77,7 @@
> %define with_dtrace 0%{!?_without_dtrace:0}
> %define with_cgconfig 0%{!?_without_cgconfig:0}
> %define with_referential 0%{!?_without_referential:1}
> +%define with_sanlock 0%{!?_without_sanlock:0}
>
> # Non-server/HV driver defaults which are always enabled
> %define with_python 0%{!?_without_python:1}
> @@ -180,6 +181,7 @@
>
> %if 0%{?fedora} >= 13 || 0%{?rhel} >= 6
> %define with_dtrace 1
> +%define with_sanlock 1
> %endif
>
> # Pull in cgroups config system
> @@ -435,6 +437,9 @@ BuildRequires: systemtap-sdt-devel
> %if %{with_referential}
> BuildRequires: referential-devel
> %endif
> +%if %{with_sanlock}
> +BuildRequires: sanlock-devel
> +%endif
Hum ... weird
[root@paphio ~]# yum install sanlock-devel
..
No package sanlock-devel available.
Error: Nothing to do
[root@paphio ~]# cat /etc/fedora-release
Fedora release 14 (Laughlin)
[root@paphio ~]#
are you sure about the dep ?
> %if %{with_storage_fs}
> # For mount/umount in FS driver
> @@ -718,6 +723,8 @@ rm -f $RPM_BUILD_ROOT%{_libdir}/*.la
> rm -f $RPM_BUILD_ROOT%{_libdir}/*.a
> rm -f $RPM_BUILD_ROOT%{_libdir}/python*/site-packages/*.la
> rm -f $RPM_BUILD_ROOT%{_libdir}/python*/site-packages/*.a
> +rm -f $RPM_BUILD_ROOT%{_libdir}/libvirt/lock-driver/*.la
> +rm -f $RPM_BUILD_ROOT%{_libdir}/libvirt/lock-driver/*.a
>
> %if %{with_network}
> install -d -m 0755 $RPM_BUILD_ROOT%{_datadir}/lib/libvirt/dnsmasq/
> @@ -1004,6 +1011,10 @@ fi
> %attr(0755, root, root) %{_libexecdir}/libvirt_lxc
> %endif
>
> +%if %{with_sanlock}
> +%attr(0755, root, root) %{_libdir}/libvirt/lock-driver/sanlock.so
> +%endif
> +
> %attr(0755, root, root) %{_libexecdir}/libvirt_parthelper
> %attr(0755, root, root) %{_libexecdir}/libvirt_iohelper
> %attr(0755, root, root) %{_sbindir}/libvirtd
> diff --git a/po/POTFILES.in b/po/POTFILES.in
> index 9c3d287..c3b45f9 100644
> --- a/po/POTFILES.in
> +++ b/po/POTFILES.in
> @@ -31,6 +31,7 @@ src/fdstream.c
> src/interface/netcf_driver.c
> src/internal.h
> src/libvirt.c
> +src/locking/lock_driver_sanlock.c
> src/locking/lock_manager.c
> src/lxc/lxc_container.c
> src/lxc/lxc_conf.c
> diff --git a/src/Makefile.am b/src/Makefile.am
> index 1e5a72e..edf017d 100644
> --- a/src/Makefile.am
> +++ b/src/Makefile.am
> @@ -99,6 +99,9 @@ DRIVER_SOURCES = \
> locking/lock_driver_nop.h locking/lock_driver_nop.c \
> locking/domain_lock.h locking/domain_lock.c
>
> +LOCK_DRIVER_SANLOCK_SOURCES = \
> + locking/lock_driver_sanlock.c
> +
>
> # XML configuration format handling sources
> # Domain driver generic impl APIs
> @@ -1159,6 +1162,15 @@ libvirt_qemu_la_CFLAGS = $(AM_CFLAGS)
> libvirt_qemu_la_LIBADD = libvirt.la $(CYGWIN_EXTRA_LIBADD)
> EXTRA_DIST += $(LIBVIRT_QEMU_SYMBOL_FILE)
>
> +
> +lockdriverdir = $(libdir)/libvirt/lock-driver
> +lockdriver_LTLIBRARIES = sanlock.la
> +
> +sanlock_la_SOURCES = $(LOCK_DRIVER_SANLOCK_SOURCES)
> +sanlock_la_CFLAGS = $(AM_CLFAGS)
> +sanlock_la_LDFLAGS = -module -avoid-version
> +sanlock_la_LIBADD = -lsanlock
> +
> libexec_PROGRAMS =
>
> if WITH_LIBVIRTD
> diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms
> index a3fe2f1..e61ea13 100644
> --- a/src/libvirt_private.syms
> +++ b/src/libvirt_private.syms
> @@ -650,6 +650,7 @@ virVMOperationTypeToString;
> # memory.h
> virAlloc;
> virAllocN;
> +virAllocVar;
> virExpandN;
> virFree;
> virReallocN;
> diff --git a/src/locking/lock_driver_sanlock.c b/src/locking/lock_driver_sanlock.c
> new file mode 100644
> index 0000000..6a31fdf
> --- /dev/null
> +++ b/src/locking/lock_driver_sanlock.c
> @@ -0,0 +1,413 @@
> +/*
> + * lock_driver_sanlock.c: A lock driver for Sanlock
> + *
> + * Copyright (C) 2010-2011 Red Hat, Inc.
> + *
> + * This library is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * This library is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with this library; if not, write to the Free Software
> + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
* Author: Daniel P. Berrange <berrange(a)redhat.com>
> + */
> +
> +#include <config.h>
> +
> +#include <stdlib.h>
> +#include <stdint.h>
> +#include <unistd.h>
> +#include <string.h>
> +#include <stdio.h>
> +#include <errno.h>
> +#include <sys/types.h>
> +
> +#include <sanlock.h>
> +#include <sanlock_resource.h>
> +
> +#include "lock_driver.h"
> +#include "logging.h"
> +#include "virterror_internal.h"
> +#include "memory.h"
> +#include "util.h"
> +#include "files.h"
> +
> +#define VIR_FROM_THIS VIR_FROM_LOCKING
> +
> +#define virLockError(code, ...) \
> + virReportErrorHelper(VIR_FROM_THIS, code, __FILE__, \
> + __FUNCTION__, __LINE__, __VA_ARGS__)
> +
> +typedef struct _virLockManagerSanlockPrivate virLockManagerSanlockPrivate;
> +typedef virLockManagerSanlockPrivate *virLockManagerSanlockPrivatePtr;
> +
> +struct _virLockManagerSanlockPrivate {
> + char vm_name[SANLK_NAME_LEN];
> + char vm_uuid[VIR_UUID_BUFLEN];
> + unsigned int vm_id;
> + unsigned int vm_pid;
> + unsigned int flags;
> + bool hasRWDisks;
> + int res_count;
> + struct sanlk_resource *res_args[SANLK_MAX_RESOURCES];
> +};
> +
> +/*
> + * sanlock plugin for the libvirt virLockManager API
> + */
> +
> +static int virLockManagerSanlockInit(unsigned int version ATTRIBUTE_UNUSED,
> + unsigned int flags)
> +{
> + virCheckFlags(0, -1);
> + return 0;
> +}
> +
> +static int virLockManagerSanlockDeinit(void)
> +{
> + virLockError(VIR_ERR_INTERNAL_ERROR, "%s",
> + _("Unloading sanlock plugin is forbidden"));
> + return -1;
> +}
> +
> +static int virLockManagerSanlockNew(virLockManagerPtr lock,
> + unsigned int type,
> + size_t nparams,
> + virLockManagerParamPtr params,
> + unsigned int flags)
> +{
> + virLockManagerParamPtr param;
> + virLockManagerSanlockPrivatePtr priv;
> + int i;
> +
> + virCheckFlags(0, -1);
> +
> + if (type != VIR_LOCK_MANAGER_OBJECT_TYPE_DOMAIN) {
> + virLockError(VIR_ERR_INTERNAL_ERROR,
> + _("Unsupported object type %d"), type);
> + return -1;
> + }
> +
> + if (VIR_ALLOC(priv) < 0) {
> + virReportOOMError();
> + return -1;
> + }
> +
> + priv->flags = flags;
> +
> + for (i = 0; i < nparams; i++) {
> + param = ¶ms[i];
> +
> + if (STREQ(param->key, "uuid")) {
> + memcpy(priv->vm_uuid, param->value.uuid, 16);
> + } else if (STREQ(param->key, "name")) {
> + if (!virStrcpy(priv->vm_name, param->value.str, SANLK_NAME_LEN))
{
> + virLockError(VIR_ERR_INTERNAL_ERROR,
> + _("Domain name '%s' exceeded %d
characters"),
> + param->value.str, SANLK_NAME_LEN);
> + goto error;
> + }
> + } else if (STREQ(param->key, "pid")) {
> + priv->vm_pid = param->value.ui;
> + } else if (STREQ(param->key, "id")) {
> + priv->vm_id = param->value.ui;
> + }
> + }
> +
> + lock->privateData = priv;
> + return 0;
> +
> +error:
> + VIR_FREE(priv);
> + return -1;
> +}
> +
> +static void virLockManagerSanlockFree(virLockManagerPtr lock)
> +{
> + virLockManagerSanlockPrivatePtr priv = lock->privateData;
> + int i;
> +
> + if (!priv)
> + return;
> +
> + for (i = 0; i < priv->res_count; i++)
> + VIR_FREE(priv->res_args[i]);
> + VIR_FREE(priv);
> + lock->privateData = NULL;
> +}
> +
> +static int virLockManagerSanlockAddResource(virLockManagerPtr lock,
> + unsigned int type,
> + const char *name,
> + size_t nparams,
> + virLockManagerParamPtr params,
> + unsigned int flags)
> +{
> + virLockManagerSanlockPrivatePtr priv = lock->privateData;
> + struct sanlk_resource *res;
> + int i;
> +
> + virCheckFlags(VIR_LOCK_MANAGER_RESOURCE_READONLY |
> + VIR_LOCK_MANAGER_RESOURCE_SHARED, -1);
> +
> + if (priv->res_count == SANLK_MAX_RESOURCES) {
> + virLockError(VIR_ERR_INTERNAL_ERROR,
> + _("Too many resources %d for object"),
> + SANLK_MAX_RESOURCES);
> + return -1;
> + }
> +
> + if (type == VIR_LOCK_MANAGER_RESOURCE_TYPE_DISK) {
> + if (!(flags & (VIR_LOCK_MANAGER_RESOURCE_SHARED |
> + VIR_LOCK_MANAGER_RESOURCE_READONLY)))
> + priv->hasRWDisks = true;
> + return 0;
> + }
> +
> + if (type != VIR_LOCK_MANAGER_RESOURCE_TYPE_LEASE)
> + return 0;
> +
> + if (flags & VIR_LOCK_MANAGER_RESOURCE_READONLY) {
> + virLockError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
> + _("Readonly leases are not supported"));
> + return -1;
> + }
> + if (flags & VIR_LOCK_MANAGER_RESOURCE_SHARED) {
> + virLockError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
> + _("Sharable leases are not supported"));
> + return -1;
> + }
> +
> + if (VIR_ALLOC_VAR(res, struct sanlk_disk, 1) < 0) {
> + virReportOOMError();
> + return -1;
> + }
> +
> + res->num_disks = 1;
> + if (!virStrcpy(res->name, name, SANLK_NAME_LEN)) {
> + virLockError(VIR_ERR_INTERNAL_ERROR,
> + _("Resource name '%s' exceeds %d
characters"),
> + name, SANLK_NAME_LEN);
> + goto error;
> + }
> +
> + for (i = 0; i < nparams; i++) {
> + if (STREQ(params[i].key, "path")) {
> + if (!virStrcpy(res->disks[0].path, params[i].value.str,
SANLK_PATH_LEN)) {
> + virLockError(VIR_ERR_INTERNAL_ERROR,
> + _("Lease path '%s' exceeds %d
characters"),
> + params[i].value.str, SANLK_PATH_LEN);
> + goto error;
> + }
> + } else if (STREQ(params[i].key, "offset")) {
> + res->disks[0].offset = params[i].value.ul;
> + } else if (STREQ(params[i].key, "lockspace")) {
> + if (!virStrcpy(res->lockspace_name, params[i].value.str,
SANLK_NAME_LEN)) {
> + virLockError(VIR_ERR_INTERNAL_ERROR,
> + _("Resource lockspace '%s' exceeds %d
characters"),
> + params[i].value.str, SANLK_NAME_LEN);
> + goto error;
> + }
> + }
> + }
> +
> + priv->res_args[priv->res_count] = res;
> + priv->res_count++;
> + return 0;
> +
> +error:
> + VIR_FREE(res);
> + return -1;
> +}
> +
> +static int virLockManagerSanlockAcquire(virLockManagerPtr lock,
> + const char *state,
> + unsigned int flags)
> +{
> + virLockManagerSanlockPrivatePtr priv = lock->privateData;
> + struct sanlk_options *opt;
> + struct sanlk_resource **res_args;
> + int res_count;
> + bool res_free = false;
> + int sock = -1;
> + int rv;
> + int i;
> +
> + virCheckFlags(VIR_LOCK_MANAGER_ACQUIRE_REGISTER_ONLY, -1);
> +
> + if (priv->res_count == 0 &&
> + priv->hasRWDisks) {
> + virLockError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
> + _("Read/write, exclusive access, disks were present, but
no leases specified"));
> + return -1;
> + }
> +
> + if (VIR_ALLOC(opt) < 0) {
> + virReportOOMError();
> + return -1;
> + }
> +
> + if (!virStrcpy(opt->owner_name, priv->vm_name, SANLK_NAME_LEN)) {
> + virLockError(VIR_ERR_INTERNAL_ERROR,
> + _("Domain name '%s' exceeded %d
characters"),
> + priv->vm_name, SANLK_NAME_LEN);
> + goto error;
> + }
> +
> + if (state && STRNEQ(state, "") && 0) {
> + if ((rv = sanlock_state_to_args((char *)state,
> + &res_count,
> + &res_args)) < 0) {
> + virReportSystemError(-rv,
> + _("Unable to parse lock state %s"),
> + state);
> + goto error;
> + }
> + res_free = true;
> + } else {
> + res_args = priv->res_args;
> + res_count = priv->res_count;
> + }
> +
> + VIR_DEBUG("Register sanlock %d", flags);
> + /* We only initialize 'sock' if we are in the real
> + * child process and we need it to be inherited
> + *
> + * If sock==-1, then sanlock auto-open/closes a
> + * temporary sock
> + */
> + if (priv->vm_pid == getpid() &&
> + (sock = sanlock_register()) < 0) {
> + virReportSystemError(-sock, "%s",
> + _("Failed to open socket to sanlock
daemon"));
> + goto error;
> + }
> +
> + if (!(flags & VIR_LOCK_MANAGER_ACQUIRE_REGISTER_ONLY)) {
> + VIR_DEBUG("Acquiring object %u", priv->res_count);
> + if ((rv = sanlock_acquire(sock, priv->vm_pid, 0,
> + priv->res_count, priv->res_args,
> + opt)) < 0) {
Hum ...
> +#if 1
> + virReportSystemError(-rv, "%s",
> + _("Failed to acquire lock"));
> +#else
> + virLockError(VIR_ERR_INTERNAL_ERROR, "%s",
> + _("Failed to acquire lock"));
> +#endif
this probably is worth some kind of comment or cleaned up
> + goto error;
> + }
> + }
> +
> + VIR_FREE(opt);
> +
> + /*
> + * We are *intentionally* "leaking" sock file descriptor
> + * because we want it to be inherited by QEMU. When the
> + * sock FD finally closes upon QEMU exit (or crash) then
> + * sanlock will notice EOF and release the lock
> + */
> + if (sock != -1 &&
> + virSetInherit(sock, true) < 0)
> + goto error;
> +
> + VIR_DEBUG("Acquire completed fd=%d", sock);
> +
> + if (res_free) {
> + for (i = 0 ; i < res_count ; i++) {
> + VIR_FREE(res_args[i]);
> + }
> + VIR_FREE(res_args);
> + }
> +
> + return 0;
> +
> +error:
> + if (res_free) {
> + for (i = 0 ; i < res_count ; i++) {
> + VIR_FREE(res_args[i]);
> + }
> + VIR_FREE(res_args);
> + }
> + VIR_FREE(opt);
> + VIR_FORCE_CLOSE(sock);
> + return -1;
> +}
> +
> +
> +static int virLockManagerSanlockRelease(virLockManagerPtr lock,
> + char **state,
> + unsigned int flags)
> +{
> + virLockManagerSanlockPrivatePtr priv = lock->privateData;
> + int res_count;
> + int rv;
> +
> + virCheckFlags(0, -1);
> +
> + if ((rv = sanlock_inquire(-1, priv->vm_pid, 0, &res_count, state)) <
0) {
> + virReportSystemError(-rv, "%s",
> + _("Failed to release lock"));
> + return -1;
> + }
> +
> + if (STREQ(*state, ""))
> + VIR_FREE(*state);
> +
> + if ((rv = sanlock_release(-1, priv->vm_pid, SANLK_REL_ALL, 0, NULL)) < 0)
{
> + virReportSystemError(-rv, "%s",
> + _("Failed to release lock"));
> + return -1;
> + }
> +
> + return 0;
> +}
> +
> +static int virLockManagerSanlockInquire(virLockManagerPtr lock,
> + char **state,
> + unsigned int flags)
> +{
> + virLockManagerSanlockPrivatePtr priv = lock->privateData;
> + int rv, res_count;
> +
> + virCheckFlags(0, -1);
> +
> + VIR_DEBUG("pid=%d", priv->vm_pid);
> +
> + if ((rv = sanlock_inquire(-1, priv->vm_pid, 0, &res_count, state)) <
0) {
> + virReportSystemError(-rv, "%s",
> + _("Failed to inquire lock"));
> + return -1;
> + }
> +
> + if (STREQ(*state, ""))
> + VIR_FREE(*state);
> +
> + return 0;
> +}
> +
> +virLockDriver virLockDriverImpl =
> +{
> + .version = VIR_LOCK_MANAGER_VERSION,
> +
> + .flags = VIR_LOCK_MANAGER_USES_STATE,
> +
> + .drvInit = virLockManagerSanlockInit,
> + .drvDeinit = virLockManagerSanlockDeinit,
> +
> + .drvNew = virLockManagerSanlockNew,
> + .drvFree = virLockManagerSanlockFree,
> +
> + .drvAddResource = virLockManagerSanlockAddResource,
> +
> + .drvAcquire = virLockManagerSanlockAcquire,
> + .drvRelease = virLockManagerSanlockRelease,
> + .drvInquire = virLockManagerSanlockInquire,
> +};
I'm a bit puzzled by the new dependancy, and this might prevent me
from building rc1 of 0.9.2 if pushed as-is,
but ACK in principle.
This is last weeks v4 posting. There is a v5 I posted this week which
addresses the things you mention here.
Regards,
Daniel
--
|:
http://berrange.com -o-
http://www.flickr.com/photos/dberrange/ :|
|:
http://libvirt.org -o-
http://virt-manager.org :|
|:
http://autobuild.org -o-
http://search.cpan.org/~danberr/ :|
|:
http://entangle-photo.org -o-
http://live.gnome.org/gtk-vnc :|