Am Donnerstag, 25. Juli 2013, 11:29:37 schrieb Daniel P. Berrange:
On Thu, Jul 25, 2013 at 12:07:44PM +0200, David Weber wrote:
> Thank you for your quick response!
>
> Am Donnerstag, 25. Juli 2013, 10:31:40 schrieb Daniel P. Berrange:
> > On Thu, Jul 25, 2013 at 08:23:24AM +0000, David Weber wrote:
> > > Hi,
> > >
> > > we are interested in using virtlockd on an OCFS2 shared filesystem.
> > > We are now facing the problem that virtlockd uses fcntl() locks which
> > > aren't supported by OCFS2 with the o2cb cluster stack and we want
> > > to avoid using indirect leases.
> > >
> > > OCFS2 instead supports flock() which is quite similar to fcntl(). I
> > > attached a patch which makes libvirt use flock() *instead* of fcntl()
> > > and it seems to work.
> > >
> > > NFS on the contrast only supports fcntl() so it should be configurable
> > > which lock type to use.
> > >
> > > I'm not very experienced with locking, so would such a patch be
> > > acceptable or do you see possible problems with it?
> >
> > We definitely can't use flock() unconditionally like that, as it is
> > less widely supported than fcntl() locking and is known broken on
> > most network filesystems (flock() will only lock wrt the local node,
> > not network-wide).
>
> flock() locks are cluster aware in recent versions of OCFS2 and I would
> try to make it configurable which lock type to use.
>
> > I'm pretty surprised that you can fcntl() is not supported in ocfs2.
> > Are you really sure about that ?
> >
> > This mail message suggests it is supported
> >
> >
https://oss.oracle.com/pipermail/ocfs2-users/2009-June/003572.html
> >
> > "Support for fcntl locking aka file-range locking aka posix locking is
> >
> > provided by vfs for all file systems. However, that support is
> > appropriate
> > only for local file systems.
> >
> > In ocfs2, we have added support for cluster-aware fcntl locking via
> > the userspace clustering framework that allows one to use ocfs2 with
> > different cluster-stacks."
>
> OCFS2 supports fcntl() only with the userspace cluster stacks (pacemaker
> and cman) which we want to avoid.
Looking again at flock() I see it cannot support locking of ranges, only
the entire file. This makes it unsuitable as a replacement for libvirt's
use of fcntl() I'm afraid. I can only sugggest you configure OCFS2 so
that it supports fcntl(), or setup virtlockd to use separate indirect
leases on a diffrent shared filesystem, or perhaps try sanlock instead
which doesn't require any special filesystem support.
It's true that flock() doesn't support locking of ranges but I can't see how
this is necessary.
I attached a patch with extends virtlockd so it can either use flock() *or*
fcntl(). Configurable in the configuration file.
> SUSE advises to use indirect leases which we also want to avoid:
>
https://www.suse.com/documentation//sled11/book_kvm/?page=/documentation//
> sled11/book_kvm/data/sec_libvirt_storage_locking.html
>
> "virtlockd's default configuration does not allow you to lock disk files
> placed on a shared file system (for example NFS or OCFS2). Locking on
> these file systems requires you to specify a lockspace directory on the
> VM Host Server by setting"
>
> Although that's not completely correct because NFS supports fcntl()
That's just badly written explanation for that config setting. It should
really be saying that the default configuration does not provide protection
across multiple hosts for file paths which are not visible via a shared
filesystem. eg a SAN LUN in /dev/sdNNN won't be protected in the default
config.
Daniel
From 2a91cc505b4fb5b467d2add416b2ebf4baa53311 Mon Sep 17 00:00:00 2001
From: David Weber <wb(a)munzinger.de>
Date: Thu, 25 Jul 2013 14:10:23 +0200
Subject: [PATCH] Add flock() support for locking
Configurable in the virtlockd config
---
src/locking/libvirt_lockd.aug | 1 +
src/locking/lock_daemon_dispatch.c | 6 ++-
src/locking/lock_driver_lockd.c | 14 ++++++-
src/locking/lock_protocol.x | 3 +-
src/locking/lockd.conf | 7 ++++
src/util/virfile.c | 59 +++++++++++++++++++++++++++-
src/util/virfile.h | 4 ++
src/util/virlockspace.c | 80
++++++++++++++++++++++++++++----------
src/util/virlockspace.h | 1 +
9 files changed, 148 insertions(+), 27 deletions(-)
diff --git a/src/locking/libvirt_lockd.aug b/src/locking/libvirt_lockd.aug
index 6a3bcba..7df715f 100644
--- a/src/locking/libvirt_lockd.aug
+++ b/src/locking/libvirt_lockd.aug
@@ -19,6 +19,7 @@ module Libvirt_lockd =
(* Each enty in the config is one of the following three ... *)
let entry = bool_entry "auto_disk_leases"
| bool_entry "require_lease_for_disks"
+ | bool_entry "useFlock"
| str_entry "file_lockspace_dir"
| str_entry "lvm_lockspace_dir"
| str_entry "scsi_lockspace_dir"
diff --git a/src/locking/lock_daemon_dispatch.c
b/src/locking/lock_daemon_dispatch.c
index c2e1000..930018c 100644
--- a/src/locking/lock_daemon_dispatch.c
+++ b/src/locking/lock_daemon_dispatch.c
@@ -50,7 +50,8 @@ virLockSpaceProtocolDispatchAcquireResource(virNetServerPtr
server ATTRIBUTE_UNU
virMutexLock(&priv->lock);
virCheckFlagsGoto(VIR_LOCK_SPACE_PROTOCOL_ACQUIRE_RESOURCE_SHARED |
- VIR_LOCK_SPACE_PROTOCOL_ACQUIRE_RESOURCE_AUTOCREATE,
cleanup);
+ VIR_LOCK_SPACE_PROTOCOL_ACQUIRE_RESOURCE_AUTOCREATE |
+ VIR_LOCK_SPACE_PROTOCOL_ACQUIRE_RESOURCE_USE_FLOCK,
cleanup);
if (priv->restricted) {
virReportError(VIR_ERR_OPERATION_DENIED, "%s",
@@ -76,7 +77,8 @@ virLockSpaceProtocolDispatchAcquireResource(virNetServerPtr
server ATTRIBUTE_UNU
newFlags |= VIR_LOCK_SPACE_ACQUIRE_SHARED;
if (flags & VIR_LOCK_SPACE_PROTOCOL_ACQUIRE_RESOURCE_AUTOCREATE)
newFlags |= VIR_LOCK_SPACE_ACQUIRE_AUTOCREATE;
-
+ if (flags & VIR_LOCK_SPACE_PROTOCOL_ACQUIRE_RESOURCE_USE_FLOCK)
+ newFlags |= VIR_LOCK_SPACE_ACQUIRE_FLOCK;
if (virLockSpaceAcquireResource(lockspace,
args->name,
priv->ownerPid,
diff --git a/src/locking/lock_driver_lockd.c b/src/locking/lock_driver_lockd.c
index b115799..1e07162 100644
--- a/src/locking/lock_driver_lockd.c
+++ b/src/locking/lock_driver_lockd.c
@@ -71,6 +71,7 @@ struct _virLockManagerLockDaemonPrivate {
struct _virLockManagerLockDaemonDriver {
bool autoDiskLease;
bool requireLeaseForDisks;
+ bool useFlock;
char *fileLockSpaceDir;
char *lvmLockSpaceDir;
@@ -125,6 +126,10 @@ static int virLockManagerLockDaemonLoadConfig(const char
*configFile)
CHECK_TYPE("auto_disk_leases", VIR_CONF_LONG);
if (p) driver->autoDiskLease = p->l;
+ p = virConfGetValue(conf, "use_flock");
+ CHECK_TYPE("use_flock", VIR_CONF_LONG);
+ if (p) driver->useFlock = p->l;
+
p = virConfGetValue(conf, "file_lockspace_dir");
CHECK_TYPE("file_lockspace_dir", VIR_CONF_STRING);
if (p && p->str) {
@@ -379,6 +384,7 @@ static int virLockManagerLockDaemonInit(unsigned int
version,
driver->requireLeaseForDisks = true;
driver->autoDiskLease = true;
+ driver->useFlock = false;
if (virLockManagerLockDaemonLoadConfig(configFile) < 0)
goto error;
@@ -724,6 +730,8 @@ static int
virLockManagerLockDaemonAcquire(virLockManagerPtr lock,
args.name = priv->resources[i].name;
args.flags = priv->resources[i].flags;
+ if (driver->useFlock)
+ args.flags |=
VIR_LOCK_SPACE_PROTOCOL_ACQUIRE_RESOURCE_USE_FLOCK;
if (virNetClientProgramCall(program,
client,
counter++,
@@ -780,10 +788,12 @@ static int
virLockManagerLockDaemonRelease(virLockManagerPtr lock,
args.name = priv->resources[i].name;
args.flags = priv->resources[i].flags;
- args.flags &=
- ~(VIR_LOCK_SPACE_PROTOCOL_ACQUIRE_RESOURCE_SHARED |
+ args.flags &=~(VIR_LOCK_SPACE_PROTOCOL_ACQUIRE_RESOURCE_SHARED |
VIR_LOCK_SPACE_PROTOCOL_ACQUIRE_RESOURCE_AUTOCREATE);
+ if (driver->useFlock)
+ args.flags |= VIR_LOCK_SPACE_PROTOCOL_ACQUIRE_RESOURCE_USE_FLOCK;
+
if (virNetClientProgramCall(program,
client,
counter++,
diff --git a/src/locking/lock_protocol.x b/src/locking/lock_protocol.x
index 354d51a..7828674 100644
--- a/src/locking/lock_protocol.x
+++ b/src/locking/lock_protocol.x
@@ -52,7 +52,8 @@ struct virLockSpaceProtocolDeleteResourceArgs {
enum virLockSpaceProtocolAcquireResourceFlags {
VIR_LOCK_SPACE_PROTOCOL_ACQUIRE_RESOURCE_SHARED = 1,
- VIR_LOCK_SPACE_PROTOCOL_ACQUIRE_RESOURCE_AUTOCREATE = 2
+ VIR_LOCK_SPACE_PROTOCOL_ACQUIRE_RESOURCE_AUTOCREATE = 2,
+ VIR_LOCK_SPACE_PROTOCOL_ACQUIRE_RESOURCE_USE_FLOCK = 4
};
struct virLockSpaceProtocolAcquireResourceArgs {
diff --git a/src/locking/lockd.conf b/src/locking/lockd.conf
index 85edb91..944c808 100644
--- a/src/locking/lockd.conf
+++ b/src/locking/lockd.conf
@@ -17,6 +17,13 @@
#
#require_lease_for_disks = 1
+#
+# The default lockd behaviour is to acquire locks via their
+# the fnctl(2) syscall. This isn't aviable on all filesystems.
+# In this case you can use this flag to use flock(2) instead.
+#
+#use_flock = 0
+
#
# The default lockd behaviour is to use the "direct"
diff --git a/src/util/virfile.c b/src/util/virfile.c
index 8f0eec3..56900f0 100644
--- a/src/util/virfile.c
+++ b/src/util/virfile.c
@@ -378,7 +378,7 @@ int virFileLock(int fd, bool shared, off_t start, off_t len)
* @start: byte offset to start unlock
* @len: length of lock (0 to release entire remaining file from @start)
*
- * Release a lock previously acquired with virFileUnlock().
+ * Release a lock previously acquired with virFileLock().
* NB the lock will also be released if any open file descriptor
* pointing to the same file as @fd is closed
*
@@ -398,6 +398,63 @@ int virFileUnlock(int fd, off_t start, off_t len)
return 0;
}
+
+/**
+ * virFileLockFlock:
+ * @fd: file descriptor to acquire the lock on
+ * @shared: type of lock to acquire
+ *
+ * Attempt to acquire a lock on the file @fd. If @shared
+ * is true, then a shared lock will be acquired,
+ * otherwise an exclusive lock will be acquired. If
+ * the lock cannot be acquired, an error will be
+ * returned. This will not wait to acquire the lock if
+ * another process already holds it.
+ *
+ * The lock will be released when @fd is closed. The lock
+ * will also be released if *any* other open file descriptor
+ * pointing to the same underlying file is closed. As such
+ * this function should not be relied on in multi-threaded
+ * apps where other threads can be opening/closing arbitrary
+ * files.
+ *
+ * Returns 0 on success, or -errno otherwise
+ */
+int virFileLockFlock(int fd, bool shared)
+{
+ if (shared) {
+ if (flock(fd, LOCK_SH | LOCK_NB) < 0)
+ return -errno;
+ } else {
+ if (flock(fd, LOCK_EX | LOCK_NB) < 0)
+ return -errno;
+ }
+
+ return 0;
+}
+
+
+/**
+ * virFileUnlockFlock:
+ * @fd: file descriptor to release the lock on
+ * @start: byte offset to start unlock
+ *
+ * Release a lock previously acquired with virFileLockFlock().
+ * NB the lock will also be released if any open file descriptor
+ * pointing to the same file as @fd is closed
+ *
+ * Returns 0 on succcess, or -errno on error
+ */
+int virFileUnlockFlock(int fd)
+{
+ if (flock(fd, LOCK_UN) < 0)
+ return -errno;
+
+ return 0;
+}
+
+
+
#else
int virFileLock(int fd ATTRIBUTE_UNUSED,
bool shared ATTRIBUTE_UNUSED,
diff --git a/src/util/virfile.h b/src/util/virfile.h
index 72d35ce..e8ced5c 100644
--- a/src/util/virfile.h
+++ b/src/util/virfile.h
@@ -100,6 +100,10 @@ void virFileWrapperFdFree(virFileWrapperFdPtr dfd);
int virFileLock(int fd, bool shared, off_t start, off_t len);
int virFileUnlock(int fd, off_t start, off_t len);
+int virFileLockFlock(int fd, bool shared);
+int virFileUnlockFlock(int fd);
+
+
typedef int (*virFileRewriteFunc)(int fd, void *opaque);
int virFileRewrite(const char *path,
mode_t mode,
diff --git a/src/util/virlockspace.c b/src/util/virlockspace.c
index afb1abb..ef95672 100644
--- a/src/util/virlockspace.c
+++ b/src/util/virlockspace.c
@@ -154,17 +154,32 @@ virLockSpaceResourceNew(virLockSpacePtr lockspace,
goto error;
}
- if (virFileLock(res->fd, shared, 0, 1) < 0) {
- if (errno == EACCES || errno == EAGAIN) {
- virReportError(VIR_ERR_RESOURCE_BUSY,
- _("Lockspace resource '%s' is
locked"),
- resname);
- } else {
- virReportSystemError(errno,
- _("Unable to acquire lock on
'%s'"),
- res->path);
+ if (flags & VIR_LOCK_SPACE_ACQUIRE_FLOCK) {
+ if (virFileLockFlock(res->fd, shared) < 0) {
+ if (errno == EACCES || errno == EAGAIN) {
+ virReportError(VIR_ERR_RESOURCE_BUSY,
+ _("Lockspace resource '%s' is
locked"),
+ resname);
+ } else {
+ virReportSystemError(errno,
+ _("Unable to acquire lock on
'%s'"),
+ res->path);
+ }
+ goto error;
+ }
+ } else {
+ if (virFileLock(res->fd, shared, 0, 1) < 0) {
+ if (errno == EACCES || errno == EAGAIN) {
+ virReportError(VIR_ERR_RESOURCE_BUSY,
+ _("Lockspace resource '%s' is
locked"),
+ resname);
+ } else {
+ virReportSystemError(errno,
+ _("Unable to acquire lock on
'%s'"),
+ res->path);
+ }
+ goto error;
}
- goto error;
}
/* Now make sure the pidfile we locked is the same
@@ -201,17 +216,32 @@ virLockSpaceResourceNew(virLockSpacePtr lockspace,
goto error;
}
- if (virFileLock(res->fd, shared, 0, 1) < 0) {
- if (errno == EACCES || errno == EAGAIN) {
- virReportError(VIR_ERR_RESOURCE_BUSY,
- _("Lockspace resource '%s' is locked"),
- resname);
- } else {
- virReportSystemError(errno,
- _("Unable to acquire lock on
'%s'"),
- res->path);
+ if (flags & VIR_LOCK_SPACE_ACQUIRE_FLOCK) {
+ if (virFileLockFlock(res->fd, shared) < 0) {
+ if (errno == EACCES || errno == EAGAIN) {
+ virReportError(VIR_ERR_RESOURCE_BUSY,
+ _("Lockspace resource '%s' is
locked"),
+ resname);
+ } else {
+ virReportSystemError(errno,
+ _("Unable to acquire lock on
'%s'"),
+ res->path);
+ }
+ goto error;
+ }
+ } else {
+ if (virFileLock(res->fd, shared, 0, 1) < 0) {
+ if (errno == EACCES || errno == EAGAIN) {
+ virReportError(VIR_ERR_RESOURCE_BUSY,
+ _("Lockspace resource '%s' is
locked"),
+ resname);
+ } else {
+ virReportSystemError(errno,
+ _("Unable to acquire lock on
'%s'"),
+ res->path);
+ }
+ goto error;
}
- goto error;
}
}
res->lockHeld = true;
@@ -616,9 +646,17 @@ int virLockSpaceAcquireResource(virLockSpacePtr
lockspace,
VIR_DEBUG("lockspace=%p resname=%s flags=%x owner=%lld",
lockspace, resname, flags, (unsigned long long)owner);
+
+ if (flags & VIR_LOCK_SPACE_ACQUIRE_SHARED)
+ VIR_DEBUG("David: shared");
+ if (flags & VIR_LOCK_SPACE_ACQUIRE_AUTOCREATE)
+ VIR_DEBUG("David: autocreate");
+ if (flags & VIR_LOCK_SPACE_ACQUIRE_FLOCK)
+ VIR_DEBUG("David: flock");
virCheckFlags(VIR_LOCK_SPACE_ACQUIRE_SHARED |
- VIR_LOCK_SPACE_ACQUIRE_AUTOCREATE, -1);
+ VIR_LOCK_SPACE_ACQUIRE_AUTOCREATE |
+ VIR_LOCK_SPACE_ACQUIRE_FLOCK, -1);
virMutexLock(&lockspace->lock);
diff --git a/src/util/virlockspace.h b/src/util/virlockspace.h
index 041cf20..a6ba3dd 100644
--- a/src/util/virlockspace.h
+++ b/src/util/virlockspace.h
@@ -45,6 +45,7 @@ int virLockSpaceDeleteResource(virLockSpacePtr lockspace,
typedef enum {
VIR_LOCK_SPACE_ACQUIRE_SHARED = (1 << 0),
VIR_LOCK_SPACE_ACQUIRE_AUTOCREATE = (1 << 1),
+ VIR_LOCK_SPACE_ACQUIRE_FLOCK = (1 << 2),
} virLockSpaceAcquireFlags;
int virLockSpaceAcquireResource(virLockSpacePtr lockspace,
--
1.8.1.2