[libvirt] [PATCH v2 0/7] Introduce metadata locking

v2 of: https://www.redhat.com/archives/libvir-list/2018-August/msg00482.html diff to v1: - 1/6 from original patch set is replaced with different approach. As Dan suggested, virLockSpace accepts range to lock through its API and has new flag that tells it to wait for the lock to be acquired. Michal Prívozník (7): virlockspace: Allow caller to specify start and length offset in virLockSpaceAcquireResource virlockspace: Introduce VIR_LOCK_SPACE_ACQUIRE_WAIT lock_driver.h: Introduce metadata flag lockd_driver_lockd: Implement metadata flag lock_driver_sanlock: Handle metadata flag gracefully domain_lock: Implement metadata locking qemu_security: Lock metadata while relabelling src/libvirt_private.syms | 8 + src/locking/domain_lock.c | 304 ++++++++++++++++++++++++++++++++++--- src/locking/domain_lock.h | 28 ++++ src/locking/lock_daemon_dispatch.c | 13 +- src/locking/lock_driver.h | 2 + src/locking/lock_driver_lockd.c | 31 ++-- src/locking/lock_driver_lockd.h | 1 + src/locking/lock_driver_sanlock.c | 25 ++- src/qemu/qemu_security.c | 107 +++++++++++++ src/util/virlockspace.c | 25 ++- src/util/virlockspace.h | 5 + tests/virlockspacetest.c | 29 +++- 12 files changed, 525 insertions(+), 53 deletions(-) -- 2.16.4

So far the virLockSpaceAcquireResource() locks the first byte in the underlying file. But caller might want to lock other range. Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- src/locking/lock_daemon_dispatch.c | 3 +++ src/util/virlockspace.c | 15 ++++++++++----- src/util/virlockspace.h | 4 ++++ tests/virlockspacetest.c | 29 ++++++++++++++++++++++++----- 4 files changed, 41 insertions(+), 10 deletions(-) diff --git a/src/locking/lock_daemon_dispatch.c b/src/locking/lock_daemon_dispatch.c index 1b479db55d..10248ec0b5 100644 --- a/src/locking/lock_daemon_dispatch.c +++ b/src/locking/lock_daemon_dispatch.c @@ -50,6 +50,8 @@ virLockSpaceProtocolDispatchAcquireResource(virNetServerPtr server ATTRIBUTE_UNU virNetServerClientGetPrivateData(client); virLockSpacePtr lockspace; unsigned int newFlags; + off_t start = 0; + off_t len = 1; virMutexLock(&priv->lock); @@ -84,6 +86,7 @@ virLockSpaceProtocolDispatchAcquireResource(virNetServerPtr server ATTRIBUTE_UNU if (virLockSpaceAcquireResource(lockspace, args->name, priv->ownerPid, + start, len, newFlags) < 0) goto cleanup; diff --git a/src/util/virlockspace.c b/src/util/virlockspace.c index 3364c843aa..60bfef4c5f 100644 --- a/src/util/virlockspace.c +++ b/src/util/virlockspace.c @@ -115,8 +115,10 @@ static void virLockSpaceResourceFree(virLockSpaceResourcePtr res) static virLockSpaceResourcePtr virLockSpaceResourceNew(virLockSpacePtr lockspace, const char *resname, - unsigned int flags, - pid_t owner) + pid_t owner, + off_t start, + off_t len, + unsigned int flags) { virLockSpaceResourcePtr res; bool shared = !!(flags & VIR_LOCK_SPACE_ACQUIRE_SHARED); @@ -157,7 +159,7 @@ virLockSpaceResourceNew(virLockSpacePtr lockspace, goto error; } - if (virFileLock(res->fd, shared, 0, 1, false) < 0) { + if (virFileLock(res->fd, shared, start, len, false) < 0) { if (errno == EACCES || errno == EAGAIN) { virReportError(VIR_ERR_RESOURCE_BUSY, _("Lockspace resource '%s' is locked"), @@ -204,7 +206,7 @@ virLockSpaceResourceNew(virLockSpacePtr lockspace, goto error; } - if (virFileLock(res->fd, shared, 0, 1, false) < 0) { + if (virFileLock(res->fd, shared, start, len, false) < 0) { if (errno == EACCES || errno == EAGAIN) { virReportError(VIR_ERR_RESOURCE_BUSY, _("Lockspace resource '%s' is locked"), @@ -612,6 +614,8 @@ int virLockSpaceDeleteResource(virLockSpacePtr lockspace, int virLockSpaceAcquireResource(virLockSpacePtr lockspace, const char *resname, pid_t owner, + off_t start, + off_t len, unsigned int flags) { int ret = -1; @@ -641,7 +645,8 @@ int virLockSpaceAcquireResource(virLockSpacePtr lockspace, goto cleanup; } - if (!(res = virLockSpaceResourceNew(lockspace, resname, flags, owner))) + if (!(res = virLockSpaceResourceNew(lockspace, resname, + owner, start, len, flags))) goto cleanup; if (virHashAddEntry(lockspace->resources, resname, res) < 0) { diff --git a/src/util/virlockspace.h b/src/util/virlockspace.h index 041cf20396..24f2c89be6 100644 --- a/src/util/virlockspace.h +++ b/src/util/virlockspace.h @@ -22,6 +22,8 @@ #ifndef __VIR_LOCK_SPACE_H__ # define __VIR_LOCK_SPACE_H__ +# include <sys/types.h> + # include "internal.h" # include "virjson.h" @@ -50,6 +52,8 @@ typedef enum { int virLockSpaceAcquireResource(virLockSpacePtr lockspace, const char *resname, pid_t owner, + off_t start, + off_t len, unsigned int flags); int virLockSpaceReleaseResource(virLockSpacePtr lockspace, diff --git a/tests/virlockspacetest.c b/tests/virlockspacetest.c index 75ad98a02c..2409809353 100644 --- a/tests/virlockspacetest.c +++ b/tests/virlockspacetest.c @@ -99,6 +99,8 @@ static int testLockSpaceResourceLockExcl(const void *args ATTRIBUTE_UNUSED) { virLockSpacePtr lockspace; int ret = -1; + const off_t start = 0; + const off_t len = 1; rmdir(LOCKSPACE_DIR); @@ -111,13 +113,13 @@ static int testLockSpaceResourceLockExcl(const void *args ATTRIBUTE_UNUSED) if (virLockSpaceCreateResource(lockspace, "foo") < 0) goto cleanup; - if (virLockSpaceAcquireResource(lockspace, "foo", geteuid(), 0) < 0) + if (virLockSpaceAcquireResource(lockspace, "foo", geteuid(), start, len, 0) < 0) goto cleanup; if (!virFileExists(LOCKSPACE_DIR "/foo")) goto cleanup; - if (virLockSpaceAcquireResource(lockspace, "foo", geteuid(), 0) == 0) + if (virLockSpaceAcquireResource(lockspace, "foo", geteuid(), start, len, 0) == 0) goto cleanup; if (virLockSpaceDeleteResource(lockspace, "foo") == 0) @@ -145,6 +147,8 @@ static int testLockSpaceResourceLockExclAuto(const void *args ATTRIBUTE_UNUSED) { virLockSpacePtr lockspace; int ret = -1; + const off_t start = 0; + const off_t len = 1; rmdir(LOCKSPACE_DIR); @@ -158,6 +162,7 @@ static int testLockSpaceResourceLockExclAuto(const void *args ATTRIBUTE_UNUSED) goto cleanup; if (virLockSpaceAcquireResource(lockspace, "foo", geteuid(), + start, len, VIR_LOCK_SPACE_ACQUIRE_AUTOCREATE) < 0) goto cleanup; @@ -183,6 +188,8 @@ static int testLockSpaceResourceLockShr(const void *args ATTRIBUTE_UNUSED) { virLockSpacePtr lockspace; int ret = -1; + const off_t start = 0; + const off_t len = 1; rmdir(LOCKSPACE_DIR); @@ -196,13 +203,16 @@ static int testLockSpaceResourceLockShr(const void *args ATTRIBUTE_UNUSED) goto cleanup; if (virLockSpaceAcquireResource(lockspace, "foo", geteuid(), + start, len, VIR_LOCK_SPACE_ACQUIRE_SHARED) < 0) goto cleanup; - if (virLockSpaceAcquireResource(lockspace, "foo", geteuid(), 0) == 0) + if (virLockSpaceAcquireResource(lockspace, "foo", + geteuid(), start, len, 0) == 0) goto cleanup; if (virLockSpaceAcquireResource(lockspace, "foo", geteuid(), + start, len, VIR_LOCK_SPACE_ACQUIRE_SHARED) < 0) goto cleanup; @@ -237,6 +247,8 @@ static int testLockSpaceResourceLockShrAuto(const void *args ATTRIBUTE_UNUSED) { virLockSpacePtr lockspace; int ret = -1; + const off_t start = 0; + const off_t len = 1; rmdir(LOCKSPACE_DIR); @@ -250,6 +262,7 @@ static int testLockSpaceResourceLockShrAuto(const void *args ATTRIBUTE_UNUSED) goto cleanup; if (virLockSpaceAcquireResource(lockspace, "foo", geteuid(), + start, len, VIR_LOCK_SPACE_ACQUIRE_SHARED | VIR_LOCK_SPACE_ACQUIRE_AUTOCREATE) < 0) goto cleanup; @@ -258,6 +271,7 @@ static int testLockSpaceResourceLockShrAuto(const void *args ATTRIBUTE_UNUSED) goto cleanup; if (virLockSpaceAcquireResource(lockspace, "foo", geteuid(), + start, len, VIR_LOCK_SPACE_ACQUIRE_AUTOCREATE) == 0) goto cleanup; @@ -265,6 +279,7 @@ static int testLockSpaceResourceLockShrAuto(const void *args ATTRIBUTE_UNUSED) goto cleanup; if (virLockSpaceAcquireResource(lockspace, "foo", geteuid(), + start, len, VIR_LOCK_SPACE_ACQUIRE_SHARED | VIR_LOCK_SPACE_ACQUIRE_AUTOCREATE) < 0) goto cleanup; @@ -297,6 +312,8 @@ static int testLockSpaceResourceLockPath(const void *args ATTRIBUTE_UNUSED) { virLockSpacePtr lockspace; int ret = -1; + const off_t start = 0; + const off_t len = 1; rmdir(LOCKSPACE_DIR); @@ -309,13 +326,15 @@ static int testLockSpaceResourceLockPath(const void *args ATTRIBUTE_UNUSED) if (virLockSpaceCreateResource(lockspace, LOCKSPACE_DIR "/foo") < 0) goto cleanup; - if (virLockSpaceAcquireResource(lockspace, LOCKSPACE_DIR "/foo", geteuid(), 0) < 0) + if (virLockSpaceAcquireResource(lockspace, LOCKSPACE_DIR "/foo", + geteuid(), start, len, 0) < 0) goto cleanup; if (!virFileExists(LOCKSPACE_DIR "/foo")) goto cleanup; - if (virLockSpaceAcquireResource(lockspace, LOCKSPACE_DIR "/foo", geteuid(), 0) == 0) + if (virLockSpaceAcquireResource(lockspace, LOCKSPACE_DIR "/foo", + geteuid(), start, len, 0) == 0) goto cleanup; if (virLockSpaceDeleteResource(lockspace, LOCKSPACE_DIR "/foo") == 0) -- 2.16.4

On 08/14/2018 07:19 AM, Michal Privoznik wrote:
So far the virLockSpaceAcquireResource() locks the first byte in the underlying file. But caller might want to lock other range.
Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- src/locking/lock_daemon_dispatch.c | 3 +++ src/util/virlockspace.c | 15 ++++++++++----- src/util/virlockspace.h | 4 ++++ tests/virlockspacetest.c | 29 ++++++++++++++++++++++++----- 4 files changed, 41 insertions(+), 10 deletions(-)
Reviewed-by: John Ferlan <jferlan@redhat.com> John

On Tue, Aug 14, 2018 at 01:19:37PM +0200, Michal Privoznik wrote:
So far the virLockSpaceAcquireResource() locks the first byte in the underlying file. But caller might want to lock other range.
Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- src/locking/lock_daemon_dispatch.c | 3 +++ src/util/virlockspace.c | 15 ++++++++++----- src/util/virlockspace.h | 4 ++++ tests/virlockspacetest.c | 29 ++++++++++++++++++++++++----- 4 files changed, 41 insertions(+), 10 deletions(-)
Reviewed-by: Daniel P. Berrangé <berrange@redhat.com> Regards, Daniel -- |: https://berrange.com -o- https://www.flickr.com/photos/dberrange :| |: https://libvirt.org -o- https://fstop138.berrange.com :| |: https://entangle-photo.org -o- https://www.instagram.com/dberrange :|

This flag modifies the way the lock is acquired. It waits for the lock to be set instead of usual set-or-fail logic that happens without this flag. Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- src/util/virlockspace.c | 14 ++++++++++---- src/util/virlockspace.h | 1 + 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/src/util/virlockspace.c b/src/util/virlockspace.c index 60bfef4c5f..d75a6d4a6e 100644 --- a/src/util/virlockspace.c +++ b/src/util/virlockspace.c @@ -122,6 +122,7 @@ virLockSpaceResourceNew(virLockSpacePtr lockspace, { virLockSpaceResourcePtr res; bool shared = !!(flags & VIR_LOCK_SPACE_ACQUIRE_SHARED); + bool waitForLock = !!(flags & VIR_LOCK_SPACE_ACQUIRE_WAIT); if (VIR_ALLOC(res) < 0) return NULL; @@ -159,7 +160,7 @@ virLockSpaceResourceNew(virLockSpacePtr lockspace, goto error; } - if (virFileLock(res->fd, shared, start, len, false) < 0) { + if (virFileLock(res->fd, shared, start, len, waitForLock) < 0) { if (errno == EACCES || errno == EAGAIN) { virReportError(VIR_ERR_RESOURCE_BUSY, _("Lockspace resource '%s' is locked"), @@ -206,7 +207,7 @@ virLockSpaceResourceNew(virLockSpacePtr lockspace, goto error; } - if (virFileLock(res->fd, shared, start, len, false) < 0) { + if (virFileLock(res->fd, shared, start, len, waitForLock) < 0) { if (errno == EACCES || errno == EAGAIN) { virReportError(VIR_ERR_RESOURCE_BUSY, _("Lockspace resource '%s' is locked"), @@ -625,11 +626,16 @@ int virLockSpaceAcquireResource(virLockSpacePtr lockspace, lockspace, resname, flags, (unsigned long long)owner); virCheckFlags(VIR_LOCK_SPACE_ACQUIRE_SHARED | - VIR_LOCK_SPACE_ACQUIRE_AUTOCREATE, -1); + VIR_LOCK_SPACE_ACQUIRE_AUTOCREATE | + VIR_LOCK_SPACE_ACQUIRE_WAIT, -1); + + VIR_EXCLUSIVE_FLAGS_RET(VIR_LOCK_SPACE_ACQUIRE_WAIT, + VIR_LOCK_SPACE_ACQUIRE_SHARED, -1); virMutexLock(&lockspace->lock); - if ((res = virHashLookup(lockspace->resources, resname))) { + if (!(flags & VIR_LOCK_SPACE_ACQUIRE_WAIT) && + (res = virHashLookup(lockspace->resources, resname))) { if ((res->flags & VIR_LOCK_SPACE_ACQUIRE_SHARED) && (flags & VIR_LOCK_SPACE_ACQUIRE_SHARED)) { diff --git a/src/util/virlockspace.h b/src/util/virlockspace.h index 24f2c89be6..bfaed34d5b 100644 --- a/src/util/virlockspace.h +++ b/src/util/virlockspace.h @@ -47,6 +47,7 @@ int virLockSpaceDeleteResource(virLockSpacePtr lockspace, typedef enum { VIR_LOCK_SPACE_ACQUIRE_SHARED = (1 << 0), VIR_LOCK_SPACE_ACQUIRE_AUTOCREATE = (1 << 1), + VIR_LOCK_SPACE_ACQUIRE_WAIT = (1 << 2), } virLockSpaceAcquireFlags; int virLockSpaceAcquireResource(virLockSpacePtr lockspace, -- 2.16.4

On 08/14/2018 07:19 AM, Michal Privoznik wrote:
This flag modifies the way the lock is acquired. It waits for the lock to be set instead of usual set-or-fail logic that happens without this flag.
Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- src/util/virlockspace.c | 14 ++++++++++---- src/util/virlockspace.h | 1 + 2 files changed, 11 insertions(+), 4 deletions(-)
Reviewed-by: John Ferlan <jferlan@redhat.com> John

On Tue, Aug 14, 2018 at 01:19:38PM +0200, Michal Privoznik wrote:
This flag modifies the way the lock is acquired. It waits for the lock to be set instead of usual set-or-fail logic that happens without this flag.
Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- src/util/virlockspace.c | 14 ++++++++++---- src/util/virlockspace.h | 1 + 2 files changed, 11 insertions(+), 4 deletions(-)
Reviewed-by: Daniel P. Berrangé <berrange@redhat.com> Regards, Daniel -- |: https://berrange.com -o- https://www.flickr.com/photos/dberrange :| |: https://libvirt.org -o- https://fstop138.berrange.com :| |: https://entangle-photo.org -o- https://www.instagram.com/dberrange :|

Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- src/locking/lock_driver.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/locking/lock_driver.h b/src/locking/lock_driver.h index 8b7cccc521..7c8f79520a 100644 --- a/src/locking/lock_driver.h +++ b/src/locking/lock_driver.h @@ -56,6 +56,8 @@ typedef enum { VIR_LOCK_MANAGER_RESOURCE_READONLY = (1 << 0), /* The resource is assigned in shared, writable mode */ VIR_LOCK_MANAGER_RESOURCE_SHARED = (1 << 1), + /* The resource is locked for metadata change */ + VIR_LOCK_MANAGER_RESOURCE_METADATA = (1 << 2), } virLockManagerResourceFlags; typedef enum { -- 2.16.4

On 08/14/2018 07:19 AM, Michal Privoznik wrote:
Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- src/locking/lock_driver.h | 2 ++ 1 file changed, 2 insertions(+)
diff --git a/src/locking/lock_driver.h b/src/locking/lock_driver.h index 8b7cccc521..7c8f79520a 100644 --- a/src/locking/lock_driver.h +++ b/src/locking/lock_driver.h @@ -56,6 +56,8 @@ typedef enum { VIR_LOCK_MANAGER_RESOURCE_READONLY = (1 << 0), /* The resource is assigned in shared, writable mode */ VIR_LOCK_MANAGER_RESOURCE_SHARED = (1 << 1), + /* The resource is locked for metadata change */ + VIR_LOCK_MANAGER_RESOURCE_METADATA = (1 << 2),
Does this work or make sense for lease type? Of course this adds something that would "conceivably" be available such that we may want to document it on https://libvirt.org/internals/locking.html, but then again that's so sparse it would take a bit of spelunking in the code to figure it out. Anyway, since the name itself seems reasonable to me, Reviewed-by: John Ferlan <jferlan@redhat.com> if you add a few more details around it related to disk vs. lease that'd be nice. If you update the docs eventually with all that you've learned, then that'd be great too! John
} virLockManagerResourceFlags;
typedef enum {

On 08/17/2018 12:24 AM, John Ferlan wrote:
On 08/14/2018 07:19 AM, Michal Privoznik wrote:
Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- src/locking/lock_driver.h | 2 ++ 1 file changed, 2 insertions(+)
diff --git a/src/locking/lock_driver.h b/src/locking/lock_driver.h index 8b7cccc521..7c8f79520a 100644 --- a/src/locking/lock_driver.h +++ b/src/locking/lock_driver.h @@ -56,6 +56,8 @@ typedef enum { VIR_LOCK_MANAGER_RESOURCE_READONLY = (1 << 0), /* The resource is assigned in shared, writable mode */ VIR_LOCK_MANAGER_RESOURCE_SHARED = (1 << 1), + /* The resource is locked for metadata change */ + VIR_LOCK_MANAGER_RESOURCE_METADATA = (1 << 2),
Does this work or make sense for lease type?
That's the thing. You're right, it doesn't make sense for lease type. But on the level of RPC of virtlockd both leases and disks kind of blend together. I mean, virtlockd is merely told to lock this or that file.
Of course this adds something that would "conceivably" be available such that we may want to document it on https://libvirt.org/internals/locking.html, but then again that's so sparse it would take a bit of spelunking in the code to figure it out.
Anyway, since the name itself seems reasonable to me,
Reviewed-by: John Ferlan <jferlan@redhat.com>
if you add a few more details around it related to disk vs. lease that'd be nice. If you update the docs eventually with all that you've learned, then that'd be great too!
I'm gonna save that to a follow up patch. Thanks! Michal

On Mon, Aug 20, 2018 at 09:25:18AM +0200, Michal Prívozník wrote:
On 08/17/2018 12:24 AM, John Ferlan wrote:
On 08/14/2018 07:19 AM, Michal Privoznik wrote:
Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- src/locking/lock_driver.h | 2 ++ 1 file changed, 2 insertions(+)
diff --git a/src/locking/lock_driver.h b/src/locking/lock_driver.h index 8b7cccc521..7c8f79520a 100644 --- a/src/locking/lock_driver.h +++ b/src/locking/lock_driver.h @@ -56,6 +56,8 @@ typedef enum { VIR_LOCK_MANAGER_RESOURCE_READONLY = (1 << 0), /* The resource is assigned in shared, writable mode */ VIR_LOCK_MANAGER_RESOURCE_SHARED = (1 << 1), + /* The resource is locked for metadata change */ + VIR_LOCK_MANAGER_RESOURCE_METADATA = (1 << 2),
Does this work or make sense for lease type?
That's the thing. You're right, it doesn't make sense for lease type. But on the level of RPC of virtlockd both leases and disks kind of blend together. I mean, virtlockd is merely told to lock this or that file.
That's ok. The way I look at it the domain XML describes various resources, disks, leases, etc. The virtlockd daemon provides a generic mechanism for acquiring locks. We just happen to map leases onto the default lock type. Here we're adding a 2nd lock type and do not require any mapping from leases. Regards, Daniel -- |: https://berrange.com -o- https://www.flickr.com/photos/dberrange :| |: https://libvirt.org -o- https://fstop138.berrange.com :| |: https://entangle-photo.org -o- https://www.instagram.com/dberrange :|

Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- src/locking/lock_daemon_dispatch.c | 12 ++++++++++-- src/locking/lock_driver_lockd.c | 31 +++++++++++++++++++++---------- src/locking/lock_driver_lockd.h | 1 + 3 files changed, 32 insertions(+), 12 deletions(-) diff --git a/src/locking/lock_daemon_dispatch.c b/src/locking/lock_daemon_dispatch.c index 10248ec0b5..c24571ceac 100644 --- a/src/locking/lock_daemon_dispatch.c +++ b/src/locking/lock_daemon_dispatch.c @@ -37,6 +37,9 @@ VIR_LOG_INIT("locking.lock_daemon_dispatch"); #include "lock_daemon_dispatch_stubs.h" +#define DEFAULT_OFFSET 0 +#define METADATA_OFFSET 1 + static int virLockSpaceProtocolDispatchAcquireResource(virNetServerPtr server ATTRIBUTE_UNUSED, virNetServerClientPtr client, @@ -50,13 +53,14 @@ virLockSpaceProtocolDispatchAcquireResource(virNetServerPtr server ATTRIBUTE_UNU virNetServerClientGetPrivateData(client); virLockSpacePtr lockspace; unsigned int newFlags; - off_t start = 0; + off_t start = DEFAULT_OFFSET; off_t len = 1; virMutexLock(&priv->lock); virCheckFlagsGoto(VIR_LOCK_SPACE_PROTOCOL_ACQUIRE_RESOURCE_SHARED | - VIR_LOCK_SPACE_PROTOCOL_ACQUIRE_RESOURCE_AUTOCREATE, cleanup); + VIR_LOCK_SPACE_PROTOCOL_ACQUIRE_RESOURCE_AUTOCREATE | + VIR_LOCK_SPACE_PROTOCOL_ACQUIRE_METADATA, cleanup); if (priv->restricted) { virReportError(VIR_ERR_OPERATION_DENIED, "%s", @@ -82,6 +86,10 @@ virLockSpaceProtocolDispatchAcquireResource(virNetServerPtr server ATTRIBUTE_UNU newFlags |= VIR_LOCK_SPACE_ACQUIRE_SHARED; if (flags & VIR_LOCK_SPACE_PROTOCOL_ACQUIRE_RESOURCE_AUTOCREATE) newFlags |= VIR_LOCK_SPACE_ACQUIRE_AUTOCREATE; + if (flags & VIR_LOCK_SPACE_PROTOCOL_ACQUIRE_METADATA) { + start = METADATA_OFFSET; + newFlags |= VIR_LOCK_SPACE_ACQUIRE_WAIT; + } if (virLockSpaceAcquireResource(lockspace, args->name, diff --git a/src/locking/lock_driver_lockd.c b/src/locking/lock_driver_lockd.c index 957a963a7b..bd14ed8930 100644 --- a/src/locking/lock_driver_lockd.c +++ b/src/locking/lock_driver_lockd.c @@ -475,9 +475,11 @@ static int virLockManagerLockDaemonAddResource(virLockManagerPtr lock, bool autoCreate = false; virCheckFlags(VIR_LOCK_MANAGER_RESOURCE_READONLY | - VIR_LOCK_MANAGER_RESOURCE_SHARED, -1); + VIR_LOCK_MANAGER_RESOURCE_SHARED | + VIR_LOCK_MANAGER_RESOURCE_METADATA, -1); - if (flags & VIR_LOCK_MANAGER_RESOURCE_READONLY) + if (flags & VIR_LOCK_MANAGER_RESOURCE_READONLY && + !(flags & VIR_LOCK_MANAGER_RESOURCE_METADATA)) return 0; switch (type) { @@ -489,7 +491,8 @@ static int virLockManagerLockDaemonAddResource(virLockManagerPtr lock, } if (!driver->autoDiskLease) { if (!(flags & (VIR_LOCK_MANAGER_RESOURCE_SHARED | - VIR_LOCK_MANAGER_RESOURCE_READONLY))) + VIR_LOCK_MANAGER_RESOURCE_READONLY | + VIR_LOCK_MANAGER_RESOURCE_METADATA))) priv->hasRWDisks = true; return 0; } @@ -602,6 +605,10 @@ static int virLockManagerLockDaemonAddResource(virLockManagerPtr lock, priv->resources[priv->nresources-1].flags |= VIR_LOCK_SPACE_PROTOCOL_ACQUIRE_RESOURCE_AUTOCREATE; + if (flags & VIR_LOCK_MANAGER_RESOURCE_METADATA) + priv->resources[priv->nresources-1].flags |= + VIR_LOCK_SPACE_PROTOCOL_ACQUIRE_METADATA; + return 0; error: @@ -626,12 +633,15 @@ static int virLockManagerLockDaemonAcquire(virLockManagerPtr lock, virCheckFlags(VIR_LOCK_MANAGER_ACQUIRE_REGISTER_ONLY | VIR_LOCK_MANAGER_ACQUIRE_RESTRICT, -1); - if (priv->nresources == 0 && - priv->hasRWDisks && - driver->requireLeaseForDisks) { - virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", - _("Read/write, exclusive access, disks were present, but no leases specified")); - return -1; + if (priv->nresources == 0) { + if (priv->hasRWDisks && + driver->requireLeaseForDisks) { + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", + _("Read/write, exclusive access, disks were present, but no leases specified")); + return -1; + } + + return 0; } if (!(client = virLockManagerLockDaemonConnect(lock, &program, &counter))) @@ -711,7 +721,8 @@ static int virLockManagerLockDaemonRelease(virLockManagerPtr lock, args.flags &= ~(VIR_LOCK_SPACE_PROTOCOL_ACQUIRE_RESOURCE_SHARED | - VIR_LOCK_SPACE_PROTOCOL_ACQUIRE_RESOURCE_AUTOCREATE); + VIR_LOCK_SPACE_PROTOCOL_ACQUIRE_RESOURCE_AUTOCREATE | + VIR_LOCK_SPACE_PROTOCOL_ACQUIRE_METADATA); if (virNetClientProgramCall(program, client, diff --git a/src/locking/lock_driver_lockd.h b/src/locking/lock_driver_lockd.h index 6931fe7425..9882793260 100644 --- a/src/locking/lock_driver_lockd.h +++ b/src/locking/lock_driver_lockd.h @@ -25,6 +25,7 @@ enum virLockSpaceProtocolAcquireResourceFlags { VIR_LOCK_SPACE_PROTOCOL_ACQUIRE_RESOURCE_SHARED = (1 << 0), VIR_LOCK_SPACE_PROTOCOL_ACQUIRE_RESOURCE_AUTOCREATE = (1 << 1), + VIR_LOCK_SPACE_PROTOCOL_ACQUIRE_METADATA = (1 << 2), }; #endif /* __VIR_LOCK_DRIVER_LOCKD_H__ */ -- 2.16.4

On 08/14/2018 07:19 AM, Michal Privoznik wrote:
Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- src/locking/lock_daemon_dispatch.c | 12 ++++++++++-- src/locking/lock_driver_lockd.c | 31 +++++++++++++++++++++---------- src/locking/lock_driver_lockd.h | 1 + 3 files changed, 32 insertions(+), 12 deletions(-)
diff --git a/src/locking/lock_daemon_dispatch.c b/src/locking/lock_daemon_dispatch.c index 10248ec0b5..c24571ceac 100644 --- a/src/locking/lock_daemon_dispatch.c +++ b/src/locking/lock_daemon_dispatch.c @@ -37,6 +37,9 @@ VIR_LOG_INIT("locking.lock_daemon_dispatch");
#include "lock_daemon_dispatch_stubs.h"
+#define DEFAULT_OFFSET 0 +#define METADATA_OFFSET 1 +
Curious, does this lead to the prospect of being able to acquire/use offset==0 length==1 and offset==1 length==1 at least conceptually and granularity wise? Related or not, there's a strange NFSv3 collision between QEMU and NFS related to some sort of fcntl locking granularity. More details that you possibly want to read at: https://bugzilla.redhat.com/show_bug.cgi?id=1592582 https://bugzilla.redhat.com/show_bug.cgi?id=1547095 I only note it because well it was on my 'short term history' radar and suffice to say it's an ugly and not fun issue dealing with these locks.
static int virLockSpaceProtocolDispatchAcquireResource(virNetServerPtr server ATTRIBUTE_UNUSED, virNetServerClientPtr client, @@ -50,13 +53,14 @@ virLockSpaceProtocolDispatchAcquireResource(virNetServerPtr server ATTRIBUTE_UNU virNetServerClientGetPrivateData(client); virLockSpacePtr lockspace; unsigned int newFlags; - off_t start = 0; + off_t start = DEFAULT_OFFSET; off_t len = 1;
virMutexLock(&priv->lock);
virCheckFlagsGoto(VIR_LOCK_SPACE_PROTOCOL_ACQUIRE_RESOURCE_SHARED | - VIR_LOCK_SPACE_PROTOCOL_ACQUIRE_RESOURCE_AUTOCREATE, cleanup); + VIR_LOCK_SPACE_PROTOCOL_ACQUIRE_RESOURCE_AUTOCREATE | + VIR_LOCK_SPACE_PROTOCOL_ACQUIRE_METADATA, cleanup);
if (priv->restricted) { virReportError(VIR_ERR_OPERATION_DENIED, "%s", @@ -82,6 +86,10 @@ virLockSpaceProtocolDispatchAcquireResource(virNetServerPtr server ATTRIBUTE_UNU newFlags |= VIR_LOCK_SPACE_ACQUIRE_SHARED; if (flags & VIR_LOCK_SPACE_PROTOCOL_ACQUIRE_RESOURCE_AUTOCREATE) newFlags |= VIR_LOCK_SPACE_ACQUIRE_AUTOCREATE; + if (flags & VIR_LOCK_SPACE_PROTOCOL_ACQUIRE_METADATA) { + start = METADATA_OFFSET; + newFlags |= VIR_LOCK_SPACE_ACQUIRE_WAIT; + }
If this is documented in more detail, then it should be noted that a METADATA lock forces usage of the wait API's. I can only imagine someone, some day will ask for a wait w/ timeout to avoid waiting too long.
if (virLockSpaceAcquireResource(lockspace, args->name, diff --git a/src/locking/lock_driver_lockd.c b/src/locking/lock_driver_lockd.c index 957a963a7b..bd14ed8930 100644 --- a/src/locking/lock_driver_lockd.c +++ b/src/locking/lock_driver_lockd.c @@ -475,9 +475,11 @@ static int virLockManagerLockDaemonAddResource(virLockManagerPtr lock, bool autoCreate = false;
virCheckFlags(VIR_LOCK_MANAGER_RESOURCE_READONLY | - VIR_LOCK_MANAGER_RESOURCE_SHARED, -1); + VIR_LOCK_MANAGER_RESOURCE_SHARED | + VIR_LOCK_MANAGER_RESOURCE_METADATA, -1);
- if (flags & VIR_LOCK_MANAGER_RESOURCE_READONLY) + if (flags & VIR_LOCK_MANAGER_RESOURCE_READONLY && + !(flags & VIR_LOCK_MANAGER_RESOURCE_METADATA)) return 0;
Could someone pass READONLY & METADATA and not return 0 here? Yes, doesn't make sense, but combining them leads to the logic matrix question.
switch (type) { @@ -489,7 +491,8 @@ static int virLockManagerLockDaemonAddResource(virLockManagerPtr lock, } if (!driver->autoDiskLease) { if (!(flags & (VIR_LOCK_MANAGER_RESOURCE_SHARED | - VIR_LOCK_MANAGER_RESOURCE_READONLY))) + VIR_LOCK_MANAGER_RESOURCE_READONLY | + VIR_LOCK_MANAGER_RESOURCE_METADATA))) priv->hasRWDisks = true; return 0; } @@ -602,6 +605,10 @@ static int virLockManagerLockDaemonAddResource(virLockManagerPtr lock, priv->resources[priv->nresources-1].flags |= VIR_LOCK_SPACE_PROTOCOL_ACQUIRE_RESOURCE_AUTOCREATE;
+ if (flags & VIR_LOCK_MANAGER_RESOURCE_METADATA) + priv->resources[priv->nresources-1].flags |= + VIR_LOCK_SPACE_PROTOCOL_ACQUIRE_METADATA; + return 0;
error: @@ -626,12 +633,15 @@ static int virLockManagerLockDaemonAcquire(virLockManagerPtr lock, virCheckFlags(VIR_LOCK_MANAGER_ACQUIRE_REGISTER_ONLY | VIR_LOCK_MANAGER_ACQUIRE_RESTRICT, -1);
- if (priv->nresources == 0 && - priv->hasRWDisks && - driver->requireLeaseForDisks) { - virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", - _("Read/write, exclusive access, disks were present, but no leases specified")); - return -1; + if (priv->nresources == 0) {
I'm assuming there's a relationship between metadata and nresources == 0 which really isn't apparent especially since the subsequent error message is about leases. Do we need to check for resource type? Or in the top level API do we need to only allow METADATA for DISK type?
+ if (priv->hasRWDisks && + driver->requireLeaseForDisks) { + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", + _("Read/write, exclusive access, disks were present, but no leases specified")); + return -1; + } + + return 0; }
if (!(client = virLockManagerLockDaemonConnect(lock, &program, &counter))) @@ -711,7 +721,8 @@ static int virLockManagerLockDaemonRelease(virLockManagerPtr lock,
args.flags &= ~(VIR_LOCK_SPACE_PROTOCOL_ACQUIRE_RESOURCE_SHARED | - VIR_LOCK_SPACE_PROTOCOL_ACQUIRE_RESOURCE_AUTOCREATE); + VIR_LOCK_SPACE_PROTOCOL_ACQUIRE_RESOURCE_AUTOCREATE | + VIR_LOCK_SPACE_PROTOCOL_ACQUIRE_METADATA);
if (virNetClientProgramCall(program, client, diff --git a/src/locking/lock_driver_lockd.h b/src/locking/lock_driver_lockd.h index 6931fe7425..9882793260 100644 --- a/src/locking/lock_driver_lockd.h +++ b/src/locking/lock_driver_lockd.h @@ -25,6 +25,7 @@ enum virLockSpaceProtocolAcquireResourceFlags { VIR_LOCK_SPACE_PROTOCOL_ACQUIRE_RESOURCE_SHARED = (1 << 0), VIR_LOCK_SPACE_PROTOCOL_ACQUIRE_RESOURCE_AUTOCREATE = (1 << 1), + VIR_LOCK_SPACE_PROTOCOL_ACQUIRE_METADATA = (1 << 2),
Use of RESOURCE_METADATA would be more consistent wouldn't it? John I'll continue looking tomorrow...
};
#endif /* __VIR_LOCK_DRIVER_LOCKD_H__ */

On 08/17/2018 01:53 AM, John Ferlan wrote:
On 08/14/2018 07:19 AM, Michal Privoznik wrote:
Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- src/locking/lock_daemon_dispatch.c | 12 ++++++++++-- src/locking/lock_driver_lockd.c | 31 +++++++++++++++++++++---------- src/locking/lock_driver_lockd.h | 1 + 3 files changed, 32 insertions(+), 12 deletions(-)
diff --git a/src/locking/lock_daemon_dispatch.c b/src/locking/lock_daemon_dispatch.c index 10248ec0b5..c24571ceac 100644 --- a/src/locking/lock_daemon_dispatch.c +++ b/src/locking/lock_daemon_dispatch.c @@ -37,6 +37,9 @@ VIR_LOG_INIT("locking.lock_daemon_dispatch");
#include "lock_daemon_dispatch_stubs.h"
+#define DEFAULT_OFFSET 0 +#define METADATA_OFFSET 1 +
Curious, does this lead to the prospect of being able to acquire/use offset==0 length==1 and offset==1 length==1 at least conceptually and granularity wise?
Yes, this is exactly the purpose. I've taken inspiration from qemu. They lock bytes from 100 to 100 + N, depending on what operations they want the disk for (checkout raw_apply_lock_bytes() form block/file-posix.c). And my idea was for virtlockd to do the same. Currently, it locks 0th byte of given file and with metadata flag it would lock the first one.
Related or not, there's a strange NFSv3 collision between QEMU and NFS related to some sort of fcntl locking granularity. More details that you possibly want to read at:
https://bugzilla.redhat.com/show_bug.cgi?id=1592582 https://bugzilla.redhat.com/show_bug.cgi?id=1547095
I only note it because well it was on my 'short term history' radar and suffice to say it's an ugly and not fun issue dealing with these locks.
Oh yeah, NFSv3 and file locks. Well, I'm not making our situation any worse here. Qemu is already locking disks, and it is doing so for the while lifetime of the domain whereas my patches would lock for only a fraction of a second (well, as long as it takes for chown() and setfilecon_raw() to run). It is very unlikely that connection to the NFSv3 server will be lost right when metadata lock is held.
static int virLockSpaceProtocolDispatchAcquireResource(virNetServerPtr server ATTRIBUTE_UNUSED, virNetServerClientPtr client, @@ -50,13 +53,14 @@ virLockSpaceProtocolDispatchAcquireResource(virNetServerPtr server ATTRIBUTE_UNU virNetServerClientGetPrivateData(client); virLockSpacePtr lockspace; unsigned int newFlags; - off_t start = 0; + off_t start = DEFAULT_OFFSET; off_t len = 1;
virMutexLock(&priv->lock);
virCheckFlagsGoto(VIR_LOCK_SPACE_PROTOCOL_ACQUIRE_RESOURCE_SHARED | - VIR_LOCK_SPACE_PROTOCOL_ACQUIRE_RESOURCE_AUTOCREATE, cleanup); + VIR_LOCK_SPACE_PROTOCOL_ACQUIRE_RESOURCE_AUTOCREATE | + VIR_LOCK_SPACE_PROTOCOL_ACQUIRE_METADATA, cleanup);
if (priv->restricted) { virReportError(VIR_ERR_OPERATION_DENIED, "%s", @@ -82,6 +86,10 @@ virLockSpaceProtocolDispatchAcquireResource(virNetServerPtr server ATTRIBUTE_UNU newFlags |= VIR_LOCK_SPACE_ACQUIRE_SHARED; if (flags & VIR_LOCK_SPACE_PROTOCOL_ACQUIRE_RESOURCE_AUTOCREATE) newFlags |= VIR_LOCK_SPACE_ACQUIRE_AUTOCREATE; + if (flags & VIR_LOCK_SPACE_PROTOCOL_ACQUIRE_METADATA) { + start = METADATA_OFFSET; + newFlags |= VIR_LOCK_SPACE_ACQUIRE_WAIT; + }
If this is documented in more detail, then it should be noted that a METADATA lock forces usage of the wait API's. I can only imagine someone, some day will ask for a wait w/ timeout to avoid waiting too long.
That should never happen (TM). The metadata lock is acquired only for time that seclabels are set on some domain paths (and as we will see in 6/7 we are not going to lock all the paths). As soon as they are set the lock is released.
if (virLockSpaceAcquireResource(lockspace, args->name, diff --git a/src/locking/lock_driver_lockd.c b/src/locking/lock_driver_lockd.c index 957a963a7b..bd14ed8930 100644 --- a/src/locking/lock_driver_lockd.c +++ b/src/locking/lock_driver_lockd.c @@ -475,9 +475,11 @@ static int virLockManagerLockDaemonAddResource(virLockManagerPtr lock, bool autoCreate = false;
virCheckFlags(VIR_LOCK_MANAGER_RESOURCE_READONLY | - VIR_LOCK_MANAGER_RESOURCE_SHARED, -1); + VIR_LOCK_MANAGER_RESOURCE_SHARED | + VIR_LOCK_MANAGER_RESOURCE_METADATA, -1);
- if (flags & VIR_LOCK_MANAGER_RESOURCE_READONLY) + if (flags & VIR_LOCK_MANAGER_RESOURCE_READONLY && + !(flags & VIR_LOCK_MANAGER_RESOURCE_METADATA)) return 0;
Could someone pass READONLY & METADATA and not return 0 here? Yes, doesn't make sense, but combining them leads to the logic matrix question.
So far there isn't such caller, but sure I can add an explicit check that errors out if both READONLY and METADATA are set at the same time.
switch (type) { @@ -489,7 +491,8 @@ static int virLockManagerLockDaemonAddResource(virLockManagerPtr lock, } if (!driver->autoDiskLease) { if (!(flags & (VIR_LOCK_MANAGER_RESOURCE_SHARED | - VIR_LOCK_MANAGER_RESOURCE_READONLY))) + VIR_LOCK_MANAGER_RESOURCE_READONLY | + VIR_LOCK_MANAGER_RESOURCE_METADATA))) priv->hasRWDisks = true; return 0; } @@ -602,6 +605,10 @@ static int virLockManagerLockDaemonAddResource(virLockManagerPtr lock, priv->resources[priv->nresources-1].flags |= VIR_LOCK_SPACE_PROTOCOL_ACQUIRE_RESOURCE_AUTOCREATE;
+ if (flags & VIR_LOCK_MANAGER_RESOURCE_METADATA) + priv->resources[priv->nresources-1].flags |= + VIR_LOCK_SPACE_PROTOCOL_ACQUIRE_METADATA; + return 0;
error: @@ -626,12 +633,15 @@ static int virLockManagerLockDaemonAcquire(virLockManagerPtr lock, virCheckFlags(VIR_LOCK_MANAGER_ACQUIRE_REGISTER_ONLY | VIR_LOCK_MANAGER_ACQUIRE_RESTRICT, -1);
- if (priv->nresources == 0 && - priv->hasRWDisks && - driver->requireLeaseForDisks) { - virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", - _("Read/write, exclusive access, disks were present, but no leases specified")); - return -1; + if (priv->nresources == 0) {
I'm assuming there's a relationship between metadata and nresources == 0 which really isn't apparent especially since the subsequent error message is about leases. Do we need to check for resource type?
Or in the top level API do we need to only allow METADATA for DISK type?
Well, nresources == 0 can happen regardless of METADATA flag if autoDiskLease is turned off. In that case no domain disk is added into @resources and thus nresources can be zero. This is the situation where we require users to put <lease/> into domain XML which will then protect them. By the same token, when locking for METADATA, we can not ignore autoDiskLease setting (because if user has turned off disk leases they probably did not set any lockspace dir and certainly did not put it onto shared NFS, as required by our docs https://libvirt.org/locking-lockd.html#lockdplugin) and we can't lock files automatically. We must rely on <lease/>. However, we shouldn't fail because from METADATA POV we don't care if a disk is RO or RW. Long story short, if users want to rely on <lease/> solely we should honour that.
+ if (priv->hasRWDisks && + driver->requireLeaseForDisks) { + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", + _("Read/write, exclusive access, disks were present, but no leases specified")); + return -1; + } + + return 0; }
if (!(client = virLockManagerLockDaemonConnect(lock, &program, &counter))) @@ -711,7 +721,8 @@ static int virLockManagerLockDaemonRelease(virLockManagerPtr lock,
args.flags &= ~(VIR_LOCK_SPACE_PROTOCOL_ACQUIRE_RESOURCE_SHARED | - VIR_LOCK_SPACE_PROTOCOL_ACQUIRE_RESOURCE_AUTOCREATE); + VIR_LOCK_SPACE_PROTOCOL_ACQUIRE_RESOURCE_AUTOCREATE | + VIR_LOCK_SPACE_PROTOCOL_ACQUIRE_METADATA);
if (virNetClientProgramCall(program, client, diff --git a/src/locking/lock_driver_lockd.h b/src/locking/lock_driver_lockd.h index 6931fe7425..9882793260 100644 --- a/src/locking/lock_driver_lockd.h +++ b/src/locking/lock_driver_lockd.h @@ -25,6 +25,7 @@ enum virLockSpaceProtocolAcquireResourceFlags { VIR_LOCK_SPACE_PROTOCOL_ACQUIRE_RESOURCE_SHARED = (1 << 0), VIR_LOCK_SPACE_PROTOCOL_ACQUIRE_RESOURCE_AUTOCREATE = (1 << 1), + VIR_LOCK_SPACE_PROTOCOL_ACQUIRE_METADATA = (1 << 2),
Use of RESOURCE_METADATA would be more consistent wouldn't it?
Oh, okay. Michal

On Mon, Aug 20, 2018 at 09:25:13AM +0200, Michal Prívozník wrote:
On 08/17/2018 01:53 AM, John Ferlan wrote:
On 08/14/2018 07:19 AM, Michal Privoznik wrote:
Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- src/locking/lock_daemon_dispatch.c | 12 ++++++++++-- src/locking/lock_driver_lockd.c | 31 +++++++++++++++++++++---------- src/locking/lock_driver_lockd.h | 1 + 3 files changed, 32 insertions(+), 12 deletions(-)
diff --git a/src/locking/lock_daemon_dispatch.c b/src/locking/lock_daemon_dispatch.c index 10248ec0b5..c24571ceac 100644 --- a/src/locking/lock_daemon_dispatch.c +++ b/src/locking/lock_daemon_dispatch.c @@ -37,6 +37,9 @@ VIR_LOG_INIT("locking.lock_daemon_dispatch");
#include "lock_daemon_dispatch_stubs.h"
+#define DEFAULT_OFFSET 0 +#define METADATA_OFFSET 1 +
Curious, does this lead to the prospect of being able to acquire/use offset==0 length==1 and offset==1 length==1 at least conceptually and granularity wise?
Yes, this is exactly the purpose. I've taken inspiration from qemu. They lock bytes from 100 to 100 + N, depending on what operations they want the disk for (checkout raw_apply_lock_bytes() form block/file-posix.c). And my idea was for virtlockd to do the same. Currently, it locks 0th byte of given file and with metadata flag it would lock the first one.
Agreed, NFSv3 is already doomed, and this barely makes it worse because of the short lock time. NFSv4 is the fix for anyone who really cares. Better to have stale NFSv3 locks due to dead clients not releasing locks than to loose all your VM disk due to corruption. Regards, Daniel -- |: https://berrange.com -o- https://www.flickr.com/photos/dberrange :| |: https://libvirt.org -o- https://fstop138.berrange.com :| |: https://entangle-photo.org -o- https://www.instagram.com/dberrange :|

No real support implemented here. But hey, at least we will not fail. Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- src/locking/lock_driver_sanlock.c | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/src/locking/lock_driver_sanlock.c b/src/locking/lock_driver_sanlock.c index 3e5f0e37b0..c1996fb937 100644 --- a/src/locking/lock_driver_sanlock.c +++ b/src/locking/lock_driver_sanlock.c @@ -791,7 +791,8 @@ static int virLockManagerSanlockAddResource(virLockManagerPtr lock, virLockManagerSanlockPrivatePtr priv = lock->privateData; virCheckFlags(VIR_LOCK_MANAGER_RESOURCE_READONLY | - VIR_LOCK_MANAGER_RESOURCE_SHARED, -1); + VIR_LOCK_MANAGER_RESOURCE_SHARED | + VIR_LOCK_MANAGER_RESOURCE_METADATA, -1); if (priv->res_count == SANLK_MAX_RESOURCES) { virReportError(VIR_ERR_INTERNAL_ERROR, @@ -804,6 +805,11 @@ static int virLockManagerSanlockAddResource(virLockManagerPtr lock, if (flags & VIR_LOCK_MANAGER_RESOURCE_READONLY) return 0; + /* No metadata locking support for now. + * TODO: implement it. */ + if (flags & VIR_LOCK_MANAGER_RESOURCE_METADATA) + return 0; + switch (type) { case VIR_LOCK_MANAGER_RESOURCE_TYPE_DISK: if (driver->autoDiskLease) { @@ -953,12 +959,17 @@ static int virLockManagerSanlockAcquire(virLockManagerPtr lock, virCheckFlags(VIR_LOCK_MANAGER_ACQUIRE_RESTRICT | VIR_LOCK_MANAGER_ACQUIRE_REGISTER_ONLY, -1); - if (priv->res_count == 0 && - priv->hasRWDisks && - driver->requireLeaseForDisks) { - virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", - _("Read/write, exclusive access, disks were present, but no leases specified")); - return -1; + if (priv->res_count == 0) { + if (priv->hasRWDisks && + driver->requireLeaseForDisks) { + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", + _("Read/write, exclusive access, disks were present, but no leases specified")); + return -1; + } + + /* We are not handling METADATA flag yet. So no resources + * case is no-op for now. */ + return 0; } /* We only initialize 'sock' if we are in the real -- 2.16.4

On 08/14/2018 07:19 AM, Michal Privoznik wrote:
No real support implemented here. But hey, at least we will not fail.
Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- src/locking/lock_driver_sanlock.c | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-)
diff --git a/src/locking/lock_driver_sanlock.c b/src/locking/lock_driver_sanlock.c index 3e5f0e37b0..c1996fb937 100644 --- a/src/locking/lock_driver_sanlock.c +++ b/src/locking/lock_driver_sanlock.c @@ -791,7 +791,8 @@ static int virLockManagerSanlockAddResource(virLockManagerPtr lock, virLockManagerSanlockPrivatePtr priv = lock->privateData;
virCheckFlags(VIR_LOCK_MANAGER_RESOURCE_READONLY | - VIR_LOCK_MANAGER_RESOURCE_SHARED, -1); + VIR_LOCK_MANAGER_RESOURCE_SHARED | + VIR_LOCK_MANAGER_RESOURCE_METADATA, -1);
if (priv->res_count == SANLK_MAX_RESOURCES) { virReportError(VIR_ERR_INTERNAL_ERROR, @@ -804,6 +805,11 @@ static int virLockManagerSanlockAddResource(virLockManagerPtr lock, if (flags & VIR_LOCK_MANAGER_RESOURCE_READONLY) return 0;
+ /* No metadata locking support for now. + * TODO: implement it. */ + if (flags & VIR_LOCK_MANAGER_RESOURCE_METADATA) + return 0; +
Doesn't this give someone the false impression that their resource is locked if they choose METADATA? Something doesn't feel right about that - giving the impression that it's supported and the consumer is protected, but when push comes to shove they aren't. I'd be inclined to believe that we may want to do nothing with/for sanlock allowing the virCheckFlags above take care of the consumer.
switch (type) { case VIR_LOCK_MANAGER_RESOURCE_TYPE_DISK: if (driver->autoDiskLease) { @@ -953,12 +959,17 @@ static int virLockManagerSanlockAcquire(virLockManagerPtr lock, virCheckFlags(VIR_LOCK_MANAGER_ACQUIRE_RESTRICT | VIR_LOCK_MANAGER_ACQUIRE_REGISTER_ONLY, -1);
- if (priv->res_count == 0 && - priv->hasRWDisks && - driver->requireLeaseForDisks) { - virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", - _("Read/write, exclusive access, disks were present, but no leases specified")); - return -1; + if (priv->res_count == 0) { + if (priv->hasRWDisks && + driver->requireLeaseForDisks) { + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", + _("Read/write, exclusive access, disks were present, but no leases specified")); + return -1; + } + + /* We are not handling METADATA flag yet. So no resources + * case is no-op for now. */ + return 0;
Similar comment to patch4 w/r/t resource type (e.g. disk or lease), but now at least it's more obvious to me that hasRWDisks means lock for disk vs. not. Still it's odd to think that returning 0, but not actually getting the lock is the "right" thing to do. "Theoretically", if AddResource failed, then would Acquire ever be called w/ this flag?
}
/* We only initialize 'sock' if we are in the real
BTW: Now that I think about them together... - lock_driver_lockd.h - extract from patch4 and merge to patch3. I suppose it could be separate too to keep mgmt vs. space separate, but they're related enough to keep them together. Perhaps this is where a few more "words" about usage expectations as part of the commit message. - lock_daemon_dispatch.c - extract from patch4 and whether it goes before or after is one of those chicken/egg type quandaries. Keeping it with the fcntl/lockd as opposed to the sanlock implementation doesn't feel right (even though sanlock doesn't use any LOCK_SPACE symbols). I can be swayed otherwise... maybe someone else will pipe in. John

On 08/17/2018 04:49 PM, John Ferlan wrote:
On 08/14/2018 07:19 AM, Michal Privoznik wrote:
No real support implemented here. But hey, at least we will not fail.
Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- src/locking/lock_driver_sanlock.c | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-)
diff --git a/src/locking/lock_driver_sanlock.c b/src/locking/lock_driver_sanlock.c index 3e5f0e37b0..c1996fb937 100644 --- a/src/locking/lock_driver_sanlock.c +++ b/src/locking/lock_driver_sanlock.c @@ -791,7 +791,8 @@ static int virLockManagerSanlockAddResource(virLockManagerPtr lock, virLockManagerSanlockPrivatePtr priv = lock->privateData;
virCheckFlags(VIR_LOCK_MANAGER_RESOURCE_READONLY | - VIR_LOCK_MANAGER_RESOURCE_SHARED, -1); + VIR_LOCK_MANAGER_RESOURCE_SHARED | + VIR_LOCK_MANAGER_RESOURCE_METADATA, -1);
if (priv->res_count == SANLK_MAX_RESOURCES) { virReportError(VIR_ERR_INTERNAL_ERROR, @@ -804,6 +805,11 @@ static int virLockManagerSanlockAddResource(virLockManagerPtr lock, if (flags & VIR_LOCK_MANAGER_RESOURCE_READONLY) return 0;
+ /* No metadata locking support for now. + * TODO: implement it. */ + if (flags & VIR_LOCK_MANAGER_RESOURCE_METADATA) + return 0; +
Doesn't this give someone the false impression that their resource is locked if they choose METADATA?
Okay, I'll provide some implementation. Even though sanlock is not really my cup of coffee :-)
Something doesn't feel right about that - giving the impression that it's supported and the consumer is protected, but when push comes to shove they aren't.
I'd be inclined to believe that we may want to do nothing with/for sanlock allowing the virCheckFlags above take care of the consumer.
No we must not do this. Metadata locking is not something users can turn on or off. Okay, they can turn it off when they disable all the locking (i.e. comment out lock_manager in qemu.conf). But if they would have sanlock turned on and wanted to start a domain the virCheckFlags() would be triggered right away and starting a domain would be denied.
switch (type) { case VIR_LOCK_MANAGER_RESOURCE_TYPE_DISK: if (driver->autoDiskLease) { @@ -953,12 +959,17 @@ static int virLockManagerSanlockAcquire(virLockManagerPtr lock, virCheckFlags(VIR_LOCK_MANAGER_ACQUIRE_RESTRICT | VIR_LOCK_MANAGER_ACQUIRE_REGISTER_ONLY, -1);
- if (priv->res_count == 0 && - priv->hasRWDisks && - driver->requireLeaseForDisks) { - virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", - _("Read/write, exclusive access, disks were present, but no leases specified")); - return -1; + if (priv->res_count == 0) { + if (priv->hasRWDisks && + driver->requireLeaseForDisks) { + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", + _("Read/write, exclusive access, disks were present, but no leases specified")); + return -1; + } + + /* We are not handling METADATA flag yet. So no resources + * case is no-op for now. */ + return 0;
Similar comment to patch4 w/r/t resource type (e.g. disk or lease), but now at least it's more obvious to me that hasRWDisks means lock for disk vs. not.
Still it's odd to think that returning 0, but not actually getting the lock is the "right" thing to do. "Theoretically", if AddResource failed, then would Acquire ever be called w/ this flag?
Sure it would. Look at virDomainLockProcessStart() for instance. It calls virDomainLockManagerNew() which adds all the resources via AddResource() and then calls Acquire().
}
/* We only initialize 'sock' if we are in the real
BTW:
Now that I think about them together...
- lock_driver_lockd.h - extract from patch4 and merge to patch3. I suppose it could be separate too to keep mgmt vs. space separate, but they're related enough to keep them together. Perhaps this is where a few more "words" about usage expectations as part of the commit message.> - lock_daemon_dispatch.c - extract from patch4 and whether it goes before or after is one of those chicken/egg type quandaries. Keeping it with the fcntl/lockd as opposed to the sanlock implementation doesn't feel right (even though sanlock doesn't use any LOCK_SPACE symbols).
I rather keep the patches as they are now. It makes more sense to me this way. Michal

On 08/20/2018 09:25 AM, Michal Prívozník wrote:
On 08/17/2018 04:49 PM, John Ferlan wrote:
On 08/14/2018 07:19 AM, Michal Privoznik wrote:
No real support implemented here. But hey, at least we will not fail.
Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- src/locking/lock_driver_sanlock.c | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-)
diff --git a/src/locking/lock_driver_sanlock.c b/src/locking/lock_driver_sanlock.c index 3e5f0e37b0..c1996fb937 100644 --- a/src/locking/lock_driver_sanlock.c +++ b/src/locking/lock_driver_sanlock.c @@ -791,7 +791,8 @@ static int virLockManagerSanlockAddResource(virLockManagerPtr lock, virLockManagerSanlockPrivatePtr priv = lock->privateData;
virCheckFlags(VIR_LOCK_MANAGER_RESOURCE_READONLY | - VIR_LOCK_MANAGER_RESOURCE_SHARED, -1); + VIR_LOCK_MANAGER_RESOURCE_SHARED | + VIR_LOCK_MANAGER_RESOURCE_METADATA, -1);
if (priv->res_count == SANLK_MAX_RESOURCES) { virReportError(VIR_ERR_INTERNAL_ERROR, @@ -804,6 +805,11 @@ static int virLockManagerSanlockAddResource(virLockManagerPtr lock, if (flags & VIR_LOCK_MANAGER_RESOURCE_READONLY) return 0;
+ /* No metadata locking support for now. + * TODO: implement it. */ + if (flags & VIR_LOCK_MANAGER_RESOURCE_METADATA) + return 0; +
Doesn't this give someone the false impression that their resource is locked if they choose METADATA?
Okay, I'll provide some implementation. Even though sanlock is not really my cup of coffee :-)
Actually, sanlock is doomed. It allows us to set the initial offset (with posing some weird restrictions on it) and it doesn't allow us to specify length we want to lock (it looks like it locks whole disk sector). Therefore we can't lock offsets 0 and 1 independently. D'oh! Maybe we could lock offsets 0 and 1MB but I'm unsure how sanlock behaves when offset is outside of the file (we can't safely assume that every file we will try to lock is at least 1MB big, can we? No we can't! OVMF_VARS is only 128KB long). https://linux.die.net/man/8/sanlock and scroll down to offsets. Therefore I rather stick with my dummy implementation and have somebody else look at this. Somebody who understands how sanlock works. Michal

On Fri, Aug 17, 2018 at 10:49:12AM -0400, John Ferlan wrote:
On 08/14/2018 07:19 AM, Michal Privoznik wrote:
No real support implemented here. But hey, at least we will not fail.
Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- src/locking/lock_driver_sanlock.c | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-)
diff --git a/src/locking/lock_driver_sanlock.c b/src/locking/lock_driver_sanlock.c index 3e5f0e37b0..c1996fb937 100644 --- a/src/locking/lock_driver_sanlock.c +++ b/src/locking/lock_driver_sanlock.c @@ -791,7 +791,8 @@ static int virLockManagerSanlockAddResource(virLockManagerPtr lock, virLockManagerSanlockPrivatePtr priv = lock->privateData;
virCheckFlags(VIR_LOCK_MANAGER_RESOURCE_READONLY | - VIR_LOCK_MANAGER_RESOURCE_SHARED, -1); + VIR_LOCK_MANAGER_RESOURCE_SHARED | + VIR_LOCK_MANAGER_RESOURCE_METADATA, -1);
if (priv->res_count == SANLK_MAX_RESOURCES) { virReportError(VIR_ERR_INTERNAL_ERROR, @@ -804,6 +805,11 @@ static int virLockManagerSanlockAddResource(virLockManagerPtr lock, if (flags & VIR_LOCK_MANAGER_RESOURCE_READONLY) return 0;
+ /* No metadata locking support for now. + * TODO: implement it. */ + if (flags & VIR_LOCK_MANAGER_RESOURCE_METADATA) + return 0; +
Doesn't this give someone the false impression that their resource is locked if they choose METADATA?
Something doesn't feel right about that - giving the impression that it's supported and the consumer is protected, but when push comes to shove they aren't.
I'd be inclined to believe that we may want to do nothing with/for sanlock allowing the virCheckFlags above take care of the consumer.
Yeah, this doesn't feel right to me. I think we need to treat the metadata locking as completely independant of the content locking. This implies we should have a separate configuration for metadata locking, where the only valid options are "lockd" or "nop". eg our available matrix looks like METADATA | nop lockd sanlock ---------+-------------------- nop | Y Y N CONTENT lockd | Y Y N sanlock | Y Y N Regards, Daniel -- |: https://berrange.com -o- https://www.flickr.com/photos/dberrange :| |: https://libvirt.org -o- https://fstop138.berrange.com :| |: https://entangle-photo.org -o- https://www.instagram.com/dberrange :|

On Mon, Aug 20, 2018 at 03:59:23PM +0100, Daniel P. Berrangé wrote:
On Fri, Aug 17, 2018 at 10:49:12AM -0400, John Ferlan wrote:
On 08/14/2018 07:19 AM, Michal Privoznik wrote:
No real support implemented here. But hey, at least we will not fail.
Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- src/locking/lock_driver_sanlock.c | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-)
diff --git a/src/locking/lock_driver_sanlock.c b/src/locking/lock_driver_sanlock.c index 3e5f0e37b0..c1996fb937 100644 --- a/src/locking/lock_driver_sanlock.c +++ b/src/locking/lock_driver_sanlock.c @@ -791,7 +791,8 @@ static int virLockManagerSanlockAddResource(virLockManagerPtr lock, virLockManagerSanlockPrivatePtr priv = lock->privateData;
virCheckFlags(VIR_LOCK_MANAGER_RESOURCE_READONLY | - VIR_LOCK_MANAGER_RESOURCE_SHARED, -1); + VIR_LOCK_MANAGER_RESOURCE_SHARED | + VIR_LOCK_MANAGER_RESOURCE_METADATA, -1);
if (priv->res_count == SANLK_MAX_RESOURCES) { virReportError(VIR_ERR_INTERNAL_ERROR, @@ -804,6 +805,11 @@ static int virLockManagerSanlockAddResource(virLockManagerPtr lock, if (flags & VIR_LOCK_MANAGER_RESOURCE_READONLY) return 0;
+ /* No metadata locking support for now. + * TODO: implement it. */ + if (flags & VIR_LOCK_MANAGER_RESOURCE_METADATA) + return 0; +
Doesn't this give someone the false impression that their resource is locked if they choose METADATA?
Something doesn't feel right about that - giving the impression that it's supported and the consumer is protected, but when push comes to shove they aren't.
I'd be inclined to believe that we may want to do nothing with/for sanlock allowing the virCheckFlags above take care of the consumer.
Yeah, this doesn't feel right to me. I think we need to treat the metadata locking as completely independant of the content locking. This implies we should have a separate configuration for metadata locking, where the only valid options are "lockd" or "nop".
eg our available matrix looks like
METADATA | nop lockd sanlock ---------+-------------------- nop | Y Y N CONTENT lockd | Y Y N sanlock | Y Y N
Oh and even for virtlockd we need to consider the config separately for content vs metdata. For content we have the possibility to lock out of band files as a proxy for the main file. This is primarily done for protecting shared blocks devices as locking the device node doesn't apply across hosts. For metadata locking, we only ever care about the local device node as changes to labelling/ownership don't propagate across hosts. IOW, we should always directly lock the file in question for metadata locking, and never use an out of band proxy for locking. Regards, Daniel -- |: https://berrange.com -o- https://www.flickr.com/photos/dberrange :| |: https://libvirt.org -o- https://fstop138.berrange.com :| |: https://entangle-photo.org -o- https://www.instagram.com/dberrange :|

On 08/20/2018 05:04 PM, Daniel P. Berrangé wrote:
On Mon, Aug 20, 2018 at 03:59:23PM +0100, Daniel P. Berrangé wrote:
On Fri, Aug 17, 2018 at 10:49:12AM -0400, John Ferlan wrote:
On 08/14/2018 07:19 AM, Michal Privoznik wrote:
No real support implemented here. But hey, at least we will not fail.
Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- src/locking/lock_driver_sanlock.c | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-)
diff --git a/src/locking/lock_driver_sanlock.c b/src/locking/lock_driver_sanlock.c index 3e5f0e37b0..c1996fb937 100644 --- a/src/locking/lock_driver_sanlock.c +++ b/src/locking/lock_driver_sanlock.c @@ -791,7 +791,8 @@ static int virLockManagerSanlockAddResource(virLockManagerPtr lock, virLockManagerSanlockPrivatePtr priv = lock->privateData;
virCheckFlags(VIR_LOCK_MANAGER_RESOURCE_READONLY | - VIR_LOCK_MANAGER_RESOURCE_SHARED, -1); + VIR_LOCK_MANAGER_RESOURCE_SHARED | + VIR_LOCK_MANAGER_RESOURCE_METADATA, -1);
if (priv->res_count == SANLK_MAX_RESOURCES) { virReportError(VIR_ERR_INTERNAL_ERROR, @@ -804,6 +805,11 @@ static int virLockManagerSanlockAddResource(virLockManagerPtr lock, if (flags & VIR_LOCK_MANAGER_RESOURCE_READONLY) return 0;
+ /* No metadata locking support for now. + * TODO: implement it. */ + if (flags & VIR_LOCK_MANAGER_RESOURCE_METADATA) + return 0; +
Doesn't this give someone the false impression that their resource is locked if they choose METADATA?
Something doesn't feel right about that - giving the impression that it's supported and the consumer is protected, but when push comes to shove they aren't.
I'd be inclined to believe that we may want to do nothing with/for sanlock allowing the virCheckFlags above take care of the consumer.
Yeah, this doesn't feel right to me. I think we need to treat the metadata locking as completely independant of the content locking. This implies we should have a separate configuration for metadata locking, where the only valid options are "lockd" or "nop".
eg our available matrix looks like
METADATA | nop lockd sanlock ---------+-------------------- nop | Y Y N CONTENT lockd | Y Y N sanlock | Y Y N
Having some troubles parsing the table. Do you mean: | Content | Metadata ---------+------------------- nop | Y | Y lockd | Y | Y sanlock | Y | N Where Y says the respective type (content/metadata) can/cannot be locked by lock driver in question? I.e. we would have 'metadata_lock_manager' config value in qemu.conf and it'd accept only 'nop' and 'lockd'. Then we can have 'metadata_lockspace_dir' in /etc/libvirt/qemu-lockd.conf where the lockspace would be created. Anyway, I'm gonna state the obvious because it might not be that obvious to somebody else: we can change the RPC between libvirtd and virtlockd safely, because we require both to be the same version. I'm saying that just in case I need to alter the RPC a bit (found some unused procs there, btw but they might come handy someday).
Oh and even for virtlockd we need to consider the config separately for content vs metdata.
Do you mean like completely new config file? qemu-lockd-metadata.conf? Okay. So far we only need one config option (metadata_lockspace_dir) but it might turn out we need more and it would make sens to keep options separated from content locking options.
For content we have the possibility to lock out of band files as a proxy for the main file. This is primarily done for protecting shared blocks devices as locking the device node doesn't apply across hosts.
For metadata locking, we only ever care about the local device node as changes to labelling/ownership don't propagate across hosts. IOW, we should always directly lock the file in question for metadata locking, and never use an out of band proxy for locking.
Oh right. From this POV locking from secdriver seems like even better idea. Michal

On Mon, Aug 20, 2018 at 07:29:30PM +0200, Michal Prívozník wrote:
On 08/20/2018 05:04 PM, Daniel P. Berrangé wrote:
On Mon, Aug 20, 2018 at 03:59:23PM +0100, Daniel P. Berrangé wrote:
On Fri, Aug 17, 2018 at 10:49:12AM -0400, John Ferlan wrote:
On 08/14/2018 07:19 AM, Michal Privoznik wrote:
No real support implemented here. But hey, at least we will not fail.
Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- src/locking/lock_driver_sanlock.c | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-)
diff --git a/src/locking/lock_driver_sanlock.c b/src/locking/lock_driver_sanlock.c index 3e5f0e37b0..c1996fb937 100644 --- a/src/locking/lock_driver_sanlock.c +++ b/src/locking/lock_driver_sanlock.c @@ -791,7 +791,8 @@ static int virLockManagerSanlockAddResource(virLockManagerPtr lock, virLockManagerSanlockPrivatePtr priv = lock->privateData;
virCheckFlags(VIR_LOCK_MANAGER_RESOURCE_READONLY | - VIR_LOCK_MANAGER_RESOURCE_SHARED, -1); + VIR_LOCK_MANAGER_RESOURCE_SHARED | + VIR_LOCK_MANAGER_RESOURCE_METADATA, -1);
if (priv->res_count == SANLK_MAX_RESOURCES) { virReportError(VIR_ERR_INTERNAL_ERROR, @@ -804,6 +805,11 @@ static int virLockManagerSanlockAddResource(virLockManagerPtr lock, if (flags & VIR_LOCK_MANAGER_RESOURCE_READONLY) return 0;
+ /* No metadata locking support for now. + * TODO: implement it. */ + if (flags & VIR_LOCK_MANAGER_RESOURCE_METADATA) + return 0; +
Doesn't this give someone the false impression that their resource is locked if they choose METADATA?
Something doesn't feel right about that - giving the impression that it's supported and the consumer is protected, but when push comes to shove they aren't.
I'd be inclined to believe that we may want to do nothing with/for sanlock allowing the virCheckFlags above take care of the consumer.
Yeah, this doesn't feel right to me. I think we need to treat the metadata locking as completely independant of the content locking. This implies we should have a separate configuration for metadata locking, where the only valid options are "lockd" or "nop".
eg our available matrix looks like
METADATA | nop lockd sanlock ---------+-------------------- nop | Y Y N CONTENT lockd | Y Y N sanlock | Y Y N
Having some troubles parsing the table. Do you mean:
| Content | Metadata ---------+------------------- nop | Y | Y lockd | Y | Y sanlock | Y | N
Where Y says the respective type (content/metadata) can/cannot be locked by lock driver in question? I.e. we would have 'metadata_lock_manager' config value in qemu.conf and it'd accept only 'nop' and 'lockd'.
Heh, ok, yes, that is a simpler table :-)
Then we can have 'metadata_lockspace_dir' in /etc/libvirt/qemu-lockd.conf where the lockspace would be created.
No need for a metadata_lockspace_dir, since we should always be locking the resource itself, never an out of band proxy file.
Oh and even for virtlockd we need to consider the config separately for content vs metdata.
Do you mean like completely new config file? qemu-lockd-metadata.conf? Okay. So far we only need one config option (metadata_lockspace_dir) but it might turn out we need more and it would make sens to keep options separated from content locking options.
I don't think we need a config file for now. Regards, Daniel -- |: https://berrange.com -o- https://www.flickr.com/photos/dberrange :| |: https://libvirt.org -o- https://fstop138.berrange.com :| |: https://entangle-photo.org -o- https://www.instagram.com/dberrange :|

In order for our drivers to lock resources for metadata change we need set of new APIs. Fortunately, we don't have to care about every possible device a domain can have. We care only about those which can live on a network filesystem and hence can be accessed by multiple daemons at the same time. These devices are covered in virDomainLockMetadataLock() and only a small fraction of those can be hotplugged (covered in the rest of the introduced APIs). Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- src/libvirt_private.syms | 8 ++ src/locking/domain_lock.c | 304 ++++++++++++++++++++++++++++++++++++++++++---- src/locking/domain_lock.h | 28 +++++ 3 files changed, 317 insertions(+), 23 deletions(-) diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms index ca4a192a4a..720ae12301 100644 --- a/src/libvirt_private.syms +++ b/src/libvirt_private.syms @@ -1284,6 +1284,14 @@ virDomainLockImageAttach; virDomainLockImageDetach; virDomainLockLeaseAttach; virDomainLockLeaseDetach; +virDomainLockMetadataDiskLock; +virDomainLockMetadataDiskUnlock; +virDomainLockMetadataImageLock; +virDomainLockMetadataImageUnlock; +virDomainLockMetadataLock; +virDomainLockMetadataMemLock; +virDomainLockMetadataMemUnlock; +virDomainLockMetadataUnlock; virDomainLockProcessInquire; virDomainLockProcessPause; virDomainLockProcessResume; diff --git a/src/locking/domain_lock.c b/src/locking/domain_lock.c index 705b132457..19a097fb25 100644 --- a/src/locking/domain_lock.c +++ b/src/locking/domain_lock.c @@ -69,7 +69,8 @@ static int virDomainLockManagerAddLease(virLockManagerPtr lock, static int virDomainLockManagerAddImage(virLockManagerPtr lock, - virStorageSourcePtr src) + virStorageSourcePtr src, + bool metadataOnly) { unsigned int diskFlags = 0; int type = virStorageSourceGetActualType(src); @@ -82,10 +83,14 @@ static int virDomainLockManagerAddImage(virLockManagerPtr lock, type == VIR_STORAGE_TYPE_DIR)) return 0; - if (src->readonly) - diskFlags |= VIR_LOCK_MANAGER_RESOURCE_READONLY; - if (src->shared) - diskFlags |= VIR_LOCK_MANAGER_RESOURCE_SHARED; + if (metadataOnly) { + diskFlags = VIR_LOCK_MANAGER_RESOURCE_METADATA; + } else { + if (src->readonly) + diskFlags |= VIR_LOCK_MANAGER_RESOURCE_READONLY; + if (src->shared) + diskFlags |= VIR_LOCK_MANAGER_RESOURCE_SHARED; + } VIR_DEBUG("Add disk %s", src->path); if (virLockManagerAddResource(lock, @@ -101,13 +106,68 @@ static int virDomainLockManagerAddImage(virLockManagerPtr lock, } +static int +virDomainLockManagerAddMemory(virLockManagerPtr lock, + const virDomainMemoryDef *mem) +{ + const char *path = NULL; + + switch ((virDomainMemoryModel) mem->model) { + case VIR_DOMAIN_MEMORY_MODEL_NVDIMM: + path = mem->nvdimmPath; + break; + + case VIR_DOMAIN_MEMORY_MODEL_DIMM: + case VIR_DOMAIN_MEMORY_MODEL_LAST: + case VIR_DOMAIN_MEMORY_MODEL_NONE: + break; + } + + if (!path) + return 0; + + VIR_DEBUG("Adding memory %s", path); + if (virLockManagerAddResource(lock, + VIR_LOCK_MANAGER_RESOURCE_TYPE_DISK, + path, + 0, + NULL, + VIR_LOCK_MANAGER_RESOURCE_METADATA) < 0) + return -1; + + return 0; +} + + +static int +virDomainLockManagerAddFile(virLockManagerPtr lock, + const char *file) +{ + if (!file) + return 0; + + VIR_DEBUG("Adding file %s", file); + if (virLockManagerAddResource(lock, + VIR_LOCK_MANAGER_RESOURCE_TYPE_DISK, + file, + 0, + NULL, + VIR_LOCK_MANAGER_RESOURCE_METADATA) < 0) + return -1; + + return 0; +} + + static virLockManagerPtr virDomainLockManagerNew(virLockManagerPluginPtr plugin, const char *uri, virDomainObjPtr dom, bool withResources, + bool metadataOnly, unsigned int flags) { virLockManagerPtr lock; + const virDomainDef *def = dom->def; size_t i; virLockManagerParam params[] = { { .type = VIR_LOCK_MANAGER_PARAM_TYPE_UUID, @@ -115,11 +175,11 @@ static virLockManagerPtr virDomainLockManagerNew(virLockManagerPluginPtr plugin, }, { .type = VIR_LOCK_MANAGER_PARAM_TYPE_STRING, .key = "name", - .value = { .str = dom->def->name }, + .value = { .str = def->name }, }, { .type = VIR_LOCK_MANAGER_PARAM_TYPE_UINT, .key = "id", - .value = { .iv = dom->def->id }, + .value = { .iv = def->id }, }, { .type = VIR_LOCK_MANAGER_PARAM_TYPE_UINT, .key = "pid", @@ -133,7 +193,7 @@ static virLockManagerPtr virDomainLockManagerNew(virLockManagerPluginPtr plugin, VIR_DEBUG("plugin=%p dom=%p withResources=%d", plugin, dom, withResources); - memcpy(params[0].value.uuid, dom->def->uuid, VIR_UUID_BUFLEN); + memcpy(params[0].value.uuid, def->uuid, VIR_UUID_BUFLEN); if (!(lock = virLockManagerNew(virLockManagerPluginGetDriver(plugin), VIR_LOCK_MANAGER_OBJECT_TYPE_DOMAIN, @@ -144,19 +204,46 @@ static virLockManagerPtr virDomainLockManagerNew(virLockManagerPluginPtr plugin, if (withResources) { VIR_DEBUG("Adding leases"); - for (i = 0; i < dom->def->nleases; i++) - if (virDomainLockManagerAddLease(lock, dom->def->leases[i]) < 0) + for (i = 0; i < def->nleases; i++) + if (virDomainLockManagerAddLease(lock, def->leases[i]) < 0) goto error; + } + if (withResources || metadataOnly) { VIR_DEBUG("Adding disks"); - for (i = 0; i < dom->def->ndisks; i++) { - virDomainDiskDefPtr disk = dom->def->disks[i]; + for (i = 0; i < def->ndisks; i++) { + virDomainDiskDefPtr disk = def->disks[i]; - if (virDomainLockManagerAddImage(lock, disk->src) < 0) + if (virDomainLockManagerAddImage(lock, disk->src, metadataOnly) < 0) goto error; } } + if (metadataOnly) { + for (i = 0; i < def->nmems; i++) { + virDomainMemoryDefPtr mem = def->mems[i]; + + if (virDomainLockManagerAddMemory(lock, mem) < 0) + goto error; + } + + if (def->os.loader && + virDomainLockManagerAddFile(lock, def->os.loader->nvram) < 0) + goto error; + + if (virDomainLockManagerAddFile(lock, def->os.kernel) < 0) + goto error; + + if (virDomainLockManagerAddFile(lock, def->os.initrd) < 0) + goto error; + + if (virDomainLockManagerAddFile(lock, def->os.dtb) < 0) + goto error; + + if (virDomainLockManagerAddFile(lock, def->os.slic_table) < 0) + goto error; + } + return lock; error: @@ -178,7 +265,7 @@ int virDomainLockProcessStart(virLockManagerPluginPtr plugin, VIR_DEBUG("plugin=%p dom=%p paused=%d fd=%p", plugin, dom, paused, fd); - if (!(lock = virDomainLockManagerNew(plugin, uri, dom, true, + if (!(lock = virDomainLockManagerNew(plugin, uri, dom, true, false, VIR_LOCK_MANAGER_NEW_STARTED))) return -1; @@ -203,7 +290,7 @@ int virDomainLockProcessPause(virLockManagerPluginPtr plugin, VIR_DEBUG("plugin=%p dom=%p state=%p", plugin, dom, state); - if (!(lock = virDomainLockManagerNew(plugin, NULL, dom, true, 0))) + if (!(lock = virDomainLockManagerNew(plugin, NULL, dom, true, false, 0))) return -1; ret = virLockManagerRelease(lock, state, 0); @@ -223,7 +310,7 @@ int virDomainLockProcessResume(virLockManagerPluginPtr plugin, VIR_DEBUG("plugin=%p dom=%p state=%s", plugin, dom, NULLSTR(state)); - if (!(lock = virDomainLockManagerNew(plugin, uri, dom, true, 0))) + if (!(lock = virDomainLockManagerNew(plugin, uri, dom, true, false, 0))) return -1; ret = virLockManagerAcquire(lock, state, 0, dom->def->onLockFailure, NULL); @@ -242,7 +329,7 @@ int virDomainLockProcessInquire(virLockManagerPluginPtr plugin, VIR_DEBUG("plugin=%p dom=%p state=%p", plugin, dom, state); - if (!(lock = virDomainLockManagerNew(plugin, NULL, dom, true, 0))) + if (!(lock = virDomainLockManagerNew(plugin, NULL, dom, true, false, 0))) return -1; ret = virLockManagerInquire(lock, state, 0); @@ -262,10 +349,10 @@ int virDomainLockImageAttach(virLockManagerPluginPtr plugin, VIR_DEBUG("plugin=%p dom=%p src=%p", plugin, dom, src); - if (!(lock = virDomainLockManagerNew(plugin, uri, dom, false, 0))) + if (!(lock = virDomainLockManagerNew(plugin, uri, dom, false, false, 0))) return -1; - if (virDomainLockManagerAddImage(lock, src) < 0) + if (virDomainLockManagerAddImage(lock, src, false) < 0) goto cleanup; if (virLockManagerAcquire(lock, NULL, 0, @@ -299,10 +386,10 @@ int virDomainLockImageDetach(virLockManagerPluginPtr plugin, VIR_DEBUG("plugin=%p dom=%p src=%p", plugin, dom, src); - if (!(lock = virDomainLockManagerNew(plugin, NULL, dom, false, 0))) + if (!(lock = virDomainLockManagerNew(plugin, NULL, dom, false, false, 0))) return -1; - if (virDomainLockManagerAddImage(lock, src) < 0) + if (virDomainLockManagerAddImage(lock, src, false) < 0) goto cleanup; if (virLockManagerRelease(lock, NULL, 0) < 0) @@ -336,7 +423,7 @@ int virDomainLockLeaseAttach(virLockManagerPluginPtr plugin, VIR_DEBUG("plugin=%p dom=%p lease=%p", plugin, dom, lease); - if (!(lock = virDomainLockManagerNew(plugin, uri, dom, false, 0))) + if (!(lock = virDomainLockManagerNew(plugin, uri, dom, false, false, 0))) return -1; if (virDomainLockManagerAddLease(lock, lease) < 0) @@ -364,7 +451,7 @@ int virDomainLockLeaseDetach(virLockManagerPluginPtr plugin, VIR_DEBUG("plugin=%p dom=%p lease=%p", plugin, dom, lease); - if (!(lock = virDomainLockManagerNew(plugin, NULL, dom, false, 0))) + if (!(lock = virDomainLockManagerNew(plugin, NULL, dom, false, false, 0))) return -1; if (virDomainLockManagerAddLease(lock, lease) < 0) @@ -380,3 +467,174 @@ int virDomainLockLeaseDetach(virLockManagerPluginPtr plugin, return ret; } + + +int +virDomainLockMetadataLock(virLockManagerPluginPtr plugin, + virDomainObjPtr dom) +{ + virLockManagerPtr lock; + const unsigned int flags = 0; + int ret = -1; + + VIR_DEBUG("plugin=%p dom=%p", plugin, dom); + + if (!(lock = virDomainLockManagerNew(plugin, NULL, dom, false, true, 0))) + return -1; + + if (virLockManagerAcquire(lock, NULL, flags, + VIR_DOMAIN_LOCK_FAILURE_DEFAULT, NULL) < 0) + goto cleanup; + + ret = 0; + cleanup: + virLockManagerFree(lock); + return ret; +} + + +int +virDomainLockMetadataUnlock(virLockManagerPluginPtr plugin, + virDomainObjPtr dom) +{ + virLockManagerPtr lock; + const unsigned int flags = 0; + int ret = -1; + + VIR_DEBUG("plugin=%p dom=%p", plugin, dom); + + if (!(lock = virDomainLockManagerNew(plugin, NULL, dom, false, true, 0))) + return -1; + + if (virLockManagerRelease(lock, NULL, flags) < 0) + goto cleanup; + + ret = 0; + cleanup: + virLockManagerFree(lock); + return ret; +} + + +int +virDomainLockMetadataImageLock(virLockManagerPluginPtr plugin, + virDomainObjPtr dom, + virStorageSourcePtr src) +{ + virLockManagerPtr lock; + int ret = -1; + + VIR_DEBUG("plugin=%p dom=%p src=%p", plugin, dom, src); + + if (!(lock = virDomainLockManagerNew(plugin, NULL, dom, false, false, 0))) + return -1; + + if (virDomainLockManagerAddImage(lock, src, true) < 0) + goto cleanup; + + if (virLockManagerAcquire(lock, NULL, 0, + VIR_DOMAIN_LOCK_FAILURE_DEFAULT, NULL) < 0) + goto cleanup; + + ret = 0; + cleanup: + virLockManagerFree(lock); + return ret; +} + + +int +virDomainLockMetadataImageUnlock(virLockManagerPluginPtr plugin, + virDomainObjPtr dom, + virStorageSourcePtr src) +{ + virLockManagerPtr lock; + int ret = -1; + + VIR_DEBUG("plugin=%p dom=%p src=%p", plugin, dom, src); + + if (!(lock = virDomainLockManagerNew(plugin, NULL, dom, false, false, 0))) + return -1; + + if (virDomainLockManagerAddImage(lock, src, true) < 0) + goto cleanup; + + if (virLockManagerRelease(lock, NULL, 0) < 0) + goto cleanup; + + ret = 0; + cleanup: + virLockManagerFree(lock); + return ret; +} + + +int +virDomainLockMetadataDiskLock(virLockManagerPluginPtr plugin, + virDomainObjPtr dom, + virDomainDiskDefPtr disk) +{ + return virDomainLockMetadataImageLock(plugin, dom, disk->src); +} + + +int +virDomainLockMetadataDiskUnlock(virLockManagerPluginPtr plugin, + virDomainObjPtr dom, + virDomainDiskDefPtr disk) +{ + return virDomainLockMetadataImageUnlock(plugin, dom, disk->src); +} + + +int +virDomainLockMetadataMemLock(virLockManagerPluginPtr plugin, + virDomainObjPtr dom, + virDomainMemoryDefPtr mem) +{ + virLockManagerPtr lock; + int ret = -1; + + VIR_DEBUG("plugin=%p dom=%p mem=%p", plugin, dom, mem); + + if (!(lock = virDomainLockManagerNew(plugin, NULL, dom, false, false, 0))) + return -1; + + if (virDomainLockManagerAddMemory(lock, mem) < 0) + goto cleanup; + + if (virLockManagerAcquire(lock, NULL, 0, + VIR_DOMAIN_LOCK_FAILURE_DEFAULT, NULL) < 0) + goto cleanup; + + ret = 0; + cleanup: + virLockManagerFree(lock); + return ret; +} + + +int +virDomainLockMetadataMemUnlock(virLockManagerPluginPtr plugin, + virDomainObjPtr dom, + virDomainMemoryDefPtr mem) +{ + virLockManagerPtr lock; + int ret = -1; + + VIR_DEBUG("plugin=%p dom=%p mem=%p", plugin, dom, mem); + + if (!(lock = virDomainLockManagerNew(plugin, NULL, dom, false, false, 0))) + return -1; + + if (virDomainLockManagerAddMemory(lock, mem) < 0) + goto cleanup; + + if (virLockManagerRelease(lock, NULL, 0) < 0) + goto cleanup; + + ret = 0; + cleanup: + virLockManagerFree(lock); + return ret; +} diff --git a/src/locking/domain_lock.h b/src/locking/domain_lock.h index fb4910230c..fbd3ee1d4e 100644 --- a/src/locking/domain_lock.h +++ b/src/locking/domain_lock.h @@ -66,4 +66,32 @@ int virDomainLockLeaseDetach(virLockManagerPluginPtr plugin, virDomainObjPtr dom, virDomainLeaseDefPtr lease); +int virDomainLockMetadataLock(virLockManagerPluginPtr plugin, + virDomainObjPtr dom); + +int virDomainLockMetadataUnlock(virLockManagerPluginPtr plugin, + virDomainObjPtr dom); + +int virDomainLockMetadataDiskLock(virLockManagerPluginPtr plugin, + virDomainObjPtr dom, + virDomainDiskDefPtr disk); +int virDomainLockMetadataDiskUnlock(virLockManagerPluginPtr plugin, + virDomainObjPtr dom, + virDomainDiskDefPtr disk); + +int virDomainLockMetadataImageLock(virLockManagerPluginPtr plugin, + virDomainObjPtr dom, + virStorageSourcePtr src); + +int virDomainLockMetadataImageUnlock(virLockManagerPluginPtr plugin, + virDomainObjPtr dom, + virStorageSourcePtr src); + +int virDomainLockMetadataMemLock(virLockManagerPluginPtr plugin, + virDomainObjPtr dom, + virDomainMemoryDefPtr mem); +int virDomainLockMetadataMemUnlock(virLockManagerPluginPtr plugin, + virDomainObjPtr dom, + virDomainMemoryDefPtr mem); + #endif /* __VIR_DOMAIN_LOCK_H__ */ -- 2.16.4

On 08/14/2018 07:19 AM, Michal Privoznik wrote:
In order for our drivers to lock resources for metadata change we need set of new APIs. Fortunately, we don't have to care about every possible device a domain can have. We care only about those which can live on a network filesystem and hence can be accessed by multiple daemons at the same time. These devices are covered in virDomainLockMetadataLock() and only a small fraction of those can be hotplugged (covered in the rest of the introduced APIs).
Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- src/libvirt_private.syms | 8 ++ src/locking/domain_lock.c | 304 ++++++++++++++++++++++++++++++++++++++++++---- src/locking/domain_lock.h | 28 +++++ 3 files changed, 317 insertions(+), 23 deletions(-)
There seems to be more than just what's described going on in this patch which could be split out.... 1. Use @def in virDomainLockManagerNew 2. Add @metadataonly to virDomainLockManagerNew and virDomainLockManagerAddImage 3. Introduce virDomainLockManagerAddMemory and virDomainLockManagerAddFile along with changes to virDomainLockManagerNew along with perhaps a few words why those files were chosen and what decisions led to their selection. If someone adds something in the future how would they know to add them to the list? 4. Various virDomainLockMetadata* API's.
diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms index ca4a192a4a..720ae12301 100644 --- a/src/libvirt_private.syms +++ b/src/libvirt_private.syms @@ -1284,6 +1284,14 @@ virDomainLockImageAttach; virDomainLockImageDetach; virDomainLockLeaseAttach; virDomainLockLeaseDetach; +virDomainLockMetadataDiskLock; +virDomainLockMetadataDiskUnlock; +virDomainLockMetadataImageLock; +virDomainLockMetadataImageUnlock; +virDomainLockMetadataLock; +virDomainLockMetadataMemLock; +virDomainLockMetadataMemUnlock; +virDomainLockMetadataUnlock; virDomainLockProcessInquire; virDomainLockProcessPause; virDomainLockProcessResume; diff --git a/src/locking/domain_lock.c b/src/locking/domain_lock.c index 705b132457..19a097fb25 100644 --- a/src/locking/domain_lock.c +++ b/src/locking/domain_lock.c @@ -69,7 +69,8 @@ static int virDomainLockManagerAddLease(virLockManagerPtr lock,
static int virDomainLockManagerAddImage(virLockManagerPtr lock, - virStorageSourcePtr src) + virStorageSourcePtr src, + bool metadataOnly) { unsigned int diskFlags = 0; int type = virStorageSourceGetActualType(src); @@ -82,10 +83,14 @@ static int virDomainLockManagerAddImage(virLockManagerPtr lock, type == VIR_STORAGE_TYPE_DIR)) return 0;
- if (src->readonly) - diskFlags |= VIR_LOCK_MANAGER_RESOURCE_READONLY; - if (src->shared) - diskFlags |= VIR_LOCK_MANAGER_RESOURCE_SHARED; + if (metadataOnly) { + diskFlags = VIR_LOCK_MANAGER_RESOURCE_METADATA; + } else { + if (src->readonly) + diskFlags |= VIR_LOCK_MANAGER_RESOURCE_READONLY; + if (src->shared) + diskFlags |= VIR_LOCK_MANAGER_RESOURCE_SHARED; + }
VIR_DEBUG("Add disk %s", src->path); if (virLockManagerAddResource(lock, @@ -101,13 +106,68 @@ static int virDomainLockManagerAddImage(virLockManagerPtr lock, }
+static int +virDomainLockManagerAddMemory(virLockManagerPtr lock, + const virDomainMemoryDef *mem) +{ + const char *path = NULL; + + switch ((virDomainMemoryModel) mem->model) { + case VIR_DOMAIN_MEMORY_MODEL_NVDIMM: + path = mem->nvdimmPath;
Why not flip flop the order of new helpers and just call/return virDomainLockManagerAddFile directly with mem->nvdimmPath
+ break; + + case VIR_DOMAIN_MEMORY_MODEL_DIMM: + case VIR_DOMAIN_MEMORY_MODEL_LAST: + case VIR_DOMAIN_MEMORY_MODEL_NONE: + break; + } + + if (!path) + return 0;
And then just return 0 here. or just call it here since it returns 0 if @!file anyway.
+ + VIR_DEBUG("Adding memory %s", path); + if (virLockManagerAddResource(lock, + VIR_LOCK_MANAGER_RESOURCE_TYPE_DISK, + path, + 0, + NULL, + VIR_LOCK_MANAGER_RESOURCE_METADATA) < 0) + return -1; + + return 0; +} + + +static int +virDomainLockManagerAddFile(virLockManagerPtr lock, + const char *file) +{ + if (!file) + return 0; + + VIR_DEBUG("Adding file %s", file); + if (virLockManagerAddResource(lock, + VIR_LOCK_MANAGER_RESOURCE_TYPE_DISK, + file, + 0, + NULL, + VIR_LOCK_MANAGER_RESOURCE_METADATA) < 0) + return -1; + + return 0; +} + + static virLockManagerPtr virDomainLockManagerNew(virLockManagerPluginPtr plugin, const char *uri, virDomainObjPtr dom, bool withResources, + bool metadataOnly, unsigned int flags) { virLockManagerPtr lock; + const virDomainDef *def = dom->def; size_t i; virLockManagerParam params[] = { { .type = VIR_LOCK_MANAGER_PARAM_TYPE_UUID, @@ -115,11 +175,11 @@ static virLockManagerPtr virDomainLockManagerNew(virLockManagerPluginPtr plugin, }, { .type = VIR_LOCK_MANAGER_PARAM_TYPE_STRING, .key = "name", - .value = { .str = dom->def->name }, + .value = { .str = def->name }, }, { .type = VIR_LOCK_MANAGER_PARAM_TYPE_UINT, .key = "id", - .value = { .iv = dom->def->id }, + .value = { .iv = def->id }, }, { .type = VIR_LOCK_MANAGER_PARAM_TYPE_UINT, .key = "pid", @@ -133,7 +193,7 @@ static virLockManagerPtr virDomainLockManagerNew(virLockManagerPluginPtr plugin, VIR_DEBUG("plugin=%p dom=%p withResources=%d", plugin, dom, withResources);
- memcpy(params[0].value.uuid, dom->def->uuid, VIR_UUID_BUFLEN); + memcpy(params[0].value.uuid, def->uuid, VIR_UUID_BUFLEN);
if (!(lock = virLockManagerNew(virLockManagerPluginGetDriver(plugin), VIR_LOCK_MANAGER_OBJECT_TYPE_DOMAIN, @@ -144,19 +204,46 @@ static virLockManagerPtr virDomainLockManagerNew(virLockManagerPluginPtr plugin,
if (withResources) { VIR_DEBUG("Adding leases"); - for (i = 0; i < dom->def->nleases; i++) - if (virDomainLockManagerAddLease(lock, dom->def->leases[i]) < 0) + for (i = 0; i < def->nleases; i++) + if (virDomainLockManagerAddLease(lock, def->leases[i]) < 0) goto error; + }
+ if (withResources || metadataOnly) {
So this would seemingly answer my question long ago - one could get one or both locks (@0 w/ length==1 and @1 w/ length==1)
VIR_DEBUG("Adding disks"); - for (i = 0; i < dom->def->ndisks; i++) { - virDomainDiskDefPtr disk = dom->def->disks[i]; + for (i = 0; i < def->ndisks; i++) { + virDomainDiskDefPtr disk = def->disks[i];
- if (virDomainLockManagerAddImage(lock, disk->src) < 0) + if (virDomainLockManagerAddImage(lock, disk->src, metadataOnly) < 0) goto error; } }
+ if (metadataOnly) { + for (i = 0; i < def->nmems; i++) { + virDomainMemoryDefPtr mem = def->mems[i]; + + if (virDomainLockManagerAddMemory(lock, mem) < 0) + goto error; + } + + if (def->os.loader && + virDomainLockManagerAddFile(lock, def->os.loader->nvram) < 0) + goto error; + + if (virDomainLockManagerAddFile(lock, def->os.kernel) < 0) + goto error; + + if (virDomainLockManagerAddFile(lock, def->os.initrd) < 0) + goto error; + + if (virDomainLockManagerAddFile(lock, def->os.dtb) < 0) + goto error; + + if (virDomainLockManagerAddFile(lock, def->os.slic_table) < 0) + goto error; + } + return lock;
error: @@ -178,7 +265,7 @@ int virDomainLockProcessStart(virLockManagerPluginPtr plugin, VIR_DEBUG("plugin=%p dom=%p paused=%d fd=%p", plugin, dom, paused, fd);
- if (!(lock = virDomainLockManagerNew(plugin, uri, dom, true, + if (!(lock = virDomainLockManagerNew(plugin, uri, dom, true, false, VIR_LOCK_MANAGER_NEW_STARTED))) return -1;
@@ -203,7 +290,7 @@ int virDomainLockProcessPause(virLockManagerPluginPtr plugin, VIR_DEBUG("plugin=%p dom=%p state=%p", plugin, dom, state);
- if (!(lock = virDomainLockManagerNew(plugin, NULL, dom, true, 0))) + if (!(lock = virDomainLockManagerNew(plugin, NULL, dom, true, false, 0))) return -1;
ret = virLockManagerRelease(lock, state, 0); @@ -223,7 +310,7 @@ int virDomainLockProcessResume(virLockManagerPluginPtr plugin, VIR_DEBUG("plugin=%p dom=%p state=%s", plugin, dom, NULLSTR(state));
- if (!(lock = virDomainLockManagerNew(plugin, uri, dom, true, 0))) + if (!(lock = virDomainLockManagerNew(plugin, uri, dom, true, false, 0))) return -1;
ret = virLockManagerAcquire(lock, state, 0, dom->def->onLockFailure, NULL); @@ -242,7 +329,7 @@ int virDomainLockProcessInquire(virLockManagerPluginPtr plugin, VIR_DEBUG("plugin=%p dom=%p state=%p", plugin, dom, state);
- if (!(lock = virDomainLockManagerNew(plugin, NULL, dom, true, 0))) + if (!(lock = virDomainLockManagerNew(plugin, NULL, dom, true, false, 0))) return -1;
ret = virLockManagerInquire(lock, state, 0); @@ -262,10 +349,10 @@ int virDomainLockImageAttach(virLockManagerPluginPtr plugin,
VIR_DEBUG("plugin=%p dom=%p src=%p", plugin, dom, src);
- if (!(lock = virDomainLockManagerNew(plugin, uri, dom, false, 0))) + if (!(lock = virDomainLockManagerNew(plugin, uri, dom, false, false, 0))) return -1;
- if (virDomainLockManagerAddImage(lock, src) < 0) + if (virDomainLockManagerAddImage(lock, src, false) < 0) goto cleanup;
if (virLockManagerAcquire(lock, NULL, 0, @@ -299,10 +386,10 @@ int virDomainLockImageDetach(virLockManagerPluginPtr plugin,
VIR_DEBUG("plugin=%p dom=%p src=%p", plugin, dom, src);
- if (!(lock = virDomainLockManagerNew(plugin, NULL, dom, false, 0))) + if (!(lock = virDomainLockManagerNew(plugin, NULL, dom, false, false, 0))) return -1;
- if (virDomainLockManagerAddImage(lock, src) < 0) + if (virDomainLockManagerAddImage(lock, src, false) < 0) goto cleanup;
if (virLockManagerRelease(lock, NULL, 0) < 0) @@ -336,7 +423,7 @@ int virDomainLockLeaseAttach(virLockManagerPluginPtr plugin, VIR_DEBUG("plugin=%p dom=%p lease=%p", plugin, dom, lease);
- if (!(lock = virDomainLockManagerNew(plugin, uri, dom, false, 0))) + if (!(lock = virDomainLockManagerNew(plugin, uri, dom, false, false, 0))) return -1;
if (virDomainLockManagerAddLease(lock, lease) < 0) @@ -364,7 +451,7 @@ int virDomainLockLeaseDetach(virLockManagerPluginPtr plugin, VIR_DEBUG("plugin=%p dom=%p lease=%p", plugin, dom, lease);
- if (!(lock = virDomainLockManagerNew(plugin, NULL, dom, false, 0))) + if (!(lock = virDomainLockManagerNew(plugin, NULL, dom, false, false, 0))) return -1;
if (virDomainLockManagerAddLease(lock, lease) < 0) @@ -380,3 +467,174 @@ int virDomainLockLeaseDetach(virLockManagerPluginPtr plugin,
return ret; } + + +int +virDomainLockMetadataLock(virLockManagerPluginPtr plugin, + virDomainObjPtr dom) +{ + virLockManagerPtr lock; + const unsigned int flags = 0;
We never change this from 0 to something else (not even the next patch)... Not that it matters, just noting it considering that other new API's just pass 0 and not flags which is only ever set to 0.
+ int ret = -1; + + VIR_DEBUG("plugin=%p dom=%p", plugin, dom); + + if (!(lock = virDomainLockManagerNew(plugin, NULL, dom, false, true, 0))) + return -1; + + if (virLockManagerAcquire(lock, NULL, flags, + VIR_DOMAIN_LOCK_FAILURE_DEFAULT, NULL) < 0)
Strange to see a longer second line, although I understand seeing NULL along leads to why not just put it on the previous line type review comment. FWIW: Seeing how @action is set here led me down the path of seeing how it's used. I wonder if perhaps that could be used in the lockd/sanlock code in order to determine how/whether to fail rather than just returning 0 in sanlock when METADATA isn't supported. If it really matters that is. It's not 100% clear to me what/how the expected action usage should be.
+ goto cleanup; + + ret = 0; + cleanup: + virLockManagerFree(lock); + return ret; +} + + +int +virDomainLockMetadataUnlock(virLockManagerPluginPtr plugin, + virDomainObjPtr dom) +{ + virLockManagerPtr lock; + const unsigned int flags = 0;
Similar @flags commentary.
+ int ret = -1; + + VIR_DEBUG("plugin=%p dom=%p", plugin, dom); + + if (!(lock = virDomainLockManagerNew(plugin, NULL, dom, false, true, 0)))
Since MetadataLock/Unlock are the only ones passing true for the @metadataOnly bool in virDomainLockManagerNew, I'm "conflicted" over whether there should a helper for both in lieu of the bool, but then the bool does the trick and it's following @withResources which was there since inception, so I guess it's fine albeit odd w/r/t a *New API doing more than just allocating memory or setting defaults. Not an issue, but an observation, in case you were wondering. Trying to learn a bit about this code while doing this review... John
+ return -1; + + if (virLockManagerRelease(lock, NULL, flags) < 0) + goto cleanup; + + ret = 0; + cleanup: + virLockManagerFree(lock); + return ret; +} + + +int +virDomainLockMetadataImageLock(virLockManagerPluginPtr plugin, + virDomainObjPtr dom, + virStorageSourcePtr src) +{ + virLockManagerPtr lock; + int ret = -1; + + VIR_DEBUG("plugin=%p dom=%p src=%p", plugin, dom, src); + + if (!(lock = virDomainLockManagerNew(plugin, NULL, dom, false, false, 0))) + return -1; + + if (virDomainLockManagerAddImage(lock, src, true) < 0) + goto cleanup; + + if (virLockManagerAcquire(lock, NULL, 0, + VIR_DOMAIN_LOCK_FAILURE_DEFAULT, NULL) < 0) + goto cleanup; + + ret = 0; + cleanup: + virLockManagerFree(lock); + return ret; +} + + +int +virDomainLockMetadataImageUnlock(virLockManagerPluginPtr plugin, + virDomainObjPtr dom, + virStorageSourcePtr src) +{ + virLockManagerPtr lock; + int ret = -1; + + VIR_DEBUG("plugin=%p dom=%p src=%p", plugin, dom, src); + + if (!(lock = virDomainLockManagerNew(plugin, NULL, dom, false, false, 0))) + return -1; + + if (virDomainLockManagerAddImage(lock, src, true) < 0) + goto cleanup; + + if (virLockManagerRelease(lock, NULL, 0) < 0) + goto cleanup; + + ret = 0; + cleanup: + virLockManagerFree(lock); + return ret; +} + + +int +virDomainLockMetadataDiskLock(virLockManagerPluginPtr plugin, + virDomainObjPtr dom, + virDomainDiskDefPtr disk) +{ + return virDomainLockMetadataImageLock(plugin, dom, disk->src); +} + + +int +virDomainLockMetadataDiskUnlock(virLockManagerPluginPtr plugin, + virDomainObjPtr dom, + virDomainDiskDefPtr disk) +{ + return virDomainLockMetadataImageUnlock(plugin, dom, disk->src); +} + + +int +virDomainLockMetadataMemLock(virLockManagerPluginPtr plugin, + virDomainObjPtr dom, + virDomainMemoryDefPtr mem) +{ + virLockManagerPtr lock; + int ret = -1; + + VIR_DEBUG("plugin=%p dom=%p mem=%p", plugin, dom, mem); + + if (!(lock = virDomainLockManagerNew(plugin, NULL, dom, false, false, 0))) + return -1; + + if (virDomainLockManagerAddMemory(lock, mem) < 0) + goto cleanup; + + if (virLockManagerAcquire(lock, NULL, 0, + VIR_DOMAIN_LOCK_FAILURE_DEFAULT, NULL) < 0) + goto cleanup; + + ret = 0; + cleanup: + virLockManagerFree(lock); + return ret; +} + + +int +virDomainLockMetadataMemUnlock(virLockManagerPluginPtr plugin, + virDomainObjPtr dom, + virDomainMemoryDefPtr mem) +{ + virLockManagerPtr lock; + int ret = -1; + + VIR_DEBUG("plugin=%p dom=%p mem=%p", plugin, dom, mem); + + if (!(lock = virDomainLockManagerNew(plugin, NULL, dom, false, false, 0))) + return -1; + + if (virDomainLockManagerAddMemory(lock, mem) < 0) + goto cleanup; + + if (virLockManagerRelease(lock, NULL, 0) < 0) + goto cleanup; + + ret = 0; + cleanup: + virLockManagerFree(lock); + return ret; +} diff --git a/src/locking/domain_lock.h b/src/locking/domain_lock.h index fb4910230c..fbd3ee1d4e 100644 --- a/src/locking/domain_lock.h +++ b/src/locking/domain_lock.h @@ -66,4 +66,32 @@ int virDomainLockLeaseDetach(virLockManagerPluginPtr plugin, virDomainObjPtr dom, virDomainLeaseDefPtr lease);
+int virDomainLockMetadataLock(virLockManagerPluginPtr plugin, + virDomainObjPtr dom); + +int virDomainLockMetadataUnlock(virLockManagerPluginPtr plugin, + virDomainObjPtr dom); + +int virDomainLockMetadataDiskLock(virLockManagerPluginPtr plugin, + virDomainObjPtr dom, + virDomainDiskDefPtr disk); +int virDomainLockMetadataDiskUnlock(virLockManagerPluginPtr plugin, + virDomainObjPtr dom, + virDomainDiskDefPtr disk); + +int virDomainLockMetadataImageLock(virLockManagerPluginPtr plugin, + virDomainObjPtr dom, + virStorageSourcePtr src); + +int virDomainLockMetadataImageUnlock(virLockManagerPluginPtr plugin, + virDomainObjPtr dom, + virStorageSourcePtr src); + +int virDomainLockMetadataMemLock(virLockManagerPluginPtr plugin, + virDomainObjPtr dom, + virDomainMemoryDefPtr mem); +int virDomainLockMetadataMemUnlock(virLockManagerPluginPtr plugin, + virDomainObjPtr dom, + virDomainMemoryDefPtr mem); + #endif /* __VIR_DOMAIN_LOCK_H__ */

On 08/17/2018 07:58 PM, John Ferlan wrote:
On 08/14/2018 07:19 AM, Michal Privoznik wrote:
In order for our drivers to lock resources for metadata change we need set of new APIs. Fortunately, we don't have to care about every possible device a domain can have. We care only about those which can live on a network filesystem and hence can be accessed by multiple daemons at the same time. These devices are covered in virDomainLockMetadataLock() and only a small fraction of those can be hotplugged (covered in the rest of the introduced APIs).
Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- src/libvirt_private.syms | 8 ++ src/locking/domain_lock.c | 304 ++++++++++++++++++++++++++++++++++++++++++---- src/locking/domain_lock.h | 28 +++++ 3 files changed, 317 insertions(+), 23 deletions(-)
There seems to be more than just what's described going on in this patch which could be split out....
1. Use @def in virDomainLockManagerNew
2. Add @metadataonly to virDomainLockManagerNew and virDomainLockManagerAddImage
3. Introduce virDomainLockManagerAddMemory and virDomainLockManagerAddFile along with changes to virDomainLockManagerNew along with perhaps a few words why those files were chosen and what decisions led to their selection. If someone adds something in the future how would they know to add them to the list?
4. Various virDomainLockMetadata* API's.
D'oh! Okay, I'll split this.
diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms index ca4a192a4a..720ae12301 100644 --- a/src/libvirt_private.syms +++ b/src/libvirt_private.syms @@ -1284,6 +1284,14 @@ virDomainLockImageAttach; virDomainLockImageDetach; virDomainLockLeaseAttach; virDomainLockLeaseDetach; +virDomainLockMetadataDiskLock; +virDomainLockMetadataDiskUnlock; +virDomainLockMetadataImageLock; +virDomainLockMetadataImageUnlock; +virDomainLockMetadataLock; +virDomainLockMetadataMemLock; +virDomainLockMetadataMemUnlock; +virDomainLockMetadataUnlock; virDomainLockProcessInquire; virDomainLockProcessPause; virDomainLockProcessResume; diff --git a/src/locking/domain_lock.c b/src/locking/domain_lock.c index 705b132457..19a097fb25 100644 --- a/src/locking/domain_lock.c +++ b/src/locking/domain_lock.c @@ -69,7 +69,8 @@ static int virDomainLockManagerAddLease(virLockManagerPtr lock,
static int virDomainLockManagerAddImage(virLockManagerPtr lock, - virStorageSourcePtr src) + virStorageSourcePtr src, + bool metadataOnly) { unsigned int diskFlags = 0; int type = virStorageSourceGetActualType(src); @@ -82,10 +83,14 @@ static int virDomainLockManagerAddImage(virLockManagerPtr lock, type == VIR_STORAGE_TYPE_DIR)) return 0;
- if (src->readonly) - diskFlags |= VIR_LOCK_MANAGER_RESOURCE_READONLY; - if (src->shared) - diskFlags |= VIR_LOCK_MANAGER_RESOURCE_SHARED; + if (metadataOnly) { + diskFlags = VIR_LOCK_MANAGER_RESOURCE_METADATA; + } else { + if (src->readonly) + diskFlags |= VIR_LOCK_MANAGER_RESOURCE_READONLY; + if (src->shared) + diskFlags |= VIR_LOCK_MANAGER_RESOURCE_SHARED; + }
VIR_DEBUG("Add disk %s", src->path); if (virLockManagerAddResource(lock, @@ -101,13 +106,68 @@ static int virDomainLockManagerAddImage(virLockManagerPtr lock, }
+static int +virDomainLockManagerAddMemory(virLockManagerPtr lock, + const virDomainMemoryDef *mem) +{ + const char *path = NULL; + + switch ((virDomainMemoryModel) mem->model) { + case VIR_DOMAIN_MEMORY_MODEL_NVDIMM: + path = mem->nvdimmPath;
Why not flip flop the order of new helpers and just call/return virDomainLockManagerAddFile directly with mem->nvdimmPath
+ break; + + case VIR_DOMAIN_MEMORY_MODEL_DIMM: + case VIR_DOMAIN_MEMORY_MODEL_LAST: + case VIR_DOMAIN_MEMORY_MODEL_NONE: + break; + } + + if (!path) + return 0;
And then just return 0 here. or just call it here since it returns 0 if @!file anyway.
Well, I think it's just a matter of preference though. My preference was to have this future extensible. I mean, if we ever introduce new model that we need to lock, this switch() would just get its path and the rest is already handled. Whereas if I'd move AddResource() call right into the switch() the call would be duplicated then.
+ + VIR_DEBUG("Adding memory %s", path); + if (virLockManagerAddResource(lock, + VIR_LOCK_MANAGER_RESOURCE_TYPE_DISK, + path, + 0, + NULL, + VIR_LOCK_MANAGER_RESOURCE_METADATA) < 0) + return -1; + + return 0; +} + + +static int +virDomainLockManagerAddFile(virLockManagerPtr lock, + const char *file) +{ + if (!file) + return 0; + + VIR_DEBUG("Adding file %s", file); + if (virLockManagerAddResource(lock, + VIR_LOCK_MANAGER_RESOURCE_TYPE_DISK, + file, + 0, + NULL, + VIR_LOCK_MANAGER_RESOURCE_METADATA) < 0) + return -1; + + return 0; +} + + static virLockManagerPtr virDomainLockManagerNew(virLockManagerPluginPtr plugin, const char *uri, virDomainObjPtr dom, bool withResources, + bool metadataOnly, unsigned int flags) { virLockManagerPtr lock; + const virDomainDef *def = dom->def; size_t i; virLockManagerParam params[] = { { .type = VIR_LOCK_MANAGER_PARAM_TYPE_UUID, @@ -115,11 +175,11 @@ static virLockManagerPtr virDomainLockManagerNew(virLockManagerPluginPtr plugin, }, { .type = VIR_LOCK_MANAGER_PARAM_TYPE_STRING, .key = "name", - .value = { .str = dom->def->name }, + .value = { .str = def->name }, }, { .type = VIR_LOCK_MANAGER_PARAM_TYPE_UINT, .key = "id", - .value = { .iv = dom->def->id }, + .value = { .iv = def->id }, }, { .type = VIR_LOCK_MANAGER_PARAM_TYPE_UINT, .key = "pid", @@ -133,7 +193,7 @@ static virLockManagerPtr virDomainLockManagerNew(virLockManagerPluginPtr plugin, VIR_DEBUG("plugin=%p dom=%p withResources=%d", plugin, dom, withResources);
- memcpy(params[0].value.uuid, dom->def->uuid, VIR_UUID_BUFLEN); + memcpy(params[0].value.uuid, def->uuid, VIR_UUID_BUFLEN);
if (!(lock = virLockManagerNew(virLockManagerPluginGetDriver(plugin), VIR_LOCK_MANAGER_OBJECT_TYPE_DOMAIN, @@ -144,19 +204,46 @@ static virLockManagerPtr virDomainLockManagerNew(virLockManagerPluginPtr plugin,
if (withResources) { VIR_DEBUG("Adding leases"); - for (i = 0; i < dom->def->nleases; i++) - if (virDomainLockManagerAddLease(lock, dom->def->leases[i]) < 0) + for (i = 0; i < def->nleases; i++) + if (virDomainLockManagerAddLease(lock, def->leases[i]) < 0) goto error; + }
+ if (withResources || metadataOnly) {
So this would seemingly answer my question long ago - one could get one or both locks (@0 w/ length==1 and @1 w/ length==1)
Yes. It was discussed here: https://www.redhat.com/archives/libvir-list/2018-July/msg01880.html
VIR_DEBUG("Adding disks"); - for (i = 0; i < dom->def->ndisks; i++) { - virDomainDiskDefPtr disk = dom->def->disks[i]; + for (i = 0; i < def->ndisks; i++) { + virDomainDiskDefPtr disk = def->disks[i];
- if (virDomainLockManagerAddImage(lock, disk->src) < 0) + if (virDomainLockManagerAddImage(lock, disk->src, metadataOnly) < 0) goto error; } }
+ if (metadataOnly) { + for (i = 0; i < def->nmems; i++) { + virDomainMemoryDefPtr mem = def->mems[i]; + + if (virDomainLockManagerAddMemory(lock, mem) < 0) + goto error; + } + + if (def->os.loader && + virDomainLockManagerAddFile(lock, def->os.loader->nvram) < 0) + goto error; + + if (virDomainLockManagerAddFile(lock, def->os.kernel) < 0) + goto error; + + if (virDomainLockManagerAddFile(lock, def->os.initrd) < 0) + goto error; + + if (virDomainLockManagerAddFile(lock, def->os.dtb) < 0) + goto error; + + if (virDomainLockManagerAddFile(lock, def->os.slic_table) < 0) + goto error; + } + return lock;
error: @@ -178,7 +265,7 @@ int virDomainLockProcessStart(virLockManagerPluginPtr plugin, VIR_DEBUG("plugin=%p dom=%p paused=%d fd=%p", plugin, dom, paused, fd);
- if (!(lock = virDomainLockManagerNew(plugin, uri, dom, true, + if (!(lock = virDomainLockManagerNew(plugin, uri, dom, true, false, VIR_LOCK_MANAGER_NEW_STARTED))) return -1;
@@ -203,7 +290,7 @@ int virDomainLockProcessPause(virLockManagerPluginPtr plugin, VIR_DEBUG("plugin=%p dom=%p state=%p", plugin, dom, state);
- if (!(lock = virDomainLockManagerNew(plugin, NULL, dom, true, 0))) + if (!(lock = virDomainLockManagerNew(plugin, NULL, dom, true, false, 0))) return -1;
ret = virLockManagerRelease(lock, state, 0); @@ -223,7 +310,7 @@ int virDomainLockProcessResume(virLockManagerPluginPtr plugin, VIR_DEBUG("plugin=%p dom=%p state=%s", plugin, dom, NULLSTR(state));
- if (!(lock = virDomainLockManagerNew(plugin, uri, dom, true, 0))) + if (!(lock = virDomainLockManagerNew(plugin, uri, dom, true, false, 0))) return -1;
ret = virLockManagerAcquire(lock, state, 0, dom->def->onLockFailure, NULL); @@ -242,7 +329,7 @@ int virDomainLockProcessInquire(virLockManagerPluginPtr plugin, VIR_DEBUG("plugin=%p dom=%p state=%p", plugin, dom, state);
- if (!(lock = virDomainLockManagerNew(plugin, NULL, dom, true, 0))) + if (!(lock = virDomainLockManagerNew(plugin, NULL, dom, true, false, 0))) return -1;
ret = virLockManagerInquire(lock, state, 0); @@ -262,10 +349,10 @@ int virDomainLockImageAttach(virLockManagerPluginPtr plugin,
VIR_DEBUG("plugin=%p dom=%p src=%p", plugin, dom, src);
- if (!(lock = virDomainLockManagerNew(plugin, uri, dom, false, 0))) + if (!(lock = virDomainLockManagerNew(plugin, uri, dom, false, false, 0))) return -1;
- if (virDomainLockManagerAddImage(lock, src) < 0) + if (virDomainLockManagerAddImage(lock, src, false) < 0) goto cleanup;
if (virLockManagerAcquire(lock, NULL, 0, @@ -299,10 +386,10 @@ int virDomainLockImageDetach(virLockManagerPluginPtr plugin,
VIR_DEBUG("plugin=%p dom=%p src=%p", plugin, dom, src);
- if (!(lock = virDomainLockManagerNew(plugin, NULL, dom, false, 0))) + if (!(lock = virDomainLockManagerNew(plugin, NULL, dom, false, false, 0))) return -1;
- if (virDomainLockManagerAddImage(lock, src) < 0) + if (virDomainLockManagerAddImage(lock, src, false) < 0) goto cleanup;
if (virLockManagerRelease(lock, NULL, 0) < 0) @@ -336,7 +423,7 @@ int virDomainLockLeaseAttach(virLockManagerPluginPtr plugin, VIR_DEBUG("plugin=%p dom=%p lease=%p", plugin, dom, lease);
- if (!(lock = virDomainLockManagerNew(plugin, uri, dom, false, 0))) + if (!(lock = virDomainLockManagerNew(plugin, uri, dom, false, false, 0))) return -1;
if (virDomainLockManagerAddLease(lock, lease) < 0) @@ -364,7 +451,7 @@ int virDomainLockLeaseDetach(virLockManagerPluginPtr plugin, VIR_DEBUG("plugin=%p dom=%p lease=%p", plugin, dom, lease);
- if (!(lock = virDomainLockManagerNew(plugin, NULL, dom, false, 0))) + if (!(lock = virDomainLockManagerNew(plugin, NULL, dom, false, false, 0))) return -1;
if (virDomainLockManagerAddLease(lock, lease) < 0) @@ -380,3 +467,174 @@ int virDomainLockLeaseDetach(virLockManagerPluginPtr plugin,
return ret; } + + +int +virDomainLockMetadataLock(virLockManagerPluginPtr plugin, + virDomainObjPtr dom) +{ + virLockManagerPtr lock; + const unsigned int flags = 0;
We never change this from 0 to something else (not even the next patch)... Not that it matters, just noting it considering that other new API's just pass 0 and not flags which is only ever set to 0.
I prefer avoiding magical constants. For example: func(x, NULL, 0, 0); vs void *resources = NULL; size_t nresources = 0; uint flags = 0; func(x, resources, nresources, flags);
+ int ret = -1; + + VIR_DEBUG("plugin=%p dom=%p", plugin, dom); + + if (!(lock = virDomainLockManagerNew(plugin, NULL, dom, false, true, 0))) + return -1; + + if (virLockManagerAcquire(lock, NULL, flags, + VIR_DOMAIN_LOCK_FAILURE_DEFAULT, NULL) < 0)
Strange to see a longer second line, although I understand seeing NULL along leads to why not just put it on the previous line type review comment.
FWIW: Seeing how @action is set here led me down the path of seeing how it's used.
I wonder if perhaps that could be used in the lockd/sanlock code in order to determine how/whether to fail rather than just returning 0 in sanlock when METADATA isn't supported. If it really matters that is. It's not 100% clear to me what/how the expected action usage should be.
+ goto cleanup; + + ret = 0; + cleanup: + virLockManagerFree(lock); + return ret; +} + + +int +virDomainLockMetadataUnlock(virLockManagerPluginPtr plugin, + virDomainObjPtr dom) +{ + virLockManagerPtr lock; + const unsigned int flags = 0;
Similar @flags commentary.
+ int ret = -1; + + VIR_DEBUG("plugin=%p dom=%p", plugin, dom); + + if (!(lock = virDomainLockManagerNew(plugin, NULL, dom, false, true, 0)))
Since MetadataLock/Unlock are the only ones passing true for the @metadataOnly bool in virDomainLockManagerNew, I'm "conflicted" over whether there should a helper for both in lieu of the bool, but then the bool does the trick and it's following @withResources which was there since inception, so I guess it's fine albeit odd w/r/t a *New API doing more than just allocating memory or setting defaults.
Not an issue, but an observation, in case you were wondering. Trying to learn a bit about this code while doing this review...
John
Michal

On Tue, Aug 14, 2018 at 01:19:42PM +0200, Michal Privoznik wrote:
In order for our drivers to lock resources for metadata change we need set of new APIs. Fortunately, we don't have to care about every possible device a domain can have. We care only about those which can live on a network filesystem and hence can be accessed by multiple daemons at the same time. These devices are covered in virDomainLockMetadataLock() and only a small fraction of those can be hotplugged (covered in the rest of the introduced APIs).
I'm not sure I understand the rationale behind saying we only care about resources on network filesystems. If I have 2 locally running guests, and both have a serial port backed by a physical serial port, eg <serial type="dev"> <source path="/dev/ttyS0"/> <target port="1"/> </serial> we *do* care about locking /dev/ttyS0, as libvirtd isn't doing mutual exclusion checks anywhere else for the /dev/ttyS0 device node. In general I think we need to lock every single file resource that is labelled for a guest, regardless of whether its local or remote. Regards, Daniel -- |: https://berrange.com -o- https://www.flickr.com/photos/dberrange :| |: https://libvirt.org -o- https://fstop138.berrange.com :| |: https://entangle-photo.org -o- https://www.instagram.com/dberrange :|

On Mon, Aug 20, 2018 at 04:07:28PM +0100, Daniel P. Berrangé wrote:
On Tue, Aug 14, 2018 at 01:19:42PM +0200, Michal Privoznik wrote:
In order for our drivers to lock resources for metadata change we need set of new APIs. Fortunately, we don't have to care about every possible device a domain can have. We care only about those which can live on a network filesystem and hence can be accessed by multiple daemons at the same time. These devices are covered in virDomainLockMetadataLock() and only a small fraction of those can be hotplugged (covered in the rest of the introduced APIs).
I'm not sure I understand the rationale behind saying we only care about resources on network filesystems.
If I have 2 locally running guests, and both have a serial port backed by a physical serial port, eg
<serial type="dev"> <source path="/dev/ttyS0"/> <target port="1"/> </serial>
we *do* care about locking /dev/ttyS0, as libvirtd isn't doing mutual exclusion checks anywhere else for the /dev/ttyS0 device node.
In general I think we need to lock every single file resource that is labelled for a guest, regardless of whether its local or remote.
In the next patch I propose integration into the security manager that would avoid the need to touch this domain lock abstraction at all. Regards, Daniel -- |: https://berrange.com -o- https://www.flickr.com/photos/dberrange :| |: https://libvirt.org -o- https://fstop138.berrange.com :| |: https://entangle-photo.org -o- https://www.instagram.com/dberrange :|

On 08/20/2018 05:07 PM, Daniel P. Berrangé wrote:
On Tue, Aug 14, 2018 at 01:19:42PM +0200, Michal Privoznik wrote:
In order for our drivers to lock resources for metadata change we need set of new APIs. Fortunately, we don't have to care about every possible device a domain can have. We care only about those which can live on a network filesystem and hence can be accessed by multiple daemons at the same time. These devices are covered in virDomainLockMetadataLock() and only a small fraction of those can be hotplugged (covered in the rest of the introduced APIs).
I'm not sure I understand the rationale behind saying we only care about resources on network filesystems.
If I have 2 locally running guests, and both have a serial port backed by a physical serial port, eg
<serial type="dev"> <source path="/dev/ttyS0"/> <target port="1"/> </serial>
we *do* care about locking /dev/ttyS0, as libvirtd isn't doing mutual exclusion checks anywhere else for the /dev/ttyS0 device node.
Ah you mean that the system wide daemon and session daemon could clash when relabeling /dev/ttyS0? Well, we don't do relabeling for session daemons and running two system daemons is not supported (you're gonna hit more serious problems when trying that anyway).
In general I think we need to lock every single file resource that is labelled for a guest, regardless of whether its local or remote.
Well this might be feasible iff locking would be done from security driver. Michal

On Mon, Aug 20, 2018 at 07:13:46PM +0200, Michal Prívozník wrote:
On 08/20/2018 05:07 PM, Daniel P. Berrangé wrote:
On Tue, Aug 14, 2018 at 01:19:42PM +0200, Michal Privoznik wrote:
In order for our drivers to lock resources for metadata change we need set of new APIs. Fortunately, we don't have to care about every possible device a domain can have. We care only about those which can live on a network filesystem and hence can be accessed by multiple daemons at the same time. These devices are covered in virDomainLockMetadataLock() and only a small fraction of those can be hotplugged (covered in the rest of the introduced APIs).
I'm not sure I understand the rationale behind saying we only care about resources on network filesystems.
If I have 2 locally running guests, and both have a serial port backed by a physical serial port, eg
<serial type="dev"> <source path="/dev/ttyS0"/> <target port="1"/> </serial>
we *do* care about locking /dev/ttyS0, as libvirtd isn't doing mutual exclusion checks anywhere else for the /dev/ttyS0 device node.
Ah you mean that the system wide daemon and session daemon could clash when relabeling /dev/ttyS0? Well, we don't do relabeling for session daemons and running two system daemons is not supported (you're gonna hit more serious problems when trying that anyway).
We certainly *do* have relabelling for session daemons, for SELinux: $ ls -alZ ~/VirtualMachines/demo.img -rw-r--r--. 1 berrange berrange unconfined_u:object_r:virt_home_t:s0 1073741824 Aug 21 09:25 /home/berrange/VirtualMachines/demo.img $ virsh uri qemu:///session $ virsh start QEMUGuest1 Domain QEMUGuest1 started $ ls -alZ ~/VirtualMachines/demo.img -rw-r--r--. 1 berrange berrange unconfined_u:object_r:svirt_image_t:s0:c310,c831 1073741824 Aug 21 09:25 /home/berrange/VirtualMachines/demo.img but I was more thinking about the future where we have the libvirt shim concept that allows starting & managing of QEMU processes independantly, and libvirtd is merely there for aggregated data reporting. In that case you'd have many instances of the security driver operating in parallel. Regards, Daniel -- |: https://berrange.com -o- https://www.flickr.com/photos/dberrange :| |: https://libvirt.org -o- https://fstop138.berrange.com :| |: https://entangle-photo.org -o- https://www.instagram.com/dberrange :|

Fortunately, we have qemu wrappers so it's sufficient to put lock/unlock call only there. Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- src/qemu/qemu_security.c | 107 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 107 insertions(+) diff --git a/src/qemu/qemu_security.c b/src/qemu/qemu_security.c index af3be42854..527563947c 100644 --- a/src/qemu/qemu_security.c +++ b/src/qemu/qemu_security.c @@ -26,6 +26,7 @@ #include "qemu_domain.h" #include "qemu_security.h" #include "virlog.h" +#include "locking/domain_lock.h" #define VIR_FROM_THIS VIR_FROM_QEMU @@ -39,6 +40,12 @@ qemuSecuritySetAllLabel(virQEMUDriverPtr driver, { int ret = -1; qemuDomainObjPrivatePtr priv = vm->privateData; + bool locked = false; + + if (virDomainLockMetadataLock(driver->lockManager, vm) < 0) + goto cleanup; + + locked = true; if (qemuDomainNamespaceEnabled(vm, QEMU_DOMAIN_NS_MOUNT) && virSecurityManagerTransactionStart(driver->securityManager) < 0) @@ -55,9 +62,17 @@ qemuSecuritySetAllLabel(virQEMUDriverPtr driver, vm->pid) < 0) goto cleanup; + locked = false; + + if (virDomainLockMetadataUnlock(driver->lockManager, vm) < 0) + goto cleanup; + ret = 0; cleanup: virSecurityManagerTransactionAbort(driver->securityManager); + if (locked && + virDomainLockMetadataUnlock(driver->lockManager, vm) < 0) + VIR_WARN("unable to release metadata lock"); return ret; } @@ -68,6 +83,10 @@ qemuSecurityRestoreAllLabel(virQEMUDriverPtr driver, bool migrated) { qemuDomainObjPrivatePtr priv = vm->privateData; + bool unlock = true; + + if (virDomainLockMetadataLock(driver->lockManager, vm) < 0) + unlock = false; /* In contrast to qemuSecuritySetAllLabel, do not use * secdriver transactions here. This function is called from @@ -79,6 +98,10 @@ qemuSecurityRestoreAllLabel(virQEMUDriverPtr driver, vm->def, migrated, priv->chardevStdioLogd); + + if (unlock && + virDomainLockMetadataUnlock(driver->lockManager, vm) < 0) + VIR_WARN("unable to release metadata lock"); } @@ -88,6 +111,12 @@ qemuSecuritySetDiskLabel(virQEMUDriverPtr driver, virDomainDiskDefPtr disk) { int ret = -1; + bool locked = false; + + if (virDomainLockMetadataDiskLock(driver->lockManager, vm, disk) < 0) + goto cleanup; + + locked = true; if (qemuDomainNamespaceEnabled(vm, QEMU_DOMAIN_NS_MOUNT) && virSecurityManagerTransactionStart(driver->securityManager) < 0) @@ -103,9 +132,17 @@ qemuSecuritySetDiskLabel(virQEMUDriverPtr driver, vm->pid) < 0) goto cleanup; + locked = false; + + if (virDomainLockMetadataDiskUnlock(driver->lockManager, vm, disk) < 0) + goto cleanup; + ret = 0; cleanup: virSecurityManagerTransactionAbort(driver->securityManager); + if (locked && + virDomainLockMetadataDiskUnlock(driver->lockManager, vm, disk) < 0) + VIR_WARN("unable to release disk metadata lock"); return ret; } @@ -116,6 +153,12 @@ qemuSecurityRestoreDiskLabel(virQEMUDriverPtr driver, virDomainDiskDefPtr disk) { int ret = -1; + bool locked = false; + + if (virDomainLockMetadataDiskLock(driver->lockManager, vm, disk) < 0) + goto cleanup; + + locked = true; if (qemuDomainNamespaceEnabled(vm, QEMU_DOMAIN_NS_MOUNT) && virSecurityManagerTransactionStart(driver->securityManager) < 0) @@ -131,9 +174,17 @@ qemuSecurityRestoreDiskLabel(virQEMUDriverPtr driver, vm->pid) < 0) goto cleanup; + locked = false; + + if (virDomainLockMetadataDiskUnlock(driver->lockManager, vm, disk) < 0) + goto cleanup; + ret = 0; cleanup: virSecurityManagerTransactionAbort(driver->securityManager); + if (locked && + virDomainLockMetadataDiskUnlock(driver->lockManager, vm, disk) < 0) + VIR_WARN("unable to release disk metadata lock"); return ret; } @@ -144,6 +195,12 @@ qemuSecuritySetImageLabel(virQEMUDriverPtr driver, virStorageSourcePtr src) { int ret = -1; + bool locked = false; + + if (virDomainLockMetadataImageLock(driver->lockManager, vm, src) < 0) + goto cleanup; + + locked = true; if (qemuDomainNamespaceEnabled(vm, QEMU_DOMAIN_NS_MOUNT) && virSecurityManagerTransactionStart(driver->securityManager) < 0) @@ -159,9 +216,17 @@ qemuSecuritySetImageLabel(virQEMUDriverPtr driver, vm->pid) < 0) goto cleanup; + locked = false; + + if (virDomainLockMetadataImageUnlock(driver->lockManager, vm, src) < 0) + goto cleanup; + ret = 0; cleanup: virSecurityManagerTransactionAbort(driver->securityManager); + if (locked && + virDomainLockMetadataImageUnlock(driver->lockManager, vm, src) < 0) + VIR_WARN("unable to release image metadata lock"); return ret; } @@ -172,6 +237,12 @@ qemuSecurityRestoreImageLabel(virQEMUDriverPtr driver, virStorageSourcePtr src) { int ret = -1; + bool locked = false; + + if (virDomainLockMetadataImageLock(driver->lockManager, vm, src) < 0) + goto cleanup; + + locked = true; if (qemuDomainNamespaceEnabled(vm, QEMU_DOMAIN_NS_MOUNT) && virSecurityManagerTransactionStart(driver->securityManager) < 0) @@ -187,9 +258,17 @@ qemuSecurityRestoreImageLabel(virQEMUDriverPtr driver, vm->pid) < 0) goto cleanup; + locked = false; + + if (virDomainLockMetadataImageUnlock(driver->lockManager, vm, src) < 0) + goto cleanup; + ret = 0; cleanup: virSecurityManagerTransactionAbort(driver->securityManager); + if (locked && + virDomainLockMetadataImageUnlock(driver->lockManager, vm, src) < 0) + VIR_WARN("unable to release image metadata lock"); return ret; } @@ -258,6 +337,12 @@ qemuSecuritySetMemoryLabel(virQEMUDriverPtr driver, virDomainMemoryDefPtr mem) { int ret = -1; + bool locked = false; + + if (virDomainLockMetadataMemLock(driver->lockManager, vm, mem) < 0) + goto cleanup; + + locked = true; if (qemuDomainNamespaceEnabled(vm, QEMU_DOMAIN_NS_MOUNT) && virSecurityManagerTransactionStart(driver->securityManager) < 0) @@ -273,9 +358,17 @@ qemuSecuritySetMemoryLabel(virQEMUDriverPtr driver, vm->pid) < 0) goto cleanup; + locked = false; + + if (virDomainLockMetadataMemUnlock(driver->lockManager, vm, mem) < 0) + goto cleanup; + ret = 0; cleanup: virSecurityManagerTransactionAbort(driver->securityManager); + if (locked && + virDomainLockMetadataMemUnlock(driver->lockManager, vm, mem) < 0) + VIR_WARN("unable to release memory metadata lock"); return ret; } @@ -286,6 +379,12 @@ qemuSecurityRestoreMemoryLabel(virQEMUDriverPtr driver, virDomainMemoryDefPtr mem) { int ret = -1; + bool locked = false; + + if (virDomainLockMetadataMemLock(driver->lockManager, vm, mem) < 0) + goto cleanup; + + locked = true; if (qemuDomainNamespaceEnabled(vm, QEMU_DOMAIN_NS_MOUNT) && virSecurityManagerTransactionStart(driver->securityManager) < 0) @@ -301,9 +400,17 @@ qemuSecurityRestoreMemoryLabel(virQEMUDriverPtr driver, vm->pid) < 0) goto cleanup; + locked = false; + + if (virDomainLockMetadataMemUnlock(driver->lockManager, vm, mem) < 0) + goto cleanup; + ret = 0; cleanup: virSecurityManagerTransactionAbort(driver->securityManager); + if (locked && + virDomainLockMetadataMemUnlock(driver->lockManager, vm, mem) < 0) + VIR_WARN("unable to release memory metadata lock"); return ret; } -- 2.16.4

On 08/14/2018 07:19 AM, Michal Privoznik wrote:
Fortunately, we have qemu wrappers so it's sufficient to put lock/unlock call only there.
Kind of sparse... Maybe a few words related to what benefit locking the metadata provides. There is a danger in all this too if the unlock does fail since metadata locks are wait type locks any subsequent lock will wait.
Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- src/qemu/qemu_security.c | 107 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 107 insertions(+)
diff --git a/src/qemu/qemu_security.c b/src/qemu/qemu_security.c index af3be42854..527563947c 100644 --- a/src/qemu/qemu_security.c +++ b/src/qemu/qemu_security.c @@ -26,6 +26,7 @@ #include "qemu_domain.h" #include "qemu_security.h" #include "virlog.h" +#include "locking/domain_lock.h"
#define VIR_FROM_THIS VIR_FROM_QEMU
@@ -39,6 +40,12 @@ qemuSecuritySetAllLabel(virQEMUDriverPtr driver, { int ret = -1; qemuDomainObjPrivatePtr priv = vm->privateData; + bool locked = false; + + if (virDomainLockMetadataLock(driver->lockManager, vm) < 0) + goto cleanup; + + locked = true;
if (qemuDomainNamespaceEnabled(vm, QEMU_DOMAIN_NS_MOUNT) && virSecurityManagerTransactionStart(driver->securityManager) < 0) @@ -55,9 +62,17 @@ qemuSecuritySetAllLabel(virQEMUDriverPtr driver, vm->pid) < 0) goto cleanup;
+ locked = false;
Technically not true yet. Also , I think two such attempts with the second eliciting the VIR_WARN is perfectly reasonable (IOW: on failure of the following goto cleanup with locked == true, try again, and spew that message). This repeats a few times, but I'll note just once.
+ + if (virDomainLockMetadataUnlock(driver->lockManager, vm) < 0) + goto cleanup; + ret = 0; cleanup: virSecurityManagerTransactionAbort(driver->securityManager); + if (locked && + virDomainLockMetadataUnlock(driver->lockManager, vm) < 0) + VIR_WARN("unable to release metadata lock");
Should we add the @stdin_path and @vm->pid and/or vm->name here? It may help some day. Similar comments for other new VIR_WARN's, but the parameters change...
return ret; }
@@ -68,6 +83,10 @@ qemuSecurityRestoreAllLabel(virQEMUDriverPtr driver, bool migrated) { qemuDomainObjPrivatePtr priv = vm->privateData; + bool unlock = true; + + if (virDomainLockMetadataLock(driver->lockManager, vm) < 0) + unlock = false;
/* In contrast to qemuSecuritySetAllLabel, do not use * secdriver transactions here. This function is called from @@ -79,6 +98,10 @@ qemuSecurityRestoreAllLabel(virQEMUDriverPtr driver, vm->def, migrated, priv->chardevStdioLogd); + + if (unlock && + virDomainLockMetadataUnlock(driver->lockManager, vm) < 0) + VIR_WARN("unable to release metadata lock"); }
@@ -88,6 +111,12 @@ qemuSecuritySetDiskLabel(virQEMUDriverPtr driver, virDomainDiskDefPtr disk) { int ret = -1; + bool locked = false; + + if (virDomainLockMetadataDiskLock(driver->lockManager, vm, disk) < 0) + goto cleanup; + + locked = true;
if (qemuDomainNamespaceEnabled(vm, QEMU_DOMAIN_NS_MOUNT) && virSecurityManagerTransactionStart(driver->securityManager) < 0) @@ -103,9 +132,17 @@ qemuSecuritySetDiskLabel(virQEMUDriverPtr driver, vm->pid) < 0) goto cleanup;
+ locked = false; + + if (virDomainLockMetadataDiskUnlock(driver->lockManager, vm, disk) < 0) + goto cleanup; + ret = 0; cleanup: virSecurityManagerTransactionAbort(driver->securityManager); + if (locked && + virDomainLockMetadataDiskUnlock(driver->lockManager, vm, disk) < 0) + VIR_WARN("unable to release disk metadata lock"); return ret; }
@@ -116,6 +153,12 @@ qemuSecurityRestoreDiskLabel(virQEMUDriverPtr driver, virDomainDiskDefPtr disk) { int ret = -1; + bool locked = false; + + if (virDomainLockMetadataDiskLock(driver->lockManager, vm, disk) < 0) + goto cleanup; + + locked = true;
if (qemuDomainNamespaceEnabled(vm, QEMU_DOMAIN_NS_MOUNT) && virSecurityManagerTransactionStart(driver->securityManager) < 0) @@ -131,9 +174,17 @@ qemuSecurityRestoreDiskLabel(virQEMUDriverPtr driver, vm->pid) < 0) goto cleanup;
+ locked = false; + + if (virDomainLockMetadataDiskUnlock(driver->lockManager, vm, disk) < 0) + goto cleanup; + ret = 0; cleanup: virSecurityManagerTransactionAbort(driver->securityManager); + if (locked && + virDomainLockMetadataDiskUnlock(driver->lockManager, vm, disk) < 0) + VIR_WARN("unable to release disk metadata lock"); return ret; }
@@ -144,6 +195,12 @@ qemuSecuritySetImageLabel(virQEMUDriverPtr driver, virStorageSourcePtr src) { int ret = -1; + bool locked = false; + + if (virDomainLockMetadataImageLock(driver->lockManager, vm, src) < 0) + goto cleanup; + + locked = true;
if (qemuDomainNamespaceEnabled(vm, QEMU_DOMAIN_NS_MOUNT) && virSecurityManagerTransactionStart(driver->securityManager) < 0) @@ -159,9 +216,17 @@ qemuSecuritySetImageLabel(virQEMUDriverPtr driver, vm->pid) < 0) goto cleanup;
+ locked = false; + + if (virDomainLockMetadataImageUnlock(driver->lockManager, vm, src) < 0) + goto cleanup; + ret = 0; cleanup: virSecurityManagerTransactionAbort(driver->securityManager); + if (locked && + virDomainLockMetadataImageUnlock(driver->lockManager, vm, src) < 0) + VIR_WARN("unable to release image metadata lock"); return ret; }
@@ -172,6 +237,12 @@ qemuSecurityRestoreImageLabel(virQEMUDriverPtr driver, virStorageSourcePtr src) { int ret = -1; + bool locked = false; + + if (virDomainLockMetadataImageLock(driver->lockManager, vm, src) < 0) + goto cleanup; + + locked = true;
if (qemuDomainNamespaceEnabled(vm, QEMU_DOMAIN_NS_MOUNT) && virSecurityManagerTransactionStart(driver->securityManager) < 0) @@ -187,9 +258,17 @@ qemuSecurityRestoreImageLabel(virQEMUDriverPtr driver, vm->pid) < 0) goto cleanup;
+ locked = false; + + if (virDomainLockMetadataImageUnlock(driver->lockManager, vm, src) < 0) + goto cleanup; + ret = 0; cleanup: virSecurityManagerTransactionAbort(driver->securityManager); + if (locked && + virDomainLockMetadataImageUnlock(driver->lockManager, vm, src) < 0) + VIR_WARN("unable to release image metadata lock"); return ret; }
Just below here is HostdevLabel processing... For a SCSI host device that would seem to cover the type of resource described in the cover letter to patch6 - at least for iSCSI LUNs (and of course vHBA LUNs). Need to look at the hostdev->mode and hostdev->source.subsys.type and dig into the subsys.u.scsi to get to the src->path. Other than that nothing else sticks out to me. John
@@ -258,6 +337,12 @@ qemuSecuritySetMemoryLabel(virQEMUDriverPtr driver, virDomainMemoryDefPtr mem) { int ret = -1; + bool locked = false; + + if (virDomainLockMetadataMemLock(driver->lockManager, vm, mem) < 0) + goto cleanup; + + locked = true;
if (qemuDomainNamespaceEnabled(vm, QEMU_DOMAIN_NS_MOUNT) && virSecurityManagerTransactionStart(driver->securityManager) < 0) @@ -273,9 +358,17 @@ qemuSecuritySetMemoryLabel(virQEMUDriverPtr driver, vm->pid) < 0) goto cleanup;
+ locked = false; + + if (virDomainLockMetadataMemUnlock(driver->lockManager, vm, mem) < 0) + goto cleanup; + ret = 0; cleanup: virSecurityManagerTransactionAbort(driver->securityManager); + if (locked && + virDomainLockMetadataMemUnlock(driver->lockManager, vm, mem) < 0) + VIR_WARN("unable to release memory metadata lock"); return ret; }
@@ -286,6 +379,12 @@ qemuSecurityRestoreMemoryLabel(virQEMUDriverPtr driver, virDomainMemoryDefPtr mem) { int ret = -1; + bool locked = false; + + if (virDomainLockMetadataMemLock(driver->lockManager, vm, mem) < 0) + goto cleanup; + + locked = true;
if (qemuDomainNamespaceEnabled(vm, QEMU_DOMAIN_NS_MOUNT) && virSecurityManagerTransactionStart(driver->securityManager) < 0) @@ -301,9 +400,17 @@ qemuSecurityRestoreMemoryLabel(virQEMUDriverPtr driver, vm->pid) < 0) goto cleanup;
+ locked = false; + + if (virDomainLockMetadataMemUnlock(driver->lockManager, vm, mem) < 0) + goto cleanup; + ret = 0; cleanup: virSecurityManagerTransactionAbort(driver->securityManager); + if (locked && + virDomainLockMetadataMemUnlock(driver->lockManager, vm, mem) < 0) + VIR_WARN("unable to release memory metadata lock"); return ret; }

On 08/17/2018 08:38 PM, John Ferlan wrote:
On 08/14/2018 07:19 AM, Michal Privoznik wrote:
Fortunately, we have qemu wrappers so it's sufficient to put lock/unlock call only there.
Kind of sparse... Maybe a few words related to what benefit locking the metadata provides. There is a danger in all this too if the unlock does fail since metadata locks are wait type locks any subsequent lock will wait.
Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- src/qemu/qemu_security.c | 107 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 107 insertions(+)
diff --git a/src/qemu/qemu_security.c b/src/qemu/qemu_security.c index af3be42854..527563947c 100644 --- a/src/qemu/qemu_security.c +++ b/src/qemu/qemu_security.c @@ -26,6 +26,7 @@ #include "qemu_domain.h" #include "qemu_security.h" #include "virlog.h" +#include "locking/domain_lock.h"
#define VIR_FROM_THIS VIR_FROM_QEMU
@@ -39,6 +40,12 @@ qemuSecuritySetAllLabel(virQEMUDriverPtr driver, { int ret = -1; qemuDomainObjPrivatePtr priv = vm->privateData; + bool locked = false; + + if (virDomainLockMetadataLock(driver->lockManager, vm) < 0) + goto cleanup; + + locked = true;
if (qemuDomainNamespaceEnabled(vm, QEMU_DOMAIN_NS_MOUNT) && virSecurityManagerTransactionStart(driver->securityManager) < 0) @@ -55,9 +62,17 @@ qemuSecuritySetAllLabel(virQEMUDriverPtr driver, vm->pid) < 0) goto cleanup;
+ locked = false;
Technically not true yet. Also , I think two such attempts with the second eliciting the VIR_WARN is perfectly reasonable (IOW: on failure of the following goto cleanup with locked == true, try again, and spew that message).
This repeats a few times, but I'll note just once.
I don't think it is safe to unlock something twice if we've locked it just once.
+ + if (virDomainLockMetadataUnlock(driver->lockManager, vm) < 0) + goto cleanup; + ret = 0; cleanup: virSecurityManagerTransactionAbort(driver->securityManager); + if (locked && + virDomainLockMetadataUnlock(driver->lockManager, vm) < 0) + VIR_WARN("unable to release metadata lock");
Should we add the @stdin_path and @vm->pid and/or vm->name here? It may help some day. Similar comments for other new VIR_WARN's, but the parameters change...
Ah, good point. Okay.
return ret; }
@@ -68,6 +83,10 @@ qemuSecurityRestoreAllLabel(virQEMUDriverPtr driver, bool migrated) { qemuDomainObjPrivatePtr priv = vm->privateData; + bool unlock = true; + + if (virDomainLockMetadataLock(driver->lockManager, vm) < 0) + unlock = false;
/* In contrast to qemuSecuritySetAllLabel, do not use * secdriver transactions here. This function is called from @@ -79,6 +98,10 @@ qemuSecurityRestoreAllLabel(virQEMUDriverPtr driver, vm->def, migrated, priv->chardevStdioLogd); + + if (unlock && + virDomainLockMetadataUnlock(driver->lockManager, vm) < 0) + VIR_WARN("unable to release metadata lock"); }
@@ -88,6 +111,12 @@ qemuSecuritySetDiskLabel(virQEMUDriverPtr driver, virDomainDiskDefPtr disk) { int ret = -1; + bool locked = false; + + if (virDomainLockMetadataDiskLock(driver->lockManager, vm, disk) < 0) + goto cleanup; + + locked = true;
if (qemuDomainNamespaceEnabled(vm, QEMU_DOMAIN_NS_MOUNT) && virSecurityManagerTransactionStart(driver->securityManager) < 0) @@ -103,9 +132,17 @@ qemuSecuritySetDiskLabel(virQEMUDriverPtr driver, vm->pid) < 0) goto cleanup;
+ locked = false; + + if (virDomainLockMetadataDiskUnlock(driver->lockManager, vm, disk) < 0) + goto cleanup; + ret = 0; cleanup: virSecurityManagerTransactionAbort(driver->securityManager); + if (locked && + virDomainLockMetadataDiskUnlock(driver->lockManager, vm, disk) < 0) + VIR_WARN("unable to release disk metadata lock"); return ret; }
@@ -116,6 +153,12 @@ qemuSecurityRestoreDiskLabel(virQEMUDriverPtr driver, virDomainDiskDefPtr disk) { int ret = -1; + bool locked = false; + + if (virDomainLockMetadataDiskLock(driver->lockManager, vm, disk) < 0) + goto cleanup; + + locked = true;
if (qemuDomainNamespaceEnabled(vm, QEMU_DOMAIN_NS_MOUNT) && virSecurityManagerTransactionStart(driver->securityManager) < 0) @@ -131,9 +174,17 @@ qemuSecurityRestoreDiskLabel(virQEMUDriverPtr driver, vm->pid) < 0) goto cleanup;
+ locked = false; + + if (virDomainLockMetadataDiskUnlock(driver->lockManager, vm, disk) < 0) + goto cleanup; + ret = 0; cleanup: virSecurityManagerTransactionAbort(driver->securityManager); + if (locked && + virDomainLockMetadataDiskUnlock(driver->lockManager, vm, disk) < 0) + VIR_WARN("unable to release disk metadata lock"); return ret; }
@@ -144,6 +195,12 @@ qemuSecuritySetImageLabel(virQEMUDriverPtr driver, virStorageSourcePtr src) { int ret = -1; + bool locked = false; + + if (virDomainLockMetadataImageLock(driver->lockManager, vm, src) < 0) + goto cleanup; + + locked = true;
if (qemuDomainNamespaceEnabled(vm, QEMU_DOMAIN_NS_MOUNT) && virSecurityManagerTransactionStart(driver->securityManager) < 0) @@ -159,9 +216,17 @@ qemuSecuritySetImageLabel(virQEMUDriverPtr driver, vm->pid) < 0) goto cleanup;
+ locked = false; + + if (virDomainLockMetadataImageUnlock(driver->lockManager, vm, src) < 0) + goto cleanup; + ret = 0; cleanup: virSecurityManagerTransactionAbort(driver->securityManager); + if (locked && + virDomainLockMetadataImageUnlock(driver->lockManager, vm, src) < 0) + VIR_WARN("unable to release image metadata lock"); return ret; }
@@ -172,6 +237,12 @@ qemuSecurityRestoreImageLabel(virQEMUDriverPtr driver, virStorageSourcePtr src) { int ret = -1; + bool locked = false; + + if (virDomainLockMetadataImageLock(driver->lockManager, vm, src) < 0) + goto cleanup; + + locked = true;
if (qemuDomainNamespaceEnabled(vm, QEMU_DOMAIN_NS_MOUNT) && virSecurityManagerTransactionStart(driver->securityManager) < 0) @@ -187,9 +258,17 @@ qemuSecurityRestoreImageLabel(virQEMUDriverPtr driver, vm->pid) < 0) goto cleanup;
+ locked = false; + + if (virDomainLockMetadataImageUnlock(driver->lockManager, vm, src) < 0) + goto cleanup; + ret = 0; cleanup: virSecurityManagerTransactionAbort(driver->securityManager); + if (locked && + virDomainLockMetadataImageUnlock(driver->lockManager, vm, src) < 0) + VIR_WARN("unable to release image metadata lock"); return ret; }
Just below here is HostdevLabel processing... For a SCSI host device that would seem to cover the type of resource described in the cover letter to patch6 - at least for iSCSI LUNs (and of course vHBA LUNs).
Need to look at the hostdev->mode and hostdev->source.subsys.type and dig into the subsys.u.scsi to get to the src->path.
Oh, this was the reason I made VIR_WARN() so sparse. I did not wanted to get into business of getting paths from all the complicated structs we have. I'll just put vm->def->name and vm->pid into the warn message. Michal

On Tue, Aug 14, 2018 at 01:19:43PM +0200, Michal Privoznik wrote:
Fortunately, we have qemu wrappers so it's sufficient to put lock/unlock call only there.
Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- src/qemu/qemu_security.c | 107 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 107 insertions(+)
diff --git a/src/qemu/qemu_security.c b/src/qemu/qemu_security.c index af3be42854..527563947c 100644 --- a/src/qemu/qemu_security.c +++ b/src/qemu/qemu_security.c @@ -26,6 +26,7 @@ #include "qemu_domain.h" #include "qemu_security.h" #include "virlog.h" +#include "locking/domain_lock.h"
#define VIR_FROM_THIS VIR_FROM_QEMU
@@ -39,6 +40,12 @@ qemuSecuritySetAllLabel(virQEMUDriverPtr driver, { int ret = -1; qemuDomainObjPrivatePtr priv = vm->privateData; + bool locked = false; + + if (virDomainLockMetadataLock(driver->lockManager, vm) < 0) + goto cleanup; + + locked = true;
if (qemuDomainNamespaceEnabled(vm, QEMU_DOMAIN_NS_MOUNT) && virSecurityManagerTransactionStart(driver->securityManager) < 0) @@ -55,9 +62,17 @@ qemuSecuritySetAllLabel(virQEMUDriverPtr driver, vm->pid) < 0) goto cleanup;
+ locked = false; + + if (virDomainLockMetadataUnlock(driver->lockManager, vm) < 0) + goto cleanup; + ret = 0; cleanup: virSecurityManagerTransactionAbort(driver->securityManager); + if (locked && + virDomainLockMetadataUnlock(driver->lockManager, vm) < 0) + VIR_WARN("unable to release metadata lock"); return ret; }
I'm wondering if this is really the right level to plug in the metadata locking ? What about if we just pass a virLockManagerPtr to the security drivers and let them lock each resource at the time they need to modify its metadata. This will trivially guarantee that we always lock the exact set of files that we'll be changing metadata on. eg in SELinux driver the virSecuritySELinuxSetFileconHelper method would directly call virLockManagerAcquire & virLockManagerRelease, avoiding the entire virDomainLock abstraction which was really focused around managing the content locks around lifecycle state changes. Regards, Daniel -- |: https://berrange.com -o- https://www.flickr.com/photos/dberrange :| |: https://libvirt.org -o- https://fstop138.berrange.com :| |: https://entangle-photo.org -o- https://www.instagram.com/dberrange :|

On 08/20/2018 05:16 PM, Daniel P. Berrangé wrote:
On Tue, Aug 14, 2018 at 01:19:43PM +0200, Michal Privoznik wrote:
Fortunately, we have qemu wrappers so it's sufficient to put lock/unlock call only there.
Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- src/qemu/qemu_security.c | 107 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 107 insertions(+)
diff --git a/src/qemu/qemu_security.c b/src/qemu/qemu_security.c index af3be42854..527563947c 100644 --- a/src/qemu/qemu_security.c +++ b/src/qemu/qemu_security.c @@ -26,6 +26,7 @@ #include "qemu_domain.h" #include "qemu_security.h" #include "virlog.h" +#include "locking/domain_lock.h"
#define VIR_FROM_THIS VIR_FROM_QEMU
@@ -39,6 +40,12 @@ qemuSecuritySetAllLabel(virQEMUDriverPtr driver, { int ret = -1; qemuDomainObjPrivatePtr priv = vm->privateData; + bool locked = false; + + if (virDomainLockMetadataLock(driver->lockManager, vm) < 0) + goto cleanup; + + locked = true;
if (qemuDomainNamespaceEnabled(vm, QEMU_DOMAIN_NS_MOUNT) && virSecurityManagerTransactionStart(driver->securityManager) < 0) @@ -55,9 +62,17 @@ qemuSecuritySetAllLabel(virQEMUDriverPtr driver, vm->pid) < 0) goto cleanup;
+ locked = false; + + if (virDomainLockMetadataUnlock(driver->lockManager, vm) < 0) + goto cleanup; + ret = 0; cleanup: virSecurityManagerTransactionAbort(driver->securityManager); + if (locked && + virDomainLockMetadataUnlock(driver->lockManager, vm) < 0) + VIR_WARN("unable to release metadata lock"); return ret; }
I'm wondering if this is really the right level to plug in the metadata locking ? What about if we just pass a virLockManagerPtr to the security drivers and let them lock each resource at the time they need to modify its metadata. This will trivially guarantee that we always lock the exact set of files that we'll be changing metadata on.
eg in SELinux driver the virSecuritySELinuxSetFileconHelper method would directly call virLockManagerAcquire & virLockManagerRelease, avoiding the entire virDomainLock abstraction which was really focused around managing the content locks around lifecycle state changes.
Yeah, I vaguely recall writing code like this (when I was trying to solve this some time ago). Okay, let me see if that's feasible. But boy, this is getting hairy. Michal

On 08/20/2018 07:17 PM, Michal Prívozník wrote:
On 08/20/2018 05:16 PM, Daniel P. Berrangé wrote:
On Tue, Aug 14, 2018 at 01:19:43PM +0200, Michal Privoznik wrote:
Fortunately, we have qemu wrappers so it's sufficient to put lock/unlock call only there.
Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- src/qemu/qemu_security.c | 107 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 107 insertions(+)
I'm wondering if this is really the right level to plug in the metadata locking ? What about if we just pass a virLockManagerPtr to the security drivers and let them lock each resource at the time they need to modify its metadata. This will trivially guarantee that we always lock the exact set of files that we'll be changing metadata on.
eg in SELinux driver the virSecuritySELinuxSetFileconHelper method would directly call virLockManagerAcquire & virLockManagerRelease, avoiding the entire virDomainLock abstraction which was really focused around managing the content locks around lifecycle state changes.
Yeah, I vaguely recall writing code like this (when I was trying to solve this some time ago). Okay, let me see if that's feasible.
But boy, this is getting hairy.
So as I'm writing these patches I came to realize couple of things: a) instead of domain PID we should pass libvirtd PID because we want to release the locks if libvirtd dies not domain. b) that, however, leads to a problem because virLockManagerLockDaemonAcquire() closes the socket to virtlockd causing it to kill the owner of the lock, i.e. it kills libvirtd immediately. c) even if I hack around b) so that we connect only once for each lock+unlock pair call, we would still connect dozens of times when starting a domain (all the paths we label times all active secdrivers). So we should share connection here too. Now question is how do do this effectively and cleanly (code-wise). For solving b) we can have a new flag, say VIR_LOCK_MANAGER_KEEP_CONNECT that would cause virLockManagerLockDaemonAcquire() to save the (connection, program, counter) tuple somewhere into lock driver private data so that virLockManagerLockDaemonRelease() called with the same flag can re-use the data. For c) I guess we will need to open the connection in virLockManagerLockDaemonNew(), put the socket FD into event loop so that EOF is caught and initiate reopen in that case. However, I'm not sure if this is desirable - to be constantly connected to virtlockd. Michal

On Thu, Aug 23, 2018 at 04:54:01PM +0200, Michal Privoznik wrote:
On 08/20/2018 07:17 PM, Michal Prívozník wrote:
On 08/20/2018 05:16 PM, Daniel P. Berrangé wrote:
On Tue, Aug 14, 2018 at 01:19:43PM +0200, Michal Privoznik wrote:
Fortunately, we have qemu wrappers so it's sufficient to put lock/unlock call only there.
Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- src/qemu/qemu_security.c | 107 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 107 insertions(+)
I'm wondering if this is really the right level to plug in the metadata locking ? What about if we just pass a virLockManagerPtr to the security drivers and let them lock each resource at the time they need to modify its metadata. This will trivially guarantee that we always lock the exact set of files that we'll be changing metadata on.
eg in SELinux driver the virSecuritySELinuxSetFileconHelper method would directly call virLockManagerAcquire & virLockManagerRelease, avoiding the entire virDomainLock abstraction which was really focused around managing the content locks around lifecycle state changes.
Yeah, I vaguely recall writing code like this (when I was trying to solve this some time ago). Okay, let me see if that's feasible.
But boy, this is getting hairy.
So as I'm writing these patches I came to realize couple of things:
a) instead of domain PID we should pass libvirtd PID because we want to release the locks if libvirtd dies not domain.
b) that, however, leads to a problem because virLockManagerLockDaemonAcquire() closes the socket to virtlockd causing it to kill the owner of the lock, i.e. it kills libvirtd immediately.
This is fine ;-P
c) even if I hack around b) so that we connect only once for each lock+unlock pair call, we would still connect dozens of times when starting a domain (all the paths we label times all active secdrivers). So we should share connection here too.
Yeah, makes sense.
Now question is how do do this effectively and cleanly (code-wise). For solving b) we can have a new flag, say VIR_LOCK_MANAGER_KEEP_CONNECT that would cause virLockManagerLockDaemonAcquire() to save the (connection, program, counter) tuple somewhere into lock driver private data so that virLockManagerLockDaemonRelease() called with the same flag can re-use the data.
For c) I guess we will need to open the connection in virLockManagerLockDaemonNew(), put the socket FD into event loop so that EOF is caught and initiate reopen in that case. However, I'm not sure if this is desirable - to be constantly connected to virtlockd.
Can we use a model similar to what I did for the shared secondary driver connections. By default a call to virGetConnectNetwork() will open a new connection. To avoid opening & closing 100's of connections though, some places will call virSetConnectNetwork() to store a pre-opened connection in a thread local. That stays open until virSetConnectNetwork is called again passing in a NULL. We would put such a cache around any bits of code that triggers many connection calls to virlockd. Regards, Daniel -- |: https://berrange.com -o- https://www.flickr.com/photos/dberrange :| |: https://libvirt.org -o- https://fstop138.berrange.com :| |: https://entangle-photo.org -o- https://www.instagram.com/dberrange :|

On 08/23/2018 05:01 PM, Daniel P. Berrangé wrote:
On Thu, Aug 23, 2018 at 04:54:01PM +0200, Michal Privoznik wrote:
On 08/20/2018 07:17 PM, Michal Prívozník wrote:
On 08/20/2018 05:16 PM, Daniel P. Berrangé wrote:
On Tue, Aug 14, 2018 at 01:19:43PM +0200, Michal Privoznik wrote:
Fortunately, we have qemu wrappers so it's sufficient to put lock/unlock call only there.
Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- src/qemu/qemu_security.c | 107 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 107 insertions(+)
I'm wondering if this is really the right level to plug in the metadata locking ? What about if we just pass a virLockManagerPtr to the security drivers and let them lock each resource at the time they need to modify its metadata. This will trivially guarantee that we always lock the exact set of files that we'll be changing metadata on.
eg in SELinux driver the virSecuritySELinuxSetFileconHelper method would directly call virLockManagerAcquire & virLockManagerRelease, avoiding the entire virDomainLock abstraction which was really focused around managing the content locks around lifecycle state changes.
Yeah, I vaguely recall writing code like this (when I was trying to solve this some time ago). Okay, let me see if that's feasible.
But boy, this is getting hairy.
So as I'm writing these patches I came to realize couple of things:
a) instead of domain PID we should pass libvirtd PID because we want to release the locks if libvirtd dies not domain.
b) that, however, leads to a problem because virLockManagerLockDaemonAcquire() closes the socket to virtlockd causing it to kill the owner of the lock, i.e. it kills libvirtd immediately.
This is fine ;-P
c) even if I hack around b) so that we connect only once for each lock+unlock pair call, we would still connect dozens of times when starting a domain (all the paths we label times all active secdrivers). So we should share connection here too.
Yeah, makes sense.
Now question is how do do this effectively and cleanly (code-wise). For solving b) we can have a new flag, say VIR_LOCK_MANAGER_KEEP_CONNECT that would cause virLockManagerLockDaemonAcquire() to save the (connection, program, counter) tuple somewhere into lock driver private data so that virLockManagerLockDaemonRelease() called with the same flag can re-use the data.
For c) I guess we will need to open the connection in virLockManagerLockDaemonNew(), put the socket FD into event loop so that EOF is caught and initiate reopen in that case. However, I'm not sure if this is desirable - to be constantly connected to virtlockd.
Can we use a model similar to what I did for the shared secondary driver connections.
By default a call to virGetConnectNetwork() will open a new connection.
To avoid opening & closing 100's of connections though, some places will call virSetConnectNetwork() to store a pre-opened connection in a thread local. That stays open until virSetConnectNetwork is called again passing in a NULL.
We would put such a cache around any bits of code that triggers many connection calls to virlockd.
Actually, would sharing connection for case c) work? Consider the following scenario: two threads A and B starting two different domains, both of them which want to relabel /dev/blah. Now, say thread A gets to lock the path first. It does so, and proceed to chown(). Then thread B wants to lock the same path. It tries to do so, but has to wait until A unlocks it. However, at this point virtlockd is stuck in virFileLock() call (it waits for the lock to be released). Because virtlockd runs single threaded it doesn't matter anymore that A will try to unlock the path eventually. virtlockd has deadlocked. I don't see any way around this :( Michal

On Thu, Aug 23, 2018 at 05:45:42PM +0200, Michal Privoznik wrote:
On 08/23/2018 05:01 PM, Daniel P. Berrangé wrote:
On Thu, Aug 23, 2018 at 04:54:01PM +0200, Michal Privoznik wrote:
On 08/20/2018 07:17 PM, Michal Prívozník wrote:
On 08/20/2018 05:16 PM, Daniel P. Berrangé wrote:
On Tue, Aug 14, 2018 at 01:19:43PM +0200, Michal Privoznik wrote:
Fortunately, we have qemu wrappers so it's sufficient to put lock/unlock call only there.
Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- src/qemu/qemu_security.c | 107 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 107 insertions(+)
I'm wondering if this is really the right level to plug in the metadata locking ? What about if we just pass a virLockManagerPtr to the security drivers and let them lock each resource at the time they need to modify its metadata. This will trivially guarantee that we always lock the exact set of files that we'll be changing metadata on.
eg in SELinux driver the virSecuritySELinuxSetFileconHelper method would directly call virLockManagerAcquire & virLockManagerRelease, avoiding the entire virDomainLock abstraction which was really focused around managing the content locks around lifecycle state changes.
Yeah, I vaguely recall writing code like this (when I was trying to solve this some time ago). Okay, let me see if that's feasible.
But boy, this is getting hairy.
So as I'm writing these patches I came to realize couple of things:
a) instead of domain PID we should pass libvirtd PID because we want to release the locks if libvirtd dies not domain.
b) that, however, leads to a problem because virLockManagerLockDaemonAcquire() closes the socket to virtlockd causing it to kill the owner of the lock, i.e. it kills libvirtd immediately.
This is fine ;-P
c) even if I hack around b) so that we connect only once for each lock+unlock pair call, we would still connect dozens of times when starting a domain (all the paths we label times all active secdrivers). So we should share connection here too.
Yeah, makes sense.
Now question is how do do this effectively and cleanly (code-wise). For solving b) we can have a new flag, say VIR_LOCK_MANAGER_KEEP_CONNECT that would cause virLockManagerLockDaemonAcquire() to save the (connection, program, counter) tuple somewhere into lock driver private data so that virLockManagerLockDaemonRelease() called with the same flag can re-use the data.
For c) I guess we will need to open the connection in virLockManagerLockDaemonNew(), put the socket FD into event loop so that EOF is caught and initiate reopen in that case. However, I'm not sure if this is desirable - to be constantly connected to virtlockd.
Can we use a model similar to what I did for the shared secondary driver connections.
By default a call to virGetConnectNetwork() will open a new connection.
To avoid opening & closing 100's of connections though, some places will call virSetConnectNetwork() to store a pre-opened connection in a thread local. That stays open until virSetConnectNetwork is called again passing in a NULL.
We would put such a cache around any bits of code that triggers many connection calls to virlockd.
Actually, would sharing connection for case c) work?
Consider the following scenario: two threads A and B starting two different domains, both of them which want to relabel /dev/blah.
Now, say thread A gets to lock the path first. It does so, and proceed to chown().
Then thread B wants to lock the same path. It tries to do so, but has to wait until A unlocks it. However, at this point virtlockd is stuck in virFileLock() call (it waits for the lock to be released).
Because virtlockd runs single threaded it doesn't matter anymore that A will try to unlock the path eventually. virtlockd has deadlocked.
I don't see any way around this :(
Oh right, yes, that kills the idea of using a WAIT flag for lockspaces, unless we want to introduce threads ingto virtlockd, but we can't do that because Linux has totally fubard locking across execve() :-( Regards, Daniel -- |: https://berrange.com -o- https://www.flickr.com/photos/dberrange :| |: https://libvirt.org -o- https://fstop138.berrange.com :| |: https://entangle-photo.org -o- https://www.instagram.com/dberrange :|

On 08/23/2018 05:51 PM, Daniel P. Berrangé wrote:
On Thu, Aug 23, 2018 at 05:45:42PM +0200, Michal Privoznik wrote:
On 08/23/2018 05:01 PM, Daniel P. Berrangé wrote:
On Thu, Aug 23, 2018 at 04:54:01PM +0200, Michal Privoznik wrote:
On 08/20/2018 07:17 PM, Michal Prívozník wrote:
On 08/20/2018 05:16 PM, Daniel P. Berrangé wrote:
On Tue, Aug 14, 2018 at 01:19:43PM +0200, Michal Privoznik wrote: > Fortunately, we have qemu wrappers so it's sufficient to put > lock/unlock call only there. > > Signed-off-by: Michal Privoznik <mprivozn@redhat.com> > --- > src/qemu/qemu_security.c | 107 +++++++++++++++++++++++++++++++++++++++++++++++ > 1 file changed, 107 insertions(+) >
Oh right, yes, that kills the idea of using a WAIT flag for lockspaces, unless we want to introduce threads ingto virtlockd, but we can't do that because Linux has totally fubard locking across execve() :-(
But we need WAIT. I guess we do not want to do: lockForMetadata(const char *path) { int retries; while (retries) { rc = try_lock(path, wait=false); if (!rc) break; if (errno = EAGAIN && retries--) {sleep(.1); continue;} errorOut(); } } However, if we make sure that virtlockd never forks() nor execs() we have nothing to fear about. Or am I missing something? And to be 100% sure we can always provide dummy impl for fork() + execve() in src/locking/lock_daemon.c which fails everytime. Michal

On 08/23/2018 06:14 PM, Michal Privoznik wrote:
On 08/23/2018 05:51 PM, Daniel P. Berrangé wrote:
On Thu, Aug 23, 2018 at 05:45:42PM +0200, Michal Privoznik wrote:
On 08/23/2018 05:01 PM, Daniel P. Berrangé wrote:
On Thu, Aug 23, 2018 at 04:54:01PM +0200, Michal Privoznik wrote:
On 08/20/2018 07:17 PM, Michal Prívozník wrote:
On 08/20/2018 05:16 PM, Daniel P. Berrangé wrote: > On Tue, Aug 14, 2018 at 01:19:43PM +0200, Michal Privoznik wrote: >> Fortunately, we have qemu wrappers so it's sufficient to put >> lock/unlock call only there. >> >> Signed-off-by: Michal Privoznik <mprivozn@redhat.com> >> --- >> src/qemu/qemu_security.c | 107 +++++++++++++++++++++++++++++++++++++++++++++++ >> 1 file changed, 107 insertions(+) >>
Oh right, yes, that kills the idea of using a WAIT flag for lockspaces, unless we want to introduce threads ingto virtlockd, but we can't do that because Linux has totally fubard locking across execve() :-(
Actually, it's problem with open() + close() not execve(). And no dumy implementation (as I'm suggesting below) will not help us with that.
But we need WAIT. I guess we do not want to do:
lockForMetadata(const char *path) { int retries;
while (retries) { rc = try_lock(path, wait=false);
if (!rc) break; if (errno = EAGAIN && retries--) {sleep(.1); continue;}
errorOut(); } }
However, if we make sure that virtlockd never forks() nor execs() we have nothing to fear about. Or am I missing something? And to be 100% sure we can always provide dummy impl for fork() + execve() in src/locking/lock_daemon.c which fails everytime.
I work around this by putting a mutex into secdriver so that only one thread can do lock(). The others have to wait until the thread unlocks() the path. This way we leave virtlockd with just one thread. In my testing everything works like charm. Michal

On Fri, Aug 24, 2018 at 02:01:52PM +0200, Michal Privoznik wrote:
On 08/23/2018 06:14 PM, Michal Privoznik wrote:
On 08/23/2018 05:51 PM, Daniel P. Berrangé wrote:
On Thu, Aug 23, 2018 at 05:45:42PM +0200, Michal Privoznik wrote:
On 08/23/2018 05:01 PM, Daniel P. Berrangé wrote:
On Thu, Aug 23, 2018 at 04:54:01PM +0200, Michal Privoznik wrote:
On 08/20/2018 07:17 PM, Michal Prívozník wrote: > On 08/20/2018 05:16 PM, Daniel P. Berrangé wrote: >> On Tue, Aug 14, 2018 at 01:19:43PM +0200, Michal Privoznik wrote: >>> Fortunately, we have qemu wrappers so it's sufficient to put >>> lock/unlock call only there. >>> >>> Signed-off-by: Michal Privoznik <mprivozn@redhat.com> >>> --- >>> src/qemu/qemu_security.c | 107 +++++++++++++++++++++++++++++++++++++++++++++++ >>> 1 file changed, 107 insertions(+) >>>
Oh right, yes, that kills the idea of using a WAIT flag for lockspaces, unless we want to introduce threads ingto virtlockd, but we can't do that because Linux has totally fubard locking across execve() :-(
Actually, it's problem with open() + close() not execve(). And no dumy implementation (as I'm suggesting below) will not help us with that.
No, I really do mean execve() here. The open() + close() issue is the awful standards compliant behaviour. The execve() issue with threads is a trainwreck of Linuxs own making: https://bugzilla.redhat.com/show_bug.cgi?id=1552621
But we need WAIT. I guess we do not want to do:
lockForMetadata(const char *path) { int retries;
while (retries) { rc = try_lock(path, wait=false);
if (!rc) break; if (errno = EAGAIN && retries--) {sleep(.1); continue;}
errorOut(); } }
However, if we make sure that virtlockd never forks() nor execs() we have nothing to fear about. Or am I missing something? And to be 100% sure we can always provide dummy impl for fork() + execve() in src/locking/lock_daemon.c which fails everytime.
I work around this by putting a mutex into secdriver so that only one thread can do lock(). The others have to wait until the thread unlocks() the path. This way we leave virtlockd with just one thread. In my testing everything works like charm.
That sounds reasonable, so we don't need the _WAIT behaviour in virtlockd itself, as everything will wait in the secdriver instead. At least for now, until we modularize the startup process with the shim. Guess that's just one more todo item to solve for the shim so not the end of the world. Regards, Daniel -- |: https://berrange.com -o- https://www.flickr.com/photos/dberrange :| |: https://libvirt.org -o- https://fstop138.berrange.com :| |: https://entangle-photo.org -o- https://www.instagram.com/dberrange :|

On 08/24/2018 02:53 PM, Daniel P. Berrangé wrote:
That sounds reasonable, so we don't need the _WAIT behaviour in virtlockd itself, as everything will wait in the secdriver instead. At least for now, until we modularize the startup process with the shim. Guess that's just one more todo item to solve for the shim so not the end of the world.
Hold on, we do need _WAIT so that we mutually exclude other virtlockd-s from other hosts fiddling with seclabels on a shared NFS. However, we will not deadlock on a single host, that's what I'm saying. Michal

On Fri, Aug 24, 2018 at 03:29:24PM +0200, Michal Privoznik wrote:
On 08/24/2018 02:53 PM, Daniel P. Berrangé wrote:
That sounds reasonable, so we don't need the _WAIT behaviour in virtlockd itself, as everything will wait in the secdriver instead. At least for now, until we modularize the startup process with the shim. Guess that's just one more todo item to solve for the shim so not the end of the world.
Hold on, we do need _WAIT so that we mutually exclude other virtlockd-s from other hosts fiddling with seclabels on a shared NFS. However, we will not deadlock on a single host, that's what I'm saying.
Right but later when multiple clients are permitted to connect to the same virtlockd, the API they will use has a designed in deadlock :-( Regards, Daniel -- |: https://berrange.com -o- https://www.flickr.com/photos/dberrange :| |: https://libvirt.org -o- https://fstop138.berrange.com :| |: https://entangle-photo.org -o- https://www.instagram.com/dberrange :|

Hi, This series was run against 'syntax-check' test by patchew.org, which failed, please find the details below: Type: series Message-id: cover.1534245398.git.mprivozn@redhat.com Subject: [libvirt] [PATCH v2 0/7] Introduce metadata locking === TEST SCRIPT BEGIN === #!/bin/bash # Testing script will be invoked under the git checkout with # HEAD pointing to a commit that has the patches applied on top of "base" # branch time bash -c './autogen.sh && make syntax-check' === TEST SCRIPT END === Updating bcb55ab053bc79561b55d0394490f4b64e0f2d01 >From https://github.com/patchew-project/libvirt ff7bd6a245..b0451117b3 master -> master * [new tag] patchew/20180814114440.20803-1-abologna@redhat.com -> patchew/20180814114440.20803-1-abologna@redhat.com Switched to a new branch 'test' de813e44ab qemu_security: Lock metadata while relabelling 718359e105 domain_lock: Implement metadata locking 77e0f31573 lock_driver_sanlock: Handle metadata flag gracefully a40a747b66 lockd_driver_lockd: Implement metadata flag 8759856c94 lock_driver.h: Introduce metadata flag 3314c0a08f virlockspace: Introduce VIR_LOCK_SPACE_ACQUIRE_WAIT 400bd7189d virlockspace: Allow caller to specify start and length offset in virLockSpaceAcquireResource === OUTPUT BEGIN === Updating submodules... Submodule 'gnulib' (https://git.savannah.gnu.org/git/gnulib.git/) registered for path '.gnulib' Submodule 'keycodemapdb' (https://gitlab.com/keycodemap/keycodemapdb.git) registered for path 'src/keycodemapdb' Cloning into '/var/tmp/patchew-tester-tmp-i2jn955t/src/.gnulib'... fatal: unable to access 'https://git.savannah.gnu.org/git/gnulib.git/': The requested URL returned error: 502 fatal: clone of 'https://git.savannah.gnu.org/git/gnulib.git/' into submodule path '/var/tmp/patchew-tester-tmp-i2jn955t/src/.gnulib' failed Failed to clone '.gnulib'. Retry scheduled Cloning into '/var/tmp/patchew-tester-tmp-i2jn955t/src/src/keycodemapdb'... Cloning into '/var/tmp/patchew-tester-tmp-i2jn955t/src/.gnulib'... fatal: unable to access 'https://git.savannah.gnu.org/git/gnulib.git/': The requested URL returned error: 502 fatal: clone of 'https://git.savannah.gnu.org/git/gnulib.git/' into submodule path '/var/tmp/patchew-tester-tmp-i2jn955t/src/.gnulib' failed Failed to clone '.gnulib' a second time, aborting error: Updating submodules failed real 0m6.736s user 0m0.751s sys 0m0.443s === OUTPUT END === Test command exited with code: 1 --- Email generated automatically by Patchew [http://patchew.org/]. Please send your feedback to patchew-devel@redhat.com
participants (5)
-
Daniel P. Berrangé
-
John Ferlan
-
Michal Privoznik
-
Michal Prívozník
-
no-reply@patchew.org