[libvirt] [PATCH v3 00/13] Implement alternative metadata locking

v3 of: https://www.redhat.com/archives/libvir-list/2018-October/msg00667.html diff to v2: - Introduced two new patches (1/13 and 2/13) so that even non-Linux platforms are covered - In 4/13 I switched from indefinite wait for lock to a lock with timeout (of 10 seconds). This is basically to prevent us stalling if some app misbehaves and holds the file locked for eternity. Michal Prívozník (13): virprocess: Introduce virProcessRunInFork virprocess: Make virProcessRunInMountNamespace use virProcessRunInFork security: Always spawn process for transactions security_manager: Rework metadata locking Revert "security_manager: Load lock plugin on init" Revert "qemu_conf: Introduce metadata_lock_manager" Revert "lock_manager: Allow disabling configFile for virLockManagerPluginNew" Revert "lock_driver: Introduce VIR_LOCK_MANAGER_ACQUIRE_ROLLBACK" Revert "lock_driver: Introduce VIR_LOCK_MANAGER_RESOURCE_TYPE_METADATA" Revert "_virLockManagerLockDaemonPrivate: Move @hasRWDisks into dom union" Revert "lock_driver: Introduce new VIR_LOCK_MANAGER_OBJECT_TYPE_DAEMON" Revert "lock_driver_lockd: Introduce VIR_LOCK_SPACE_PROTOCOL_ACQUIRE_RESOURCE_METADATA flag" Revert "virlockspace: Allow caller to specify start and length offset in virLockSpaceAcquireResource" cfg.mk | 4 +- src/libvirt_private.syms | 1 + src/locking/lock_daemon_dispatch.c | 11 +- src/locking/lock_driver.h | 12 - src/locking/lock_driver_lockd.c | 421 ++++++++++------------------- src/locking/lock_driver_lockd.h | 1 - src/locking/lock_driver_sanlock.c | 44 +-- src/locking/lock_manager.c | 10 +- src/lxc/lxc_controller.c | 3 +- src/lxc/lxc_driver.c | 2 +- src/qemu/qemu_conf.c | 1 - src/qemu/qemu_conf.h | 1 - src/qemu/qemu_driver.c | 3 - src/security/security_dac.c | 12 +- src/security/security_manager.c | 250 +++++++++-------- src/security/security_manager.h | 19 +- src/security/security_selinux.c | 11 +- src/util/virlockspace.c | 15 +- src/util/virlockspace.h | 4 - src/util/virprocess.c | 69 ++++- src/util/virprocess.h | 16 ++ tests/seclabeltest.c | 2 +- tests/securityselinuxlabeltest.c | 2 +- tests/securityselinuxtest.c | 2 +- tests/testutilsqemu.c | 2 +- tests/virlockspacetest.c | 29 +- 26 files changed, 387 insertions(+), 560 deletions(-) -- 2.18.1

This new helper can be used to spawn a child process and run passed callback from it. This will come handy esp. if the callback is not thread safe. Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- src/libvirt_private.syms | 1 + src/util/virprocess.c | 81 ++++++++++++++++++++++++++++++++++++++++ src/util/virprocess.h | 16 ++++++++ 3 files changed, 98 insertions(+) diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms index 335210c31d..e2dc85310a 100644 --- a/src/libvirt_private.syms +++ b/src/libvirt_private.syms @@ -2625,6 +2625,7 @@ virProcessKill; virProcessKillPainfully; virProcessKillPainfullyDelay; virProcessNamespaceAvailable; +virProcessRunInFork; virProcessRunInMountNamespace; virProcessSchedPolicyTypeFromString; virProcessSchedPolicyTypeToString; diff --git a/src/util/virprocess.c b/src/util/virprocess.c index 3883c708fc..51b9ccb1bb 100644 --- a/src/util/virprocess.c +++ b/src/util/virprocess.c @@ -1165,6 +1165,87 @@ virProcessRunInMountNamespace(pid_t pid, } +static int +virProcessForkHelper(int errfd, + pid_t pid, + virProcessForkCallback cb, + void *opaque) +{ + if (cb(pid, opaque) < 0) { + virErrorPtr err = virGetLastError(); + if (err) { + size_t len = strlen(err->message) + 1; + ignore_value(safewrite(errfd, err->message, len)); + } + + return -1; + } + + return 0; +} + + +/** + * virProcessRunInFork: + * @cb: callback to run + * @opaque: opaque data to @cb + * + * Do the fork and run @cb in the child. This can be used when + * @cb does something thread unsafe, for instance. However, due + * to possible signal propagation @cb should only call async + * signal safe functions. On return, the returned value is either + * -1 with error message reported if something went bad in the + * parent, if child has died from a signal or if the child + * returned EXIT_CANCELED. Otherwise the returned value is the + * exit status of the child. + */ +int +virProcessRunInFork(virProcessForkCallback cb, + void *opaque) +{ + int ret = -1; + pid_t child = -1; + pid_t parent = getpid(); + int errfd[2] = { -1, -1 }; + + if (pipe2(errfd, O_CLOEXEC) < 0) { + virReportSystemError(errno, "%s", + _("Cannot create pipe for child")); + return -1; + } + + if ((child = virFork()) < 0) + goto cleanup; + + if (child == 0) { + VIR_FORCE_CLOSE(errfd[0]); + ret = virProcessForkHelper(errfd[1], parent, cb, opaque); + VIR_FORCE_CLOSE(errfd[1]); + _exit(ret < 0 ? EXIT_CANCELED : ret); + } else { + int status; + VIR_AUTOFREE(char *) buf = NULL; + + VIR_FORCE_CLOSE(errfd[1]); + ignore_value(virFileReadHeaderFD(errfd[0], 1024, &buf)); + ret = virProcessWait(child, &status, false); + if (!ret) { + ret = status == EXIT_CANCELED ? -1 : status; + if (ret) { + virReportError(VIR_ERR_INTERNAL_ERROR, + _("child reported: %s"), + NULLSTR(buf)); + } + } + } + + cleanup: + VIR_FORCE_CLOSE(errfd[0]); + VIR_FORCE_CLOSE(errfd[1]); + return ret; +} + + #if defined(HAVE_SYS_MOUNT_H) && defined(HAVE_UNSHARE) int virProcessSetupPrivateMountNS(void) diff --git a/src/util/virprocess.h b/src/util/virprocess.h index 5faa0892fe..1bae8c9212 100644 --- a/src/util/virprocess.h +++ b/src/util/virprocess.h @@ -93,6 +93,22 @@ int virProcessRunInMountNamespace(pid_t pid, virProcessNamespaceCallback cb, void *opaque); +/** + * virProcessForkCallback: + * @pid: parent's pid + * @opaque: opaque data + * + * Callback to run in fork()-ed process. + * + * Returns: 0 on success, + * -1 on error (treated as EXIT_CANCELED) + */ +typedef int (*virProcessForkCallback)(pid_t pid, + void *opaque); + +int virProcessRunInFork(virProcessForkCallback cb, + void *opaque); + int virProcessSetupPrivateMountNS(void); int virProcessSetScheduler(pid_t pid, -- 2.18.1

On Wed, Oct 17, 2018 at 11:06:43AM +0200, Michal Privoznik wrote:
This new helper can be used to spawn a child process and run passed callback from it. This will come handy esp. if the callback is not thread safe.
Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- src/libvirt_private.syms | 1 + src/util/virprocess.c | 81 ++++++++++++++++++++++++++++++++++++++++ src/util/virprocess.h | 16 ++++++++ 3 files changed, 98 insertions(+)
+/** + * virProcessRunInFork: + * @cb: callback to run + * @opaque: opaque data to @cb + * + * Do the fork and run @cb in the child. This can be used when + * @cb does something thread unsafe, for instance. However, due + * to possible signal propagation @cb should only call async + * signal safe functions. On return, the returned value is either + * -1 with error message reported if something went bad in the + * parent, if child has died from a signal or if the child + * returned EXIT_CANCELED. Otherwise the returned value is the + * exit status of the child.
Instead of; However, due to possible signal propagation @cb should only call async signal safe functions. Use: All signals will be reset to have their platform default handlers and unmasked. @cb must only use async signal safe functions. In particular no mutexes should be used in @cb, unless steps were taken before forking to guarantee a predictable state. @cb must not exec any external binaries, instead virCommand/virExec should be used for that purpose.
+ */ +int +virProcessRunInFork(virProcessForkCallback cb, + void *opaque) +{ + int ret = -1; + pid_t child = -1; + pid_t parent = getpid(); + int errfd[2] = { -1, -1 }; + + if (pipe2(errfd, O_CLOEXEC) < 0) { + virReportSystemError(errno, "%s", + _("Cannot create pipe for child")); + return -1; + } + + if ((child = virFork()) < 0) + goto cleanup; + + if (child == 0) { + VIR_FORCE_CLOSE(errfd[0]); + ret = virProcessForkHelper(errfd[1], parent, cb, opaque); + VIR_FORCE_CLOSE(errfd[1]); + _exit(ret < 0 ? EXIT_CANCELED : ret); + } else { + int status; + VIR_AUTOFREE(char *) buf = NULL; + + VIR_FORCE_CLOSE(errfd[1]); + ignore_value(virFileReadHeaderFD(errfd[0], 1024, &buf)); + ret = virProcessWait(child, &status, false); + if (!ret) { + ret = status == EXIT_CANCELED ? -1 : status; + if (ret) { + virReportError(VIR_ERR_INTERNAL_ERROR, + _("child reported: %s"), + NULLSTR(buf));
It would be desirable to include the exit status value in the message
+ } + } + } + + cleanup: + VIR_FORCE_CLOSE(errfd[0]); + VIR_FORCE_CLOSE(errfd[1]); + return ret; +}
Regards, Daniel -- |: https://berrange.com -o- https://www.flickr.com/photos/dberrange :| |: https://libvirt.org -o- https://fstop138.berrange.com :| |: https://entangle-photo.org -o- https://www.instagram.com/dberrange :|

On 10/17/18 5:06 AM, Michal Privoznik wrote:
This new helper can be used to spawn a child process and run passed callback from it. This will come handy esp. if the callback is not thread safe.
Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- src/libvirt_private.syms | 1 + src/util/virprocess.c | 81 ++++++++++++++++++++++++++++++++++++++++ src/util/virprocess.h | 16 ++++++++ 3 files changed, 98 insertions(+)
diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms index 335210c31d..e2dc85310a 100644 --- a/src/libvirt_private.syms +++ b/src/libvirt_private.syms @@ -2625,6 +2625,7 @@ virProcessKill; virProcessKillPainfully; virProcessKillPainfullyDelay; virProcessNamespaceAvailable; +virProcessRunInFork; virProcessRunInMountNamespace; virProcessSchedPolicyTypeFromString; virProcessSchedPolicyTypeToString; diff --git a/src/util/virprocess.c b/src/util/virprocess.c index 3883c708fc..51b9ccb1bb 100644 --- a/src/util/virprocess.c +++ b/src/util/virprocess.c @@ -1165,6 +1165,87 @@ virProcessRunInMountNamespace(pid_t pid, }
+static int +virProcessForkHelper(int errfd,
For consistency, virProcessRunInForkHelper
+ pid_t pid,
s/pid/parent (or ppid, IDC) just to be clear
+ virProcessForkCallback cb, + void *opaque) +{ + if (cb(pid, opaque) < 0) { + virErrorPtr err = virGetLastError(); + if (err) { + size_t len = strlen(err->message) + 1; + ignore_value(safewrite(errfd, err->message, len)); + } + + return -1; + } + + return 0; +} + + +/** + * virProcessRunInFork: + * @cb: callback to run + * @opaque: opaque data to @cb + * + * Do the fork and run @cb in the child. This can be used when + * @cb does something thread unsafe, for instance. However, due + * to possible signal propagation @cb should only call async + * signal safe functions. On return, the returned value is either + * -1 with error message reported if something went bad in the + * parent, if child has died from a signal or if the child + * returned EXIT_CANCELED. Otherwise the returned value is the + * exit status of the child. + */ +int +virProcessRunInFork(virProcessForkCallback cb, + void *opaque) +{ + int ret = -1; + pid_t child = -1; + pid_t parent = getpid(); + int errfd[2] = { -1, -1 }; + + if (pipe2(errfd, O_CLOEXEC) < 0) { + virReportSystemError(errno, "%s", + _("Cannot create pipe for child")); + return -1; + } + + if ((child = virFork()) < 0) + goto cleanup; + + if (child == 0) { + VIR_FORCE_CLOSE(errfd[0]); + ret = virProcessForkHelper(errfd[1], parent, cb, opaque); + VIR_FORCE_CLOSE(errfd[1]); + _exit(ret < 0 ? EXIT_CANCELED : ret); + } else { + int status; + VIR_AUTOFREE(char *) buf = NULL; + + VIR_FORCE_CLOSE(errfd[1]); + ignore_value(virFileReadHeaderFD(errfd[0], 1024, &buf)); + ret = virProcessWait(child, &status, false); + if (!ret) {
<sigh>
+ ret = status == EXIT_CANCELED ? -1 : status; + if (ret) { + virReportError(VIR_ERR_INTERNAL_ERROR, + _("child reported: %s"), + NULLSTR(buf)); + }
I agree w/ Daniel regarding @status reporting like virCommandWait, it looks ugly but gets all the information out at least.
+ } + } + + cleanup: + VIR_FORCE_CLOSE(errfd[0]); + VIR_FORCE_CLOSE(errfd[1]); + return ret; +} + + #if defined(HAVE_SYS_MOUNT_H) && defined(HAVE_UNSHARE) int virProcessSetupPrivateMountNS(void) diff --git a/src/util/virprocess.h b/src/util/virprocess.h index 5faa0892fe..1bae8c9212 100644 --- a/src/util/virprocess.h +++ b/src/util/virprocess.h @@ -93,6 +93,22 @@ int virProcessRunInMountNamespace(pid_t pid, virProcessNamespaceCallback cb, void *opaque);
+/** + * virProcessForkCallback: + * @pid: parent's pid
It's the "fork's" parent pid. Perhaps would be nice to have ppid or parent - just because pid is used so many times. It's not that important though.
+ * @opaque: opaque data + * + * Callback to run in fork()-ed process. + * + * Returns: 0 on success, + * -1 on error (treated as EXIT_CANCELED) + */ +typedef int (*virProcessForkCallback)(pid_t pid, + void *opaque); + +int virProcessRunInFork(virProcessForkCallback cb, + void *opaque); +
Since this is defined before virProcessRunInMountNamespace in source, probably could do the same in this file. It's a type a orderly thing. and yes, essentially what I was looking to see done from previous reviews since those paths won't necessarily be NS type runs... I'm OK with trusting that you can make the necessary comment adjustments from Daniel's review, but if you feel compelled to post a diff I won't complain either... Reviewed-by: John Ferlan <jferlan@redhat.com> John
int virProcessSetupPrivateMountNS(void);
int virProcessSetScheduler(pid_t pid,

Both virProcessRunInMountNamespace() and virProcessRunInFork() look very similar. De-duplicate the code and make virProcessRunInMountNamespace() call virProcessRunInFork(). Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- src/util/virprocess.c | 62 +++++++++---------------------------------- 1 file changed, 12 insertions(+), 50 deletions(-) diff --git a/src/util/virprocess.c b/src/util/virprocess.c index 51b9ccb1bb..3304879212 100644 --- a/src/util/virprocess.c +++ b/src/util/virprocess.c @@ -1073,11 +1073,17 @@ int virProcessGetStartTime(pid_t pid, #endif -static int virProcessNamespaceHelper(int errfd, - pid_t pid, - virProcessNamespaceCallback cb, +typedef struct _virProcessNamespaceHelperData virProcessNamespaceHelperData; +struct _virProcessNamespaceHelperData { + pid_t pid; + virProcessNamespaceCallback cb; + void *opaque; +}; + +static int virProcessNamespaceHelper(pid_t pid ATTRIBUTE_UNUSED, void *opaque) { + virProcessNamespaceHelperData *data = opaque; int fd = -1; int ret = -1; VIR_AUTOFREE(char *) path = NULL; @@ -1097,16 +1103,9 @@ static int virProcessNamespaceHelper(int errfd, goto cleanup; } - ret = cb(pid, opaque); + ret = data->cb(data->pid, data->opaque); cleanup: - if (ret < 0) { - virErrorPtr err = virGetLastError(); - if (err) { - size_t len = strlen(err->message) + 1; - ignore_value(safewrite(errfd, err->message, len)); - } - } VIR_FORCE_CLOSE(fd); return ret; } @@ -1122,46 +1121,9 @@ virProcessRunInMountNamespace(pid_t pid, virProcessNamespaceCallback cb, void *opaque) { - int ret = -1; - pid_t child = -1; - int errfd[2] = { -1, -1 }; + virProcessNamespaceHelperData data = {.pid = pid, .cb = cb, .opaque = opaque}; - if (pipe2(errfd, O_CLOEXEC) < 0) { - virReportSystemError(errno, "%s", - _("Cannot create pipe for child")); - return -1; - } - - if ((child = virFork()) < 0) - goto cleanup; - - if (child == 0) { - VIR_FORCE_CLOSE(errfd[0]); - ret = virProcessNamespaceHelper(errfd[1], pid, - cb, opaque); - VIR_FORCE_CLOSE(errfd[1]); - _exit(ret < 0 ? EXIT_CANCELED : ret); - } else { - int status; - VIR_AUTOFREE(char *) buf = NULL; - - VIR_FORCE_CLOSE(errfd[1]); - ignore_value(virFileReadHeaderFD(errfd[0], 1024, &buf)); - ret = virProcessWait(child, &status, false); - if (!ret) { - ret = status == EXIT_CANCELED ? -1 : status; - if (ret) { - virReportError(VIR_ERR_INTERNAL_ERROR, - _("child reported: %s"), - NULLSTR(buf)); - } - } - } - - cleanup: - VIR_FORCE_CLOSE(errfd[0]); - VIR_FORCE_CLOSE(errfd[1]); - return ret; + return virProcessRunInFork(virProcessNamespaceHelper, &data); } -- 2.18.1

On 10/17/18 5:06 AM, Michal Privoznik wrote:
Both virProcessRunInMountNamespace() and virProcessRunInFork() look very similar. De-duplicate the code and make virProcessRunInMountNamespace() call virProcessRunInFork().> Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- src/util/virprocess.c | 62 +++++++++---------------------------------- 1 file changed, 12 insertions(+), 50 deletions(-)
diff --git a/src/util/virprocess.c b/src/util/virprocess.c index 51b9ccb1bb..3304879212 100644 --- a/src/util/virprocess.c +++ b/src/util/virprocess.c @@ -1073,11 +1073,17 @@ int virProcessGetStartTime(pid_t pid, #endif
-static int virProcessNamespaceHelper(int errfd, - pid_t pid, - virProcessNamespaceCallback cb, +typedef struct _virProcessNamespaceHelperData virProcessNamespaceHelperData; +struct _virProcessNamespaceHelperData { + pid_t pid; + virProcessNamespaceCallback cb; + void *opaque; +}; + +static int virProcessNamespaceHelper(pid_t pid ATTRIBUTE_UNUSED, void *opaque) { + virProcessNamespaceHelperData *data = opaque; int fd = -1; int ret = -1; VIR_AUTOFREE(char *) path = NULL;
Right after this there's a : if (virAsprintf(&path, "/proc/%lld/ns/mnt", (long long) pid) < 0) goto cleanup; The last arg should be data->pid I believe... Beyond that - nothing jumps out at me... Reviewed-by: John Ferlan <jferlan@redhat.com> John

In the next commit the virSecurityManagerMetadataLock() is going to be turned thread unsafe. Therefore, we have to spawn a separate process for it. Always. Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- src/security/security_dac.c | 2 +- src/security/security_selinux.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/security/security_dac.c b/src/security/security_dac.c index da4a6c72fe..580d800bb1 100644 --- a/src/security/security_dac.c +++ b/src/security/security_dac.c @@ -563,7 +563,7 @@ virSecurityDACTransactionCommit(virSecurityManagerPtr mgr ATTRIBUTE_UNUSED, } if ((pid == -1 && - virSecurityDACTransactionRun(pid, list) < 0) || + virProcessRunInFork(virSecurityDACTransactionRun, list) < 0) || (pid != -1 && virProcessRunInMountNamespace(pid, virSecurityDACTransactionRun, diff --git a/src/security/security_selinux.c b/src/security/security_selinux.c index 467d1e6bfe..0e24b9b3ca 100644 --- a/src/security/security_selinux.c +++ b/src/security/security_selinux.c @@ -1104,7 +1104,7 @@ virSecuritySELinuxTransactionCommit(virSecurityManagerPtr mgr ATTRIBUTE_UNUSED, } if ((pid == -1 && - virSecuritySELinuxTransactionRun(pid, list) < 0) || + virProcessRunInFork(virSecuritySELinuxTransactionRun, list) < 0) || (pid != -1 && virProcessRunInMountNamespace(pid, virSecuritySELinuxTransactionRun, -- 2.18.1

On 10/17/18 5:06 AM, Michal Privoznik wrote:
In the next commit the virSecurityManagerMetadataLock() is going to be turned thread unsafe. Therefore, we have to spawn a separate process for it. Always.
Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- src/security/security_dac.c | 2 +- src/security/security_selinux.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-)
Based slightly upon the KVM Forum presentation detailing some performance issues related to the usage of virFork for virQEMUCapsIsValid and calls to virFileAccessibleAs: https://www.redhat.com/archives/libvir-list/2018-October/msg01333.html Given that this patch would thus virFork "always", what kind of impact could that have? Have you been able to do any performance profiling? What would cause a single round of SELinux & DAC settings? I know in an earlier patch I wasn't including performance of virFork because I believed that the only time it would be used would be for metadata locking when (re)labeling would be batched and for that case the "one off" virFork would seem reasonable. Since it is possible to turn off the NS code and thus go through the direct call, I think there's "room for it" here too for those singular cases we could use "pid == -1" to indicate direct calls without virFork and "pid == 0" to batch together calls using virProcessRunInFork. That way when you *know* you are batching together more than singular changes, then the caller can choose to run in a fork knowing the possible performance penalty, but with the benefits. For those that are batched we're stuck, but my memory on all the metadata locking paths is fuzzy already. What's here does work, but after that KVM Forum preso I guess we definitely need to pay attention to areas that can be perf bottlenecks for paths that may be really common. Having a way to disable or avoid virFork is something we may just need to have. I'm willing to be convinced otherwise - I'm just "wary" now.
diff --git a/src/security/security_dac.c b/src/security/security_dac.c index da4a6c72fe..580d800bb1 100644 --- a/src/security/security_dac.c +++ b/src/security/security_dac.c
The function description would need an update way to describe this @pid usage which differs from the current description.
@@ -563,7 +563,7 @@ virSecurityDACTransactionCommit(virSecurityManagerPtr mgr ATTRIBUTE_UNUSED, }
if ((pid == -1 && - virSecurityDACTransactionRun(pid, list) < 0) || + virProcessRunInFork(virSecurityDACTransactionRun, list) < 0) || (pid != -1 && virProcessRunInMountNamespace(pid, virSecurityDACTransactionRun,
I think I've previously disclosed my dislike of the format, why not: if (pid > 0) { rc = virProcessRunInMountNamespace(pid, ...); } else { if (pid == -1) rc = virSecurityDACTransactionRun(-pid, list); else rc = virProcessRunInFork(virSecurityDACTransactionRun, list)); } if (rc < 0) goto cleanup; to me that's a lot cleaner and doesn't involve trying to look at multiple if statements with ||'s.
diff --git a/src/security/security_selinux.c b/src/security/security_selinux.c index 467d1e6bfe..0e24b9b3ca 100644 --- a/src/security/security_selinux.c +++ b/src/security/security_selinux.c @@ -1104,7 +1104,7 @@ virSecuritySELinuxTransactionCommit(virSecurityManagerPtr mgr ATTRIBUTE_UNUSED, }
if ((pid == -1 && - virSecuritySELinuxTransactionRun(pid, list) < 0) || + virProcessRunInFork(virSecuritySELinuxTransactionRun, list) < 0) || (pid != -1 && virProcessRunInMountNamespace(pid, virSecuritySELinuxTransactionRun,
ditto. John

On 11/08/2018 11:45 PM, John Ferlan wrote:
On 10/17/18 5:06 AM, Michal Privoznik wrote:
In the next commit the virSecurityManagerMetadataLock() is going to be turned thread unsafe. Therefore, we have to spawn a separate process for it. Always.
Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- src/security/security_dac.c | 2 +- src/security/security_selinux.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-)
Based slightly upon the KVM Forum presentation detailing some performance issues related to the usage of virFork for virQEMUCapsIsValid and calls to virFileAccessibleAs:
https://www.redhat.com/archives/libvir-list/2018-October/msg01333.html
Given that this patch would thus virFork "always", what kind of impact could that have? Have you been able to do any performance profiling? What would cause a single round of SELinux & DAC settings?
This is what I was discussing with Daniel. Although I can't recall where. Anyway, fork() is expensive sure, but my argumentation in previous versions was that we are doing it already anyway. Namespaces are enabled by default and unless users turned them off this adds no new fork(). Only if namespaces are turned off then there is new fork(). At any rate, this is one fork per one secdriver call. It's not one fork per path (which would be horrible).
I know in an earlier patch I wasn't including performance of virFork because I believed that the only time it would be used would be for metadata locking when (re)labeling would be batched and for that case the "one off" virFork would seem reasonable.
This is still true. There is no extra fork() if you have namespaces enabled. Unfortunately, POSIX file locking is fubared and doing fork is the least painful option.
Since it is possible to turn off the NS code and thus go through the direct call, I think there's "room for it" here too for those singular cases we could use "pid == -1" to indicate direct calls without virFork and "pid == 0" to batch together calls using virProcessRunInFork.
That way when you *know* you are batching together more than singular changes, then the caller can choose to run in a fork knowing the possible performance penalty, but with the benefits. For those that are batched we're stuck, but my memory on all the metadata locking paths is fuzzy already.
What's here does work, but after that KVM Forum preso I guess we definitely need to pay attention to areas that can be perf bottlenecks for paths that may be really common.
Having a way to disable or avoid virFork is something we may just need to have. I'm willing to be convinced otherwise - I'm just "wary" now.
The metadata locking needs to be there so that the setting seclabels is atomic. I mean, so far chown() and setfcon() are atomic. However, there will be some xattr related calls around those. Therefore to make the whole critical section atomic there has to be a lock: lock(file); xattr(file); chown(file); xattr(file); unlock(file); The xattr() calls set/recall the original owner of the file. I can make this configurable, but if there is no locking the only thing libvirt can do is chown(), not the xattr() because that wouldn't be atomic. (I'm saying only chown(), but it is the same story for setfcon().) Therefore, if users disable metadata locking the original owner of the file can't be preserved. On the other hand, we can enable it by default and have an opt out for cases where it doesn't work - just like we have for namespaces. And I believe people did disable them in their early days (even though I don't understand why, they were bugfree :-P)
diff --git a/src/security/security_dac.c b/src/security/security_dac.c index da4a6c72fe..580d800bb1 100644 --- a/src/security/security_dac.c +++ b/src/security/security_dac.c
The function description would need an update way to describe this @pid usage which differs from the current description.
@@ -563,7 +563,7 @@ virSecurityDACTransactionCommit(virSecurityManagerPtr mgr ATTRIBUTE_UNUSED, }
if ((pid == -1 && - virSecurityDACTransactionRun(pid, list) < 0) || + virProcessRunInFork(virSecurityDACTransactionRun, list) < 0) || (pid != -1 && virProcessRunInMountNamespace(pid, virSecurityDACTransactionRun,
I think I've previously disclosed my dislike of the format, why not:
if (pid > 0) { rc = virProcessRunInMountNamespace(pid, ...); } else { if (pid == -1) rc = virSecurityDACTransactionRun(-pid, list); else rc = virProcessRunInFork(virSecurityDACTransactionRun, list)); } if (rc < 0) goto cleanup;
to me that's a lot cleaner and doesn't involve trying to look at multiple if statements with ||'s.
Okay, I'll change this.
diff --git a/src/security/security_selinux.c b/src/security/security_selinux.c index 467d1e6bfe..0e24b9b3ca 100644 --- a/src/security/security_selinux.c +++ b/src/security/security_selinux.c @@ -1104,7 +1104,7 @@ virSecuritySELinuxTransactionCommit(virSecurityManagerPtr mgr ATTRIBUTE_UNUSED, }
if ((pid == -1 && - virSecuritySELinuxTransactionRun(pid, list) < 0) || + virProcessRunInFork(virSecuritySELinuxTransactionRun, list) < 0) || (pid != -1 && virProcessRunInMountNamespace(pid, virSecuritySELinuxTransactionRun,
ditto.
John
Michal

On 11/9/18 7:42 AM, Michal Privoznik wrote:
On 11/08/2018 11:45 PM, John Ferlan wrote:
On 10/17/18 5:06 AM, Michal Privoznik wrote:
In the next commit the virSecurityManagerMetadataLock() is going to be turned thread unsafe. Therefore, we have to spawn a separate process for it. Always.
Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- src/security/security_dac.c | 2 +- src/security/security_selinux.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-)
Based slightly upon the KVM Forum presentation detailing some performance issues related to the usage of virFork for virQEMUCapsIsValid and calls to virFileAccessibleAs:
https://www.redhat.com/archives/libvir-list/2018-October/msg01333.html
Given that this patch would thus virFork "always", what kind of impact could that have? Have you been able to do any performance profiling? What would cause a single round of SELinux & DAC settings?
This is what I was discussing with Daniel. Although I can't recall where. Anyway, fork() is expensive sure, but my argumentation in previous versions was that we are doing it already anyway. Namespaces are enabled by default and unless users turned them off this adds no new fork(). Only if namespaces are turned off then there is new fork(). At any rate, this is one fork per one secdriver call. It's not one fork per path (which would be horrible).
I too recall seeing the convo w/ Daniel, but couldn't find it in the recent patches... I wonder if it was on internal IRC. I did do a small amount of profiling before I asked, but my config doesn't have some of the qemu_command qemuSecuritySet* calls for sockets and tapfd's that are outside the batched qemuSecuritySetAllLabel. As an aside, qemuSecurityDomainSetPathLabel strays from the normal qemuSecurity{Set|Restore}* nomenclature (/me being grumpy about searches for call patterns and would gladly R-by a qemuSecuritySetDomain* patch as well as one that describes the functionality including the well there's no need to run the Restore because these are domain transient things that get removed anyway ;-)). Anyway, currently it's just a "handful" of calls, but some in areas that we seem to get flack on for. If we can get ahead of that flack because we know what we're about to do w/r/t virFork that'd be good.
I know in an earlier patch I wasn't including performance of virFork because I believed that the only time it would be used would be for metadata locking when (re)labeling would be batched and for that case the "one off" virFork would seem reasonable.
This is still true. There is no extra fork() if you have namespaces enabled. Unfortunately, POSIX file locking is fubared and doing fork is the least painful option.
Since it is possible to turn off the NS code and thus go through the direct call, I think there's "room for it" here too for those singular cases we could use "pid == -1" to indicate direct calls without virFork and "pid == 0" to batch together calls using virProcessRunInFork.
That way when you *know* you are batching together more than singular changes, then the caller can choose to run in a fork knowing the possible performance penalty, but with the benefits. For those that are batched we're stuck, but my memory on all the metadata locking paths is fuzzy already.
What's here does work, but after that KVM Forum preso I guess we definitely need to pay attention to areas that can be perf bottlenecks for paths that may be really common.
Having a way to disable or avoid virFork is something we may just need to have. I'm willing to be convinced otherwise - I'm just "wary" now.
The metadata locking needs to be there so that the setting seclabels is atomic. I mean, so far chown() and setfcon() are atomic. However, there will be some xattr related calls around those. Therefore to make the whole critical section atomic there has to be a lock:
lock(file); xattr(file); chown(file); xattr(file); unlock(file);
The xattr() calls set/recall the original owner of the file. I can make this configurable, but if there is no locking the only thing libvirt can do is chown(), not the xattr() because that wouldn't be atomic. (I'm saying only chown(), but it is the same story for setfcon().) Therefore, if users disable metadata locking the original owner of the file can't be preserved. On the other hand, we can enable it by default and have an opt out for cases where it doesn't work - just like we have for namespaces. And I believe people did disable them in their early days (even though I don't understand why, they were bugfree :-P)
I understand (generically) why we need the lock. I'm OK with it being enabled by default. That's not the question/ask. Building in a way to allow disabling usage of virFork and/or metadata lock knowing the "penalty" or downside to doing so goes beyond bug free or performance, it's just that "choice" we allow someone to make. You know there are those out there that will bemoan "choosing" this is as the default. If they want to disable in order to gain whatever at the cost of something else, then so be it. In some ways it's a CYA exercise. John
diff --git a/src/security/security_dac.c b/src/security/security_dac.c index da4a6c72fe..580d800bb1 100644 --- a/src/security/security_dac.c +++ b/src/security/security_dac.c
The function description would need an update way to describe this @pid usage which differs from the current description.
@@ -563,7 +563,7 @@ virSecurityDACTransactionCommit(virSecurityManagerPtr mgr ATTRIBUTE_UNUSED, }
if ((pid == -1 && - virSecurityDACTransactionRun(pid, list) < 0) || + virProcessRunInFork(virSecurityDACTransactionRun, list) < 0) || (pid != -1 && virProcessRunInMountNamespace(pid, virSecurityDACTransactionRun,
I think I've previously disclosed my dislike of the format, why not:
if (pid > 0) { rc = virProcessRunInMountNamespace(pid, ...); } else { if (pid == -1) rc = virSecurityDACTransactionRun(-pid, list); else rc = virProcessRunInFork(virSecurityDACTransactionRun, list)); } if (rc < 0) goto cleanup;
to me that's a lot cleaner and doesn't involve trying to look at multiple if statements with ||'s.
Okay, I'll change this.
diff --git a/src/security/security_selinux.c b/src/security/security_selinux.c index 467d1e6bfe..0e24b9b3ca 100644 --- a/src/security/security_selinux.c +++ b/src/security/security_selinux.c @@ -1104,7 +1104,7 @@ virSecuritySELinuxTransactionCommit(virSecurityManagerPtr mgr ATTRIBUTE_UNUSED, }
if ((pid == -1 && - virSecuritySELinuxTransactionRun(pid, list) < 0) || + virProcessRunInFork(virSecuritySELinuxTransactionRun, list) < 0) || (pid != -1 && virProcessRunInMountNamespace(pid, virSecuritySELinuxTransactionRun,
ditto.
John
Michal

On 11/09/2018 03:02 PM, John Ferlan wrote:
On 11/9/18 7:42 AM, Michal Privoznik wrote:
On 11/08/2018 11:45 PM, John Ferlan wrote:
On 10/17/18 5:06 AM, Michal Privoznik wrote:
In the next commit the virSecurityManagerMetadataLock() is going to be turned thread unsafe. Therefore, we have to spawn a separate process for it. Always.
Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- src/security/security_dac.c | 2 +- src/security/security_selinux.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-)
Based slightly upon the KVM Forum presentation detailing some performance issues related to the usage of virFork for virQEMUCapsIsValid and calls to virFileAccessibleAs:
https://www.redhat.com/archives/libvir-list/2018-October/msg01333.html
Given that this patch would thus virFork "always", what kind of impact could that have? Have you been able to do any performance profiling? What would cause a single round of SELinux & DAC settings?
This is what I was discussing with Daniel. Although I can't recall where. Anyway, fork() is expensive sure, but my argumentation in previous versions was that we are doing it already anyway. Namespaces are enabled by default and unless users turned them off this adds no new fork(). Only if namespaces are turned off then there is new fork(). At any rate, this is one fork per one secdriver call. It's not one fork per path (which would be horrible).
I too recall seeing the convo w/ Daniel, but couldn't find it in the recent patches... I wonder if it was on internal IRC.
I did do a small amount of profiling before I asked, but my config doesn't have some of the qemu_command qemuSecuritySet* calls for sockets and tapfd's that are outside the batched qemuSecuritySetAllLabel. As an aside, qemuSecurityDomainSetPathLabel strays from the normal qemuSecurity{Set|Restore}* nomenclature (/me being grumpy about searches for call patterns and would gladly R-by a qemuSecuritySetDomain* patch as well as one that describes the functionality including the well there's no need to run the Restore because these are domain transient things that get removed anyway ;-)).
Anyway, currently it's just a "handful" of calls, but some in areas that we seem to get flack on for. If we can get ahead of that flack because we know what we're about to do w/r/t virFork that'd be good.
I know in an earlier patch I wasn't including performance of virFork because I believed that the only time it would be used would be for metadata locking when (re)labeling would be batched and for that case the "one off" virFork would seem reasonable.
This is still true. There is no extra fork() if you have namespaces enabled. Unfortunately, POSIX file locking is fubared and doing fork is the least painful option.
Since it is possible to turn off the NS code and thus go through the direct call, I think there's "room for it" here too for those singular cases we could use "pid == -1" to indicate direct calls without virFork and "pid == 0" to batch together calls using virProcessRunInFork.
That way when you *know* you are batching together more than singular changes, then the caller can choose to run in a fork knowing the possible performance penalty, but with the benefits. For those that are batched we're stuck, but my memory on all the metadata locking paths is fuzzy already.
What's here does work, but after that KVM Forum preso I guess we definitely need to pay attention to areas that can be perf bottlenecks for paths that may be really common.
Having a way to disable or avoid virFork is something we may just need to have. I'm willing to be convinced otherwise - I'm just "wary" now.
The metadata locking needs to be there so that the setting seclabels is atomic. I mean, so far chown() and setfcon() are atomic. However, there will be some xattr related calls around those. Therefore to make the whole critical section atomic there has to be a lock:
lock(file); xattr(file); chown(file); xattr(file); unlock(file);
The xattr() calls set/recall the original owner of the file. I can make this configurable, but if there is no locking the only thing libvirt can do is chown(), not the xattr() because that wouldn't be atomic. (I'm saying only chown(), but it is the same story for setfcon().) Therefore, if users disable metadata locking the original owner of the file can't be preserved. On the other hand, we can enable it by default and have an opt out for cases where it doesn't work - just like we have for namespaces. And I believe people did disable them in their early days (even though I don't understand why, they were bugfree :-P)
I understand (generically) why we need the lock. I'm OK with it being enabled by default. That's not the question/ask. Building in a way to allow disabling usage of virFork and/or metadata lock knowing the "penalty" or downside to doing so goes beyond bug free or performance, it's just that "choice" we allow someone to make. You know there are those out there that will bemoan "choosing" this is as the default. If they want to disable in order to gain whatever at the cost of something else, then so be it. In some ways it's a CYA exercise.
Just an idea that I got, what if there won't be any config knob but this would use namespaces? I mean, if namespaces are on then metadata locking is happening and if they are off then no metadata locking is happening. Since namespaces do mean extra fork(), doing things this way there won't be any extra fork() if namespaces are off. Michal

[...]
I understand (generically) why we need the lock. I'm OK with it being enabled by default. That's not the question/ask. Building in a way to allow disabling usage of virFork and/or metadata lock knowing the "penalty" or downside to doing so goes beyond bug free or performance, it's just that "choice" we allow someone to make. You know there are those out there that will bemoan "choosing" this is as the default. If they want to disable in order to gain whatever at the cost of something else, then so be it. In some ways it's a CYA exercise.
Just an idea that I got, what if there won't be any config knob but this would use namespaces? I mean, if namespaces are on then metadata locking is happening and if they are off then no metadata locking is happening.
Since namespaces do mean extra fork(), doing things this way there won't be any extra fork() if namespaces are off.
I'd prefer to not make metadata locking (files) rely on namespaces (devices). I get the relationship though. John

On Tue, Nov 13, 2018 at 03:52:17PM -0500, John Ferlan wrote:
[...]
I understand (generically) why we need the lock. I'm OK with it being enabled by default. That's not the question/ask. Building in a way to allow disabling usage of virFork and/or metadata lock knowing the "penalty" or downside to doing so goes beyond bug free or performance, it's just that "choice" we allow someone to make. You know there are those out there that will bemoan "choosing" this is as the default. If they want to disable in order to gain whatever at the cost of something else, then so be it. In some ways it's a CYA exercise.
Just an idea that I got, what if there won't be any config knob but this would use namespaces? I mean, if namespaces are on then metadata locking is happening and if they are off then no metadata locking is happening.
Since namespaces do mean extra fork(), doing things this way there won't be any extra fork() if namespaces are off.
I'd prefer to not make metadata locking (files) rely on namespaces (devices). I get the relationship though.
In particular the needs for metadata locking applies to all platforms that libvirt QEMU driver runs on (*BSD, OSX), but namespaces only work on Linux. Regards, Daniel -- |: https://berrange.com -o- https://www.flickr.com/photos/dberrange :| |: https://libvirt.org -o- https://fstop138.berrange.com :| |: https://entangle-photo.org -o- https://www.instagram.com/dberrange :|

Trying to use virlockd to lock metadata turns out to be too big gun. Since we will always spawn a separate process for relabeling we are safe to use thread unsafe POSIX locks and take out virtlockd completely out of the picture. Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- src/security/security_dac.c | 10 +- src/security/security_manager.c | 225 +++++++++++++++++--------------- src/security/security_manager.h | 17 ++- src/security/security_selinux.c | 9 +- 4 files changed, 139 insertions(+), 122 deletions(-) diff --git a/src/security/security_dac.c b/src/security/security_dac.c index 580d800bb1..ad2561a241 100644 --- a/src/security/security_dac.c +++ b/src/security/security_dac.c @@ -204,6 +204,7 @@ virSecurityDACTransactionRun(pid_t pid ATTRIBUTE_UNUSED, void *opaque) { virSecurityDACChownListPtr list = opaque; + virSecurityManagerMetadataLockStatePtr state; const char **paths = NULL; size_t npaths = 0; size_t i; @@ -216,14 +217,10 @@ virSecurityDACTransactionRun(pid_t pid ATTRIBUTE_UNUSED, for (i = 0; i < list->nItems; i++) { const char *p = list->items[i]->path; - if (!p || - virFileIsDir(p)) - continue; - VIR_APPEND_ELEMENT_COPY_INPLACE(paths, npaths, p); } - if (virSecurityManagerMetadataLock(list->manager, paths, npaths) < 0) + if (!(state = virSecurityManagerMetadataLock(list->manager, paths, npaths))) goto cleanup; for (i = 0; i < list->nItems; i++) { @@ -246,8 +243,7 @@ virSecurityDACTransactionRun(pid_t pid ATTRIBUTE_UNUSED, break; } - if (virSecurityManagerMetadataUnlock(list->manager, paths, npaths) < 0) - goto cleanup; + virSecurityManagerMetadataUnlock(list->manager, &state); if (rv < 0) goto cleanup; diff --git a/src/security/security_manager.c b/src/security/security_manager.c index c6c80e6165..b675f8ab46 100644 --- a/src/security/security_manager.c +++ b/src/security/security_manager.c @@ -21,6 +21,10 @@ */ #include <config.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> + #include "security_driver.h" #include "security_stack.h" #include "security_dac.h" @@ -30,14 +34,11 @@ #include "virlog.h" #include "locking/lock_manager.h" #include "virfile.h" -#include "virtime.h" #define VIR_FROM_THIS VIR_FROM_SECURITY VIR_LOG_INIT("security.security_manager"); -virMutex lockManagerMutex = VIR_MUTEX_INITIALIZER; - struct _virSecurityManager { virObjectLockable parent; @@ -47,10 +48,6 @@ struct _virSecurityManager { void *privateData; virLockManagerPluginPtr lockPlugin; - /* This is a FD that represents a connection to virtlockd so - * that connection is kept open in between MetdataLock() and - * MetadataUnlock() calls. */ - int clientfd; }; static virClassPtr virSecurityManagerClass; @@ -66,7 +63,6 @@ void virSecurityManagerDispose(void *obj) mgr->drv->close(mgr); virObjectUnref(mgr->lockPlugin); - VIR_FORCE_CLOSE(mgr->clientfd); VIR_FREE(mgr->privateData); } @@ -119,7 +115,6 @@ virSecurityManagerNewDriver(virSecurityDriverPtr drv, mgr->flags = flags; mgr->virtDriver = virtDriver; VIR_STEAL_PTR(mgr->privateData, privateData); - mgr->clientfd = -1; if (drv->open(mgr) < 0) goto error; @@ -1276,129 +1271,153 @@ virSecurityManagerRestoreTPMLabels(virSecurityManagerPtr mgr, } -static virLockManagerPtr -virSecurityManagerNewLockManager(virSecurityManagerPtr mgr, - const char * const *paths, - size_t npaths) +struct _virSecurityManagerMetadataLockState { + size_t nfds; + int *fds; +}; + + +static int +cmpstringp(const void *p1, const void *p2) { - virLockManagerPtr lock; - virLockManagerParam params[] = { - { .type = VIR_LOCK_MANAGER_PARAM_TYPE_UUID, - .key = "uuid", - }, - { .type = VIR_LOCK_MANAGER_PARAM_TYPE_STRING, - .key = "name", - .value = { .cstr = "libvirtd-sec" }, - }, - { .type = VIR_LOCK_MANAGER_PARAM_TYPE_UINT, - .key = "pid", - .value = { .iv = getpid() }, - }, - }; - const unsigned int flags = 0; - size_t i; + const char *s1 = *(char * const *) p1; + const char *s2 = *(char * const *) p2; - if (virGetHostUUID(params[0].value.uuid) < 0) - return NULL; + if (!s1 && !s2) + return 0; - if (!(lock = virLockManagerNew(virLockManagerPluginGetDriver(mgr->lockPlugin), - VIR_LOCK_MANAGER_OBJECT_TYPE_DAEMON, - ARRAY_CARDINALITY(params), - params, - flags))) - return NULL; + if (!s1 || !s2) + return s2 ? -1 : 1; - for (i = 0; i < npaths; i++) { - if (virLockManagerAddResource(lock, - VIR_LOCK_MANAGER_RESOURCE_TYPE_METADATA, - paths[i], 0, NULL, 0) < 0) - goto error; - } - - return lock; - error: - virLockManagerFree(lock); - return NULL; + /* from man 3 qsort */ + return strcmp(s1, s2); } +#define METADATA_OFFSET 1 +#define METADATA_LEN 1 -/* How many seconds should we try to acquire the lock before - * giving up. */ -#define LOCK_ACQUIRE_TIMEOUT 60 - -int -virSecurityManagerMetadataLock(virSecurityManagerPtr mgr, - const char * const *paths, +/** + * virSecurityManagerMetadataLock: + * @mgr: security manager object + * @paths: paths to lock + * @npaths: number of items in @paths array + * + * Lock passed @paths for metadata change. The returned state + * should be passed to virSecurityManagerMetadataUnlock. + * + * NOTE: this function is not thread safe (because of usage of + * POSIX locks). + * + * Returns: state on success, + * NULL on failure. + */ +virSecurityManagerMetadataLockStatePtr +virSecurityManagerMetadataLock(virSecurityManagerPtr mgr ATTRIBUTE_UNUSED, + const char **paths, size_t npaths) { - virLockManagerPtr lock; - virTimeBackOffVar timebackoff; - int fd = -1; - int rv = -1; - int ret = -1; + size_t i = 0; + size_t nfds = 0; + int *fds = NULL; + virSecurityManagerMetadataLockStatePtr ret = NULL; - virMutexLock(&lockManagerMutex); + if (VIR_ALLOC_N(fds, npaths) < 0) + return NULL; - if (!(lock = virSecurityManagerNewLockManager(mgr, paths, npaths))) - goto cleanup; + /* Sort paths to lock in order to avoid deadlocks. */ + qsort(paths, npaths, sizeof(*paths), cmpstringp); - if (virTimeBackOffStart(&timebackoff, 1, LOCK_ACQUIRE_TIMEOUT * 1000) < 0) - goto cleanup; - while (virTimeBackOffWait(&timebackoff)) { - rv = virLockManagerAcquire(lock, NULL, - VIR_LOCK_MANAGER_ACQUIRE_ROLLBACK, - VIR_DOMAIN_LOCK_FAILURE_DEFAULT, &fd); + for (i = 0; i < npaths; i++) { + const char *p = paths[i]; + struct stat sb; + int retries = 10 * 1000; + int fd; + + if (!p || stat(p, &sb) < 0) + continue; + + if (S_ISDIR(sb.st_mode)) { + /* Directories can't be locked */ + continue; + } + + if ((fd = open(p, O_RDWR)) < 0) { + if (S_ISSOCK(sb.st_mode)) { + /* Sockets can be opened only if there exists the + * other side that listens. */ + continue; + } + + virReportSystemError(errno, + _("unable to open %s"), + p); + goto cleanup; + } + + do { + if (virFileLock(fd, false, + METADATA_OFFSET, METADATA_LEN, false) < 0) { + if (retries && (errno == EACCES || errno == EAGAIN)) { + /* File is locked. Try again. */ + retries--; + usleep(1000); + continue; + } else { + virReportSystemError(errno, + _("unable to lock %s for metadata change"), + p); + VIR_FORCE_CLOSE(fd); + goto cleanup; + } + } - if (rv >= 0) break; + } while (1); - if (virGetLastErrorCode() == VIR_ERR_RESOURCE_BUSY) - continue; - - goto cleanup; + VIR_APPEND_ELEMENT_COPY_INPLACE(fds, nfds, fd); } - if (rv < 0) + if (VIR_ALLOC(ret) < 0) goto cleanup; - mgr->clientfd = fd; - fd = -1; + VIR_STEAL_PTR(ret->fds, fds); + ret->nfds = nfds; + nfds = 0; - ret = 0; cleanup: - virLockManagerFree(lock); - VIR_FORCE_CLOSE(fd); - if (ret < 0) - virMutexUnlock(&lockManagerMutex); + for (i = nfds; i > 0; i--) + VIR_FORCE_CLOSE(fds[i - 1]); + VIR_FREE(fds); return ret; } -int -virSecurityManagerMetadataUnlock(virSecurityManagerPtr mgr, - const char * const *paths, - size_t npaths) +void +virSecurityManagerMetadataUnlock(virSecurityManagerPtr mgr ATTRIBUTE_UNUSED, + virSecurityManagerMetadataLockStatePtr *state) { - virLockManagerPtr lock; - int fd; - int ret = -1; + size_t i; - /* lockManagerMutex acquired from previous - * virSecurityManagerMetadataLock() call. */ + if (!state) + return; - fd = mgr->clientfd; - mgr->clientfd = -1; + for (i = 0; i < (*state)->nfds; i++) { + char ebuf[1024]; + int fd = (*state)->fds[i]; - if (!(lock = virSecurityManagerNewLockManager(mgr, paths, npaths))) - goto cleanup; + /* Technically, unlock is not needed because it will + * happen on VIR_CLOSE() anyway. But let's play it nice. */ + if (virFileUnlock(fd, METADATA_OFFSET, METADATA_LEN) < 0) { + VIR_WARN("Unable to unlock fd %d: %s", + fd, virStrerror(errno, ebuf, sizeof(ebuf))); + } - if (virLockManagerRelease(lock, NULL, 0) < 0) - goto cleanup; + if (VIR_CLOSE(fd) < 0) { + VIR_WARN("Unable to close fd %d: %s", + fd, virStrerror(errno, ebuf, sizeof(ebuf))); + } + } - ret = 0; - cleanup: - virLockManagerFree(lock); - VIR_FORCE_CLOSE(fd); - virMutexUnlock(&lockManagerMutex); - return ret; + VIR_FREE((*state)->fds); + VIR_FREE(*state); } diff --git a/src/security/security_manager.h b/src/security/security_manager.h index 10ebe5cc29..fe8a1b4a24 100644 --- a/src/security/security_manager.h +++ b/src/security/security_manager.h @@ -199,11 +199,16 @@ int virSecurityManagerSetTPMLabels(virSecurityManagerPtr mgr, int virSecurityManagerRestoreTPMLabels(virSecurityManagerPtr mgr, virDomainDefPtr vm); -int virSecurityManagerMetadataLock(virSecurityManagerPtr mgr, - const char * const *paths, - size_t npaths); -int virSecurityManagerMetadataUnlock(virSecurityManagerPtr mgr, - const char * const *paths, - size_t npaths); +typedef struct _virSecurityManagerMetadataLockState virSecurityManagerMetadataLockState; +typedef virSecurityManagerMetadataLockState *virSecurityManagerMetadataLockStatePtr; + +virSecurityManagerMetadataLockStatePtr +virSecurityManagerMetadataLock(virSecurityManagerPtr mgr, + const char **paths, + size_t npaths); + +void +virSecurityManagerMetadataUnlock(virSecurityManagerPtr mgr, + virSecurityManagerMetadataLockStatePtr *state); #endif /* VIR_SECURITY_MANAGER_H__ */ diff --git a/src/security/security_selinux.c b/src/security/security_selinux.c index 0e24b9b3ca..17e24c6ac3 100644 --- a/src/security/security_selinux.c +++ b/src/security/security_selinux.c @@ -214,6 +214,7 @@ virSecuritySELinuxTransactionRun(pid_t pid ATTRIBUTE_UNUSED, void *opaque) { virSecuritySELinuxContextListPtr list = opaque; + virSecurityManagerMetadataLockStatePtr state; bool privileged = virSecurityManagerGetPrivileged(list->manager); const char **paths = NULL; size_t npaths = 0; @@ -227,13 +228,10 @@ virSecuritySELinuxTransactionRun(pid_t pid ATTRIBUTE_UNUSED, for (i = 0; i < list->nItems; i++) { const char *p = list->items[i]->path; - if (virFileIsDir(p)) - continue; - VIR_APPEND_ELEMENT_COPY_INPLACE(paths, npaths, p); } - if (virSecurityManagerMetadataLock(list->manager, paths, npaths) < 0) + if (!(state = virSecurityManagerMetadataLock(list->manager, paths, npaths))) goto cleanup; rv = 0; @@ -250,8 +248,7 @@ virSecuritySELinuxTransactionRun(pid_t pid ATTRIBUTE_UNUSED, } } - if (virSecurityManagerMetadataUnlock(list->manager, paths, npaths) < 0) - goto cleanup; + virSecurityManagerMetadataUnlock(list->manager, &state); if (rv < 0) goto cleanup; -- 2.18.1

On Wed, Oct 17, 2018 at 11:06:46AM +0200, Michal Privoznik wrote:
Trying to use virlockd to lock metadata turns out to be too big gun. Since we will always spawn a separate process for relabeling we are safe to use thread unsafe POSIX locks and take out virtlockd completely out of the picture.
Note safety of POSIX locks is not merely about other threads. If *any* code we invoke from security_dac or security_selinux were to open() & close() the file we're labelling we'll loose the locks. This is a little concerning since we call out to a few functions in src/util that might not be aware they should *never* open)( and close() the path they're given. It looks like we lucky at the moment, but this has future gun -> foot -> ouch potential. Likewise we also call out to libselinux APIs. I've looked at the code for the APIs we call and again, right now, we look to be safe, but there's some risk there. I'm not sure whether this is big enough risk to invalidate this forking approach or not though. This kind of problem was the reason why virtlockd was created to hold locks in a completely separate process from the code with the critical section. Regards, Daniel -- |: https://berrange.com -o- https://www.flickr.com/photos/dberrange :| |: https://libvirt.org -o- https://fstop138.berrange.com :| |: https://entangle-photo.org -o- https://www.instagram.com/dberrange :|

On 10/17/2018 11:45 AM, Daniel P. Berrangé wrote:
On Wed, Oct 17, 2018 at 11:06:46AM +0200, Michal Privoznik wrote:
Trying to use virlockd to lock metadata turns out to be too big gun. Since we will always spawn a separate process for relabeling we are safe to use thread unsafe POSIX locks and take out virtlockd completely out of the picture.
Note safety of POSIX locks is not merely about other threads.
If *any* code we invoke from security_dac or security_selinux were to open() & close() the file we're labelling we'll loose the locks.
This is a little concerning since we call out to a few functions in src/util that might not be aware they should *never* open)( and close() the path they're given. It looks like we lucky at the moment, but this has future gun -> foot -> ouch potential.
Likewise we also call out to libselinux APIs. I've looked at the code for the APIs we call and again, right now, we look to be safe, but there's some risk there.
I'm not sure whether this is big enough risk to invalidate this forking approach or not though. This kind of problem was the reason why virtlockd was created to hold locks in a completely separate process from the code with the critical section.
But unless we check for hard links virtlockd will suffer from the POSIX locks too. For instance, assume that B is a hardlink to A. Then, if a process opens A, locks it an the other opens B and closes it the lock is released. Even though A and B look distinct. The problem with virlockd is that our current virlockspace impl is not aware of different offsets. We can try to make it so, but that would complicate the code a lot. Just imagine that there's a domain already running and it holds disk content lock over file C. If an user wants to start a domain (which too has disk C), libvirtd will try to lock C again (even though on different offset) but it will fail to do so because C is already locked. Another problem with virtlockd that I faced was that with current implementation the connection to virlockd is opened in metadataLock(), where the connection FD is dup()-ed to prevent connection closing. The connection is then closed in metadataUnlock() where the dup()-ed FD is closed. This works pretty much good, except for fork(). If a fork() occurs between metadataLock() and metadataUnlock() the duplicated FD is leaked into the child and we can't be certain when will the child close it. The worst case scenario is that the child will close the FD when we are holding some resources, which results in virtlockd killing libvirtd (= registered owner of the locks). Alternatively, we can switch to OFD locks and have the feature work only on Linux. If anybody from BSD users will want the feature they will have to push their kernel developers to implement some sensible file locking. Also, one of the questions is what we want metadata locking to be. So far I am aiming on having exclusive access to the file that we are chown()-ing (or setfcon()-ing) so that I can put a code around it that manipulates XATTR where the original owner would be stored. This way the metadata lock is held only for a fraction of a second. I guess we don't want the metadata lock be held through whole run of a domain, i.e. obtained at domain start and released at domain shutdown. I don't see much benefit in that. Michal

On Wed, Oct 17, 2018 at 12:52:50PM +0200, Michal Privoznik wrote:
On 10/17/2018 11:45 AM, Daniel P. Berrangé wrote:
On Wed, Oct 17, 2018 at 11:06:46AM +0200, Michal Privoznik wrote:
Trying to use virlockd to lock metadata turns out to be too big gun. Since we will always spawn a separate process for relabeling we are safe to use thread unsafe POSIX locks and take out virtlockd completely out of the picture.
Note safety of POSIX locks is not merely about other threads.
If *any* code we invoke from security_dac or security_selinux were to open() & close() the file we're labelling we'll loose the locks.
This is a little concerning since we call out to a few functions in src/util that might not be aware they should *never* open)( and close() the path they're given. It looks like we lucky at the moment, but this has future gun -> foot -> ouch potential.
Likewise we also call out to libselinux APIs. I've looked at the code for the APIs we call and again, right now, we look to be safe, but there's some risk there.
I'm not sure whether this is big enough risk to invalidate this forking approach or not though. This kind of problem was the reason why virtlockd was created to hold locks in a completely separate process from the code with the critical section.
But unless we check for hard links virtlockd will suffer from the POSIX locks too. For instance, assume that B is a hardlink to A. Then, if a process opens A, locks it an the other opens B and closes it the lock is released. Even though A and B look distinct.
Yes, that is correct - there's an implicit assumption that all VMs are using the same path if there's multiple hard links. Foruntately this scenario is fairly rare - more common is symlinks which can be canonicalized.
The problem with virlockd is that our current virlockspace impl is not aware of different offsets. We can try to make it so, but that would complicate the code a lot. Just imagine that there's a domain already running and it holds disk content lock over file C. If an user wants to start a domain (which too has disk C), libvirtd will try to lock C again (even though on different offset) but it will fail to do so because C is already locked.
Yep, we would need intelligence around offsets to avoid opening it twice for each offset.
Another problem with virtlockd that I faced was that with current implementation the connection to virlockd is opened in metadataLock(), where the connection FD is dup()-ed to prevent connection closing. The connection is then closed in metadataUnlock() where the dup()-ed FD is closed. This works pretty much good, except for fork(). If a fork() occurs between metadataLock() and metadataUnlock() the duplicated FD is leaked into the child and we can't be certain when will the child close it. The worst case scenario is that the child will close the FD when we are holding some resources, which results in virtlockd killing libvirtd (= registered owner of the locks).
I don't think that's the case actually. If you have an FD that is a UNIX/TCP socket that gets dup(), the remote end of the socket won't see HUP until all duplicates of the socket are closed. IOW, both the parent & child process would need to close their respective dups.
Alternatively, we can switch to OFD locks and have the feature work only on Linux. If anybody from BSD users will want the feature they will have to push their kernel developers to implement some sensible file locking.
Also, one of the questions is what we want metadata locking to be. So far I am aiming on having exclusive access to the file that we are chown()-ing (or setfcon()-ing) so that I can put a code around it that manipulates XATTR where the original owner would be stored. This way the metadata lock is held only for a fraction of a second. I guess we don't want the metadata lock be held through whole run of a domain, i.e. obtained at domain start and released at domain shutdown. I don't see much benefit in that.
Yes, we must only lock it for the short time that we're reading or writing the metadata. We must not hold the long long term, because other threads or processes need access to the metadata while the VM is running, for example to deal with validating shared disk labels. On further reflection, I think we'll be safe enough with what you're proposing. For it to go wrong, we would have to have a bug in the code which accidentally open+close the socket, and be on an old kernel that lacks OFD locks, and two processes must be in the critical section at the same time in that short < 1 second window. Regards, Daniel -- |: https://berrange.com -o- https://www.flickr.com/photos/dberrange :| |: https://libvirt.org -o- https://fstop138.berrange.com :| |: https://entangle-photo.org -o- https://www.instagram.com/dberrange :|

On 10/17/2018 01:46 PM, Daniel P. Berrangé wrote:
On Wed, Oct 17, 2018 at 12:52:50PM +0200, Michal Privoznik wrote:
On 10/17/2018 11:45 AM, Daniel P. Berrangé wrote:
On Wed, Oct 17, 2018 at 11:06:46AM +0200, Michal Privoznik wrote:
Trying to use virlockd to lock metadata turns out to be too big gun. Since we will always spawn a separate process for relabeling we are safe to use thread unsafe POSIX locks and take out virtlockd completely out of the picture.
Note safety of POSIX locks is not merely about other threads.
If *any* code we invoke from security_dac or security_selinux were to open() & close() the file we're labelling we'll loose the locks.
This is a little concerning since we call out to a few functions in src/util that might not be aware they should *never* open)( and close() the path they're given. It looks like we lucky at the moment, but this has future gun -> foot -> ouch potential.
Likewise we also call out to libselinux APIs. I've looked at the code for the APIs we call and again, right now, we look to be safe, but there's some risk there.
I'm not sure whether this is big enough risk to invalidate this forking approach or not though. This kind of problem was the reason why virtlockd was created to hold locks in a completely separate process from the code with the critical section.
But unless we check for hard links virtlockd will suffer from the POSIX locks too. For instance, assume that B is a hardlink to A. Then, if a process opens A, locks it an the other opens B and closes it the lock is released. Even though A and B look distinct.
Yes, that is correct - there's an implicit assumption that all VMs are using the same path if there's multiple hard links. Foruntately this scenario is fairly rare - more common is symlinks which can be canonicalized.
The problem with virlockd is that our current virlockspace impl is not aware of different offsets. We can try to make it so, but that would complicate the code a lot. Just imagine that there's a domain already running and it holds disk content lock over file C. If an user wants to start a domain (which too has disk C), libvirtd will try to lock C again (even though on different offset) but it will fail to do so because C is already locked.
Yep, we would need intelligence around offsets to avoid opening it twice for each offset.
Another problem with virtlockd that I faced was that with current implementation the connection to virlockd is opened in metadataLock(), where the connection FD is dup()-ed to prevent connection closing. The connection is then closed in metadataUnlock() where the dup()-ed FD is closed. This works pretty much good, except for fork(). If a fork() occurs between metadataLock() and metadataUnlock() the duplicated FD is leaked into the child and we can't be certain when will the child close it. The worst case scenario is that the child will close the FD when we are holding some resources, which results in virtlockd killing libvirtd (= registered owner of the locks).
I don't think that's the case actually. If you have an FD that is a UNIX/TCP socket that gets dup(), the remote end of the socket won't see HUP until all duplicates of the socket are closed. IOW, both the parent & child process would need to close their respective dups.
It is the case. I've seen it out in the wild (when starting several domains at once): https://www.redhat.com/archives/libvir-list/2018-September/msg01138.html
Alternatively, we can switch to OFD locks and have the feature work only on Linux. If anybody from BSD users will want the feature they will have to push their kernel developers to implement some sensible file locking.
Also, one of the questions is what we want metadata locking to be. So far I am aiming on having exclusive access to the file that we are chown()-ing (or setfcon()-ing) so that I can put a code around it that manipulates XATTR where the original owner would be stored. This way the metadata lock is held only for a fraction of a second. I guess we don't want the metadata lock be held through whole run of a domain, i.e. obtained at domain start and released at domain shutdown. I don't see much benefit in that.
Yes, we must only lock it for the short time that we're reading or writing the metadata. We must not hold the long long term, because other threads or processes need access to the metadata while the VM is running, for example to deal with validating shared disk labels.
On further reflection, I think we'll be safe enough with what you're proposing. For it to go wrong, we would have to have a bug in the code which accidentally open+close the socket, and be on an old kernel that lacks OFD locks, and two processes must be in the critical section at the same time in that short < 1 second window.
Cool. Thanks. Let me just say that I find it disturbing that POSIX doesn't specify file locks that are usable in a multithreaded app. Sure, we can have some abstraction layer over lockf() + open() + close() but it can hardly be something usable. Or something that every app should do. Michal

On 10/17/18 5:06 AM, Michal Privoznik wrote:
Trying to use virlockd to lock metadata turns out to be too big gun. Since we will always spawn a separate process for relabeling we are safe to use thread unsafe POSIX locks and take out virtlockd completely out of the picture.
Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- src/security/security_dac.c | 10 +- src/security/security_manager.c | 225 +++++++++++++++++--------------- src/security/security_manager.h | 17 ++- src/security/security_selinux.c | 9 +- 4 files changed, 139 insertions(+), 122 deletions(-)
diff --git a/src/security/security_dac.c b/src/security/security_dac.c index 580d800bb1..ad2561a241 100644 --- a/src/security/security_dac.c +++ b/src/security/security_dac.c @@ -204,6 +204,7 @@ virSecurityDACTransactionRun(pid_t pid ATTRIBUTE_UNUSED, void *opaque) { virSecurityDACChownListPtr list = opaque; + virSecurityManagerMetadataLockStatePtr state; const char **paths = NULL; size_t npaths = 0; size_t i; @@ -216,14 +217,10 @@ virSecurityDACTransactionRun(pid_t pid ATTRIBUTE_UNUSED, for (i = 0; i < list->nItems; i++) { const char *p = list->items[i]->path;
- if (!p || - virFileIsDir(p)) - continue; - VIR_APPEND_ELEMENT_COPY_INPLACE(paths, npaths, p); }
- if (virSecurityManagerMetadataLock(list->manager, paths, npaths) < 0) + if (!(state = virSecurityManagerMetadataLock(list->manager, paths, npaths))) goto cleanup;
for (i = 0; i < list->nItems; i++) { @@ -246,8 +243,7 @@ virSecurityDACTransactionRun(pid_t pid ATTRIBUTE_UNUSED, break; }
- if (virSecurityManagerMetadataUnlock(list->manager, paths, npaths) < 0) - goto cleanup; + virSecurityManagerMetadataUnlock(list->manager, &state);
if (rv < 0) goto cleanup; diff --git a/src/security/security_manager.c b/src/security/security_manager.c index c6c80e6165..b675f8ab46 100644 --- a/src/security/security_manager.c +++ b/src/security/security_manager.c @@ -21,6 +21,10 @@ */ #include <config.h>
+#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> + #include "security_driver.h" #include "security_stack.h" #include "security_dac.h" @@ -30,14 +34,11 @@ #include "virlog.h" #include "locking/lock_manager.h" #include "virfile.h" -#include "virtime.h"
#define VIR_FROM_THIS VIR_FROM_SECURITY
VIR_LOG_INIT("security.security_manager");
-virMutex lockManagerMutex = VIR_MUTEX_INITIALIZER; - struct _virSecurityManager { virObjectLockable parent;
@@ -47,10 +48,6 @@ struct _virSecurityManager { void *privateData;
virLockManagerPluginPtr lockPlugin; - /* This is a FD that represents a connection to virtlockd so - * that connection is kept open in between MetdataLock() and - * MetadataUnlock() calls. */ - int clientfd; };
static virClassPtr virSecurityManagerClass; @@ -66,7 +63,6 @@ void virSecurityManagerDispose(void *obj) mgr->drv->close(mgr);
virObjectUnref(mgr->lockPlugin); - VIR_FORCE_CLOSE(mgr->clientfd);
VIR_FREE(mgr->privateData); } @@ -119,7 +115,6 @@ virSecurityManagerNewDriver(virSecurityDriverPtr drv, mgr->flags = flags; mgr->virtDriver = virtDriver; VIR_STEAL_PTR(mgr->privateData, privateData); - mgr->clientfd = -1;
if (drv->open(mgr) < 0) goto error; @@ -1276,129 +1271,153 @@ virSecurityManagerRestoreTPMLabels(virSecurityManagerPtr mgr, }
-static virLockManagerPtr -virSecurityManagerNewLockManager(virSecurityManagerPtr mgr, - const char * const *paths, - size_t npaths) +struct _virSecurityManagerMetadataLockState {
LockPrivate? When I first saw State I had a different thought. I like this better than the previous model... Keeping track of fds is like other models used. How do these locks handle restarts? Are the locks free'd if the daemon dies?
+ size_t nfds; + int *fds; +}; + + +static int +cmpstringp(const void *p1, const void *p2) { - virLockManagerPtr lock; - virLockManagerParam params[] = { - { .type = VIR_LOCK_MANAGER_PARAM_TYPE_UUID, - .key = "uuid", - }, - { .type = VIR_LOCK_MANAGER_PARAM_TYPE_STRING, - .key = "name", - .value = { .cstr = "libvirtd-sec" }, - }, - { .type = VIR_LOCK_MANAGER_PARAM_TYPE_UINT, - .key = "pid", - .value = { .iv = getpid() }, - }, - }; - const unsigned int flags = 0; - size_t i; + const char *s1 = *(char * const *) p1; + const char *s2 = *(char * const *) p2;
- if (virGetHostUUID(params[0].value.uuid) < 0) - return NULL; + if (!s1 && !s2) + return 0;
- if (!(lock = virLockManagerNew(virLockManagerPluginGetDriver(mgr->lockPlugin), - VIR_LOCK_MANAGER_OBJECT_TYPE_DAEMON, - ARRAY_CARDINALITY(params), - params, - flags))) - return NULL; + if (!s1 || !s2) + return s2 ? -1 : 1;
- for (i = 0; i < npaths; i++) { - if (virLockManagerAddResource(lock, - VIR_LOCK_MANAGER_RESOURCE_TYPE_METADATA, - paths[i], 0, NULL, 0) < 0) - goto error; - } - - return lock; - error: - virLockManagerFree(lock); - return NULL; + /* from man 3 qsort */ + return strcmp(s1, s2); }
+#define METADATA_OFFSET 1 +#define METADATA_LEN 1
-/* How many seconds should we try to acquire the lock before - * giving up. */ -#define LOCK_ACQUIRE_TIMEOUT 60 - -int -virSecurityManagerMetadataLock(virSecurityManagerPtr mgr, - const char * const *paths, +/** + * virSecurityManagerMetadataLock: + * @mgr: security manager object + * @paths: paths to lock + * @npaths: number of items in @paths array + * + * Lock passed @paths for metadata change. The returned state + * should be passed to virSecurityManagerMetadataUnlock. + * + * NOTE: this function is not thread safe (because of usage of + * POSIX locks). + * + * Returns: state on success, + * NULL on failure. + */ +virSecurityManagerMetadataLockStatePtr +virSecurityManagerMetadataLock(virSecurityManagerPtr mgr ATTRIBUTE_UNUSED, + const char **paths, size_t npaths) { - virLockManagerPtr lock; - virTimeBackOffVar timebackoff; - int fd = -1; - int rv = -1; - int ret = -1; + size_t i = 0; + size_t nfds = 0; + int *fds = NULL; + virSecurityManagerMetadataLockStatePtr ret = NULL;
- virMutexLock(&lockManagerMutex); + if (VIR_ALLOC_N(fds, npaths) < 0) + return NULL;
- if (!(lock = virSecurityManagerNewLockManager(mgr, paths, npaths))) - goto cleanup; + /* Sort paths to lock in order to avoid deadlocks. */ + qsort(paths, npaths, sizeof(*paths), cmpstringp);
Shouldn't this be the job of the caller to know or set order to avoid deadlocks? IOW: Why is it important to sort here? It's not clear how/why it avoids deadlocks.
- if (virTimeBackOffStart(&timebackoff, 1, LOCK_ACQUIRE_TIMEOUT * 1000) < 0) - goto cleanup; - while (virTimeBackOffWait(&timebackoff)) { - rv = virLockManagerAcquire(lock, NULL, - VIR_LOCK_MANAGER_ACQUIRE_ROLLBACK, - VIR_DOMAIN_LOCK_FAILURE_DEFAULT, &fd); + for (i = 0; i < npaths; i++) { + const char *p = paths[i]; + struct stat sb; + int retries = 10 * 1000; + int fd; + + if (!p || stat(p, &sb) < 0) + continue; + + if (S_ISDIR(sb.st_mode)) { + /* Directories can't be locked */ + continue; + } + + if ((fd = open(p, O_RDWR)) < 0) {
Is RDWR required for locking?
+ if (S_ISSOCK(sb.st_mode)) { + /* Sockets can be opened only if there exists the + * other side that listens. */ + continue; + } + + virReportSystemError(errno, + _("unable to open %s"), + p); + goto cleanup; + } + + do { + if (virFileLock(fd, false, + METADATA_OFFSET, METADATA_LEN, false) < 0) {
If lockd dies somewhere in the middle of this "transaction" - all those fd's close and unlock, right? Mr doom and gloom thinking what could go wrong.
+ if (retries && (errno == EACCES || errno == EAGAIN)) { + /* File is locked. Try again. */ + retries--; + usleep(1000); + continue; + } else { + virReportSystemError(errno, + _("unable to lock %s for metadata change"), + p); + VIR_FORCE_CLOSE(fd); + goto cleanup; + } + }
I dunno, I liked the virTimeBackOffStart better... This 1 second wait just seems like it could be a perf. bottleneck someday. I've looked at the code for a while and probably just need more thinking time on it. John (curious about more words from Daniel on this).
- if (rv >= 0) break; + } while (1);
- if (virGetLastErrorCode() == VIR_ERR_RESOURCE_BUSY) - continue; - - goto cleanup; + VIR_APPEND_ELEMENT_COPY_INPLACE(fds, nfds, fd); }
- if (rv < 0) + if (VIR_ALLOC(ret) < 0) goto cleanup;
- mgr->clientfd = fd; - fd = -1; + VIR_STEAL_PTR(ret->fds, fds); + ret->nfds = nfds; + nfds = 0;
- ret = 0; cleanup: - virLockManagerFree(lock); - VIR_FORCE_CLOSE(fd); - if (ret < 0) - virMutexUnlock(&lockManagerMutex); + for (i = nfds; i > 0; i--) + VIR_FORCE_CLOSE(fds[i - 1]); + VIR_FREE(fds); return ret; }
-int -virSecurityManagerMetadataUnlock(virSecurityManagerPtr mgr, - const char * const *paths, - size_t npaths) +void +virSecurityManagerMetadataUnlock(virSecurityManagerPtr mgr ATTRIBUTE_UNUSED, + virSecurityManagerMetadataLockStatePtr *state) { - virLockManagerPtr lock; - int fd; - int ret = -1; + size_t i;
- /* lockManagerMutex acquired from previous - * virSecurityManagerMetadataLock() call. */ + if (!state) + return;
- fd = mgr->clientfd; - mgr->clientfd = -1; + for (i = 0; i < (*state)->nfds; i++) { + char ebuf[1024]; + int fd = (*state)->fds[i];
- if (!(lock = virSecurityManagerNewLockManager(mgr, paths, npaths))) - goto cleanup; + /* Technically, unlock is not needed because it will + * happen on VIR_CLOSE() anyway. But let's play it nice. */ + if (virFileUnlock(fd, METADATA_OFFSET, METADATA_LEN) < 0) { + VIR_WARN("Unable to unlock fd %d: %s", + fd, virStrerror(errno, ebuf, sizeof(ebuf))); + }
- if (virLockManagerRelease(lock, NULL, 0) < 0) - goto cleanup; + if (VIR_CLOSE(fd) < 0) { + VIR_WARN("Unable to close fd %d: %s", + fd, virStrerror(errno, ebuf, sizeof(ebuf))); + } + }
- ret = 0; - cleanup: - virLockManagerFree(lock); - VIR_FORCE_CLOSE(fd); - virMutexUnlock(&lockManagerMutex); - return ret; + VIR_FREE((*state)->fds); + VIR_FREE(*state); } diff --git a/src/security/security_manager.h b/src/security/security_manager.h index 10ebe5cc29..fe8a1b4a24 100644 --- a/src/security/security_manager.h +++ b/src/security/security_manager.h @@ -199,11 +199,16 @@ int virSecurityManagerSetTPMLabels(virSecurityManagerPtr mgr, int virSecurityManagerRestoreTPMLabels(virSecurityManagerPtr mgr, virDomainDefPtr vm);
-int virSecurityManagerMetadataLock(virSecurityManagerPtr mgr, - const char * const *paths, - size_t npaths); -int virSecurityManagerMetadataUnlock(virSecurityManagerPtr mgr, - const char * const *paths, - size_t npaths); +typedef struct _virSecurityManagerMetadataLockState virSecurityManagerMetadataLockState; +typedef virSecurityManagerMetadataLockState *virSecurityManagerMetadataLockStatePtr; + +virSecurityManagerMetadataLockStatePtr +virSecurityManagerMetadataLock(virSecurityManagerPtr mgr, + const char **paths, + size_t npaths); + +void +virSecurityManagerMetadataUnlock(virSecurityManagerPtr mgr, + virSecurityManagerMetadataLockStatePtr *state);
#endif /* VIR_SECURITY_MANAGER_H__ */ diff --git a/src/security/security_selinux.c b/src/security/security_selinux.c index 0e24b9b3ca..17e24c6ac3 100644 --- a/src/security/security_selinux.c +++ b/src/security/security_selinux.c @@ -214,6 +214,7 @@ virSecuritySELinuxTransactionRun(pid_t pid ATTRIBUTE_UNUSED, void *opaque) { virSecuritySELinuxContextListPtr list = opaque; + virSecurityManagerMetadataLockStatePtr state; bool privileged = virSecurityManagerGetPrivileged(list->manager); const char **paths = NULL; size_t npaths = 0; @@ -227,13 +228,10 @@ virSecuritySELinuxTransactionRun(pid_t pid ATTRIBUTE_UNUSED, for (i = 0; i < list->nItems; i++) { const char *p = list->items[i]->path;
- if (virFileIsDir(p)) - continue; - VIR_APPEND_ELEMENT_COPY_INPLACE(paths, npaths, p); }
- if (virSecurityManagerMetadataLock(list->manager, paths, npaths) < 0) + if (!(state = virSecurityManagerMetadataLock(list->manager, paths, npaths))) goto cleanup;
rv = 0; @@ -250,8 +248,7 @@ virSecuritySELinuxTransactionRun(pid_t pid ATTRIBUTE_UNUSED, } }
- if (virSecurityManagerMetadataUnlock(list->manager, paths, npaths) < 0) - goto cleanup; + virSecurityManagerMetadataUnlock(list->manager, &state);
if (rv < 0) goto cleanup;

On 11/09/2018 03:59 AM, John Ferlan wrote:
On 10/17/18 5:06 AM, Michal Privoznik wrote:
Trying to use virlockd to lock metadata turns out to be too big gun. Since we will always spawn a separate process for relabeling we are safe to use thread unsafe POSIX locks and take out virtlockd completely out of the picture.
Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- src/security/security_dac.c | 10 +- src/security/security_manager.c | 225 +++++++++++++++++--------------- src/security/security_manager.h | 17 ++- src/security/security_selinux.c | 9 +- 4 files changed, 139 insertions(+), 122 deletions(-)
diff --git a/src/security/security_dac.c b/src/security/security_dac.c index 580d800bb1..ad2561a241 100644 --- a/src/security/security_dac.c +++ b/src/security/security_dac.c @@ -204,6 +204,7 @@ virSecurityDACTransactionRun(pid_t pid ATTRIBUTE_UNUSED, void *opaque) { virSecurityDACChownListPtr list = opaque; + virSecurityManagerMetadataLockStatePtr state; const char **paths = NULL; size_t npaths = 0; size_t i; @@ -216,14 +217,10 @@ virSecurityDACTransactionRun(pid_t pid ATTRIBUTE_UNUSED, for (i = 0; i < list->nItems; i++) { const char *p = list->items[i]->path;
- if (!p || - virFileIsDir(p)) - continue; - VIR_APPEND_ELEMENT_COPY_INPLACE(paths, npaths, p); }
- if (virSecurityManagerMetadataLock(list->manager, paths, npaths) < 0) + if (!(state = virSecurityManagerMetadataLock(list->manager, paths, npaths))) goto cleanup;
for (i = 0; i < list->nItems; i++) { @@ -246,8 +243,7 @@ virSecurityDACTransactionRun(pid_t pid ATTRIBUTE_UNUSED, break; }
- if (virSecurityManagerMetadataUnlock(list->manager, paths, npaths) < 0) - goto cleanup; + virSecurityManagerMetadataUnlock(list->manager, &state);
if (rv < 0) goto cleanup; diff --git a/src/security/security_manager.c b/src/security/security_manager.c index c6c80e6165..b675f8ab46 100644 --- a/src/security/security_manager.c +++ b/src/security/security_manager.c @@ -21,6 +21,10 @@ */ #include <config.h>
+#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> + #include "security_driver.h" #include "security_stack.h" #include "security_dac.h" @@ -30,14 +34,11 @@ #include "virlog.h" #include "locking/lock_manager.h" #include "virfile.h" -#include "virtime.h"
#define VIR_FROM_THIS VIR_FROM_SECURITY
VIR_LOG_INIT("security.security_manager");
-virMutex lockManagerMutex = VIR_MUTEX_INITIALIZER; - struct _virSecurityManager { virObjectLockable parent;
@@ -47,10 +48,6 @@ struct _virSecurityManager { void *privateData;
virLockManagerPluginPtr lockPlugin; - /* This is a FD that represents a connection to virtlockd so - * that connection is kept open in between MetdataLock() and - * MetadataUnlock() calls. */ - int clientfd; };
static virClassPtr virSecurityManagerClass; @@ -66,7 +63,6 @@ void virSecurityManagerDispose(void *obj) mgr->drv->close(mgr);
virObjectUnref(mgr->lockPlugin); - VIR_FORCE_CLOSE(mgr->clientfd);
VIR_FREE(mgr->privateData); } @@ -119,7 +115,6 @@ virSecurityManagerNewDriver(virSecurityDriverPtr drv, mgr->flags = flags; mgr->virtDriver = virtDriver; VIR_STEAL_PTR(mgr->privateData, privateData); - mgr->clientfd = -1;
if (drv->open(mgr) < 0) goto error; @@ -1276,129 +1271,153 @@ virSecurityManagerRestoreTPMLabels(virSecurityManagerPtr mgr, }
-static virLockManagerPtr -virSecurityManagerNewLockManager(virSecurityManagerPtr mgr, - const char * const *paths, - size_t npaths) +struct _virSecurityManagerMetadataLockState {
LockPrivate?
When I first saw State I had a different thought.
I like this better than the previous model... Keeping track of fds is like other models used. How do these locks handle restarts? Are the locks free'd if the daemon dies?
Yes they are. Since this runs in a child (also the child body is very short) then when the parent (libvirtd) is killed the children are killed too. The POSIX locks I'm using here are tied to [PID,inode] pair. Therefore, if PID dies the lock is released.
+ size_t nfds; + int *fds; +}; + + +static int +cmpstringp(const void *p1, const void *p2) { - virLockManagerPtr lock; - virLockManagerParam params[] = { - { .type = VIR_LOCK_MANAGER_PARAM_TYPE_UUID, - .key = "uuid", - }, - { .type = VIR_LOCK_MANAGER_PARAM_TYPE_STRING, - .key = "name", - .value = { .cstr = "libvirtd-sec" }, - }, - { .type = VIR_LOCK_MANAGER_PARAM_TYPE_UINT, - .key = "pid", - .value = { .iv = getpid() }, - }, - }; - const unsigned int flags = 0; - size_t i; + const char *s1 = *(char * const *) p1; + const char *s2 = *(char * const *) p2;
- if (virGetHostUUID(params[0].value.uuid) < 0) - return NULL; + if (!s1 && !s2) + return 0;
- if (!(lock = virLockManagerNew(virLockManagerPluginGetDriver(mgr->lockPlugin), - VIR_LOCK_MANAGER_OBJECT_TYPE_DAEMON, - ARRAY_CARDINALITY(params), - params, - flags))) - return NULL; + if (!s1 || !s2) + return s2 ? -1 : 1;
- for (i = 0; i < npaths; i++) { - if (virLockManagerAddResource(lock, - VIR_LOCK_MANAGER_RESOURCE_TYPE_METADATA, - paths[i], 0, NULL, 0) < 0) - goto error; - } - - return lock; - error: - virLockManagerFree(lock); - return NULL; + /* from man 3 qsort */ + return strcmp(s1, s2); }
+#define METADATA_OFFSET 1 +#define METADATA_LEN 1
-/* How many seconds should we try to acquire the lock before - * giving up. */ -#define LOCK_ACQUIRE_TIMEOUT 60 - -int -virSecurityManagerMetadataLock(virSecurityManagerPtr mgr, - const char * const *paths, +/** + * virSecurityManagerMetadataLock: + * @mgr: security manager object + * @paths: paths to lock + * @npaths: number of items in @paths array + * + * Lock passed @paths for metadata change. The returned state + * should be passed to virSecurityManagerMetadataUnlock. + * + * NOTE: this function is not thread safe (because of usage of + * POSIX locks). + * + * Returns: state on success, + * NULL on failure. + */ +virSecurityManagerMetadataLockStatePtr +virSecurityManagerMetadataLock(virSecurityManagerPtr mgr ATTRIBUTE_UNUSED, + const char **paths, size_t npaths) { - virLockManagerPtr lock; - virTimeBackOffVar timebackoff; - int fd = -1; - int rv = -1; - int ret = -1; + size_t i = 0; + size_t nfds = 0; + int *fds = NULL; + virSecurityManagerMetadataLockStatePtr ret = NULL;
- virMutexLock(&lockManagerMutex); + if (VIR_ALLOC_N(fds, npaths) < 0) + return NULL;
- if (!(lock = virSecurityManagerNewLockManager(mgr, paths, npaths))) - goto cleanup; + /* Sort paths to lock in order to avoid deadlocks. */ + qsort(paths, npaths, sizeof(*paths), cmpstringp);
Shouldn't this be the job of the caller to know or set order to avoid deadlocks? IOW: Why is it important to sort here? It's not clear how/why it avoids deadlocks.
Imagine you have two domains which are starting up. One has disks A,B and the other has B,A. Two processes are forked to start relabeling over the disks. The first child locks A, the second locks B. Then they want to lock the second disk, but this won't succeed. However, if the paths are sorted firstly then there won't be any deadlock: both children will lock A first and B second (regardless of disk position in domain XML).
- if (virTimeBackOffStart(&timebackoff, 1, LOCK_ACQUIRE_TIMEOUT * 1000) < 0) - goto cleanup; - while (virTimeBackOffWait(&timebackoff)) { - rv = virLockManagerAcquire(lock, NULL, - VIR_LOCK_MANAGER_ACQUIRE_ROLLBACK, - VIR_DOMAIN_LOCK_FAILURE_DEFAULT, &fd); + for (i = 0; i < npaths; i++) { + const char *p = paths[i]; + struct stat sb; + int retries = 10 * 1000; + int fd; + + if (!p || stat(p, &sb) < 0) + continue; + + if (S_ISDIR(sb.st_mode)) { + /* Directories can't be locked */ + continue; + } + + if ((fd = open(p, O_RDWR)) < 0) {
Is RDWR required for locking?
Good question, I'll investigate. It shouldn't be necessary.
+ if (S_ISSOCK(sb.st_mode)) { + /* Sockets can be opened only if there exists the + * other side that listens. */ + continue; + } + + virReportSystemError(errno, + _("unable to open %s"), + p); + goto cleanup; + } + + do { + if (virFileLock(fd, false, + METADATA_OFFSET, METADATA_LEN, false) < 0) {
If lockd dies somewhere in the middle of this "transaction" - all those fd's close and unlock, right? Mr doom and gloom thinking what could go wrong.
Yes. closing a file if it is locked results in it being unlocked. Even if it is a different FD. For instance: fd = open(path); virFileLock(fd, ...); close(fd); /* releases the lock */ but also: fd1 = open(path); fd2 = open(path); virFileLock(fd1, ...); close(fd2); /* also releases the lock */ This is what makes POSIX locks impossible to use from a multithreaded app (imagine those two open()s happening from different threads). This is the whole reason we need to undergo fork() torture.
+ if (retries && (errno == EACCES || errno == EAGAIN)) { + /* File is locked. Try again. */ + retries--; + usleep(1000); + continue; + } else { + virReportSystemError(errno, + _("unable to lock %s for metadata change"), + p); + VIR_FORCE_CLOSE(fd); + goto cleanup; + } + }
I dunno, I liked the virTimeBackOffStart better... This 1 second wait just seems like it could be a perf. bottleneck someday. I've looked at the code for a while and probably just need more thinking time on it.
Okay, I'll leave it in. Michal

On 11/9/18 7:42 AM, Michal Privoznik wrote:
On 11/09/2018 03:59 AM, John Ferlan wrote:
On 10/17/18 5:06 AM, Michal Privoznik wrote:
Trying to use virlockd to lock metadata turns out to be too big gun. Since we will always spawn a separate process for relabeling we are safe to use thread unsafe POSIX locks and take out virtlockd completely out of the picture.
Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- src/security/security_dac.c | 10 +- src/security/security_manager.c | 225 +++++++++++++++++--------------- src/security/security_manager.h | 17 ++- src/security/security_selinux.c | 9 +- 4 files changed, 139 insertions(+), 122 deletions(-)
diff --git a/src/security/security_dac.c b/src/security/security_dac.c index 580d800bb1..ad2561a241 100644 --- a/src/security/security_dac.c +++ b/src/security/security_dac.c @@ -204,6 +204,7 @@ virSecurityDACTransactionRun(pid_t pid ATTRIBUTE_UNUSED, void *opaque) { virSecurityDACChownListPtr list = opaque; + virSecurityManagerMetadataLockStatePtr state; const char **paths = NULL; size_t npaths = 0; size_t i; @@ -216,14 +217,10 @@ virSecurityDACTransactionRun(pid_t pid ATTRIBUTE_UNUSED, for (i = 0; i < list->nItems; i++) { const char *p = list->items[i]->path;
- if (!p || - virFileIsDir(p)) - continue; - VIR_APPEND_ELEMENT_COPY_INPLACE(paths, npaths, p); }
- if (virSecurityManagerMetadataLock(list->manager, paths, npaths) < 0) + if (!(state = virSecurityManagerMetadataLock(list->manager, paths, npaths))) goto cleanup;
for (i = 0; i < list->nItems; i++) { @@ -246,8 +243,7 @@ virSecurityDACTransactionRun(pid_t pid ATTRIBUTE_UNUSED, break; }
- if (virSecurityManagerMetadataUnlock(list->manager, paths, npaths) < 0) - goto cleanup; + virSecurityManagerMetadataUnlock(list->manager, &state);
if (rv < 0) goto cleanup; diff --git a/src/security/security_manager.c b/src/security/security_manager.c index c6c80e6165..b675f8ab46 100644 --- a/src/security/security_manager.c +++ b/src/security/security_manager.c @@ -21,6 +21,10 @@ */ #include <config.h>
+#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> + #include "security_driver.h" #include "security_stack.h" #include "security_dac.h" @@ -30,14 +34,11 @@ #include "virlog.h" #include "locking/lock_manager.h" #include "virfile.h" -#include "virtime.h"
#define VIR_FROM_THIS VIR_FROM_SECURITY
VIR_LOG_INIT("security.security_manager");
-virMutex lockManagerMutex = VIR_MUTEX_INITIALIZER; - struct _virSecurityManager { virObjectLockable parent;
@@ -47,10 +48,6 @@ struct _virSecurityManager { void *privateData;
virLockManagerPluginPtr lockPlugin; - /* This is a FD that represents a connection to virtlockd so - * that connection is kept open in between MetdataLock() and - * MetadataUnlock() calls. */ - int clientfd; };
static virClassPtr virSecurityManagerClass; @@ -66,7 +63,6 @@ void virSecurityManagerDispose(void *obj) mgr->drv->close(mgr);
virObjectUnref(mgr->lockPlugin); - VIR_FORCE_CLOSE(mgr->clientfd);
VIR_FREE(mgr->privateData); } @@ -119,7 +115,6 @@ virSecurityManagerNewDriver(virSecurityDriverPtr drv, mgr->flags = flags; mgr->virtDriver = virtDriver; VIR_STEAL_PTR(mgr->privateData, privateData); - mgr->clientfd = -1;
if (drv->open(mgr) < 0) goto error; @@ -1276,129 +1271,153 @@ virSecurityManagerRestoreTPMLabels(virSecurityManagerPtr mgr, }
-static virLockManagerPtr -virSecurityManagerNewLockManager(virSecurityManagerPtr mgr, - const char * const *paths, - size_t npaths) +struct _virSecurityManagerMetadataLockState {
LockPrivate?
When I first saw State I had a different thought.
I like this better than the previous model... Keeping track of fds is like other models used. How do these locks handle restarts? Are the locks free'd if the daemon dies?
Yes they are. Since this runs in a child (also the child body is very short) then when the parent (libvirtd) is killed the children are killed too. The POSIX locks I'm using here are tied to [PID,inode] pair. Therefore, if PID dies the lock is released.
Great thanks... The whole separation between libvirtd and virtlockd with having the previous incarnation floating around in the back of my head makes it "difficult" to concentrate on the current scheme. At first I started down the path of - oh wait, isn't this being stored in virtlockd and we're having libvirtd involved. That'd be a mess to manage.
+ size_t nfds; + int *fds; +}; + + +static int +cmpstringp(const void *p1, const void *p2) { - virLockManagerPtr lock; - virLockManagerParam params[] = { - { .type = VIR_LOCK_MANAGER_PARAM_TYPE_UUID, - .key = "uuid", - }, - { .type = VIR_LOCK_MANAGER_PARAM_TYPE_STRING, - .key = "name", - .value = { .cstr = "libvirtd-sec" }, - }, - { .type = VIR_LOCK_MANAGER_PARAM_TYPE_UINT, - .key = "pid", - .value = { .iv = getpid() }, - }, - }; - const unsigned int flags = 0; - size_t i; + const char *s1 = *(char * const *) p1; + const char *s2 = *(char * const *) p2;
- if (virGetHostUUID(params[0].value.uuid) < 0) - return NULL; + if (!s1 && !s2) + return 0;
- if (!(lock = virLockManagerNew(virLockManagerPluginGetDriver(mgr->lockPlugin), - VIR_LOCK_MANAGER_OBJECT_TYPE_DAEMON, - ARRAY_CARDINALITY(params), - params, - flags))) - return NULL; + if (!s1 || !s2) + return s2 ? -1 : 1;
- for (i = 0; i < npaths; i++) { - if (virLockManagerAddResource(lock, - VIR_LOCK_MANAGER_RESOURCE_TYPE_METADATA, - paths[i], 0, NULL, 0) < 0) - goto error; - } - - return lock; - error: - virLockManagerFree(lock); - return NULL; + /* from man 3 qsort */ + return strcmp(s1, s2); }
+#define METADATA_OFFSET 1 +#define METADATA_LEN 1
-/* How many seconds should we try to acquire the lock before - * giving up. */ -#define LOCK_ACQUIRE_TIMEOUT 60 - -int -virSecurityManagerMetadataLock(virSecurityManagerPtr mgr, - const char * const *paths, +/** + * virSecurityManagerMetadataLock: + * @mgr: security manager object + * @paths: paths to lock + * @npaths: number of items in @paths array + * + * Lock passed @paths for metadata change. The returned state + * should be passed to virSecurityManagerMetadataUnlock. + * + * NOTE: this function is not thread safe (because of usage of + * POSIX locks). + * + * Returns: state on success, + * NULL on failure. + */ +virSecurityManagerMetadataLockStatePtr +virSecurityManagerMetadataLock(virSecurityManagerPtr mgr ATTRIBUTE_UNUSED, + const char **paths, size_t npaths) { - virLockManagerPtr lock; - virTimeBackOffVar timebackoff; - int fd = -1; - int rv = -1; - int ret = -1; + size_t i = 0; + size_t nfds = 0; + int *fds = NULL; + virSecurityManagerMetadataLockStatePtr ret = NULL;
- virMutexLock(&lockManagerMutex); + if (VIR_ALLOC_N(fds, npaths) < 0) + return NULL;
- if (!(lock = virSecurityManagerNewLockManager(mgr, paths, npaths))) - goto cleanup; + /* Sort paths to lock in order to avoid deadlocks. */ + qsort(paths, npaths, sizeof(*paths), cmpstringp);
Shouldn't this be the job of the caller to know or set order to avoid deadlocks? IOW: Why is it important to sort here? It's not clear how/why it avoids deadlocks.
Imagine you have two domains which are starting up. One has disks A,B and the other has B,A. Two processes are forked to start relabeling over the disks. The first child locks A, the second locks B. Then they want to lock the second disk, but this won't succeed. However, if the paths are sorted firstly then there won't be any deadlock: both children will lock A first and B second (regardless of disk position in domain XML).
So, then I assume the disks are shared because they've been allowed in two domains that are allowed to be running at the same time. If they're shared and domainA has/uses a different label than domainB, then who "wins" that war in the long run? Seems to me shared disks are "dangerous" in this labeling game (and that's beyond the locking game). Should a mechanism be created to disallow multiple threads from running the SetAllLabel "simultaneously" to avoid/solve the problem? Since it's really the only one with the ordering problem... Seems we each have our doom and gloom scenarios in mind ;-) Thanks for the details - John
- if (virTimeBackOffStart(&timebackoff, 1, LOCK_ACQUIRE_TIMEOUT * 1000) < 0) - goto cleanup; - while (virTimeBackOffWait(&timebackoff)) { - rv = virLockManagerAcquire(lock, NULL, - VIR_LOCK_MANAGER_ACQUIRE_ROLLBACK, - VIR_DOMAIN_LOCK_FAILURE_DEFAULT, &fd); + for (i = 0; i < npaths; i++) { + const char *p = paths[i]; + struct stat sb; + int retries = 10 * 1000; + int fd; + + if (!p || stat(p, &sb) < 0) + continue; + + if (S_ISDIR(sb.st_mode)) { + /* Directories can't be locked */ + continue; + } + + if ((fd = open(p, O_RDWR)) < 0) {
Is RDWR required for locking?
Good question, I'll investigate. It shouldn't be necessary.
+ if (S_ISSOCK(sb.st_mode)) { + /* Sockets can be opened only if there exists the + * other side that listens. */ + continue; + } + + virReportSystemError(errno, + _("unable to open %s"), + p); + goto cleanup; + } + + do { + if (virFileLock(fd, false, + METADATA_OFFSET, METADATA_LEN, false) < 0) {
If lockd dies somewhere in the middle of this "transaction" - all those fd's close and unlock, right? Mr doom and gloom thinking what could go wrong.
Yes. closing a file if it is locked results in it being unlocked. Even if it is a different FD. For instance:
fd = open(path); virFileLock(fd, ...); close(fd); /* releases the lock */
but also:
fd1 = open(path); fd2 = open(path); virFileLock(fd1, ...); close(fd2); /* also releases the lock */
This is what makes POSIX locks impossible to use from a multithreaded app (imagine those two open()s happening from different threads). This is the whole reason we need to undergo fork() torture.
+ if (retries && (errno == EACCES || errno == EAGAIN)) { + /* File is locked. Try again. */ + retries--; + usleep(1000); + continue; + } else { + virReportSystemError(errno, + _("unable to lock %s for metadata change"), + p); + VIR_FORCE_CLOSE(fd); + goto cleanup; + } + }
I dunno, I liked the virTimeBackOffStart better... This 1 second wait just seems like it could be a perf. bottleneck someday. I've looked at the code for a while and probably just need more thinking time on it.
Okay, I'll leave it in.
Michal

On 11/09/2018 03:47 PM, John Ferlan wrote:
So, then I assume the disks are shared because they've been allowed in two domains that are allowed to be running at the same time. If they're shared and domainA has/uses a different label than domainB, then who "wins" that war in the long run? Seems to me shared disks are "dangerous" in this labeling game (and that's beyond the locking game).
The fact that a disk changes DAC label does not necessarily mean that qemu is losing access. Firstly, the it might be just UID that changes on the file and GID staying the same. Secondly, there might be an ACL entry that lets qemu in no matter what. I agree that the disk should be marked as shareable and readonly, but it's not for this code to decide. Actually, it is not for libvirt to tell at all. Parsing UIDs is complex and there's pretty good implementation in kernel so might as well rely on that instead of duplicating the code into libvirt. IOW, if users misconfigure disks to their domains and get EPERM or cut off access to an already running domain, it's their problem and we shouldn't mitigate it. On the other hand, we shouldn't just deadlock.
Should a mechanism be created to disallow multiple threads from running the SetAllLabel "simultaneously" to avoid/solve the problem? Since it's really the only one with the ordering problem...
This would hurt performance IMO. If there are two domains being started at once and they don't share any common path then there is no reason for relabel operation to run serialized.
Seems we each have our doom and gloom scenarios in mind ;-)
Thanks for the details -
John
Michal

This reverts commit 3e26b476b5f322353bf0dcd8e3f037ca672b8c62. Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- cfg.mk | 4 +--- src/lxc/lxc_controller.c | 3 +-- src/lxc/lxc_driver.c | 2 +- src/qemu/qemu_driver.c | 3 --- src/security/security_manager.c | 25 +------------------------ src/security/security_manager.h | 2 -- tests/seclabeltest.c | 2 +- tests/securityselinuxlabeltest.c | 2 +- tests/securityselinuxtest.c | 2 +- tests/testutilsqemu.c | 2 +- 10 files changed, 8 insertions(+), 39 deletions(-) diff --git a/cfg.mk b/cfg.mk index 4790d0b7e7..eddd110ed6 100644 --- a/cfg.mk +++ b/cfg.mk @@ -787,10 +787,8 @@ sc_prohibit_cross_inclusion: case $$dir in \ util/) safe="util";; \ access/ | conf/) safe="($$dir|conf|util)";; \ - cpu/| network/| node_device/| rpc/| storage/) \ + cpu/| network/| node_device/| rpc/| security/| storage/) \ safe="($$dir|util|conf|storage)";; \ - security/) \ - safe="($$dir|util|conf|storage|locking)";; \ xenapi/ | xenconfig/ ) safe="($$dir|util|conf|xen|cpu)";; \ *) safe="($$dir|$(mid_dirs)|util)";; \ esac; \ diff --git a/src/lxc/lxc_controller.c b/src/lxc/lxc_controller.c index 62dfd09473..e853d02d65 100644 --- a/src/lxc/lxc_controller.c +++ b/src/lxc/lxc_controller.c @@ -2624,8 +2624,7 @@ int main(int argc, char *argv[]) ctrl->handshakeFd = handshakeFd; if (!(ctrl->securityManager = virSecurityManagerNew(securityDriver, - LXC_DRIVER_NAME, - NULL, 0))) + LXC_DRIVER_NAME, 0))) goto cleanup; if (ctrl->def->seclabels) { diff --git a/src/lxc/lxc_driver.c b/src/lxc/lxc_driver.c index f732305649..990871d9b3 100644 --- a/src/lxc/lxc_driver.c +++ b/src/lxc/lxc_driver.c @@ -1531,7 +1531,7 @@ lxcSecurityInit(virLXCDriverConfigPtr cfg) flags |= VIR_SECURITY_MANAGER_REQUIRE_CONFINED; virSecurityManagerPtr mgr = virSecurityManagerNew(cfg->securityDriverName, - LXC_DRIVER_NAME, NULL, flags); + LXC_DRIVER_NAME, flags); if (!mgr) goto error; diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c index a52e2495d5..3a1185f947 100644 --- a/src/qemu/qemu_driver.c +++ b/src/qemu/qemu_driver.c @@ -350,7 +350,6 @@ qemuSecurityInit(virQEMUDriverPtr driver) while (names && *names) { if (!(mgr = qemuSecurityNew(*names, QEMU_DRIVER_NAME, - cfg->metadataLockManagerName, flags))) goto error; if (!stack) { @@ -366,7 +365,6 @@ qemuSecurityInit(virQEMUDriverPtr driver) } else { if (!(mgr = qemuSecurityNew(NULL, QEMU_DRIVER_NAME, - cfg->metadataLockManagerName, flags))) goto error; if (!(stack = qemuSecurityNewStack(mgr))) @@ -383,7 +381,6 @@ qemuSecurityInit(virQEMUDriverPtr driver) cfg->user, cfg->group, flags, - cfg->metadataLockManagerName, qemuSecurityChownCallback))) goto error; if (!stack) { diff --git a/src/security/security_manager.c b/src/security/security_manager.c index b675f8ab46..47be47df48 100644 --- a/src/security/security_manager.c +++ b/src/security/security_manager.c @@ -32,7 +32,6 @@ #include "viralloc.h" #include "virobject.h" #include "virlog.h" -#include "locking/lock_manager.h" #include "virfile.h" #define VIR_FROM_THIS VIR_FROM_SECURITY @@ -46,8 +45,6 @@ struct _virSecurityManager { unsigned int flags; const char *virtDriver; void *privateData; - - virLockManagerPluginPtr lockPlugin; }; static virClassPtr virSecurityManagerClass; @@ -58,12 +55,8 @@ void virSecurityManagerDispose(void *obj) { virSecurityManagerPtr mgr = obj; - if (mgr->drv && - mgr->drv->close) + if (mgr->drv->close) mgr->drv->close(mgr); - - virObjectUnref(mgr->lockPlugin); - VIR_FREE(mgr->privateData); } @@ -83,7 +76,6 @@ VIR_ONCE_GLOBAL_INIT(virSecurityManager); static virSecurityManagerPtr virSecurityManagerNewDriver(virSecurityDriverPtr drv, const char *virtDriver, - const char *lockManagerPluginName, unsigned int flags) { virSecurityManagerPtr mgr = NULL; @@ -103,14 +95,6 @@ virSecurityManagerNewDriver(virSecurityDriverPtr drv, if (!(mgr = virObjectLockableNew(virSecurityManagerClass))) goto error; - if (!lockManagerPluginName) - lockManagerPluginName = "nop"; - - if (!(mgr->lockPlugin = virLockManagerPluginNew(lockManagerPluginName, - NULL, NULL, 0))) { - goto error; - } - mgr->drv = drv; mgr->flags = flags; mgr->virtDriver = virtDriver; @@ -133,7 +117,6 @@ virSecurityManagerNewStack(virSecurityManagerPtr primary) virSecurityManagerPtr mgr = virSecurityManagerNewDriver(&virSecurityDriverStack, virSecurityManagerGetDriver(primary), - NULL, primary->flags); if (!mgr) @@ -142,8 +125,6 @@ virSecurityManagerNewStack(virSecurityManagerPtr primary) if (virSecurityStackAddNested(mgr, primary) < 0) goto error; - mgr->lockPlugin = virObjectRef(mgr->lockPlugin); - return mgr; error: virObjectUnref(mgr); @@ -166,7 +147,6 @@ virSecurityManagerNewDAC(const char *virtDriver, uid_t user, gid_t group, unsigned int flags, - const char *lockManagerPluginName, virSecurityManagerDACChownCallback chownCallback) { virSecurityManagerPtr mgr; @@ -177,7 +157,6 @@ virSecurityManagerNewDAC(const char *virtDriver, mgr = virSecurityManagerNewDriver(&virSecurityDriverDAC, virtDriver, - lockManagerPluginName, flags & VIR_SECURITY_MANAGER_NEW_MASK); if (!mgr) @@ -199,7 +178,6 @@ virSecurityManagerNewDAC(const char *virtDriver, virSecurityManagerPtr virSecurityManagerNew(const char *name, const char *virtDriver, - const char *lockManagerPluginName, unsigned int flags) { virSecurityDriverPtr drv = virSecurityDriverLookup(name, virtDriver); @@ -228,7 +206,6 @@ virSecurityManagerNew(const char *name, return virSecurityManagerNewDriver(drv, virtDriver, - lockManagerPluginName, flags); } diff --git a/src/security/security_manager.h b/src/security/security_manager.h index fe8a1b4a24..c4abc34b17 100644 --- a/src/security/security_manager.h +++ b/src/security/security_manager.h @@ -45,7 +45,6 @@ typedef enum { virSecurityManagerPtr virSecurityManagerNew(const char *name, const char *virtDriver, - const char *lockManagerPluginName, unsigned int flags); virSecurityManagerPtr virSecurityManagerNewStack(virSecurityManagerPtr primary); @@ -71,7 +70,6 @@ virSecurityManagerPtr virSecurityManagerNewDAC(const char *virtDriver, uid_t user, gid_t group, unsigned int flags, - const char *lockManagerPluginName, virSecurityManagerDACChownCallback chownCallback); int virSecurityManagerPreFork(virSecurityManagerPtr mgr); diff --git a/tests/seclabeltest.c b/tests/seclabeltest.c index 7cddf96e82..a0296c787e 100644 --- a/tests/seclabeltest.c +++ b/tests/seclabeltest.c @@ -14,7 +14,7 @@ mymain(void) if (virThreadInitialize() < 0) return EXIT_FAILURE; - mgr = virSecurityManagerNew(NULL, "QEMU", NULL, VIR_SECURITY_MANAGER_DEFAULT_CONFINED); + mgr = virSecurityManagerNew(NULL, "QEMU", VIR_SECURITY_MANAGER_DEFAULT_CONFINED); if (mgr == NULL) { fprintf(stderr, "Failed to start security driver"); return EXIT_FAILURE; diff --git a/tests/securityselinuxlabeltest.c b/tests/securityselinuxlabeltest.c index aa9fae7d32..39f4eb7b6a 100644 --- a/tests/securityselinuxlabeltest.c +++ b/tests/securityselinuxlabeltest.c @@ -346,7 +346,7 @@ mymain(void) if (!rc) return EXIT_AM_SKIP; - if (!(mgr = virSecurityManagerNew("selinux", "QEMU", NULL, + if (!(mgr = virSecurityManagerNew("selinux", "QEMU", VIR_SECURITY_MANAGER_DEFAULT_CONFINED | VIR_SECURITY_MANAGER_PRIVILEGED))) { VIR_TEST_VERBOSE("Unable to initialize security driver: %s\n", diff --git a/tests/securityselinuxtest.c b/tests/securityselinuxtest.c index f1ea51b1ac..a2864cf57c 100644 --- a/tests/securityselinuxtest.c +++ b/tests/securityselinuxtest.c @@ -272,7 +272,7 @@ mymain(void) int ret = 0; virSecurityManagerPtr mgr; - if (!(mgr = virSecurityManagerNew("selinux", "QEMU", NULL, + if (!(mgr = virSecurityManagerNew("selinux", "QEMU", VIR_SECURITY_MANAGER_DEFAULT_CONFINED | VIR_SECURITY_MANAGER_PRIVILEGED))) { fprintf(stderr, "Unable to initialize security driver: %s\n", diff --git a/tests/testutilsqemu.c b/tests/testutilsqemu.c index 332885eb77..0d3e9fc7e6 100644 --- a/tests/testutilsqemu.c +++ b/tests/testutilsqemu.c @@ -716,7 +716,7 @@ int qemuTestDriverInit(virQEMUDriver *driver) if (qemuTestCapsCacheInsert(driver->qemuCapsCache, NULL) < 0) goto error; - if (!(mgr = virSecurityManagerNew("none", "qemu", NULL, + if (!(mgr = virSecurityManagerNew("none", "qemu", VIR_SECURITY_MANAGER_PRIVILEGED))) goto error; if (!(driver->securityManager = virSecurityManagerNewStack(mgr))) -- 2.18.1

This reverts commit 8b8aefb3d6ae2139ea3d4ef6d7dd2c06f57f6075. Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- src/qemu/qemu_conf.c | 1 - src/qemu/qemu_conf.h | 1 - 2 files changed, 2 deletions(-) diff --git a/src/qemu/qemu_conf.c b/src/qemu/qemu_conf.c index 17b7e11e02..32da9a7351 100644 --- a/src/qemu/qemu_conf.c +++ b/src/qemu/qemu_conf.c @@ -426,7 +426,6 @@ static void virQEMUDriverConfigDispose(void *obj) virStringListFree(cfg->securityDriverNames); VIR_FREE(cfg->lockManagerName); - VIR_FREE(cfg->metadataLockManagerName); virFirmwareFreeList(cfg->firmwares, cfg->nfirmwares); diff --git a/src/qemu/qemu_conf.h b/src/qemu/qemu_conf.h index f876f9117c..49e5c50f22 100644 --- a/src/qemu/qemu_conf.h +++ b/src/qemu/qemu_conf.h @@ -186,7 +186,6 @@ struct _virQEMUDriverConfig { bool autoStartBypassCache; char *lockManagerName; - char *metadataLockManagerName; int keepAliveInterval; unsigned int keepAliveCount; -- 2.18.1

This reverts commit 35b5b244da825fb41e35e4dc62e740d716214ec9. Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- src/locking/lock_driver.h | 4 ---- src/locking/lock_driver_lockd.c | 4 +--- src/locking/lock_driver_sanlock.c | 4 +--- src/locking/lock_manager.c | 10 +++------- 4 files changed, 5 insertions(+), 17 deletions(-) diff --git a/src/locking/lock_driver.h b/src/locking/lock_driver.h index ae30abda7d..7c8f744be3 100644 --- a/src/locking/lock_driver.h +++ b/src/locking/lock_driver.h @@ -124,7 +124,6 @@ struct _virLockManagerParam { /** * virLockDriverInit: * @version: the libvirt requested plugin ABI version - * @configFile: path to config file * @flags: the libvirt requested plugin optional extras * * Allow the plugin to validate the libvirt requested @@ -132,9 +131,6 @@ struct _virLockManagerParam { * to block its use in versions of libvirtd which are * too old to support key features. * - * The @configFile variable points to config file that the driver - * should load. If NULL, no config file should be loaded. - * * NB: A plugin may be loaded multiple times, for different * libvirt drivers (eg QEMU, LXC, UML) * diff --git a/src/locking/lock_driver_lockd.c b/src/locking/lock_driver_lockd.c index 0c672b05b1..85cdcf97be 100644 --- a/src/locking/lock_driver_lockd.c +++ b/src/locking/lock_driver_lockd.c @@ -371,10 +371,8 @@ static int virLockManagerLockDaemonInit(unsigned int version, driver->requireLeaseForDisks = true; driver->autoDiskLease = true; - if (configFile && - virLockManagerLockDaemonLoadConfig(configFile) < 0) { + if (virLockManagerLockDaemonLoadConfig(configFile) < 0) goto error; - } if (driver->autoDiskLease) { if (driver->fileLockSpaceDir && diff --git a/src/locking/lock_driver_sanlock.c b/src/locking/lock_driver_sanlock.c index 3ad0dc9bed..b10d8197c5 100644 --- a/src/locking/lock_driver_sanlock.c +++ b/src/locking/lock_driver_sanlock.c @@ -446,10 +446,8 @@ static int virLockManagerSanlockInit(unsigned int version, goto error; } - if (configFile && - virLockManagerSanlockLoadConfig(driver, configFile) < 0) { + if (virLockManagerSanlockLoadConfig(driver, configFile) < 0) goto error; - } if (driver->autoDiskLease && !driver->hostID) { virReportError(VIR_ERR_INTERNAL_ERROR, "%s", diff --git a/src/locking/lock_manager.c b/src/locking/lock_manager.c index 9067f5a01a..d421b6acfc 100644 --- a/src/locking/lock_manager.c +++ b/src/locking/lock_manager.c @@ -104,8 +104,6 @@ static void virLockManagerLogParams(size_t nparams, /** * virLockManagerPluginNew: * @name: the name of the plugin - * @driverName: the hypervisor driver that loads the plugin - * @configDir: path to dir where config files are stored * @flag: optional plugin flags * * Attempt to load the plugin $(libdir)/libvirt/lock-driver/@name.so @@ -131,13 +129,11 @@ virLockManagerPluginPtr virLockManagerPluginNew(const char *name, char *configFile = NULL; VIR_DEBUG("name=%s driverName=%s configDir=%s flags=0x%x", - name, NULLSTR(driverName), NULLSTR(configDir), flags); + name, driverName, configDir, flags); - if (driverName && configDir && - virAsprintf(&configFile, "%s/%s-%s.conf", - configDir, driverName, name) < 0) { + if (virAsprintf(&configFile, "%s/%s-%s.conf", + configDir, driverName, name) < 0) return NULL; - } if (STREQ(name, "nop")) { driver = &virLockDriverNop; -- 2.18.1

This reverts commit 385eb8399bdb1610447c2857abfe99cee4a9fb9e. Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- src/locking/lock_driver.h | 4 -- src/locking/lock_driver_lockd.c | 82 ++++++++++----------------------- 2 files changed, 24 insertions(+), 62 deletions(-) diff --git a/src/locking/lock_driver.h b/src/locking/lock_driver.h index 7c8f744be3..9be0abcfba 100644 --- a/src/locking/lock_driver.h +++ b/src/locking/lock_driver.h @@ -67,10 +67,6 @@ typedef enum { VIR_LOCK_MANAGER_ACQUIRE_REGISTER_ONLY = (1 << 0), /* Prevent further lock/unlock calls from this process */ VIR_LOCK_MANAGER_ACQUIRE_RESTRICT = (1 << 1), - /* Used when acquiring more resources in which one of them - * can't be acquired, perform a rollback and release all - * resources acquired so far. */ - VIR_LOCK_MANAGER_ACQUIRE_ROLLBACK = (1 << 2), } virLockManagerAcquireFlags; typedef enum { diff --git a/src/locking/lock_driver_lockd.c b/src/locking/lock_driver_lockd.c index 85cdcf97be..d6551e125c 100644 --- a/src/locking/lock_driver_lockd.c +++ b/src/locking/lock_driver_lockd.c @@ -735,34 +735,6 @@ static int virLockManagerLockDaemonAddResource(virLockManagerPtr lock, } -static int virLockManagerLockDaemonReleaseImpl(virNetClientPtr client, - virNetClientProgramPtr program, - int counter, - virLockManagerLockDaemonResourcePtr res) -{ - virLockSpaceProtocolReleaseResourceArgs args; - - memset(&args, 0, sizeof(args)); - - args.path = res->lockspace; - args.name = res->name; - args.flags = res->flags; - - args.flags &= - ~(VIR_LOCK_SPACE_PROTOCOL_ACQUIRE_RESOURCE_SHARED | - VIR_LOCK_SPACE_PROTOCOL_ACQUIRE_RESOURCE_AUTOCREATE | - VIR_LOCK_SPACE_PROTOCOL_ACQUIRE_RESOURCE_METADATA); - - return virNetClientProgramCall(program, - client, - counter, - VIR_LOCK_SPACE_PROTOCOL_PROC_RELEASE_RESOURCE, - 0, NULL, NULL, NULL, - (xdrproc_t)xdr_virLockSpaceProtocolReleaseResourceArgs, &args, - (xdrproc_t)xdr_void, NULL); -} - - static int virLockManagerLockDaemonAcquire(virLockManagerPtr lock, const char *state ATTRIBUTE_UNUSED, unsigned int flags, @@ -773,13 +745,10 @@ static int virLockManagerLockDaemonAcquire(virLockManagerPtr lock, virNetClientProgramPtr program = NULL; int counter = 0; int rv = -1; - ssize_t i; - ssize_t lastGood = -1; virLockManagerLockDaemonPrivatePtr priv = lock->privateData; virCheckFlags(VIR_LOCK_MANAGER_ACQUIRE_REGISTER_ONLY | - VIR_LOCK_MANAGER_ACQUIRE_RESTRICT | - VIR_LOCK_MANAGER_ACQUIRE_ROLLBACK, -1); + VIR_LOCK_MANAGER_ACQUIRE_RESTRICT, -1); if (priv->type == VIR_LOCK_MANAGER_OBJECT_TYPE_DOMAIN && priv->nresources == 0 && @@ -798,6 +767,7 @@ static int virLockManagerLockDaemonAcquire(virLockManagerPtr lock, goto cleanup; if (!(flags & VIR_LOCK_MANAGER_ACQUIRE_REGISTER_ONLY)) { + size_t i; for (i = 0; i < priv->nresources; i++) { virLockSpaceProtocolAcquireResourceArgs args; @@ -815,7 +785,6 @@ static int virLockManagerLockDaemonAcquire(virLockManagerPtr lock, (xdrproc_t)xdr_virLockSpaceProtocolAcquireResourceArgs, &args, (xdrproc_t)xdr_void, NULL) < 0) goto cleanup; - lastGood = i; } } @@ -826,28 +795,8 @@ static int virLockManagerLockDaemonAcquire(virLockManagerPtr lock, rv = 0; cleanup: - if (rv < 0) { - int saved_errno = errno; - virErrorPtr origerr; - - virErrorPreserveLast(&origerr); - if (fd) - VIR_FORCE_CLOSE(*fd); - - if (flags & VIR_LOCK_MANAGER_ACQUIRE_ROLLBACK) { - for (i = lastGood; i >= 0; i--) { - virLockManagerLockDaemonResourcePtr res = &priv->resources[i]; - - if (virLockManagerLockDaemonReleaseImpl(client, program, - counter++, res) < 0) - VIR_WARN("Unable to release resource lockspace=%s name=%s", - res->lockspace, res->name); - } - } - - virErrorRestore(&origerr); - errno = saved_errno; - } + if (rv != 0 && fd) + VIR_FORCE_CLOSE(*fd); virNetClientClose(client); virObjectUnref(client); virObjectUnref(program); @@ -875,10 +824,27 @@ static int virLockManagerLockDaemonRelease(virLockManagerPtr lock, goto cleanup; for (i = 0; i < priv->nresources; i++) { - virLockManagerLockDaemonResourcePtr res = &priv->resources[i]; + virLockSpaceProtocolReleaseResourceArgs args; - if (virLockManagerLockDaemonReleaseImpl(client, program, - counter++, res) < 0) + memset(&args, 0, sizeof(args)); + + if (priv->resources[i].lockspace) + args.path = priv->resources[i].lockspace; + args.name = priv->resources[i].name; + args.flags = priv->resources[i].flags; + + args.flags &= + ~(VIR_LOCK_SPACE_PROTOCOL_ACQUIRE_RESOURCE_SHARED | + VIR_LOCK_SPACE_PROTOCOL_ACQUIRE_RESOURCE_AUTOCREATE | + VIR_LOCK_SPACE_PROTOCOL_ACQUIRE_RESOURCE_METADATA); + + if (virNetClientProgramCall(program, + client, + counter++, + VIR_LOCK_SPACE_PROTOCOL_PROC_RELEASE_RESOURCE, + 0, NULL, NULL, NULL, + (xdrproc_t)xdr_virLockSpaceProtocolReleaseResourceArgs, &args, + (xdrproc_t)xdr_void, NULL) < 0) goto cleanup; } -- 2.18.1

This reverts commit 997283b54b0e1f599aed3085ceba027eb8110acb. Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- src/locking/lock_driver.h | 2 -- src/locking/lock_driver_lockd.c | 47 +++++++++---------------------- src/locking/lock_driver_sanlock.c | 3 +- 3 files changed, 14 insertions(+), 38 deletions(-) diff --git a/src/locking/lock_driver.h b/src/locking/lock_driver.h index 9be0abcfba..a9d2041c30 100644 --- a/src/locking/lock_driver.h +++ b/src/locking/lock_driver.h @@ -51,8 +51,6 @@ typedef enum { VIR_LOCK_MANAGER_RESOURCE_TYPE_DISK = 0, /* A lease against an arbitrary resource */ VIR_LOCK_MANAGER_RESOURCE_TYPE_LEASE = 1, - /* The resource to be locked is a metadata */ - VIR_LOCK_MANAGER_RESOURCE_TYPE_METADATA = 2, } virLockManagerResourceType; typedef enum { diff --git a/src/locking/lock_driver_lockd.c b/src/locking/lock_driver_lockd.c index d6551e125c..268676c407 100644 --- a/src/locking/lock_driver_lockd.c +++ b/src/locking/lock_driver_lockd.c @@ -563,7 +563,7 @@ static int virLockManagerLockDaemonAddResource(virLockManagerPtr lock, virLockManagerLockDaemonPrivatePtr priv = lock->privateData; char *newName = NULL; char *newLockspace = NULL; - int newFlags = 0; + bool autoCreate = false; int ret = -1; virCheckFlags(VIR_LOCK_MANAGER_RESOURCE_READONLY | @@ -575,7 +575,7 @@ static int virLockManagerLockDaemonAddResource(virLockManagerPtr lock, switch (priv->type) { case VIR_LOCK_MANAGER_OBJECT_TYPE_DOMAIN: - switch ((virLockManagerResourceType) type) { + switch (type) { case VIR_LOCK_MANAGER_RESOURCE_TYPE_DISK: if (params || nparams) { virReportError(VIR_ERR_INTERNAL_ERROR, "%s", @@ -602,7 +602,7 @@ static int virLockManagerLockDaemonAddResource(virLockManagerPtr lock, VIR_DEBUG("Got an LVM UUID %s for %s", newName, name); if (VIR_STRDUP(newLockspace, driver->lvmLockSpaceDir) < 0) goto cleanup; - newFlags |= VIR_LOCK_SPACE_PROTOCOL_ACQUIRE_RESOURCE_AUTOCREATE; + autoCreate = true; break; } virResetLastError(); @@ -619,7 +619,7 @@ static int virLockManagerLockDaemonAddResource(virLockManagerPtr lock, VIR_DEBUG("Got an SCSI ID %s for %s", newName, name); if (VIR_STRDUP(newLockspace, driver->scsiLockSpaceDir) < 0) goto cleanup; - newFlags |= VIR_LOCK_SPACE_PROTOCOL_ACQUIRE_RESOURCE_AUTOCREATE; + autoCreate = true; break; } virResetLastError(); @@ -631,7 +631,7 @@ static int virLockManagerLockDaemonAddResource(virLockManagerPtr lock, goto cleanup; if (virCryptoHashString(VIR_CRYPTO_HASH_SHA256, name, &newName) < 0) goto cleanup; - newFlags |= VIR_LOCK_SPACE_PROTOCOL_ACQUIRE_RESOURCE_AUTOCREATE; + autoCreate = true; VIR_DEBUG("Using indirect lease %s for %s", newName, name); } else { if (VIR_STRDUP(newLockspace, "") < 0) @@ -676,8 +676,6 @@ static int virLockManagerLockDaemonAddResource(virLockManagerPtr lock, goto cleanup; } break; - - case VIR_LOCK_MANAGER_RESOURCE_TYPE_METADATA: default: virReportError(VIR_ERR_INTERNAL_ERROR, _("Unknown lock manager object type %d for domain lock object"), @@ -687,29 +685,6 @@ static int virLockManagerLockDaemonAddResource(virLockManagerPtr lock, break; case VIR_LOCK_MANAGER_OBJECT_TYPE_DAEMON: - switch ((virLockManagerResourceType) type) { - case VIR_LOCK_MANAGER_RESOURCE_TYPE_METADATA: - if (params || nparams) { - virReportError(VIR_ERR_INTERNAL_ERROR, "%s", - _("Unexpected parameters for metadata resource")); - goto cleanup; - } - if (VIR_STRDUP(newLockspace, "") < 0 || - VIR_STRDUP(newName, name) < 0) - goto cleanup; - newFlags |= VIR_LOCK_SPACE_PROTOCOL_ACQUIRE_RESOURCE_METADATA; - break; - - case VIR_LOCK_MANAGER_RESOURCE_TYPE_DISK: - case VIR_LOCK_MANAGER_RESOURCE_TYPE_LEASE: - default: - virReportError(VIR_ERR_INTERNAL_ERROR, - _("Unknown lock manager object type %d for daemon lock object"), - type); - goto cleanup; - } - break; - default: virReportError(VIR_ERR_INTERNAL_ERROR, _("Unknown lock manager object type %d"), @@ -717,15 +692,19 @@ static int virLockManagerLockDaemonAddResource(virLockManagerPtr lock, goto cleanup; } - if (flags & VIR_LOCK_MANAGER_RESOURCE_SHARED) - newFlags |= VIR_LOCK_SPACE_PROTOCOL_ACQUIRE_RESOURCE_SHARED; - if (VIR_EXPAND_N(priv->resources, priv->nresources, 1) < 0) goto cleanup; VIR_STEAL_PTR(priv->resources[priv->nresources-1].lockspace, newLockspace); VIR_STEAL_PTR(priv->resources[priv->nresources-1].name, newName); - priv->resources[priv->nresources-1].flags = newFlags; + + if (flags & VIR_LOCK_MANAGER_RESOURCE_SHARED) + priv->resources[priv->nresources-1].flags |= + VIR_LOCK_SPACE_PROTOCOL_ACQUIRE_RESOURCE_SHARED; + + if (autoCreate) + priv->resources[priv->nresources-1].flags |= + VIR_LOCK_SPACE_PROTOCOL_ACQUIRE_RESOURCE_AUTOCREATE; ret = 0; cleanup: diff --git a/src/locking/lock_driver_sanlock.c b/src/locking/lock_driver_sanlock.c index b10d8197c5..86efc83b5a 100644 --- a/src/locking/lock_driver_sanlock.c +++ b/src/locking/lock_driver_sanlock.c @@ -811,7 +811,7 @@ static int virLockManagerSanlockAddResource(virLockManagerPtr lock, if (flags & VIR_LOCK_MANAGER_RESOURCE_READONLY) return 0; - switch ((virLockManagerResourceType) type) { + switch (type) { case VIR_LOCK_MANAGER_RESOURCE_TYPE_DISK: if (driver->autoDiskLease) { if (virLockManagerSanlockAddDisk(driver, lock, name, nparams, params, @@ -835,7 +835,6 @@ static int virLockManagerSanlockAddResource(virLockManagerPtr lock, return -1; break; - case VIR_LOCK_MANAGER_RESOURCE_TYPE_METADATA: default: virReportError(VIR_ERR_INTERNAL_ERROR, _("Unknown lock manager object type %d for domain lock object"), -- 2.18.1

This reverts commit aaf34cb9013d6d746f4edf9807408cb9dfbcf01d. Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- src/locking/lock_driver_lockd.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/locking/lock_driver_lockd.c b/src/locking/lock_driver_lockd.c index 268676c407..22a5a97913 100644 --- a/src/locking/lock_driver_lockd.c +++ b/src/locking/lock_driver_lockd.c @@ -63,8 +63,6 @@ struct _virLockManagerLockDaemonPrivate { char *name; int id; pid_t pid; - - bool hasRWDisks; } dom; struct { @@ -76,6 +74,8 @@ struct _virLockManagerLockDaemonPrivate { size_t nresources; virLockManagerLockDaemonResourcePtr resources; + + bool hasRWDisks; }; @@ -585,7 +585,7 @@ static int virLockManagerLockDaemonAddResource(virLockManagerPtr lock, if (!driver->autoDiskLease) { if (!(flags & (VIR_LOCK_MANAGER_RESOURCE_SHARED | VIR_LOCK_MANAGER_RESOURCE_READONLY))) - priv->t.dom.hasRWDisks = true; + priv->hasRWDisks = true; return 0; } @@ -731,7 +731,7 @@ static int virLockManagerLockDaemonAcquire(virLockManagerPtr lock, if (priv->type == VIR_LOCK_MANAGER_OBJECT_TYPE_DOMAIN && priv->nresources == 0 && - priv->t.dom.hasRWDisks && + priv->hasRWDisks && driver->requireLeaseForDisks) { virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", _("Read/write, exclusive access, disks were present, but no leases specified")); -- 2.18.1

This reverts commit 22baf6e08c65d9174b24f66370724ce961ce9576. Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- src/locking/lock_driver.h | 2 - src/locking/lock_driver_lockd.c | 297 +++++++++++------------------- src/locking/lock_driver_sanlock.c | 37 ++-- 3 files changed, 116 insertions(+), 220 deletions(-) diff --git a/src/locking/lock_driver.h b/src/locking/lock_driver.h index a9d2041c30..8b7cccc521 100644 --- a/src/locking/lock_driver.h +++ b/src/locking/lock_driver.h @@ -42,8 +42,6 @@ typedef enum { typedef enum { /* The managed object is a virtual guest domain */ VIR_LOCK_MANAGER_OBJECT_TYPE_DOMAIN = 0, - /* The managed object is a daemon (e.g. libvirtd) */ - VIR_LOCK_MANAGER_OBJECT_TYPE_DAEMON = 1, } virLockManagerObjectType; typedef enum { diff --git a/src/locking/lock_driver_lockd.c b/src/locking/lock_driver_lockd.c index 22a5a97913..ca825e6026 100644 --- a/src/locking/lock_driver_lockd.c +++ b/src/locking/lock_driver_lockd.c @@ -56,21 +56,10 @@ struct _virLockManagerLockDaemonResource { }; struct _virLockManagerLockDaemonPrivate { - virLockManagerObjectType type; - union { - struct { - unsigned char uuid[VIR_UUID_BUFLEN]; - char *name; - int id; - pid_t pid; - } dom; - - struct { - unsigned char uuid[VIR_UUID_BUFLEN]; - char *name; - pid_t pid; - } daemon; - } t; + unsigned char uuid[VIR_UUID_BUFLEN]; + char *name; + int id; + pid_t pid; size_t nresources; virLockManagerLockDaemonResourcePtr resources; @@ -167,30 +156,10 @@ virLockManagerLockDaemonConnectionRegister(virLockManagerPtr lock, memset(&args, 0, sizeof(args)); args.flags = 0; - - switch (priv->type) { - case VIR_LOCK_MANAGER_OBJECT_TYPE_DOMAIN: - memcpy(args.owner.uuid, priv->t.dom.uuid, VIR_UUID_BUFLEN); - args.owner.name = priv->t.dom.name; - args.owner.id = priv->t.dom.id; - args.owner.pid = priv->t.dom.pid; - break; - - case VIR_LOCK_MANAGER_OBJECT_TYPE_DAEMON: - memcpy(args.owner.uuid, priv->t.daemon.uuid, VIR_UUID_BUFLEN); - args.owner.name = priv->t.daemon.name; - args.owner.pid = priv->t.daemon.pid; - /* This one should not be needed. However, virtlockd - * checks for ID because not every domain has a PID. */ - args.owner.id = priv->t.daemon.pid; - break; - - default: - virReportError(VIR_ERR_INTERNAL_ERROR, - _("Unknown lock manager object type %d"), - priv->type); - return -1; - } + memcpy(args.owner.uuid, priv->uuid, VIR_UUID_BUFLEN); + args.owner.name = priv->name; + args.owner.id = priv->id; + args.owner.pid = priv->pid; if (virNetClientProgramCall(program, client, @@ -422,18 +391,7 @@ virLockManagerLockDaemonPrivateFree(virLockManagerLockDaemonPrivatePtr priv) } VIR_FREE(priv->resources); - switch (priv->type) { - case VIR_LOCK_MANAGER_OBJECT_TYPE_DOMAIN: - VIR_FREE(priv->t.dom.name); - break; - - case VIR_LOCK_MANAGER_OBJECT_TYPE_DAEMON: - VIR_FREE(priv->t.daemon.name); - break; - - default: - break; - } + VIR_FREE(priv->name); VIR_FREE(priv); } @@ -462,82 +420,46 @@ static int virLockManagerLockDaemonNew(virLockManagerPtr lock, if (VIR_ALLOC(priv) < 0) return -1; - priv->type = type; - - switch ((virLockManagerObjectType) type) { + switch (type) { case VIR_LOCK_MANAGER_OBJECT_TYPE_DOMAIN: for (i = 0; i < nparams; i++) { if (STREQ(params[i].key, "uuid")) { - memcpy(priv->t.dom.uuid, params[i].value.uuid, VIR_UUID_BUFLEN); + memcpy(priv->uuid, params[i].value.uuid, VIR_UUID_BUFLEN); } else if (STREQ(params[i].key, "name")) { - if (VIR_STRDUP(priv->t.dom.name, params[i].value.str) < 0) + if (VIR_STRDUP(priv->name, params[i].value.str) < 0) goto cleanup; } else if (STREQ(params[i].key, "id")) { - priv->t.dom.id = params[i].value.iv; + priv->id = params[i].value.iv; } else if (STREQ(params[i].key, "pid")) { - priv->t.dom.pid = params[i].value.iv; + priv->pid = params[i].value.iv; } else if (STREQ(params[i].key, "uri")) { /* ignored */ } else { virReportError(VIR_ERR_INTERNAL_ERROR, - _("Unexpected parameter %s for domain object"), + _("Unexpected parameter %s for object"), params[i].key); goto cleanup; } } - if (priv->t.dom.id == 0) { + if (priv->id == 0) { virReportError(VIR_ERR_INTERNAL_ERROR, "%s", _("Missing ID parameter for domain object")); goto cleanup; } - if (priv->t.dom.pid == 0) + if (priv->pid == 0) VIR_DEBUG("Missing PID parameter for domain object"); - if (!priv->t.dom.name) { + if (!priv->name) { virReportError(VIR_ERR_INTERNAL_ERROR, "%s", _("Missing name parameter for domain object")); goto cleanup; } - if (!virUUIDIsValid(priv->t.dom.uuid)) { + if (!virUUIDIsValid(priv->uuid)) { virReportError(VIR_ERR_INTERNAL_ERROR, "%s", _("Missing UUID parameter for domain object")); goto cleanup; } break; - case VIR_LOCK_MANAGER_OBJECT_TYPE_DAEMON: - for (i = 0; i < nparams; i++) { - if (STREQ(params[i].key, "uuid")) { - memcpy(priv->t.daemon.uuid, params[i].value.uuid, VIR_UUID_BUFLEN); - } else if (STREQ(params[i].key, "name")) { - if (VIR_STRDUP(priv->t.daemon.name, params[i].value.str) < 0) - goto cleanup; - } else if (STREQ(params[i].key, "pid")) { - priv->t.daemon.pid = params[i].value.iv; - } else { - virReportError(VIR_ERR_INTERNAL_ERROR, - _("Unexpected parameter %s for daemon object"), - params[i].key); - goto cleanup; - } - } - - if (!virUUIDIsValid(priv->t.daemon.uuid)) { - virReportError(VIR_ERR_INTERNAL_ERROR, "%s", - _("Missing UUID parameter for daemon object")); - goto cleanup; - } - if (!priv->t.daemon.name) { - virReportError(VIR_ERR_INTERNAL_ERROR, "%s", - _("Missing name parameter for daemon object")); - goto cleanup; - } - if (priv->t.daemon.pid == 0) { - virReportError(VIR_ERR_INTERNAL_ERROR, "%s", - _("Missing PID parameter for daemon object")); - goto cleanup; - } - break; - default: virReportError(VIR_ERR_INTERNAL_ERROR, _("Unknown lock manager object type %d"), @@ -572,119 +494,107 @@ static int virLockManagerLockDaemonAddResource(virLockManagerPtr lock, if (flags & VIR_LOCK_MANAGER_RESOURCE_READONLY) return 0; - switch (priv->type) { - case VIR_LOCK_MANAGER_OBJECT_TYPE_DOMAIN: + switch (type) { + case VIR_LOCK_MANAGER_RESOURCE_TYPE_DISK: + if (params || nparams) { + virReportError(VIR_ERR_INTERNAL_ERROR, "%s", + _("Unexpected parameters for disk resource")); + goto cleanup; + } + if (!driver->autoDiskLease) { + if (!(flags & (VIR_LOCK_MANAGER_RESOURCE_SHARED | + VIR_LOCK_MANAGER_RESOURCE_READONLY))) + priv->hasRWDisks = true; + return 0; + } - switch (type) { - case VIR_LOCK_MANAGER_RESOURCE_TYPE_DISK: - if (params || nparams) { - virReportError(VIR_ERR_INTERNAL_ERROR, "%s", - _("Unexpected parameters for disk resource")); + /* XXX we should somehow pass in TYPE=BLOCK info + * from the domain_lock code, instead of assuming /dev + */ + if (STRPREFIX(name, "/dev") && + driver->lvmLockSpaceDir) { + VIR_DEBUG("Trying to find an LVM UUID for %s", name); + if (virStorageFileGetLVMKey(name, &newName) < 0) goto cleanup; - } - if (!driver->autoDiskLease) { - if (!(flags & (VIR_LOCK_MANAGER_RESOURCE_SHARED | - VIR_LOCK_MANAGER_RESOURCE_READONLY))) - priv->hasRWDisks = true; - return 0; - } - /* XXX we should somehow pass in TYPE=BLOCK info - * from the domain_lock code, instead of assuming /dev - */ - if (STRPREFIX(name, "/dev") && - driver->lvmLockSpaceDir) { - VIR_DEBUG("Trying to find an LVM UUID for %s", name); - if (virStorageFileGetLVMKey(name, &newName) < 0) + if (newName) { + VIR_DEBUG("Got an LVM UUID %s for %s", newName, name); + if (VIR_STRDUP(newLockspace, driver->lvmLockSpaceDir) < 0) goto cleanup; - - if (newName) { - VIR_DEBUG("Got an LVM UUID %s for %s", newName, name); - if (VIR_STRDUP(newLockspace, driver->lvmLockSpaceDir) < 0) - goto cleanup; - autoCreate = true; - break; - } - virResetLastError(); - /* Fallback to generic non-block code */ + autoCreate = true; + break; } + virResetLastError(); + /* Fallback to generic non-block code */ + } - if (STRPREFIX(name, "/dev") && - driver->scsiLockSpaceDir) { - VIR_DEBUG("Trying to find an SCSI ID for %s", name); - if (virStorageFileGetSCSIKey(name, &newName) < 0) - goto cleanup; - - if (newName) { - VIR_DEBUG("Got an SCSI ID %s for %s", newName, name); - if (VIR_STRDUP(newLockspace, driver->scsiLockSpaceDir) < 0) - goto cleanup; - autoCreate = true; - break; - } - virResetLastError(); - /* Fallback to generic non-block code */ - } + if (STRPREFIX(name, "/dev") && + driver->scsiLockSpaceDir) { + VIR_DEBUG("Trying to find an SCSI ID for %s", name); + if (virStorageFileGetSCSIKey(name, &newName) < 0) + goto cleanup; - if (driver->fileLockSpaceDir) { - if (VIR_STRDUP(newLockspace, driver->fileLockSpaceDir) < 0) - goto cleanup; - if (virCryptoHashString(VIR_CRYPTO_HASH_SHA256, name, &newName) < 0) + if (newName) { + VIR_DEBUG("Got an SCSI ID %s for %s", newName, name); + if (VIR_STRDUP(newLockspace, driver->scsiLockSpaceDir) < 0) goto cleanup; autoCreate = true; - VIR_DEBUG("Using indirect lease %s for %s", newName, name); - } else { - if (VIR_STRDUP(newLockspace, "") < 0) - goto cleanup; - if (VIR_STRDUP(newName, name) < 0) - goto cleanup; - VIR_DEBUG("Using direct lease for %s", name); + break; } + virResetLastError(); + /* Fallback to generic non-block code */ + } - break; - case VIR_LOCK_MANAGER_RESOURCE_TYPE_LEASE: { - size_t i; - char *path = NULL; - char *lockspace = NULL; - for (i = 0; i < nparams; i++) { - if (STREQ(params[i].key, "offset")) { - if (params[i].value.ul != 0) { - virReportError(VIR_ERR_INTERNAL_ERROR, "%s", - _("Offset must be zero for this lock manager")); - goto cleanup; - } - } else if (STREQ(params[i].key, "lockspace")) { - lockspace = params[i].value.str; - } else if (STREQ(params[i].key, "path")) { - path = params[i].value.str; - } else { - virReportError(VIR_ERR_INTERNAL_ERROR, - _("Unexpected parameter %s for lease resource"), - params[i].key); - goto cleanup; - } - } - if (!path || !lockspace) { - virReportError(VIR_ERR_INTERNAL_ERROR, "%s", - _("Missing path or lockspace for lease resource")); + if (driver->fileLockSpaceDir) { + if (VIR_STRDUP(newLockspace, driver->fileLockSpaceDir) < 0) goto cleanup; - } - if (virAsprintf(&newLockspace, "%s/%s", - path, lockspace) < 0) + if (virCryptoHashString(VIR_CRYPTO_HASH_SHA256, name, &newName) < 0) + goto cleanup; + autoCreate = true; + VIR_DEBUG("Using indirect lease %s for %s", newName, name); + } else { + if (VIR_STRDUP(newLockspace, "") < 0) goto cleanup; if (VIR_STRDUP(newName, name) < 0) goto cleanup; - - } break; - default: - virReportError(VIR_ERR_INTERNAL_ERROR, - _("Unknown lock manager object type %d for domain lock object"), - type); - goto cleanup; + VIR_DEBUG("Using direct lease for %s", name); } + break; + case VIR_LOCK_MANAGER_RESOURCE_TYPE_LEASE: { + size_t i; + char *path = NULL; + char *lockspace = NULL; + for (i = 0; i < nparams; i++) { + if (STREQ(params[i].key, "offset")) { + if (params[i].value.ul != 0) { + virReportError(VIR_ERR_INTERNAL_ERROR, "%s", + _("Offset must be zero for this lock manager")); + goto cleanup; + } + } else if (STREQ(params[i].key, "lockspace")) { + lockspace = params[i].value.str; + } else if (STREQ(params[i].key, "path")) { + path = params[i].value.str; + } else { + virReportError(VIR_ERR_INTERNAL_ERROR, + _("Unexpected parameter %s for lease resource"), + params[i].key); + goto cleanup; + } + } + if (!path || !lockspace) { + virReportError(VIR_ERR_INTERNAL_ERROR, "%s", + _("Missing path or lockspace for lease resource")); + goto cleanup; + } + if (virAsprintf(&newLockspace, "%s/%s", + path, lockspace) < 0) + goto cleanup; + if (VIR_STRDUP(newName, name) < 0) + goto cleanup; - case VIR_LOCK_MANAGER_OBJECT_TYPE_DAEMON: + } break; default: virReportError(VIR_ERR_INTERNAL_ERROR, _("Unknown lock manager object type %d"), @@ -729,8 +639,7 @@ static int virLockManagerLockDaemonAcquire(virLockManagerPtr lock, virCheckFlags(VIR_LOCK_MANAGER_ACQUIRE_REGISTER_ONLY | VIR_LOCK_MANAGER_ACQUIRE_RESTRICT, -1); - if (priv->type == VIR_LOCK_MANAGER_OBJECT_TYPE_DOMAIN && - priv->nresources == 0 && + if (priv->nresources == 0 && priv->hasRWDisks && driver->requireLeaseForDisks) { virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", diff --git a/src/locking/lock_driver_sanlock.c b/src/locking/lock_driver_sanlock.c index 86efc83b5a..ff0c9be8f7 100644 --- a/src/locking/lock_driver_sanlock.c +++ b/src/locking/lock_driver_sanlock.c @@ -509,32 +509,21 @@ static int virLockManagerSanlockNew(virLockManagerPtr lock, priv->flags = flags; - switch ((virLockManagerObjectType) type) { - case VIR_LOCK_MANAGER_OBJECT_TYPE_DOMAIN: - for (i = 0; i < nparams; i++) { - param = ¶ms[i]; + for (i = 0; i < nparams; i++) { + param = ¶ms[i]; - if (STREQ(param->key, "uuid")) { - memcpy(priv->vm_uuid, param->value.uuid, 16); - } else if (STREQ(param->key, "name")) { - if (VIR_STRDUP(priv->vm_name, param->value.str) < 0) - goto error; - } else if (STREQ(param->key, "pid")) { - priv->vm_pid = param->value.iv; - } else if (STREQ(param->key, "id")) { - priv->vm_id = param->value.ui; - } else if (STREQ(param->key, "uri")) { - priv->vm_uri = param->value.cstr; - } + if (STREQ(param->key, "uuid")) { + memcpy(priv->vm_uuid, param->value.uuid, 16); + } else if (STREQ(param->key, "name")) { + if (VIR_STRDUP(priv->vm_name, param->value.str) < 0) + goto error; + } else if (STREQ(param->key, "pid")) { + priv->vm_pid = param->value.iv; + } else if (STREQ(param->key, "id")) { + priv->vm_id = param->value.ui; + } else if (STREQ(param->key, "uri")) { + priv->vm_uri = param->value.cstr; } - break; - - case VIR_LOCK_MANAGER_OBJECT_TYPE_DAEMON: - default: - virReportError(VIR_ERR_INTERNAL_ERROR, - _("Unknown lock manager object type %d"), - type); - goto error; } /* Sanlock needs process registration, but the only way how to probe -- 2.18.1

This reverts commit 21c34b86be5233634eb38f77be64e2263bfc4e48. Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- src/locking/lock_daemon_dispatch.c | 10 ++-------- src/locking/lock_driver_lockd.c | 3 +-- src/locking/lock_driver_lockd.h | 1 - 3 files changed, 3 insertions(+), 11 deletions(-) diff --git a/src/locking/lock_daemon_dispatch.c b/src/locking/lock_daemon_dispatch.c index a683ad3d6b..10248ec0b5 100644 --- a/src/locking/lock_daemon_dispatch.c +++ b/src/locking/lock_daemon_dispatch.c @@ -37,9 +37,6 @@ VIR_LOG_INIT("locking.lock_daemon_dispatch"); #include "lock_daemon_dispatch_stubs.h" -#define DEFAULT_OFFSET 0 -#define METADATA_OFFSET 1 - static int virLockSpaceProtocolDispatchAcquireResource(virNetServerPtr server ATTRIBUTE_UNUSED, virNetServerClientPtr client, @@ -53,14 +50,13 @@ virLockSpaceProtocolDispatchAcquireResource(virNetServerPtr server ATTRIBUTE_UNU virNetServerClientGetPrivateData(client); virLockSpacePtr lockspace; unsigned int newFlags; - off_t start = DEFAULT_OFFSET; + off_t start = 0; off_t len = 1; virMutexLock(&priv->lock); virCheckFlagsGoto(VIR_LOCK_SPACE_PROTOCOL_ACQUIRE_RESOURCE_SHARED | - VIR_LOCK_SPACE_PROTOCOL_ACQUIRE_RESOURCE_AUTOCREATE | - VIR_LOCK_SPACE_PROTOCOL_ACQUIRE_RESOURCE_METADATA, cleanup); + VIR_LOCK_SPACE_PROTOCOL_ACQUIRE_RESOURCE_AUTOCREATE, cleanup); if (priv->restricted) { virReportError(VIR_ERR_OPERATION_DENIED, "%s", @@ -86,8 +82,6 @@ virLockSpaceProtocolDispatchAcquireResource(virNetServerPtr server ATTRIBUTE_UNU newFlags |= VIR_LOCK_SPACE_ACQUIRE_SHARED; if (flags & VIR_LOCK_SPACE_PROTOCOL_ACQUIRE_RESOURCE_AUTOCREATE) newFlags |= VIR_LOCK_SPACE_ACQUIRE_AUTOCREATE; - if (flags & VIR_LOCK_SPACE_PROTOCOL_ACQUIRE_RESOURCE_METADATA) - start = METADATA_OFFSET; if (virLockSpaceAcquireResource(lockspace, args->name, diff --git a/src/locking/lock_driver_lockd.c b/src/locking/lock_driver_lockd.c index ca825e6026..16fce551c3 100644 --- a/src/locking/lock_driver_lockd.c +++ b/src/locking/lock_driver_lockd.c @@ -723,8 +723,7 @@ static int virLockManagerLockDaemonRelease(virLockManagerPtr lock, args.flags &= ~(VIR_LOCK_SPACE_PROTOCOL_ACQUIRE_RESOURCE_SHARED | - VIR_LOCK_SPACE_PROTOCOL_ACQUIRE_RESOURCE_AUTOCREATE | - VIR_LOCK_SPACE_PROTOCOL_ACQUIRE_RESOURCE_METADATA); + VIR_LOCK_SPACE_PROTOCOL_ACQUIRE_RESOURCE_AUTOCREATE); if (virNetClientProgramCall(program, client, diff --git a/src/locking/lock_driver_lockd.h b/src/locking/lock_driver_lockd.h index bebd804365..6931fe7425 100644 --- a/src/locking/lock_driver_lockd.h +++ b/src/locking/lock_driver_lockd.h @@ -25,7 +25,6 @@ enum virLockSpaceProtocolAcquireResourceFlags { VIR_LOCK_SPACE_PROTOCOL_ACQUIRE_RESOURCE_SHARED = (1 << 0), VIR_LOCK_SPACE_PROTOCOL_ACQUIRE_RESOURCE_AUTOCREATE = (1 << 1), - VIR_LOCK_SPACE_PROTOCOL_ACQUIRE_RESOURCE_METADATA = (1 << 2), }; #endif /* __VIR_LOCK_DRIVER_LOCKD_H__ */ -- 2.18.1

This reverts commit afd5a27575e8b6a494d2728552fe0e89c71e32b4. Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- src/locking/lock_daemon_dispatch.c | 3 --- src/util/virlockspace.c | 15 +++++---------- src/util/virlockspace.h | 4 ---- tests/virlockspacetest.c | 29 +++++------------------------ 4 files changed, 10 insertions(+), 41 deletions(-) diff --git a/src/locking/lock_daemon_dispatch.c b/src/locking/lock_daemon_dispatch.c index 10248ec0b5..1b479db55d 100644 --- a/src/locking/lock_daemon_dispatch.c +++ b/src/locking/lock_daemon_dispatch.c @@ -50,8 +50,6 @@ virLockSpaceProtocolDispatchAcquireResource(virNetServerPtr server ATTRIBUTE_UNU virNetServerClientGetPrivateData(client); virLockSpacePtr lockspace; unsigned int newFlags; - off_t start = 0; - off_t len = 1; virMutexLock(&priv->lock); @@ -86,7 +84,6 @@ virLockSpaceProtocolDispatchAcquireResource(virNetServerPtr server ATTRIBUTE_UNU if (virLockSpaceAcquireResource(lockspace, args->name, priv->ownerPid, - start, len, newFlags) < 0) goto cleanup; diff --git a/src/util/virlockspace.c b/src/util/virlockspace.c index 79fa48d365..0736b4b85b 100644 --- a/src/util/virlockspace.c +++ b/src/util/virlockspace.c @@ -115,10 +115,8 @@ static void virLockSpaceResourceFree(virLockSpaceResourcePtr res) static virLockSpaceResourcePtr virLockSpaceResourceNew(virLockSpacePtr lockspace, const char *resname, - pid_t owner, - off_t start, - off_t len, - unsigned int flags) + unsigned int flags, + pid_t owner) { virLockSpaceResourcePtr res; bool shared = !!(flags & VIR_LOCK_SPACE_ACQUIRE_SHARED); @@ -159,7 +157,7 @@ virLockSpaceResourceNew(virLockSpacePtr lockspace, goto error; } - if (virFileLock(res->fd, shared, start, len, false) < 0) { + if (virFileLock(res->fd, shared, 0, 1, false) < 0) { if (errno == EACCES || errno == EAGAIN) { virReportError(VIR_ERR_RESOURCE_BUSY, _("Lockspace resource '%s' is locked"), @@ -206,7 +204,7 @@ virLockSpaceResourceNew(virLockSpacePtr lockspace, goto error; } - if (virFileLock(res->fd, shared, start, len, false) < 0) { + if (virFileLock(res->fd, shared, 0, 1, false) < 0) { if (errno == EACCES || errno == EAGAIN) { virReportError(VIR_ERR_RESOURCE_BUSY, _("Lockspace resource '%s' is locked"), @@ -614,8 +612,6 @@ int virLockSpaceDeleteResource(virLockSpacePtr lockspace, int virLockSpaceAcquireResource(virLockSpacePtr lockspace, const char *resname, pid_t owner, - off_t start, - off_t len, unsigned int flags) { int ret = -1; @@ -645,8 +641,7 @@ int virLockSpaceAcquireResource(virLockSpacePtr lockspace, goto cleanup; } - if (!(res = virLockSpaceResourceNew(lockspace, resname, - owner, start, len, flags))) + if (!(res = virLockSpaceResourceNew(lockspace, resname, flags, owner))) goto cleanup; if (virHashAddEntry(lockspace->resources, resname, res) < 0) { diff --git a/src/util/virlockspace.h b/src/util/virlockspace.h index 24f2c89be6..041cf20396 100644 --- a/src/util/virlockspace.h +++ b/src/util/virlockspace.h @@ -22,8 +22,6 @@ #ifndef __VIR_LOCK_SPACE_H__ # define __VIR_LOCK_SPACE_H__ -# include <sys/types.h> - # include "internal.h" # include "virjson.h" @@ -52,8 +50,6 @@ typedef enum { int virLockSpaceAcquireResource(virLockSpacePtr lockspace, const char *resname, pid_t owner, - off_t start, - off_t len, unsigned int flags); int virLockSpaceReleaseResource(virLockSpacePtr lockspace, diff --git a/tests/virlockspacetest.c b/tests/virlockspacetest.c index 3c621e7eb0..93353be285 100644 --- a/tests/virlockspacetest.c +++ b/tests/virlockspacetest.c @@ -98,8 +98,6 @@ static int testLockSpaceResourceLockExcl(const void *args ATTRIBUTE_UNUSED) { virLockSpacePtr lockspace; int ret = -1; - const off_t start = 0; - const off_t len = 1; rmdir(LOCKSPACE_DIR); @@ -112,13 +110,13 @@ static int testLockSpaceResourceLockExcl(const void *args ATTRIBUTE_UNUSED) if (virLockSpaceCreateResource(lockspace, "foo") < 0) goto cleanup; - if (virLockSpaceAcquireResource(lockspace, "foo", geteuid(), start, len, 0) < 0) + if (virLockSpaceAcquireResource(lockspace, "foo", geteuid(), 0) < 0) goto cleanup; if (!virFileExists(LOCKSPACE_DIR "/foo")) goto cleanup; - if (virLockSpaceAcquireResource(lockspace, "foo", geteuid(), start, len, 0) == 0) + if (virLockSpaceAcquireResource(lockspace, "foo", geteuid(), 0) == 0) goto cleanup; if (virLockSpaceDeleteResource(lockspace, "foo") == 0) @@ -146,8 +144,6 @@ static int testLockSpaceResourceLockExclAuto(const void *args ATTRIBUTE_UNUSED) { virLockSpacePtr lockspace; int ret = -1; - const off_t start = 0; - const off_t len = 1; rmdir(LOCKSPACE_DIR); @@ -161,7 +157,6 @@ static int testLockSpaceResourceLockExclAuto(const void *args ATTRIBUTE_UNUSED) goto cleanup; if (virLockSpaceAcquireResource(lockspace, "foo", geteuid(), - start, len, VIR_LOCK_SPACE_ACQUIRE_AUTOCREATE) < 0) goto cleanup; @@ -187,8 +182,6 @@ static int testLockSpaceResourceLockShr(const void *args ATTRIBUTE_UNUSED) { virLockSpacePtr lockspace; int ret = -1; - const off_t start = 0; - const off_t len = 1; rmdir(LOCKSPACE_DIR); @@ -202,16 +195,13 @@ static int testLockSpaceResourceLockShr(const void *args ATTRIBUTE_UNUSED) goto cleanup; if (virLockSpaceAcquireResource(lockspace, "foo", geteuid(), - start, len, VIR_LOCK_SPACE_ACQUIRE_SHARED) < 0) goto cleanup; - if (virLockSpaceAcquireResource(lockspace, "foo", - geteuid(), start, len, 0) == 0) + if (virLockSpaceAcquireResource(lockspace, "foo", geteuid(), 0) == 0) goto cleanup; if (virLockSpaceAcquireResource(lockspace, "foo", geteuid(), - start, len, VIR_LOCK_SPACE_ACQUIRE_SHARED) < 0) goto cleanup; @@ -246,8 +236,6 @@ static int testLockSpaceResourceLockShrAuto(const void *args ATTRIBUTE_UNUSED) { virLockSpacePtr lockspace; int ret = -1; - const off_t start = 0; - const off_t len = 1; rmdir(LOCKSPACE_DIR); @@ -261,7 +249,6 @@ static int testLockSpaceResourceLockShrAuto(const void *args ATTRIBUTE_UNUSED) goto cleanup; if (virLockSpaceAcquireResource(lockspace, "foo", geteuid(), - start, len, VIR_LOCK_SPACE_ACQUIRE_SHARED | VIR_LOCK_SPACE_ACQUIRE_AUTOCREATE) < 0) goto cleanup; @@ -270,7 +257,6 @@ static int testLockSpaceResourceLockShrAuto(const void *args ATTRIBUTE_UNUSED) goto cleanup; if (virLockSpaceAcquireResource(lockspace, "foo", geteuid(), - start, len, VIR_LOCK_SPACE_ACQUIRE_AUTOCREATE) == 0) goto cleanup; @@ -278,7 +264,6 @@ static int testLockSpaceResourceLockShrAuto(const void *args ATTRIBUTE_UNUSED) goto cleanup; if (virLockSpaceAcquireResource(lockspace, "foo", geteuid(), - start, len, VIR_LOCK_SPACE_ACQUIRE_SHARED | VIR_LOCK_SPACE_ACQUIRE_AUTOCREATE) < 0) goto cleanup; @@ -311,8 +296,6 @@ static int testLockSpaceResourceLockPath(const void *args ATTRIBUTE_UNUSED) { virLockSpacePtr lockspace; int ret = -1; - const off_t start = 0; - const off_t len = 1; rmdir(LOCKSPACE_DIR); @@ -325,15 +308,13 @@ static int testLockSpaceResourceLockPath(const void *args ATTRIBUTE_UNUSED) if (virLockSpaceCreateResource(lockspace, LOCKSPACE_DIR "/foo") < 0) goto cleanup; - if (virLockSpaceAcquireResource(lockspace, LOCKSPACE_DIR "/foo", - geteuid(), start, len, 0) < 0) + if (virLockSpaceAcquireResource(lockspace, LOCKSPACE_DIR "/foo", geteuid(), 0) < 0) goto cleanup; if (!virFileExists(LOCKSPACE_DIR "/foo")) goto cleanup; - if (virLockSpaceAcquireResource(lockspace, LOCKSPACE_DIR "/foo", - geteuid(), start, len, 0) == 0) + if (virLockSpaceAcquireResource(lockspace, LOCKSPACE_DIR "/foo", geteuid(), 0) == 0) goto cleanup; if (virLockSpaceDeleteResource(lockspace, LOCKSPACE_DIR "/foo") == 0) -- 2.18.1
participants (4)
-
Daniel P. Berrangé
-
John Ferlan
-
Michal Privoznik
-
Michal Prívozník