It may take some time for sanlock to add a lockspace. And if user
restart libvirtd service meanwhile, the fresh daemon can fail adding the
same lockspace with EINPROGRESS. Hence, we should retry a few times
before claiming an error. This issue can be easily reproduced:
for i in {1..1000} ; do echo $i; service libvirtd restart; sleep 2; done
20
Stopping libvirtd daemon: [FAILED]
Starting libvirtd daemon: [ OK ]
21
Stopping libvirtd daemon: [ OK ]
Starting libvirtd daemon: [ OK ]
22
Stopping libvirtd daemon: [ OK ]
Starting libvirtd daemon: [ OK ]
error : virLockManagerSanlockSetupLockspace:334 : Unable to add
lockspace /var/lib/libvirt/sanlock/__LIBVIRT__DISKS__: Operation now in
progress
---
src/locking/lock_driver_sanlock.c | 16 +++++++++++++++-
1 files changed, 15 insertions(+), 1 deletions(-)
diff --git a/src/locking/lock_driver_sanlock.c b/src/locking/lock_driver_sanlock.c
index d24f3d6..6d02ac6 100644
--- a/src/locking/lock_driver_sanlock.c
+++ b/src/locking/lock_driver_sanlock.c
@@ -184,6 +184,11 @@ static int virLockManagerSanlockLoadConfig(const char *configFile)
return 0;
}
+/* How much ms sleep before retrying to add a lockspace? */
+#define LOCKSPACE_SLEEP 100
+/* How many times try adding a lockspace? */
+#define LOCKSPACE_RETRIES 10
+
static int virLockManagerSanlockSetupLockspace(void)
{
int fd = -1;
@@ -192,6 +197,7 @@ static int virLockManagerSanlockSetupLockspace(void)
struct sanlk_lockspace ls;
char *path = NULL;
char *dir = NULL;
+ int retries = LOCKSPACE_RETRIES;
if (virAsprintf(&path, "%s/%s",
driver->autoDiskLeasePath,
@@ -320,9 +326,17 @@ static int virLockManagerSanlockSetupLockspace(void)
ls.host_id = driver->hostID;
/* Stage 2: Try to register the lockspace with the daemon.
* If the lockspace is already registered, we should get EEXIST back
- * in which case we can just carry on with life
+ * in which case we can just carry on with life, or EINPROGRESS if
+ * previous libvirtd instance started the work but didn't finish.
+ * Unfortunately, sanlock lacks an API to determine state of lockspace,
+ * so we have to do this blindly.
*/
+retry:
if ((rv = sanlock_add_lockspace(&ls, 0)) < 0) {
+ if (retries-- && -rv == EINPROGRESS) {
+ usleep(LOCKSPACE_SLEEP * 1000);
+ goto retry;
+ }
if (-rv != EEXIST) {
if (rv <= -200)
virReportError(VIR_ERR_INTERNAL_ERROR,
--
1.7.8.6