Libvirt cleans up pools that error out by deactivating them. When using
LVM this is not very useful. We rather would have our own system handle
any real failures and to many situations where errors are meaningless
and unimportant cause Libvirt to disable our pools continuously causing
lots of grief. This patch is what we use to add a config option to
disable this behavior.
An example of one of the issues we encountered requiring this patch is a
race condition between Libvirt and LVM. Pool refreshes will hand if LVM
is having an action performed, then Libvirt will continue when LVM
finishes. When a pool is being refreshed while an LV is being removed,
the refresh hangs as expected, when the removal is completed the refresh
continues. Afterwards when the refresh goes to refresh the specific LV
it errors because it cannot be found and the entire pool is disabled.
Bellow are two commands when run together reliably recreates the issue.
while :; do lvcreate /dev/vgpool --name TEST -L 100G && lvremove -f
/dev/vgpool/TEST; done
while :; do virsh pool-refresh vgpool; done
I am aware this is not the most elegant solution here and am up for
suggestions for resolving the underlying issue, however we never need
our pools to be disabled because of an error and I am sure there are
other's who's usecase may be similar.
---
src/remote/libvirtd.conf.in | 7 +++++++
src/storage/storage_driver.c | 38 ++++++++++++++++++++++++++++++++----
2 files changed, 41 insertions(+), 4 deletions(-)
diff --git a/src/remote/libvirtd.conf.in b/src/remote/libvirtd.conf.in
index 34741183cc..5600c26eca 100644
--- a/src/remote/libvirtd.conf.in
+++ b/src/remote/libvirtd.conf.in
@@ -506,3 +506,10 @@
# potential infinite waits blocking libvirt.
#
#ovs_timeout = 5
+
+###################################################################
+# This decides whether to disable pools that errors in some way such as
during a refresh
+# This can negatively affect LVM pools. 1 = disable the pools, 2 =
don't disable the pools
+# Do not use this if you are not using only LVM pools
+#
+cleanup_pools = 1
diff --git a/src/storage/storage_driver.c b/src/storage/storage_driver.c
index 6bbf52f729..93ebcd662c 100644
--- a/src/storage/storage_driver.c
+++ b/src/storage/storage_driver.c
@@ -55,6 +55,7 @@
VIR_LOG_INIT("storage.storage_driver");
static virStorageDriverStatePtr driver;
+static int cleanup_pools = -1;
static int storageStateCleanup(void);
@@ -74,6 +75,20 @@ static void storageDriverUnlock(void)
virMutexUnlock(&driver->lock);
}
+static int cleanupPools(void)
+{
+ if (cleanup_pools == -1) {
+ g_autoptr(virConf) libvirtConf = NULL;
+ virConfLoadConfig(&libvirtConf, "libvirtd.conf");
+
+ if (!virConfGetValueInt(libvirtConf, "cleanup_pools",
&cleanup_pools)
+ || (cleanup_pools != 0 && cleanup_pools != 1))
+ cleanup_pools = 1;
+ }
+
+ return cleanup_pools;
+}
+
static void
storagePoolRefreshFailCleanup(virStorageBackendPtr backend,
@@ -81,14 +96,26 @@ storagePoolRefreshFailCleanup(virStorageBackendPtr
backend,
const char *stateFile)
{
virErrorPtr orig_err;
-
virErrorPreserveLast(&orig_err);
virStoragePoolObjClearVols(obj);
if (stateFile)
unlink(stateFile);
- if (backend->stopPool)
- backend->stopPool(obj);
+
+ if (!cleanupPools()) {
+ virStoragePoolDefPtr def = virStoragePoolObjGetDef(obj);
+ virReportError(VIR_ERR_INTERNAL_ERROR,
+ _("Failed to refresh storage pool '%s'.
Would have disabled this pool but clean_pools = 0: %s"),
+ def->name, virGetLastErrorMessage());
+ } else {
+ virStoragePoolDefPtr def = virStoragePoolObjGetDef(obj);
+ virReportError(VIR_ERR_INTERNAL_ERROR,
+ _("Failed to refresh storage pool '%s'.
Disabled this pool because clean_pools = 1: %s"),
+ def->name, virGetLastErrorMessage());
+ if (backend->stopPool) {
+ backend->stopPool(obj);
+ }
+ }
virErrorRestore(&orig_err);
}
@@ -101,7 +128,10 @@ storagePoolRefreshImpl(virStorageBackendPtr backend,
virStoragePoolObjClearVols(obj);
if (backend->refreshPool(obj) < 0) {
storagePoolRefreshFailCleanup(backend, obj, stateFile);
- return -1;
+ if (!cleanupPools())
+ return 0;
+ else
+ return -1;
}
return 0;
--
2.24.1