When creating multiple VMs in parallel, concurrent volume creation holds an async job on the pool. A pool refresh during this window fails immediately with "pool has asynchronous jobs running", which causes cascading failures in parallel provisioning workflows. The refresh operation genuinely cannot run while a volume build is in progress (it clears all volume metadata via virStoragePoolObjClearVols), but the failure is premature since the async job will finish shortly. Add a condition variable to virStoragePoolObj that allows storagePoolRefresh() to wait up to 30 seconds for async jobs to drain. The volume build thread broadcasts the condition when it decrements asyncjobs to zero. After waking, the refresh function re-validates preconditions (pool still active, not starting) since the pool lock was released during the wait. Only storagePoolRefresh() gets the wait mechanism. The other three operations (destroy, undefine, delete) keep the immediate error because waiting to destroy or delete a pool during volume creation is not a sensible user workflow. Resolves: https://issues.redhat.com/browse/RHEL-150758 Signed-off-by: Lucas Amaral <lucaaamaral@gmail.com> --- Build-tested on CentOS Stream 9 (297 OK, 0 failures). Runtime-validated using LD_PRELOAD to inject a 15-second delay into fallocate64() for pool directory files (the storage backend calls fallocate() directly in storage_util.c:331, which with LFS resolves to fallocate64; on local filesystems it completes in microseconds, making the race window too narrow to hit naturally). Reproduction steps (privileged CentOS Stream 9 container, virtstoraged): 1. Compile LD_PRELOAD shim that sleeps 15s in fallocate64() for files under /var/lib/libvirt/images/ larger than 1MB 2. Start virtstoraged with LD_PRELOAD 3. Create and start a dir-type pool 4. Start vol-create-as in background (triggers ~15s async job) 5. After 3s, call pool-refresh Before (unpatched): $ virsh vol-create-as default testvol 100M & $ sleep 3 && virsh pool-refresh default error: Failed to refresh pool default error: internal error: pool 'default' has asynchronous jobs running. After (both patches applied): $ virsh vol-create-as default testvol 100M & $ sleep 3 && virsh pool-refresh default Vol testvol created Pool default refreshed (waited ~12s for async job) pool-destroy correctly rejects immediately (no wait): $ virsh vol-create-as default testvol 100M & $ sleep 3 && virsh pool-destroy default error: Failed to destroy pool default error: Requested operation is not valid: pool 'default' has asynchronous jobs running. Vol testvol created src/conf/virstorageobj.c | 39 ++++++++++++++++++++++++++++++++++++ src/conf/virstorageobj.h | 3 +++ src/libvirt_private.syms | 1 + src/storage/storage_driver.c | 19 ++++++++++++++---- 4 files changed, 58 insertions(+), 4 deletions(-) diff --git a/src/conf/virstorageobj.c b/src/conf/virstorageobj.c index ac79ff4c26..2f8f1339ad 100644 --- a/src/conf/virstorageobj.c +++ b/src/conf/virstorageobj.c @@ -31,6 +31,7 @@ #include "virlog.h" #include "virscsihost.h" #include "virstring.h" +#include "virtime.h" #include "virvhba.h" #define VIR_FROM_THIS VIR_FROM_STORAGE @@ -86,6 +87,7 @@ struct _virStoragePoolObj { bool starting; bool autostart; unsigned int asyncjobs; + virCond asyncjobsCond; virStoragePoolDef *def; virStoragePoolDef *newDef; @@ -196,6 +198,11 @@ virStoragePoolObjNew(void) if (!(obj = virObjectLockableNew(virStoragePoolObjClass))) return NULL; + if (virCondInit(&obj->asyncjobsCond) < 0) { + virObjectUnref(obj); + return NULL; + } + if (!(obj->volumes = virStorageVolObjListNew())) { virObjectUnref(obj); return NULL; @@ -338,6 +345,36 @@ void virStoragePoolObjDecrAsyncjobs(virStoragePoolObj *obj) { obj->asyncjobs--; + if (obj->asyncjobs == 0) + virCondBroadcast(&obj->asyncjobsCond); +} + + +int +virStoragePoolObjWaitAsyncjobs(virStoragePoolObj *obj) +{ + unsigned long long deadline; + + if (virTimeMillisNow(&deadline) < 0) + return -1; + deadline += 30 * 1000; + + while (obj->asyncjobs > 0) { + if (virCondWaitUntil(&obj->asyncjobsCond, + &obj->parent.lock, deadline) < 0) { + if (errno == ETIMEDOUT) { + virReportError(VIR_ERR_OPERATION_TIMEOUT, + _("timed out waiting for async jobs on pool '%1$s'"), + obj->def->name); + } else { + virReportSystemError(errno, "%s", + _("failed to wait for pool async jobs")); + } + return -1; + } + } + + return 0; } @@ -354,6 +391,8 @@ virStoragePoolObjDispose(void *opaque) g_free(obj->configFile); g_free(obj->autostartLink); + + virCondDestroy(&obj->asyncjobsCond); } diff --git a/src/conf/virstorageobj.h b/src/conf/virstorageobj.h index e1eabfdb3a..a478d2f33e 100644 --- a/src/conf/virstorageobj.h +++ b/src/conf/virstorageobj.h @@ -110,6 +110,9 @@ virStoragePoolObjIncrAsyncjobs(virStoragePoolObj *obj); void virStoragePoolObjDecrAsyncjobs(virStoragePoolObj *obj); +int +virStoragePoolObjWaitAsyncjobs(virStoragePoolObj *obj); + int virStoragePoolObjLoadAllConfigs(virStoragePoolObjList *pools, const char *configDir, diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms index d8ae4f46cd..950ba51384 100644 --- a/src/libvirt_private.syms +++ b/src/libvirt_private.syms @@ -1547,6 +1547,7 @@ virStoragePoolObjSetDef; virStoragePoolObjSetStarting; virStoragePoolObjVolumeGetNames; virStoragePoolObjVolumeListExport; +virStoragePoolObjWaitAsyncjobs; # cpu/cpu.h diff --git a/src/storage/storage_driver.c b/src/storage/storage_driver.c index 8f5c921157..f7b5c8bfbd 100644 --- a/src/storage/storage_driver.c +++ b/src/storage/storage_driver.c @@ -1231,10 +1231,21 @@ storagePoolRefresh(virStoragePoolPtr pool, } if (virStoragePoolObjGetAsyncjobs(obj) > 0) { - virReportError(VIR_ERR_INTERNAL_ERROR, - _("pool '%1$s' has asynchronous jobs running."), - def->name); - goto cleanup; + if (virStoragePoolObjWaitAsyncjobs(obj) < 0) + goto cleanup; + + if (!virStoragePoolObjIsActive(obj)) { + virReportError(VIR_ERR_OPERATION_INVALID, + _("storage pool '%1$s' is not active"), def->name); + goto cleanup; + } + + if (virStoragePoolObjIsStarting(obj)) { + virReportError(VIR_ERR_OPERATION_INVALID, + _("storage pool '%1$s' is starting up"), + def->name); + goto cleanup; + } } stateFile = virFileBuildPath(driver->stateDir, def->name, ".xml"); -- 2.52.0