On 12/29/2015 08:35 AM, Wido den Hollander wrote:
It could happen that rbd_list() returns X names, but that while
refreshing the pool one of those RBD images is removed from Ceph
through a different route then libvirt.
We do not need to error out in such case, we can simply ignore the
volume and continue.
error : volStorageBackendRBDRefreshVolInfo:289 :
failed to open the RBD image 'vol-998': No such file or directory
It could also be that one or more Placement Groups (PGs) inside Ceph
are inactive due to a system failure.
If that happens it could be that some RBD images can not be refreshed
and a timeout will be raised by librados.
error : volStorageBackendRBDRefreshVolInfo:289 :
failed to open the RBD image 'vol-893': Connection timed out
Ignore the error and continue to refresh the rest of the pool's
contents.
Signed-off-by: Wido den Hollander <wido(a)widodh.nl>
---
src/storage/storage_backend_rbd.c | 37 ++++++++++++++++++++++++++++---------
1 file changed, 28 insertions(+), 9 deletions(-)
Seems reasonable - one nit... This should be two patches..
diff --git a/src/storage/storage_backend_rbd.c
b/src/storage/storage_backend_rbd.c
index 80684eb..888e3be 100644
--- a/src/storage/storage_backend_rbd.c
+++ b/src/storage/storage_backend_rbd.c
@@ -279,18 +279,17 @@ static int volStorageBackendRBDRefreshVolInfo(virStorageVolDefPtr
vol,
virStoragePoolObjPtr pool,
virStorageBackendRBDStatePtr ptr)
{
- int ret = -1;
- int r = 0;
- rbd_image_t image;
+ int r = -1;
+ rbd_image_t image = NULL;
+ rbd_image_info_t info;
r = rbd_open(ptr->ioctx, vol->name, &image, NULL);
if (r < 0) {
virReportSystemError(-r, _("failed to open the RBD image
'%s'"),
vol->name);
- return ret;
+ goto cleanup;
}
- rbd_image_info_t info;
r = rbd_stat(image, &info, sizeof(info));
if (r < 0) {
virReportSystemError(-r, _("failed to stat the RBD image
'%s'"),
@@ -308,6 +307,8 @@ static int volStorageBackendRBDRefreshVolInfo(virStorageVolDefPtr
vol,
vol->type = VIR_STORAGE_VOL_NETWORK;
vol->target.format = VIR_STORAGE_FILE_RAW;
+ r = -1;
+
VIR_FREE(vol->target.path);
if (virAsprintf(&vol->target.path, "%s/%s",
pool->def->source.name,
@@ -320,11 +321,13 @@ static int volStorageBackendRBDRefreshVolInfo(virStorageVolDefPtr
vol,
vol->name) == -1)
goto cleanup;
- ret = 0;
+ r = 0;
cleanup:
- rbd_close(image);
- return ret;
+ if (image)
+ rbd_close(image);
Ahhh a bug fix - should be a separate patch...
+
+ return r;
Changes in this function should have been their own patch since they fix
a bug. Also, keep "ret = -1;' at the top and then use ret as the return
rather than adjusting 'r'.
John
}
static int virStorageBackendRBDRefreshPool(virConnectPtr conn,
@@ -399,7 +402,23 @@ static int virStorageBackendRBDRefreshPool(virConnectPtr conn,
name += strlen(name) + 1;
- if (volStorageBackendRBDRefreshVolInfo(vol, pool, &ptr) < 0) {
+ r = volStorageBackendRBDRefreshVolInfo(vol, pool, &ptr);
+
+ /* It could be that a volume has been deleted through a different route
+ * then libvirt and that will cause a -ENOENT to be returned.
+ *
+ * Another possibility is that there is something wrong with the placement
+ * group (PG) that RBD image's header is in and that causes -ETIMEDOUT
+ * to be returned.
+ *
+ * Do not error out and simply ignore the volume
+ */
+ if (r == -ENOENT || r == -ETIMEDOUT) {
+ virStorageVolDefFree(vol);
+ continue;
+ }
+
+ if (r < 0) {
virStorageVolDefFree(vol);
goto cleanup;
}