[libvirt] [PATCH 0/3] Various RBD storage pool changes

This series of patches changes a couple of things in the RBD storage pool driver. The first change is that it includes the return status of librados and librbd in log and debug messages making it easier for users to track down the real cause of the problem. The second patch simplifies some code. The third patch leverages the new timeout options from librados. Should the backing Ceph cluster not respond for any reason librados will timeout. This prevents libvirt from handing on a rados_* or rbd_* call and locking up libvirt. If the librados version on the system does not support these timeout options nothing happens. libvirt compiles and runs just fine, but the timeout simply won't work. Wido den Hollander (3): rbd: Include return statusses from librados/librbd in logging rbd: Simplify opening RADOS IoCTX rbd: Set timeout options for librados src/storage/storage_backend_rbd.c | 137 ++++++++++++++++++++++--------------- 1 file changed, 80 insertions(+), 57 deletions(-) -- 1.7.9.5

With this information it's easier for the user to debug what is going wrong. --- src/storage/storage_backend_rbd.c | 119 +++++++++++++++++++++---------------- 1 file changed, 68 insertions(+), 51 deletions(-) diff --git a/src/storage/storage_backend_rbd.c b/src/storage/storage_backend_rbd.c index 1b35cc4..bd21873 100644 --- a/src/storage/storage_backend_rbd.c +++ b/src/storage/storage_backend_rbd.c @@ -51,6 +51,7 @@ static int virStorageBackendRBDOpenRADOSConn(virStorageBackendRBDStatePtr ptr, virStoragePoolObjPtr pool) { int ret = -1; + int r = 0; unsigned char *secret_value = NULL; size_t secret_value_size; char *rados_key = NULL; @@ -65,10 +66,10 @@ static int virStorageBackendRBDOpenRADOSConn(virStorageBackendRBDStatePtr ptr, if (pool->def->source.auth.cephx.username != NULL) { VIR_DEBUG("Using cephx authorization"); - if (rados_create(&ptr->cluster, - pool->def->source.auth.cephx.username) < 0) { - virReportError(VIR_ERR_INTERNAL_ERROR, "%s", - _("failed to initialize RADOS")); + r = rados_create(&ptr->cluster, pool->def->source.auth.cephx.username); + if (r < 0) { + virReportError(VIR_ERR_INTERNAL_ERROR, + _("failed to initialize RADOS: %d"), r); goto cleanup; } @@ -198,10 +199,11 @@ static int virStorageBackendRBDOpenRADOSConn(virStorageBackendRBDStatePtr ptr, } ptr->starttime = time(0); - if (rados_connect(ptr->cluster) < 0) { + r = rados_connect(ptr->cluster); + if (r < 0) { virReportError(VIR_ERR_INTERNAL_ERROR, - _("failed to connect to the RADOS monitor on: %s"), - mon_buff); + _("failed to connect to the RADOS monitor on: %s: %d"), + mon_buff, r); goto cleanup; } @@ -248,18 +250,22 @@ static int volStorageBackendRBDRefreshVolInfo(virStorageVolDefPtr vol, virStorageBackendRBDStatePtr ptr) { int ret = -1; + int r = 0; rbd_image_t image; - if (rbd_open(ptr->ioctx, vol->name, &image, NULL) < 0) { + + r = rbd_open(ptr->ioctx, vol->name, &image, NULL); + if (r < 0) { virReportError(VIR_ERR_INTERNAL_ERROR, - _("failed to open the RBD image '%s'"), - vol->name); + _("failed to open the RBD image '%s': %d"), + vol->name, r); return ret; } rbd_image_info_t info; - if (rbd_stat(image, &info, sizeof(info)) < 0) { - virReportError(VIR_ERR_INTERNAL_ERROR, "%s", - _("failed to stat the RBD image")); + r = rbd_stat(image, &info, sizeof(info)); + if (r < 0) { + virReportError(VIR_ERR_INTERNAL_ERROR, + _("failed to stat the RBD image: %d"), r); goto cleanup; } @@ -297,6 +303,7 @@ static int virStorageBackendRBDRefreshPool(virConnectPtr conn, size_t max_size = 1024; int ret = -1; int len = -1; + int r = 0; char *name, *names = NULL; virStorageBackendRBDState ptr; ptr.cluster = NULL; @@ -306,26 +313,28 @@ static int virStorageBackendRBDRefreshPool(virConnectPtr conn, goto cleanup; } - if (rados_ioctx_create(ptr.cluster, - pool->def->source.name, &ptr.ioctx) < 0) { + r = rados_ioctx_create(ptr.cluster, pool->def->source.name, &ptr.ioctx); + if (r < 0) { virReportError(VIR_ERR_INTERNAL_ERROR, - _("failed to create the RBD IoCTX. Does the pool '%s' exist?"), - pool->def->source.name); + _("failed to create the RBD IoCTX. Does the pool '%s' exist?: %d"), + pool->def->source.name, r); goto cleanup; } struct rados_cluster_stat_t clusterstat; - if (rados_cluster_stat(ptr.cluster, &clusterstat) < 0) { - virReportError(VIR_ERR_INTERNAL_ERROR, "%s", - _("failed to stat the RADOS cluster")); + r = rados_cluster_stat(ptr.cluster, &clusterstat); + if (r < 0) { + virReportError(VIR_ERR_INTERNAL_ERROR, + _("failed to stat the RADOS cluster: %d"), r); goto cleanup; } struct rados_pool_stat_t poolstat; - if (rados_ioctx_pool_stat(ptr.ioctx, &poolstat) < 0) { + r = rados_ioctx_pool_stat(ptr.ioctx, &poolstat); + if (r < 0) { virReportError(VIR_ERR_INTERNAL_ERROR, - _("failed to stat the RADOS pool '%s'"), - pool->def->source.name); + _("failed to stat the RADOS pool '%s': %d"), + pool->def->source.name, r); goto cleanup; } @@ -398,6 +407,7 @@ static int virStorageBackendRBDDeleteVol(virConnectPtr conn, unsigned int flags) { int ret = -1; + int r = 0; virStorageBackendRBDState ptr; ptr.cluster = NULL; ptr.ioctx = NULL; @@ -412,19 +422,20 @@ static int virStorageBackendRBDDeleteVol(virConnectPtr conn, goto cleanup; } - if (rados_ioctx_create(ptr.cluster, - pool->def->source.name, &ptr.ioctx) < 0) { + r = rados_ioctx_create(ptr.cluster, pool->def->source.name, &ptr.ioctx); + if (r < 0) { virReportError(VIR_ERR_INTERNAL_ERROR, - _("failed to create the RBD IoCTX. Does the pool '%s' exist?"), - pool->def->source.name); + _("failed to create the RBD IoCTX. Does the pool '%s' exist?: %d"), + pool->def->source.name, r); goto cleanup; } - if (rbd_remove(ptr.ioctx, vol->name) < 0) { + r = rbd_remove(ptr.ioctx, vol->name); + if (r < 0) { virReportError(VIR_ERR_INTERNAL_ERROR, - _("failed to remove volume '%s/%s'"), + _("failed to remove volume '%s/%s': %d"), pool->def->source.name, - vol->name); + vol->name, r); goto cleanup; } @@ -488,6 +499,7 @@ virStorageBackendRBDBuildVol(virConnectPtr conn, ptr.cluster = NULL; ptr.ioctx = NULL; int ret = -1; + int r = 0; VIR_DEBUG("Creating RBD image %s/%s with size %llu", pool->def->source.name, @@ -498,11 +510,11 @@ virStorageBackendRBDBuildVol(virConnectPtr conn, if (virStorageBackendRBDOpenRADOSConn(&ptr, conn, pool) < 0) goto cleanup; - if (rados_ioctx_create(ptr.cluster, - pool->def->source.name, &ptr.ioctx) < 0) { + r = rados_ioctx_create(ptr.cluster, pool->def->source.name, &ptr.ioctx); + if (r < 0) { virReportError(VIR_ERR_INTERNAL_ERROR, - _("failed to create the RBD IoCTX. Does the pool '%s' exist?"), - pool->def->source.name); + _("failed to create the RBD IoCTX. Does the pool '%s' exist?: %d"), + pool->def->source.name, r); goto cleanup; } @@ -512,11 +524,12 @@ virStorageBackendRBDBuildVol(virConnectPtr conn, goto cleanup; } - if (virStorageBackendRBDCreateImage(ptr.ioctx, vol->name, vol->capacity) < 0) { + r = virStorageBackendRBDCreateImage(ptr.ioctx, vol->name, vol->capacity); + if (r < 0) { virReportError(VIR_ERR_INTERNAL_ERROR, - _("failed to create volume '%s/%s'"), + _("failed to create volume '%s/%s': %d"), pool->def->source.name, - vol->name); + vol->name, r); goto cleanup; } @@ -538,16 +551,17 @@ static int virStorageBackendRBDRefreshVol(virConnectPtr conn, ptr.cluster = NULL; ptr.ioctx = NULL; int ret = -1; + int r = 0; if (virStorageBackendRBDOpenRADOSConn(&ptr, conn, pool) < 0) { goto cleanup; } - if (rados_ioctx_create(ptr.cluster, - pool->def->source.name, &ptr.ioctx) < 0) { + r = rados_ioctx_create(ptr.cluster, pool->def->source.name, &ptr.ioctx); + if (r < 0) { virReportError(VIR_ERR_INTERNAL_ERROR, - _("failed to create the RBD IoCTX. Does the pool '%s' exist?"), - pool->def->source.name); + _("failed to create the RBD IoCTX. Does the pool '%s' exist?: %d"), + pool->def->source.name, r); goto cleanup; } @@ -573,6 +587,7 @@ static int virStorageBackendRBDResizeVol(virConnectPtr conn ATTRIBUTE_UNUSED, ptr.ioctx = NULL; rbd_image_t image = NULL; int ret = -1; + int r = 0; virCheckFlags(0, -1); @@ -580,25 +595,27 @@ static int virStorageBackendRBDResizeVol(virConnectPtr conn ATTRIBUTE_UNUSED, goto cleanup; } - if (rados_ioctx_create(ptr.cluster, - pool->def->source.name, &ptr.ioctx) < 0) { + r = rados_ioctx_create(ptr.cluster, pool->def->source.name, &ptr.ioctx); + if (r < 0) { virReportError(VIR_ERR_INTERNAL_ERROR, - _("failed to create the RBD IoCTX. Does the pool '%s' exist?"), - pool->def->source.name); + _("failed to create the RBD IoCTX. Does the pool '%s' exist?: %d"), + pool->def->source.name, r); goto cleanup; } - if (rbd_open(ptr.ioctx, vol->name, &image, NULL) < 0) { + r = rbd_open(ptr.ioctx, vol->name, &image, NULL); + if (r < 0) { virReportError(VIR_ERR_INTERNAL_ERROR, - _("failed to open the RBD image '%s'"), - vol->name); + _("failed to open the RBD image '%s': %d"), + vol->name, r); goto cleanup; } - if (rbd_resize(image, capacity) < 0) { + r = rbd_resize(image, capacity); + if (r < 0) { virReportError(VIR_ERR_INTERNAL_ERROR, - _("failed to resize the RBD image '%s'"), - vol->name); + _("failed to resize the RBD image '%s': %d"), + vol->name, r); goto cleanup; } -- 1.7.9.5

Reduces code and brings logging back to one function. --- src/storage/storage_backend_rbd.c | 43 ++++++++++++++----------------------- 1 file changed, 16 insertions(+), 27 deletions(-) diff --git a/src/storage/storage_backend_rbd.c b/src/storage/storage_backend_rbd.c index bd21873..8d1e320 100644 --- a/src/storage/storage_backend_rbd.c +++ b/src/storage/storage_backend_rbd.c @@ -221,6 +221,17 @@ cleanup: return ret; } +static int virStorageBackendRBDOpenIoCTX(virStorageBackendRBDStatePtr ptr, virStoragePoolObjPtr pool) +{ + int r = rados_ioctx_create(ptr->cluster, pool->def->source.name, &ptr->ioctx); + if (r < 0) { + virReportError(VIR_ERR_INTERNAL_ERROR, + _("failed to create the RBD IoCTX. Does the pool '%s' exist?: %d"), + pool->def->source.name, r); + } + return r; +} + static int virStorageBackendRBDCloseRADOSConn(virStorageBackendRBDStatePtr ptr) { int ret = 0; @@ -313,11 +324,7 @@ static int virStorageBackendRBDRefreshPool(virConnectPtr conn, goto cleanup; } - r = rados_ioctx_create(ptr.cluster, pool->def->source.name, &ptr.ioctx); - if (r < 0) { - virReportError(VIR_ERR_INTERNAL_ERROR, - _("failed to create the RBD IoCTX. Does the pool '%s' exist?: %d"), - pool->def->source.name, r); + if (virStorageBackendRBDOpenIoCTX(&ptr, pool) < 0) { goto cleanup; } @@ -422,11 +429,7 @@ static int virStorageBackendRBDDeleteVol(virConnectPtr conn, goto cleanup; } - r = rados_ioctx_create(ptr.cluster, pool->def->source.name, &ptr.ioctx); - if (r < 0) { - virReportError(VIR_ERR_INTERNAL_ERROR, - _("failed to create the RBD IoCTX. Does the pool '%s' exist?: %d"), - pool->def->source.name, r); + if (virStorageBackendRBDOpenIoCTX(&ptr, pool) < 0) { goto cleanup; } @@ -510,13 +513,8 @@ virStorageBackendRBDBuildVol(virConnectPtr conn, if (virStorageBackendRBDOpenRADOSConn(&ptr, conn, pool) < 0) goto cleanup; - r = rados_ioctx_create(ptr.cluster, pool->def->source.name, &ptr.ioctx); - if (r < 0) { - virReportError(VIR_ERR_INTERNAL_ERROR, - _("failed to create the RBD IoCTX. Does the pool '%s' exist?: %d"), - pool->def->source.name, r); + if (virStorageBackendRBDOpenIoCTX(&ptr, pool) < 0) goto cleanup; - } if (vol->target.encryption != NULL) { virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", @@ -551,17 +549,12 @@ static int virStorageBackendRBDRefreshVol(virConnectPtr conn, ptr.cluster = NULL; ptr.ioctx = NULL; int ret = -1; - int r = 0; if (virStorageBackendRBDOpenRADOSConn(&ptr, conn, pool) < 0) { goto cleanup; } - r = rados_ioctx_create(ptr.cluster, pool->def->source.name, &ptr.ioctx); - if (r < 0) { - virReportError(VIR_ERR_INTERNAL_ERROR, - _("failed to create the RBD IoCTX. Does the pool '%s' exist?: %d"), - pool->def->source.name, r); + if (virStorageBackendRBDOpenIoCTX(&ptr, pool) < 0) { goto cleanup; } @@ -595,11 +588,7 @@ static int virStorageBackendRBDResizeVol(virConnectPtr conn ATTRIBUTE_UNUSED, goto cleanup; } - r = rados_ioctx_create(ptr.cluster, pool->def->source.name, &ptr.ioctx); - if (r < 0) { - virReportError(VIR_ERR_INTERNAL_ERROR, - _("failed to create the RBD IoCTX. Does the pool '%s' exist?: %d"), - pool->def->source.name, r); + if (virStorageBackendRBDOpenIoCTX(&ptr, pool) < 0) { goto cleanup; } -- 1.7.9.5

On 02/12/2014 03:11 PM, Wido den Hollander wrote:
Reduces code and brings logging back to one function. --- src/storage/storage_backend_rbd.c | 43 ++++++++++++++----------------------- 1 file changed, 16 insertions(+), 27 deletions(-)
diff --git a/src/storage/storage_backend_rbd.c b/src/storage/storage_backend_rbd.c index bd21873..8d1e320 100644 --- a/src/storage/storage_backend_rbd.c +++ b/src/storage/storage_backend_rbd.c @@ -221,6 +221,17 @@ cleanup: return ret; }
+static int virStorageBackendRBDOpenIoCTX(virStorageBackendRBDStatePtr ptr, virStoragePoolObjPtr pool) +{ + int r = rados_ioctx_create(ptr->cluster, pool->def->source.name, &ptr->ioctx); + if (r < 0) { + virReportError(VIR_ERR_INTERNAL_ERROR, + _("failed to create the RBD IoCTX. Does the pool '%s' exist?: %d"), + pool->def->source.name, r);
Looking at the documentation [1], librados returns -errno. It would be more user-friendly to report it as a string instead of a number. I'd suggest using: virReportSystemError(-r, _("failed to create the RBD IoCTX. " "Does the pool '%s' exist?'), pool->def->source.name); Also, putting this patch first would reduce the number of error messages that need to be changed. Jan [1] http://ceph.com/docs/master/rados/api/librados/

The first patch improves the logging for the user. Currently the return codes from librados are not written to any logfile, which might leave a user clueless to why the storage pool is not working. The second patch sets three timeout options in librados. These are useful for when the Ceph cluster is down. Librados will then not block for ever, but return a -ETIMEDOUT so libvirt does not block for ever. Wido den Hollander (2): rbd: Include return statusses from librados/librbd in logging rbd: Set timeout options for librados src/storage/storage_backend_rbd.c | 144 ++++++++++++++++++++----------------- 1 file changed, 78 insertions(+), 66 deletions(-) -- 1.7.9.5

With this information it's easier for the user to debug what is going wrong. --- src/storage/storage_backend_rbd.c | 127 ++++++++++++++++++------------------- 1 file changed, 61 insertions(+), 66 deletions(-) diff --git a/src/storage/storage_backend_rbd.c b/src/storage/storage_backend_rbd.c index 1b35cc4..abb110e 100644 --- a/src/storage/storage_backend_rbd.c +++ b/src/storage/storage_backend_rbd.c @@ -51,6 +51,7 @@ static int virStorageBackendRBDOpenRADOSConn(virStorageBackendRBDStatePtr ptr, virStoragePoolObjPtr pool) { int ret = -1; + int r = 0; unsigned char *secret_value = NULL; size_t secret_value_size; char *rados_key = NULL; @@ -65,10 +66,9 @@ static int virStorageBackendRBDOpenRADOSConn(virStorageBackendRBDStatePtr ptr, if (pool->def->source.auth.cephx.username != NULL) { VIR_DEBUG("Using cephx authorization"); - if (rados_create(&ptr->cluster, - pool->def->source.auth.cephx.username) < 0) { - virReportError(VIR_ERR_INTERNAL_ERROR, "%s", - _("failed to initialize RADOS")); + r = rados_create(&ptr->cluster, pool->def->source.auth.cephx.username); + if (r < 0) { + virReportSystemError(-r, "%s", _("failed to initialize RADOS")); goto cleanup; } @@ -198,10 +198,10 @@ static int virStorageBackendRBDOpenRADOSConn(virStorageBackendRBDStatePtr ptr, } ptr->starttime = time(0); - if (rados_connect(ptr->cluster) < 0) { - virReportError(VIR_ERR_INTERNAL_ERROR, - _("failed to connect to the RADOS monitor on: %s"), - mon_buff); + r = rados_connect(ptr->cluster); + if (r < 0) { + virReportSystemError(-r, _("failed to connect to the RADOS monitor on: %s"), + mon_buff); goto cleanup; } @@ -219,6 +219,16 @@ cleanup: return ret; } +static int virStorageBackendRBDOpenIoCTX(virStorageBackendRBDStatePtr ptr, virStoragePoolObjPtr pool) +{ + int r = rados_ioctx_create(ptr->cluster, pool->def->source.name, &ptr->ioctx); + if (r < 0) { + virReportSystemError(-r, _("failed to create the RBD IoCTX. Does the pool '%s' exist?"), + pool->def->source.name); + } + return r; +} + static int virStorageBackendRBDCloseRADOSConn(virStorageBackendRBDStatePtr ptr) { int ret = 0; @@ -248,18 +258,21 @@ static int volStorageBackendRBDRefreshVolInfo(virStorageVolDefPtr vol, virStorageBackendRBDStatePtr ptr) { int ret = -1; + int r = 0; rbd_image_t image; - if (rbd_open(ptr->ioctx, vol->name, &image, NULL) < 0) { - virReportError(VIR_ERR_INTERNAL_ERROR, - _("failed to open the RBD image '%s'"), - vol->name); + + r = rbd_open(ptr->ioctx, vol->name, &image, NULL); + if (r < 0) { + virReportSystemError(-r, _("failed to open the RBD image '%s'"), + vol->name); return ret; } rbd_image_info_t info; - if (rbd_stat(image, &info, sizeof(info)) < 0) { - virReportError(VIR_ERR_INTERNAL_ERROR, "%s", - _("failed to stat the RBD image")); + r = rbd_stat(image, &info, sizeof(info)); + if (r < 0) { + virReportSystemError(-r, _("failed to stat the RBD image '%s'"), + vol->name); goto cleanup; } @@ -297,6 +310,7 @@ static int virStorageBackendRBDRefreshPool(virConnectPtr conn, size_t max_size = 1024; int ret = -1; int len = -1; + int r = 0; char *name, *names = NULL; virStorageBackendRBDState ptr; ptr.cluster = NULL; @@ -306,26 +320,22 @@ static int virStorageBackendRBDRefreshPool(virConnectPtr conn, goto cleanup; } - if (rados_ioctx_create(ptr.cluster, - pool->def->source.name, &ptr.ioctx) < 0) { - virReportError(VIR_ERR_INTERNAL_ERROR, - _("failed to create the RBD IoCTX. Does the pool '%s' exist?"), - pool->def->source.name); + if (virStorageBackendRBDOpenIoCTX(&ptr, pool) < 0) { goto cleanup; } struct rados_cluster_stat_t clusterstat; - if (rados_cluster_stat(ptr.cluster, &clusterstat) < 0) { - virReportError(VIR_ERR_INTERNAL_ERROR, "%s", - _("failed to stat the RADOS cluster")); + r = rados_cluster_stat(ptr.cluster, &clusterstat); + if (r < 0) { + virReportSystemError(-r, "%s", _("failed to stat the RADOS cluster")); goto cleanup; } struct rados_pool_stat_t poolstat; - if (rados_ioctx_pool_stat(ptr.ioctx, &poolstat) < 0) { - virReportError(VIR_ERR_INTERNAL_ERROR, - _("failed to stat the RADOS pool '%s'"), - pool->def->source.name); + r = rados_ioctx_pool_stat(ptr.ioctx, &poolstat); + if (r < 0) { + virReportSystemError(-r, _("failed to stat the RADOS pool '%s'"), + pool->def->source.name); goto cleanup; } @@ -398,6 +408,7 @@ static int virStorageBackendRBDDeleteVol(virConnectPtr conn, unsigned int flags) { int ret = -1; + int r = 0; virStorageBackendRBDState ptr; ptr.cluster = NULL; ptr.ioctx = NULL; @@ -412,19 +423,14 @@ static int virStorageBackendRBDDeleteVol(virConnectPtr conn, goto cleanup; } - if (rados_ioctx_create(ptr.cluster, - pool->def->source.name, &ptr.ioctx) < 0) { - virReportError(VIR_ERR_INTERNAL_ERROR, - _("failed to create the RBD IoCTX. Does the pool '%s' exist?"), - pool->def->source.name); + if (virStorageBackendRBDOpenIoCTX(&ptr, pool) < 0) { goto cleanup; } - if (rbd_remove(ptr.ioctx, vol->name) < 0) { - virReportError(VIR_ERR_INTERNAL_ERROR, - _("failed to remove volume '%s/%s'"), - pool->def->source.name, - vol->name); + r = rbd_remove(ptr.ioctx, vol->name); + if (r < 0) { + virReportSystemError(-r, _("failed to remove volume '%s/%s'"), + pool->def->source.name, vol->name); goto cleanup; } @@ -488,6 +494,7 @@ virStorageBackendRBDBuildVol(virConnectPtr conn, ptr.cluster = NULL; ptr.ioctx = NULL; int ret = -1; + int r = 0; VIR_DEBUG("Creating RBD image %s/%s with size %llu", pool->def->source.name, @@ -498,13 +505,8 @@ virStorageBackendRBDBuildVol(virConnectPtr conn, if (virStorageBackendRBDOpenRADOSConn(&ptr, conn, pool) < 0) goto cleanup; - if (rados_ioctx_create(ptr.cluster, - pool->def->source.name, &ptr.ioctx) < 0) { - virReportError(VIR_ERR_INTERNAL_ERROR, - _("failed to create the RBD IoCTX. Does the pool '%s' exist?"), - pool->def->source.name); + if (virStorageBackendRBDOpenIoCTX(&ptr, pool) < 0) goto cleanup; - } if (vol->target.encryption != NULL) { virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", @@ -512,11 +514,11 @@ virStorageBackendRBDBuildVol(virConnectPtr conn, goto cleanup; } - if (virStorageBackendRBDCreateImage(ptr.ioctx, vol->name, vol->capacity) < 0) { - virReportError(VIR_ERR_INTERNAL_ERROR, - _("failed to create volume '%s/%s'"), - pool->def->source.name, - vol->name); + r = virStorageBackendRBDCreateImage(ptr.ioctx, vol->name, vol->capacity); + if (r < 0) { + virReportSystemError(-r, _("failed to create volume '%s/%s'"), + pool->def->source.name, + vol->name); goto cleanup; } @@ -543,11 +545,7 @@ static int virStorageBackendRBDRefreshVol(virConnectPtr conn, goto cleanup; } - if (rados_ioctx_create(ptr.cluster, - pool->def->source.name, &ptr.ioctx) < 0) { - virReportError(VIR_ERR_INTERNAL_ERROR, - _("failed to create the RBD IoCTX. Does the pool '%s' exist?"), - pool->def->source.name); + if (virStorageBackendRBDOpenIoCTX(&ptr, pool) < 0) { goto cleanup; } @@ -573,6 +571,7 @@ static int virStorageBackendRBDResizeVol(virConnectPtr conn ATTRIBUTE_UNUSED, ptr.ioctx = NULL; rbd_image_t image = NULL; int ret = -1; + int r = 0; virCheckFlags(0, -1); @@ -580,25 +579,21 @@ static int virStorageBackendRBDResizeVol(virConnectPtr conn ATTRIBUTE_UNUSED, goto cleanup; } - if (rados_ioctx_create(ptr.cluster, - pool->def->source.name, &ptr.ioctx) < 0) { - virReportError(VIR_ERR_INTERNAL_ERROR, - _("failed to create the RBD IoCTX. Does the pool '%s' exist?"), - pool->def->source.name); + if (virStorageBackendRBDOpenIoCTX(&ptr, pool) < 0) { goto cleanup; } - if (rbd_open(ptr.ioctx, vol->name, &image, NULL) < 0) { - virReportError(VIR_ERR_INTERNAL_ERROR, - _("failed to open the RBD image '%s'"), - vol->name); + r = rbd_open(ptr.ioctx, vol->name, &image, NULL); + if (r < 0) { + virReportSystemError(-r, _("failed to open the RBD image '%s'"), + vol->name); goto cleanup; } - if (rbd_resize(image, capacity) < 0) { - virReportError(VIR_ERR_INTERNAL_ERROR, - _("failed to resize the RBD image '%s'"), - vol->name); + r = rbd_resize(image, capacity); + if (r < 0) { + virReportSystemError(-r, _("failed to resize the RBD image '%s'"), + vol->name); goto cleanup; } -- 1.7.9.5

These timeout values make librados/librbd return -ETIMEDOUT when a operation is blocking due to a failing/unreachable Ceph cluster. By having the operations time out libvirt will not block. --- src/storage/storage_backend_rbd.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/src/storage/storage_backend_rbd.c b/src/storage/storage_backend_rbd.c index abb110e..22ed7de 100644 --- a/src/storage/storage_backend_rbd.c +++ b/src/storage/storage_backend_rbd.c @@ -60,6 +60,9 @@ static int virStorageBackendRBDOpenRADOSConn(virStorageBackendRBDStatePtr ptr, char secretUuid[VIR_UUID_STRING_BUFLEN]; size_t i; char *mon_buff = NULL; + const char *client_mount_timeout = "30"; + const char *mon_op_timeout = "30"; + const char *osd_op_timeout = "30"; VIR_DEBUG("Found Cephx username: %s", pool->def->source.auth.cephx.username); @@ -197,6 +200,20 @@ static int virStorageBackendRBDOpenRADOSConn(virStorageBackendRBDStatePtr ptr, goto cleanup; } + /* + * Set timeout options for librados. + * In case the Ceph cluster is down libvirt won't block forever. + * Operations in librados will return -ETIMEDOUT when the timeout is reached. + */ + VIR_DEBUG("Setting RADOS option client_mount_timeout to %s", client_mount_timeout); + rados_conf_set(ptr->cluster, "client_mount_timeout", client_mount_timeout); + + VIR_DEBUG("Setting RADOS option rados_mon_op_timeout to %s", mon_op_timeout); + rados_conf_set(ptr->cluster, "rados_mon_op_timeout", mon_op_timeout); + + VIR_DEBUG("Setting RADOS option rados_osd_op_timeout to %s", osd_op_timeout); + rados_conf_set(ptr->cluster, "rados_osd_op_timeout", osd_op_timeout); + ptr->starttime = time(0); r = rados_connect(ptr->cluster); if (r < 0) { -- 1.7.9.5

On 02/25/2014 10:50 AM, Wido den Hollander wrote:
The first patch improves the logging for the user. Currently the return codes from librados are not written to any logfile, which might leave a user clueless to why the storage pool is not working.
The second patch sets three timeout options in librados. These are useful for when the Ceph cluster is down. Librados will then not block for ever, but return a -ETIMEDOUT so libvirt does not block for ever.
Wido den Hollander (2): rbd: Include return statusses from librados/librbd in logging
s/statusses/statuses/
rbd: Set timeout options for librados
src/storage/storage_backend_rbd.c | 144 ++++++++++++++++++++----------------- 1 file changed, 78 insertions(+), 66 deletions(-)
ACK and pushed. Jan

These timeout values make librados/librbd return -ETIMEDOUT when a operation is blocking due to a failing/unreachable Ceph cluster. By having the operations time out libvirt will not block. --- src/storage/storage_backend_rbd.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/src/storage/storage_backend_rbd.c b/src/storage/storage_backend_rbd.c index 8d1e320..2c97bf4 100644 --- a/src/storage/storage_backend_rbd.c +++ b/src/storage/storage_backend_rbd.c @@ -60,6 +60,9 @@ static int virStorageBackendRBDOpenRADOSConn(virStorageBackendRBDStatePtr ptr, char secretUuid[VIR_UUID_STRING_BUFLEN]; size_t i; char *mon_buff = NULL; + const char *client_mount_timeout = "30"; + const char *mon_op_timeout = "30"; + const char *osd_op_timeout = "30"; VIR_DEBUG("Found Cephx username: %s", pool->def->source.auth.cephx.username); @@ -198,6 +201,20 @@ static int virStorageBackendRBDOpenRADOSConn(virStorageBackendRBDStatePtr ptr, goto cleanup; } + /* + * Set timeout options for librados. + * In case the Ceph cluster is down libvirt won't block forever. + * Operations in librados will return -ETIMEDOUT when the timeout is reached. + */ + VIR_DEBUG("Setting RADOS option client_mount_timeout to %s", client_mount_timeout); + rados_conf_set(ptr->cluster, "client_mount_timeout", client_mount_timeout); + + VIR_DEBUG("Setting RADOS option rados_mon_op_timeout to %s", mon_op_timeout); + rados_conf_set(ptr->cluster, "rados_mon_op_timeout", mon_op_timeout); + + VIR_DEBUG("Setting RADOS option rados_osd_op_timeout to %s", osd_op_timeout); + rados_conf_set(ptr->cluster, "rados_osd_op_timeout", osd_op_timeout); + ptr->starttime = time(0); r = rados_connect(ptr->cluster); if (r < 0) { -- 1.7.9.5
participants (2)
-
Ján Tomko
-
Wido den Hollander