On Fri, Mar 30, 2012 at 11:04:43AM +0200, Wido den Hollander wrote:
This patch adds support for a new storage backend with RBD support.
RBD is the RADOS Block Device and is part of the Ceph distributed storage system.
It comes in two flavours: Qemu-RBD and Kernel RBD, this storage backend only supports
Qemu-RBD, thus limiting the use of this storage driver to Qemu only.
To function this backend relies on librbd and librados being present on the local
system.
The backend also supports Cephx authentication for safe authentication with the Ceph
cluster.
For storing credentials it uses the build-in secret mechanism of libvirt.
Signed-off-by: Wido den Hollander <wido(a)widodh.nl>
---
configure.ac | 20 ++
include/libvirt/libvirt.h.in | 1 +
src/Makefile.am | 9 +
src/conf/storage_conf.c | 197 ++++++++++---
src/conf/storage_conf.h | 16 +
src/storage/storage_backend.c | 6 +
src/storage/storage_backend_rbd.c | 465 ++++++++++++++++++++++++++++++
src/storage/storage_backend_rbd.h | 30 ++
tests/storagepoolxml2xmlin/pool-rbd.xml | 11 +
tests/storagepoolxml2xmlout/pool-rbd.xml | 15 +
tools/virsh.c | 7 +
11 files changed, 734 insertions(+), 43 deletions(-)
create mode 100644 src/storage/storage_backend_rbd.c
create mode 100644 src/storage/storage_backend_rbd.h
create mode 100644 tests/storagepoolxml2xmlin/pool-rbd.xml
create mode 100644 tests/storagepoolxml2xmlout/pool-rbd.xml
diff --git a/configure.ac b/configure.ac
index 32cc8d0..b693e5b 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1743,6 +1743,8 @@ AC_ARG_WITH([storage-mpath],
AC_HELP_STRING([--with-storage-mpath], [with mpath backend for the storage driver
@<:@default=check@:>@]),[],[with_storage_mpath=check])
AC_ARG_WITH([storage-disk],
AC_HELP_STRING([--with-storage-disk], [with GPartd Disk backend for the storage driver
@<:@default=check@:>@]),[],[with_storage_disk=check])
+AC_ARG_WITH([storage-rbd],
+ AC_HELP_STRING([--with-storage-rbd], [with RADOS Block Device backend for the storage
driver @<:@default=check@:>@]),[],[with_storage_rbd=check])
if test "$with_libvirtd" = "no"; then
with_storage_dir=no
@@ -1752,6 +1754,7 @@ if test "$with_libvirtd" = "no"; then
with_storage_scsi=no
with_storage_mpath=no
with_storage_disk=no
+ with_storage_rbd=no
fi
if test "$with_storage_dir" = "yes" ; then
AC_DEFINE_UNQUOTED([WITH_STORAGE_DIR], 1, [whether directory backend for storage
driver is enabled])
@@ -1910,6 +1913,22 @@ if test "$with_storage_mpath" = "check"; then
fi
AM_CONDITIONAL([WITH_STORAGE_MPATH], [test "$with_storage_mpath" =
"yes"])
+if test "$with_storage_rbd" = "yes" || test
"$with_storage_rbd" = "check"; then
+ AC_CHECK_HEADER([rbd/librbd.h], [LIBRBD_FOUND=yes; break;])
+
+ LIBRBD_LIBS="-lrbd -lrados -lcrypto"
+
+ if test "$LIBRBD_FOUND" = "yes"; then
+ with_storage_rbd=yes
+ LIBS="$LIBS $LIBRBD_LIBS"
+ else
+ with_storage_rbd=no
+ fi
+
+ AC_DEFINE_UNQUOTED([WITH_STORAGE_RBD], 1, [wether RBD backend for storage driver is
enabled])
+fi
+AM_CONDITIONAL([WITH_STORAGE_RBD], [test "$with_storage_rbd" =
"yes"])
+
LIBPARTED_CFLAGS=
LIBPARTED_LIBS=
if test "$with_storage_disk" = "yes" ||
@@ -2673,6 +2692,7 @@ AC_MSG_NOTICE([ iSCSI: $with_storage_iscsi])
AC_MSG_NOTICE([ SCSI: $with_storage_scsi])
AC_MSG_NOTICE([ mpath: $with_storage_mpath])
AC_MSG_NOTICE([ Disk: $with_storage_disk])
+AC_MSG_NOTICE([ RBD: $with_storage_rbd])
AC_MSG_NOTICE([])
AC_MSG_NOTICE([Security Drivers])
AC_MSG_NOTICE([])
Can you also add RBD logic to the libvirt.spec.in RPM specfile. We should
have it turned on for any Fedora >= 16, but not for RHEL.
diff --git a/include/libvirt/libvirt.h.in
b/include/libvirt/libvirt.h.in
index 499dcd4..ee1d5ec 100644
--- a/include/libvirt/libvirt.h.in
+++ b/include/libvirt/libvirt.h.in
@@ -2324,6 +2324,7 @@ typedef enum {
VIR_STORAGE_VOL_FILE = 0, /* Regular file based volumes */
VIR_STORAGE_VOL_BLOCK = 1, /* Block based volumes */
VIR_STORAGE_VOL_DIR = 2, /* Directory-passthrough based volume */
+ VIR_STORAGE_VOL_NETWORK = 3, /* Network volumes like RBD (RADOS Block Device) */
So the RBD volumes don't ever appear as normal block devices in /dev ?
Could you also add some content to docs/storage.html to give an example
of the pool XML, and an example of the volume XML you will see, and any
other information that you think would be relevant.
diff --git a/src/conf/storage_conf.c b/src/conf/storage_conf.c
index bdf6218..2a0b5eb 100644
--- a/src/conf/storage_conf.c
+++ b/src/conf/storage_conf.c
@@ -395,6 +416,27 @@ virStoragePoolDefParseAuthChap(xmlXPathContextPtr ctxt,
}
static int
+virStoragePoolDefParseAuthCephx(xmlXPathContextPtr ctxt,
+ virStoragePoolAuthCephxPtr auth) {
+ auth->username = virXPathString("string(./auth/@username)", ctxt);
+ if (auth->username == NULL) {
+ virStorageReportError(VIR_ERR_XML_ERROR,
+ "%s", _("missing auth username
attribute"));
+ return -1;
+ }
+
+ auth->secret.uuid = virXPathString("string(./auth/secret/@uuid)",
ctxt);
+ auth->secret.usage = virXPathString("string(./auth/secret/@usage)",
ctxt);
+ if (auth->secret.uuid == NULL && auth->secret.usage == NULL) {
+ virStorageReportError(VIR_ERR_XML_ERROR,
+ "%s", _("missing auth secret uuid or usage
attribute"));
+ return -1;
+ }
If would be desirable to use virUUIDParse to validate the UUID in the
XML has correct syntax.
@@ -414,6 +456,12 @@ virStoragePoolDefParseSource(xmlXPathContextPtr
ctxt,
}
source->name = virXPathString("string(./name)", ctxt);
+ if (pool_type == VIR_STORAGE_POOL_RBD && source->name == NULL) {
+ virStorageReportError(VIR_ERR_XML_ERROR,
+ _("%s"), "missing mandatory
'name' field for RBD pool name");
+ VIR_FREE(source->name);
+ goto cleanup;
+ }
You have the _(..) bit around the wrong string here :-)
@@ -907,25 +1015,28 @@ virStoragePoolDefFormat(virStoragePoolDefPtr
def) {
if (virStoragePoolSourceFormat(&buf, options, &def->source) < 0)
goto cleanup;
- virBufferAddLit(&buf," <target>\n");
+ /* RBD devices are no local block devs nor files, so it doesn't have a target */
Interesting, that answers my earlier question. I'd like to see what the XML
looks like for a volume, and how you would then put that info in a guest
<disk> section.
diff --git a/src/conf/storage_conf.h b/src/conf/storage_conf.h
index 1ef9295..6b91ca5 100644
--- a/src/conf/storage_conf.h
+++ b/src/conf/storage_conf.h
@@ -120,6 +120,7 @@ enum virStoragePoolType {
VIR_STORAGE_POOL_ISCSI, /* iSCSI targets */
VIR_STORAGE_POOL_SCSI, /* SCSI HBA */
VIR_STORAGE_POOL_MPATH, /* Multipath devices */
+ VIR_STORAGE_POOL_RBD, /* RADOS Block Device */
Slight indentation bug.
VIR_STORAGE_POOL_LAST,
};
@@ -137,6 +138,7 @@ enum virStoragePoolDeviceType {
enum virStoragePoolAuthType {
VIR_STORAGE_POOL_AUTH_NONE,
VIR_STORAGE_POOL_AUTH_CHAP,
+ VIR_STORAGE_POOL_AUTH_CEPHX,
};
typedef struct _virStoragePoolAuthChap virStoragePoolAuthChap;
@@ -146,6 +148,15 @@ struct _virStoragePoolAuthChap {
char *passwd;
};
+typedef struct _virStoragePoolAuthCephx virStoragePoolAuthCephx;
+typedef virStoragePoolAuthCephx *virStoragePoolAuthCephxPtr;
+struct _virStoragePoolAuthCephx {
+ char *username;
+ struct {
+ char *uuid;
For UUID's we normally use
unsigned char uuid[VIR_UUID_BUFLEN];
and the virUUIDFormat/virUUIDParse APIs in the XML conversion steps
+ char *usage;
+ } secret;
+};
/*
* For remote pools, info on how to reach the host
@@ -215,6 +226,10 @@ struct _virStoragePoolSource {
/* An optional host */
virStoragePoolSourceHost host;
+ /* Or multiple hosts */
+ int nhost;
+ virStoragePoolSourceHostPtr hosts;
Can you use 'size_t' here instead of 'int', and while
you're at it, you can kjill off the existing 'host'
field and just make existing drivers use the new
'hosts' field.
It would be desirable if you created a completely separate
patch to just change 'host' to 'hosts' + 'nhosts', and update
the existing drivers. Then let you RBD patch depend on it.
diff --git a/src/storage/storage_backend_rbd.c
b/src/storage/storage_backend_rbd.c
new file mode 100644
index 0000000..056059a
--- /dev/null
+++ b/src/storage/storage_backend_rbd.c
+
+struct _virStorageBackendRBDState {
+ rados_t cluster;
+ rados_ioctx_t ioctx;
+ time_t starttime;
+};
+
+typedef struct _virStorageBackendRBDState virStorageBackendRBDState;
+typedef virStorageBackendRBDState virStorageBackendRBDStatePtr;
+
+static int virStorageBackendRBDOpenRADOSConn(virStorageBackendRBDStatePtr *ptr,
+ virConnectPtr conn,
+ virStoragePoolObjPtr pool)
+{
+ int ret = -1;
+ unsigned char *secret_value;
+ size_t secret_value_size;
+ char *rados_key;
+ virBuffer mon_host = VIR_BUFFER_INITIALIZER;
+ virSecretPtr secret = NULL;
+
+ VIR_DEBUG("Found Cephx username: %s",
+ pool->def->source.auth.cephx.username);
+
+ if (pool->def->source.auth.cephx.username != NULL) {
+ VIR_DEBUG("Using cephx authorization");
+ if (rados_create(&ptr->cluster,
+ pool->def->source.auth.cephx.username) < 0) {
+ virStorageReportError(VIR_ERR_INTERNAL_ERROR,
+ _("failed to initialize RADOS"));
+ goto cleanup;
+ }
+
+ if (pool->def->source.auth.cephx.secret.uuid != NULL) {
+ VIR_DEBUG("Looking up secret by UUID: %s",
+ pool->def->source.auth.cephx.secret.uuid);
+ secret = virSecretLookupByUUIDString(conn,
+
pool->def->source.auth.cephx.secret.uuid);
+ }
+
+ if (pool->def->source.auth.cephx.secret.usage != NULL) {
+ VIR_DEBUG("Looking up secret by usage: %s",
+ pool->def->source.auth.cephx.secret.usage);
+ secret = virSecretLookupByUsage(conn, VIR_SECRET_USAGE_TYPE_CEPH,
+
pool->def->source.auth.cephx.secret.usage);
+ }
+
+ if (secret == NULL) {
+ virStorageReportError(VIR_ERR_NO_SECRET,
+ _("failed to find the secret"));
+ goto cleanup;
+ }
+
+ secret_value = virSecretGetValue(secret, &secret_value_size, 0);
+ base64_encode_alloc((char *)secret_value,
+ secret_value_size, &rados_key);
+ memset(secret_value, 0, secret_value_size);
+
+ if (rados_key == NULL) {
+ virStorageReportError(VIR_ERR_INTERNAL_ERROR,
+ _("failed to decode the RADOS key"));
+ goto cleanup;
+ }
+
+ VIR_DEBUG("Found cephx key: %s", rados_key);
+ if (rados_conf_set(ptr->cluster, "key", rados_key) < 0) {
+ virStorageReportError(VIR_ERR_INTERNAL_ERROR,
+ _("failed to set RADOS option: %s"),
+ "rados_key");
+ goto cleanup;
+ }
+
+ memset(rados_key, 0, strlen(rados_key));
+
+ if (rados_conf_set(ptr->cluster, "auth_supported",
"cephx") < 0) {
+ virStorageReportError(VIR_ERR_INTERNAL_ERROR,
+ _("failed to set RADOS option: %s"),
+ "auth_supported");
+ goto cleanup;
+ }
+ } else {
+ VIR_DEBUG("Not using cephx authorization");
+ if (rados_conf_set(ptr->cluster, "auth_supported",
"none") < 0) {
+ virStorageReportError(VIR_ERR_INTERNAL_ERROR,
+ _("failed to set RADOS option: %s"),
+ "auth_supported");
+ goto cleanup;
+ }
+ if (rados_create(&ptr->cluster, NULL) < 0) {
+ virStorageReportError(VIR_ERR_INTERNAL_ERROR,
+ _("failed to create the RADOS cluster"));
+ goto cleanup;
+ }
+ }
+
+ VIR_DEBUG("Found %d RADOS cluster monitors in the pool configuration",
+ pool->def->source.nhost);
+
+ int i;
+ for (i = 0; i < pool->def->source.nhost; i++) {
+ if (pool->def->source.hosts[i].name != NULL &&
+ !pool->def->source.hosts[i].port) {
+ virBufferAsprintf(&mon_host, "%s:6789,",
+ pool->def->source.hosts[i].name);
+ } else if (pool->def->source.hosts[i].name != NULL &&
+ pool->def->source.hosts[i].port) {
+ virBufferAsprintf(&mon_host, "%s:%d,",
+ pool->def->source.hosts[i].name,
+ pool->def->source.hosts[i].port);
+ } else {
+ virStorageReportError(VIR_ERR_INTERNAL_ERROR,
+ _("received malformed monitor, check the XML
definition"));
+ }
+ }
+
+ char *mon_buff = virBufferContentAndReset(&mon_host);
Before calling this you are required to do
if (virBufferError(&mon_host)) {
virReportOOMError();
goto cleanup;
}
otherwise 'mon_buff' could be NULL.
+ VIR_DEBUG("RADOS mon_host has been set to: %s",
mon_buff);
+ if (rados_conf_set(ptr->cluster, "mon_host", mon_buff) < 0) {
+ virStorageReportError(VIR_ERR_INTERNAL_ERROR,
+ _("failed to set RADOS option: %s"),
+ "mon_host");
+ goto cleanup;
+ }
+
+ ptr->starttime = time(0);
+ if (rados_connect(ptr->cluster) < 0) {
+ virStorageReportError(VIR_ERR_INTERNAL_ERROR,
+ _("failed to connect to the RADOS monitor on:
%s"),
+ virBufferContentAndReset(&mon_host));
+ goto cleanup;
+ }
+
+ ret = 0;
+
+cleanup:
+ VIR_FREE(secret_value);
+ VIR_FREE(rados_key);
+ virSecretFree(secret);
+ virBufferFreeAndReset(&mon_host);
Does 'mon_buff' need to be free()d here, or does 'ptr->cluster'
now own the buffer you gave it ?
+static int
virStorageBackendRBDCloseRADOSConn(virStorageBackendRBDStatePtr ptr)
+{
+ int ret = 0;
+
+ if (ptr.ioctx != NULL) {
+ VIR_DEBUG("Closing RADOS IoCTX");
+ rados_ioctx_destroy(ptr.ioctx);
+ ret = -1;
+ }
+
+ if (ptr.cluster != NULL) {
+ VIR_DEBUG("Closing RADOS connection");
+ rados_shutdown(ptr.cluster);
+ ret = -2;
+ }
+
+ time_t runtime = time(0) - ptr.starttime;
+ VIR_DEBUG("RADOS connection existed for %ld seconds", runtime);
I think you want to set ptr.cluster & ptr.ioctx to NULL here, to protect
against being called twice.
+static int virStorageBackendRBDRefreshPool(virConnectPtr conn
ATTRIBUTE_UNUSED,
+ virStoragePoolObjPtr pool)
+{
+ size_t max_size = 1024;
+ int ret = -1;
+ virStorageBackendRBDStatePtr ptr;
+ ptr.cluster = NULL;
+ ptr.ioctx = NULL;
+
+ if (virStorageBackendRBDOpenRADOSConn(&ptr, conn, pool) < 0) {
+ goto cleanup;
+ }
+
+ if (rados_ioctx_create(ptr.cluster,
+ pool->def->source.name, &ptr.ioctx) < 0) {
+ virStorageReportError(VIR_ERR_INTERNAL_ERROR,
+ _("failed to create the RBD IoCTX. Does the pool
'%s' exist?"),
+ pool->def->source.name);
+ goto cleanup;
+ }
+
+ struct rados_cluster_stat_t stat;
+ if (rados_cluster_stat(ptr.cluster, &stat) < 0) {
+ virStorageReportError(VIR_ERR_INTERNAL_ERROR, "%s",
+ _("failed to stat the RADOS cluster"));
+ goto cleanup;
+ }
+
+ struct rados_pool_stat_t poolstat;
+ if (rados_ioctx_pool_stat(ptr.ioctx, &poolstat) < 0) {
+ virStorageReportError(VIR_ERR_INTERNAL_ERROR,
+ _("failed to stat the RADOS pool
'%s'"),
+ pool->def->source.name);
+ goto cleanup;
+ }
+
+ pool->def->capacity = stat.kb * 1024;
+ pool->def->available = stat.kb_avail * 1024;
+ pool->def->allocation = poolstat.num_bytes;
+
+ int num_images, i;
+ char *names, *name = NULL;
+
Our preference is for variables to be declared at the start of the
block, since it gives clear initialization semantics when you consider
the use of 'goto'.
eg if one of those earlier 'goto cleanup' statements run, you can't
know whether 'name' was initialized to NULL or not. IIRC we have
compiler flags that should have generated a warning on this.
+ if (VIR_ALLOC_N(names, 1024) < 0)
+ goto cleanup;
Need a virReportOOMError() here
+
+ int len = rbd_list(ptr.ioctx, names, &max_size);
+
+ for (i = 0, num_images = 0, name = names; name < names + len; i++) {
+
+ if (VIR_REALLOC_N(pool->volumes.objs, pool->volumes.count + 1) < 0) {
+ virStoragePoolObjClearVols(pool);
+ virReportOOMError();
+ goto cleanup;
+ }
+
+ virStorageVolDefPtr vol;
+ if (VIR_ALLOC(vol) < 0)
+ goto cleanup;
And virReportOOMError() here.
+
+ vol->name = strdup(name);
+ if (vol->name == NULL)
+ goto cleanup;
And here.
You might find it preferable to add a block at the end
no_memory:
virReportOOMError();
goto cleanup;
+ name += strlen(name) + 1;
+
+ if (volStorageBackendRBDRefreshVolInfo(vol, pool, ptr) < 0)
+ goto cleanup;
+
+ pool->volumes.objs[pool->volumes.count++] = vol;
+ }
+
+ VIR_DEBUG("Refreshed RBD pool %s (kb: %llu kb_avail: %llu num_bytes: %llu
num_images: %d)",
+ pool->def->source.name, (unsigned long long)stat.kb,
+ (unsigned long long)stat.kb_avail,
+ (unsigned long long)poolstat.num_bytes, pool->volumes.count);
+
+ ret = 0;
+
+cleanup:
+ VIR_FREE(names);
+ virStorageBackendRBDCloseRADOSConn(ptr);
+ return ret;
+}
Overall I think your patch looks pretty good & look forward to updated
versions
Regards,
Daniel
--
|:
http://berrange.com -o-
http://www.flickr.com/photos/dberrange/ :|
|:
http://libvirt.org -o-
http://virt-manager.org :|
|:
http://autobuild.org -o-
http://search.cpan.org/~danberr/ :|
|:
http://entangle-photo.org -o-
http://live.gnome.org/gtk-vnc :|