On Mon, Nov 11, 2013 at 09:19:30PM -0700, Eric Blake wrote:
Actually put gfapi to use, by allowing the creation of a gluster
pool. Right now, all volumes are treated as raw; further patches
will allow peering into files to allow for qcow2 files and backing
chains, and reporting proper volume allocation.
I've reported a couple of glusterfs bugs; if we were to require a
minimum of (not-yet-released) glusterfs 3.5, we could use the new
glfs_readdir [1] and not worry about the bogus return value of
glfs_fini [2], but for now I'm testing with Fedora 19's glusterfs
3.4.1.
[1]
http://lists.gnu.org/archive/html/gluster-devel/2013-10/msg00085.html
[2]
http://lists.gnu.org/archive/html/gluster-devel/2013-10/msg00086.html
* src/storage/storage_backend_gluster.c
(virStorageBackendGlusterRefreshPool): Initial implementation.
(virStorageBackendGlusterOpen, virStorageBackendGlusterClose): New
helper functions.
Signed-off-by: Eric Blake <eblake(a)redhat.com>
---
src/storage/storage_backend_gluster.c | 220 +++++++++++++++++++++++++++++++++-
1 file changed, 215 insertions(+), 5 deletions(-)
diff --git a/src/storage/storage_backend_gluster.c
b/src/storage/storage_backend_gluster.c
index 2863c73..bc90de9 100644
--- a/src/storage/storage_backend_gluster.c
+++ b/src/storage/storage_backend_gluster.c
@@ -23,20 +23,230 @@
#include <glusterfs/api/glfs.h>
-#include "virerror.h"
#include "storage_backend_gluster.h"
#include "storage_conf.h"
+#include "viralloc.h"
+#include "virerror.h"
+#include "virlog.h"
+#include "virstoragefile.h"
+#include "virstring.h"
+#include "viruri.h"
#define VIR_FROM_THIS VIR_FROM_STORAGE
+struct _virStorageBackendGlusterState {
+ glfs_t *vol;
+
+ /* Accept the same URIs as qemu's block/gluster.c:
+ * gluster[+transport]://[server[:port]]/vol/[dir/]image[?socket=...] */
+ virURI *uri;
+
+ char *volname; /* vol from URI */
+ char *dir; /* dir from URI, or "." */
+};
+
+typedef struct _virStorageBackendGlusterState virStorageBackendGlusterState;
+typedef virStorageBackendGlusterState *virStorageBackendGlusterStatePtr;
+
+static void
+virStorageBackendGlusterClose(virStorageBackendGlusterStatePtr state)
+{
+ if (!state)
+ return;
+
+ /* Yuck - glusterfs-api-3.4.1 appears to always return -1 for
+ * glfs_fini, with errno containing random data, so there's no way
+ * to tell if it succeeded. 3.4.2 is supposed to fix this.*/
+ if (state->vol && glfs_fini(state->vol) < 0)
+ VIR_DEBUG("shutdown of gluster volume %s failed with errno %d",
+ state->volname, errno);
+
+ virURIFree(state->uri);
+ VIR_FREE(state->volname);
+ VIR_FREE(state->dir);
+ VIR_FREE(state);
+}
+
+static virStorageBackendGlusterStatePtr
+virStorageBackendGlusterOpen(virStoragePoolObjPtr pool)
+{
+ virStorageBackendGlusterStatePtr ret = NULL;
+ char *sub;
+ const char *name = pool->def->source.name;
+
+ if (VIR_ALLOC(ret) < 0)
+ return NULL;
+
+ if (*name == '/')
+ name++;
+ if ((sub = strchr(name, '/'))) {
+ if (VIR_STRNDUP(ret->volname, name, sub - name) < 0 ||
+ VIR_STRDUP(ret->dir, sub) < 0)
+ goto error;
+ } else {
+ if (VIR_STRDUP(ret->volname, pool->def->source.name) < 0 ||
+ VIR_STRDUP(ret->dir, ".") < 0)
+ goto error;
+ }
+
+ /* FIXME: Currently hard-coded to tcp transport; XML needs to be
+ * extended to allow alternate transport */
+ if (VIR_ALLOC(ret->uri) < 0 ||
+ VIR_STRDUP(ret->uri->scheme, "gluster") < 0 ||
+ VIR_STRDUP(ret->uri->server, pool->def->source.hosts[0].name) < 0
||
+ virAsprintf(&ret->uri->path, "%s%s",
+ *pool->def->source.name == '/' ? "" :
"/",
+ pool->def->source.name) < 0)
+ goto error;
I'm not a huge fan of chaining together statements like this,
since some gdb versions are pretty awful at telling you which
one of these 4 statements has the problem on segv.
+ ret->uri->port = pool->def->source.hosts[0].port;
+
+ if (!(ret->vol = glfs_new(ret->volname))) {
+ virReportOOMError();
+ goto error;
+ }
+
+ if (glfs_set_volfile_server(ret->vol, "tcp",
+ ret->uri->server, ret->uri->port) < 0 ||
+ glfs_init(ret->vol) < 0) {
+ char *uri = virURIFormat(ret->uri);
+
+ virReportSystemError(errno, _("failed to connect to %s"),
+ NULLSTR(uri));
+ VIR_FREE(uri);
+ goto error;
+ }
+
+ return ret;
+
+error:
+ virStorageBackendGlusterClose(ret);
+ return NULL;
+}
+
+
+/* Populate *volptr for the given name and stat information, or leave
+ * it NULL if the entry should be skipped (such as "."). Return 0 on
+ * success, -1 on failure. */
+static int
+virStorageBackendGlusterRefreshVol(virStorageBackendGlusterStatePtr state,
+ const char *pool,
+ const char *name,
+ struct stat *st,
+ virStorageVolDefPtr *volptr)
+{
+ char *tmp;
+ int ret = -1;
+ virStorageVolDefPtr vol = NULL;
+
+ /* Silently skip directories, including '.' and '..'. FIXME:
+ * should non-'.' subdirectories be listed as type dir? */
What behaviour does the 'fs' pool have in for non-'.' subdirs ?
Seems we perhaps ought to match that.
+ *volptr = NULL;
+ if (S_ISDIR(st->st_mode))
+ return 0;
+
+ if (VIR_ALLOC(vol) < 0 ||
+ VIR_STRDUP(vol->name, name) < 0 ||
+ virAsprintf(&vol->key, "%s%s/%s",
+ *pool == '/' ? "" : "/", pool,
vol->name) < 0)
+ goto cleanup;
+
+ /* FIXME - must open files to determine if they are non-raw */
+ vol->type = VIR_STORAGE_VOL_NETWORK;
+ vol->target.format = VIR_STORAGE_FILE_RAW;
+ vol->capacity = vol->allocation = st->st_size;
So gluster has no concept of sparse volumes ?
+ tmp = state->uri->path;
+ state->uri->path = vol->key;
+ if (!(vol->target.path = virURIFormat(state->uri))) {
+ state->uri->path = tmp;
+ goto cleanup;
+ }
+ state->uri->path = tmp;
+
+ *volptr = vol;
+ vol = NULL;
+ ret = 0;
+cleanup:
+ virStorageVolDefFree(vol);
+ return ret;
+}
static int
virStorageBackendGlusterRefreshPool(virConnectPtr conn ATTRIBUTE_UNUSED,
- virStoragePoolObjPtr pool ATTRIBUTE_UNUSED)
+ virStoragePoolObjPtr pool)
{
- virReportError(VIR_ERR_NO_SUPPORT, "%s",
- _("gluster pool type not fully supported yet"));
- return -1;
+ int ret = -1;
+ virStorageBackendGlusterStatePtr state = NULL;
+ struct {
+ struct dirent ent;
+ /* See comment below about readdir_r needing padding */
+ char padding[MAX(1, 256 - (int) (sizeof(struct dirent)
+ - offsetof(struct dirent, d_name)))];
+ } de;
+ struct dirent *ent;
+ glfs_fd_t *dir = NULL;
+ struct stat st;
+ struct statvfs sb;
+
+ if (!(state = virStorageBackendGlusterOpen(pool)))
+ goto cleanup;
What kind of overhead is there in opening / closing gluster connections ?
Is it something we should just do in the pool start/stop methods, instead
of on every operation like refresh.
+
+ /* Why oh why did glfs 3.4 decide to expose only readdir_r rather
+ * than readdir? POSIX admits that readdir_r is inherently a
+ * flawed design, because systems are not required to define
+ * NAME_MAX:
http://austingroupbugs.net/view.php?id=696
+ *
http://womble.decadent.org.uk/readdir_r-advisory.html
+ *
+ * Fortunately, gluster appears to limit its underlying bricks to
+ * only use file systems such as XFS that have a NAME_MAX of 255;
+ * so we are currently guaranteed that if we provide 256 bytes of
+ * tail padding, then we should have enough space to avoid buffer
+ * overflow no matter whether the OS used d_name[], d_name[1], or
+ * d_name[256] in its 'struct dirent'.
+ *
http://lists.gnu.org/archive/html/gluster-devel/2013-10/msg00083.html
+ */
+
+ if (!(dir = glfs_opendir(state->vol, state->dir))) {
+ virReportSystemError(errno, _("cannot open path '%s'"),
+ pool->def->name);
+ goto cleanup;
+ }
+ while (!(errno = glfs_readdirplus_r(dir, &st, &de.ent, &ent)) &&
ent) {
+ virStorageVolDefPtr vol;
+ int okay = virStorageBackendGlusterRefreshVol(state,
+ pool->def->source.name,
+ ent->d_name, &st,
+ &vol);
+ if (okay < 0)
+ goto cleanup;
+ if (vol && VIR_APPEND_ELEMENT(pool->volumes.objs,
pool->volumes.count,
+ vol) < 0)
+ goto cleanup;
+ }
+ if (errno) {
+ virReportSystemError(errno, _("failed to read directory
'%s'"),
+ pool->def->source.name);
+ goto cleanup;
+ }
+
+ if (glfs_statvfs(state->vol, state->dir, &sb) < 0) {
+ virReportSystemError(errno, _("cannot statvfs path '%s'"),
pool->def->source.name);
Seems you lost the line break here.
+ goto cleanup;
+ }
+
+ pool->def->capacity = ((unsigned long long)sb.f_frsize *
+ (unsigned long long)sb.f_blocks);
+ pool->def->available = ((unsigned long long)sb.f_bfree *
+ (unsigned long long)sb.f_frsize);
+ pool->def->allocation = pool->def->capacity -
pool->def->available;
+
+ ret = 0;
+cleanup:
+ if (dir)
+ glfs_closedir(dir);
+ virStorageBackendGlusterClose(state);
+ if (ret < 0)
+ virStoragePoolObjClearVols(pool);
+ return ret;
}
Daniel
--
|:
http://berrange.com -o-
http://www.flickr.com/photos/dberrange/ :|
|:
http://libvirt.org -o-
http://virt-manager.org :|
|:
http://autobuild.org -o-
http://search.cpan.org/~danberr/ :|
|:
http://entangle-photo.org -o-
http://live.gnome.org/gtk-vnc :|