[libvirt] [PATCHv3 0/4] glusterfs storage pool

v2 at: https://www.redhat.com/archives/libvir-list/2013-November/msg00099.html Since then: minor improvements to patch 1 and 3 based on testing; patch 4 is new Eric Blake (4): storage: initial support for linking with libgfapi storage: document gluster pool storage: implement rudimentary glusterfs pool refresh storage: probe qcow2 volumes in gluster pool configure.ac | 21 ++ docs/formatstorage.html.in | 11 +- docs/schemas/storagepool.rng | 21 ++ docs/storage.html.in | 90 ++++++- libvirt.spec.in | 15 ++ m4/virt-gluster.m4 | 26 ++ po/POTFILES.in | 1 + src/Makefile.am | 9 + src/conf/storage_conf.c | 20 +- src/conf/storage_conf.h | 3 +- src/storage/storage_backend.c | 6 + src/storage/storage_backend.h | 6 +- src/storage/storage_backend_gluster.c | 343 +++++++++++++++++++++++++++ src/storage/storage_backend_gluster.h | 29 +++ tests/storagepoolxml2xmlin/pool-gluster.xml | 8 + tests/storagepoolxml2xmlout/pool-gluster.xml | 11 + tests/storagepoolxml2xmltest.c | 1 + 17 files changed, 608 insertions(+), 13 deletions(-) create mode 100644 m4/virt-gluster.m4 create mode 100644 src/storage/storage_backend_gluster.c create mode 100644 src/storage/storage_backend_gluster.h create mode 100644 tests/storagepoolxml2xmlin/pool-gluster.xml create mode 100644 tests/storagepoolxml2xmlout/pool-gluster.xml -- 1.8.3.1

We support gluster volumes in domain XML, so we also ought to support them as a storage pool. Besides, a future patch will want to take advantage of libgfapi to handle the case of a gluster device holding qcow2 rather than raw storage, and for that to work, we need a storage backend that can read gluster storage volume contents. This sets up the framework. * configure.ac (WITH_STORAGE_GLUSTER): New conditional. * m4/virt-gluster.m4: new file. * libvirt.spec.in (BuildRequires): Support gluster in spec file. * src/conf/storage_conf.h (VIR_STORAGE_POOL_GLUSTER): New pool type. * src/conf/storage_conf.c (poolTypeInfo): Treat similar to sheepdog and rbd. (virStoragePoolDefFormat): Don't output target for gluster. * src/storage/storage_backend_gluster.h: New file. * src/storage/storage_backend_gluster.c: Likewise. * po/POTFILES.in: Add new file. * src/storage/storage_backend.c (backends): Register new type. * src/Makefile.am (STORAGE_DRIVER_GLUSTER_SOURCES): Build new files. * src/storage/storage_backend.h (_virStorageBackend): Documet assumption. Signed-off-by: Eric Blake <eblake@redhat.com> --- configure.ac | 21 ++++++++++++++++ libvirt.spec.in | 15 ++++++++++++ m4/virt-gluster.m4 | 26 ++++++++++++++++++++ po/POTFILES.in | 1 + src/Makefile.am | 9 +++++++ src/conf/storage_conf.c | 20 ++++++++++++--- src/conf/storage_conf.h | 3 ++- src/storage/storage_backend.c | 6 +++++ src/storage/storage_backend.h | 6 +++-- src/storage/storage_backend_gluster.c | 46 +++++++++++++++++++++++++++++++++++ src/storage/storage_backend_gluster.h | 29 ++++++++++++++++++++++ 11 files changed, 175 insertions(+), 7 deletions(-) create mode 100644 m4/virt-gluster.m4 create mode 100644 src/storage/storage_backend_gluster.c create mode 100644 src/storage/storage_backend_gluster.h diff --git a/configure.ac b/configure.ac index 6003871..b193bc1 100644 --- a/configure.ac +++ b/configure.ac @@ -228,6 +228,7 @@ LIBVIRT_CHECK_CAPNG LIBVIRT_CHECK_CURL LIBVIRT_CHECK_DBUS LIBVIRT_CHECK_FUSE +LIBVIRT_CHECK_GLUSTER LIBVIRT_CHECK_HAL LIBVIRT_CHECK_NETCF LIBVIRT_CHECK_NUMACTL @@ -1643,6 +1644,10 @@ AC_ARG_WITH([storage-sheepdog], [AS_HELP_STRING([--with-storage-sheepdog], [with Sheepdog backend for the storage driver @<:@default=check@:>@])], [],[with_storage_sheepdog=check]) +AC_ARG_WITH([storage-gluster], + [AS_HELP_STRING([--with-storage-gluster], + [with Gluster backend for the storage driver @<:@default=check@:>@])], + [],[with_storage_gluster=check]) if test "$with_libvirtd" = "no"; then with_storage_dir=no @@ -1654,6 +1659,7 @@ if test "$with_libvirtd" = "no"; then with_storage_disk=no with_storage_rbd=no with_storage_sheepdog=no + with_storage_gluster=no fi if test "$with_storage_dir" = "yes" ; then AC_DEFINE_UNQUOTED([WITH_STORAGE_DIR], 1, [whether directory backend for storage driver is enabled]) @@ -1855,6 +1861,19 @@ fi AM_CONDITIONAL([WITH_STORAGE_SHEEPDOG], [test "$with_storage_sheepdog" = "yes"]) +LIBGLUSTER_LIBS= +if test "$with_storage_gluster" = "check"; then + with_storage_gluster=$with_glfs +fi +if test "$with_storage_gluster" = "yes"; then + if test "$with_glfs" = no; then + AC_MSG_ERROR([Need glfs (libgfapi) for gluster storage driver]) + fi + AC_DEFINE_UNQUOTED([WITH_STORAGE_GLUSTER], [1], + [whether Gluster backend for storage driver is enabled]) +fi +AM_CONDITIONAL([WITH_STORAGE_GLUSTER], [test "$with_storage_gluster" = "yes"]) + LIBPARTED_CFLAGS= LIBPARTED_LIBS= @@ -2667,6 +2686,7 @@ AC_MSG_NOTICE([ mpath: $with_storage_mpath]) AC_MSG_NOTICE([ Disk: $with_storage_disk]) AC_MSG_NOTICE([ RBD: $with_storage_rbd]) AC_MSG_NOTICE([Sheepdog: $with_storage_sheepdog]) +AC_MSG_NOTICE([ Gluster: $with_storage_gluster]) AC_MSG_NOTICE([]) AC_MSG_NOTICE([Security Drivers]) AC_MSG_NOTICE([]) @@ -2692,6 +2712,7 @@ LIBVIRT_RESULT_CAPNG LIBVIRT_RESULT_CURL LIBVIRT_RESULT_DBUS LIBVIRT_RESULT_FUSE +LIBVIRT_RESULT_GLUSTER LIBVIRT_RESULT_HAL LIBVIRT_RESULT_NETCF LIBVIRT_RESULT_NUMACTL diff --git a/libvirt.spec.in b/libvirt.spec.in index 72815f4..a90ee2b 100644 --- a/libvirt.spec.in +++ b/libvirt.spec.in @@ -98,6 +98,11 @@ %else %define with_storage_sheepdog 0 %endif +%if 0%{?fedora} >= 19 + %define with_storage_gluster 0%{!?_without_storage_gluster:%{server_drivers}} +%else + %define with_storage_gluster 0 +%endif %define with_numactl 0%{!?_without_numactl:%{server_drivers}} %define with_selinux 0%{!?_without_selinux:%{server_drivers}} @@ -281,6 +286,7 @@ %define with_storage_mpath 0 %define with_storage_rbd 0 %define with_storage_sheepdog 0 + %define with_storage_gluster 0 %define with_storage_disk 0 %endif @@ -555,6 +561,10 @@ BuildRequires: device-mapper-devel BuildRequires: ceph-devel %endif %endif +%if %{with_storage_gluster} +BuildRequires: glusterfs-api-devel +BuildRequires: glusterfs-devel +%endif %if %{with_numactl} # For QEMU/LXC numa info BuildRequires: numactl-devel @@ -1274,6 +1284,10 @@ of recent versions of Linux (and other OSes). %define _without_storage_sheepdog --without-storage-sheepdog %endif +%if ! %{with_storage_gluster} + %define _without_storage_gluster --without-storage-gluster +%endif + %if ! %{with_numactl} %define _without_numactl --without-numactl %endif @@ -1396,6 +1410,7 @@ of recent versions of Linux (and other OSes). %{?_without_storage_mpath} \ %{?_without_storage_rbd} \ %{?_without_storage_sheepdog} \ + %{?_without_storage_gluster} \ %{?_without_numactl} \ %{?_without_numad} \ %{?_without_capng} \ diff --git a/m4/virt-gluster.m4 b/m4/virt-gluster.m4 new file mode 100644 index 0000000..4851e17 --- /dev/null +++ b/m4/virt-gluster.m4 @@ -0,0 +1,26 @@ +dnl The gluster libgfapi.so library +dnl +dnl Copyright (C) 2013 Red Hat, Inc. +dnl +dnl This library is free software; you can redistribute it and/or +dnl modify it under the terms of the GNU Lesser General Public +dnl License as published by the Free Software Foundation; either +dnl version 2.1 of the License, or (at your option) any later version. +dnl +dnl This library is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. +dnl +dnl You should have received a copy of the GNU Lesser General Public +dnl License along with this library. If not, see +dnl <http://www.gnu.org/licenses/>. +dnl + +AC_DEFUN([LIBVIRT_CHECK_GLUSTER],[ + LIBVIRT_CHECK_PKG([GLFS], [glusterfs-api], [3.0]) +]) + +AC_DEFUN([LIBVIRT_RESULT_GLUSTER],[ + LIBVIRT_RESULT_LIB([GLFS]) +]) diff --git a/po/POTFILES.in b/po/POTFILES.in index 15afdec..0835b52 100644 --- a/po/POTFILES.in +++ b/po/POTFILES.in @@ -133,6 +133,7 @@ src/storage/parthelper.c src/storage/storage_backend.c src/storage/storage_backend_disk.c src/storage/storage_backend_fs.c +src/storage/storage_backend_gluster.c src/storage/storage_backend_iscsi.c src/storage/storage_backend_logical.c src/storage/storage_backend_mpath.c diff --git a/src/Makefile.am b/src/Makefile.am index ad39b74..be9978a 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -819,6 +819,9 @@ STORAGE_DRIVER_RBD_SOURCES = \ STORAGE_DRIVER_SHEEPDOG_SOURCES = \ storage/storage_backend_sheepdog.h storage/storage_backend_sheepdog.c +STORAGE_DRIVER_GLUSTER_SOURCES = \ + storage/storage_backend_gluster.h storage/storage_backend_gluster.c + STORAGE_HELPER_DISK_SOURCES = \ storage/parthelper.c @@ -1434,6 +1437,11 @@ if WITH_STORAGE_SHEEPDOG libvirt_driver_storage_impl_la_SOURCES += $(STORAGE_DRIVER_SHEEPDOG_SOURCES) endif WITH_STORAGE_SHEEPDOG +if WITH_STORAGE_GLUSTER +libvirt_driver_storage_impl_la_SOURCES += $(STORAGE_DRIVER_GLUSTER_SOURCES) +libvirt_driver_storage_impl_la_LIBADD += $(GLFS_LIBS) +endif WITH_STORAGE_GLUSTER + if WITH_NODE_DEVICES # Needed to keep automake quiet about conditionals if WITH_DRIVER_MODULES @@ -1634,6 +1642,7 @@ EXTRA_DIST += \ $(STORAGE_DRIVER_DISK_SOURCES) \ $(STORAGE_DRIVER_RBD_SOURCES) \ $(STORAGE_DRIVER_SHEEPDOG_SOURCES) \ + $(STORAGE_DRIVER_GLUSTER_SOURCES) \ $(NODE_DEVICE_DRIVER_SOURCES) \ $(NODE_DEVICE_DRIVER_HAL_SOURCES) \ $(NODE_DEVICE_DRIVER_UDEV_SOURCES) \ diff --git a/src/conf/storage_conf.c b/src/conf/storage_conf.c index 975e662..8299f2a 100644 --- a/src/conf/storage_conf.c +++ b/src/conf/storage_conf.c @@ -55,7 +55,7 @@ VIR_ENUM_IMPL(virStoragePool, VIR_STORAGE_POOL_LAST, "dir", "fs", "netfs", "logical", "disk", "iscsi", - "scsi", "mpath", "rbd", "sheepdog") + "scsi", "mpath", "rbd", "sheepdog", "gluster") VIR_ENUM_IMPL(virStoragePoolFormatFileSystem, VIR_STORAGE_POOL_FS_LAST, @@ -244,6 +244,17 @@ static virStoragePoolTypeInfo poolTypeInfo[] = { .formatToString = virStoragePoolFormatDiskTypeToString, } }, + {.poolType = VIR_STORAGE_POOL_GLUSTER, + .poolOptions = { + .flags = (VIR_STORAGE_POOL_SOURCE_HOST | + VIR_STORAGE_POOL_SOURCE_NETWORK | + VIR_STORAGE_POOL_SOURCE_NAME), + }, + .volOptions = { + .defaultFormat = VIR_STORAGE_FILE_RAW, + .formatToString = virStorageFileFormatTypeToString, + } + }, {.poolType = VIR_STORAGE_POOL_MPATH, .volOptions = { .formatToString = virStoragePoolFormatDiskTypeToString, @@ -1196,10 +1207,11 @@ virStoragePoolDefFormat(virStoragePoolDefPtr def) if (virStoragePoolSourceFormat(&buf, options, &def->source) < 0) goto cleanup; - /* RBD and Sheepdog devices are not local block devs nor files, so it - * doesn't have a target */ + /* RBD, Sheepdog, and Gluster devices are not local block devs nor + * files, so they don't have a target */ if (def->type != VIR_STORAGE_POOL_RBD && - def->type != VIR_STORAGE_POOL_SHEEPDOG) { + def->type != VIR_STORAGE_POOL_SHEEPDOG && + def->type != VIR_STORAGE_POOL_GLUSTER) { virBufferAddLit(&buf," <target>\n"); if (def->target.path) diff --git a/src/conf/storage_conf.h b/src/conf/storage_conf.h index f062bd8..d13cb99 100644 --- a/src/conf/storage_conf.h +++ b/src/conf/storage_conf.h @@ -1,7 +1,7 @@ /* * storage_conf.h: config handling for storage driver * - * Copyright (C) 2006-2008, 2010-2012 Red Hat, Inc. + * Copyright (C) 2006-2008, 2010-2013 Red Hat, Inc. * Copyright (C) 2006-2008 Daniel P. Berrange * * This library is free software; you can redistribute it and/or @@ -128,6 +128,7 @@ enum virStoragePoolType { VIR_STORAGE_POOL_MPATH, /* Multipath devices */ VIR_STORAGE_POOL_RBD, /* RADOS Block Device */ VIR_STORAGE_POOL_SHEEPDOG, /* Sheepdog device */ + VIR_STORAGE_POOL_GLUSTER, /* Gluster device */ VIR_STORAGE_POOL_LAST, }; diff --git a/src/storage/storage_backend.c b/src/storage/storage_backend.c index 1e3292b..c94e89a 100644 --- a/src/storage/storage_backend.c +++ b/src/storage/storage_backend.c @@ -80,6 +80,9 @@ #if WITH_STORAGE_SHEEPDOG # include "storage_backend_sheepdog.h" #endif +#if WITH_STORAGE_GLUSTER +# include "storage_backend_gluster.h" +#endif #define VIR_FROM_THIS VIR_FROM_STORAGE @@ -112,6 +115,9 @@ static virStorageBackendPtr backends[] = { #if WITH_STORAGE_SHEEPDOG &virStorageBackendSheepdog, #endif +#if WITH_STORAGE_GLUSTER + &virStorageBackendGluster, +#endif NULL }; diff --git a/src/storage/storage_backend.h b/src/storage/storage_backend.h index d8d3097..9e07dd8 100644 --- a/src/storage/storage_backend.h +++ b/src/storage/storage_backend.h @@ -1,7 +1,7 @@ /* * storage_backend.h: internal storage driver backend contract * - * Copyright (C) 2007-2010, 2012 Red Hat, Inc. + * Copyright (C) 2007-2010, 2012-2013 Red Hat, Inc. * Copyright (C) 2007-2008 Daniel P. Berrange * * This library is free software; you can redistribute it and/or @@ -70,6 +70,8 @@ virStorageBackendFSImageToolTypeToFunc(int tool_type); typedef struct _virStorageBackend virStorageBackend; typedef virStorageBackend *virStorageBackendPtr; +/* Callbacks are optional unless documented otherwise; but adding more + * callbacks provides better pool support. */ struct _virStorageBackend { int type; @@ -77,7 +79,7 @@ struct _virStorageBackend { virStorageBackendCheckPool checkPool; virStorageBackendStartPool startPool; virStorageBackendBuildPool buildPool; - virStorageBackendRefreshPool refreshPool; + virStorageBackendRefreshPool refreshPool; /* Must be non-NULL */ virStorageBackendStopPool stopPool; virStorageBackendDeletePool deletePool; diff --git a/src/storage/storage_backend_gluster.c b/src/storage/storage_backend_gluster.c new file mode 100644 index 0000000..2863c73 --- /dev/null +++ b/src/storage/storage_backend_gluster.c @@ -0,0 +1,46 @@ +/* + * storage_backend_gluster.c: storage backend for Gluster handling + * + * Copyright (C) 2013 Red Hat, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library. If not, see + * <http://www.gnu.org/licenses/>. + * + */ + +#include <config.h> + +#include <glusterfs/api/glfs.h> + +#include "virerror.h" +#include "storage_backend_gluster.h" +#include "storage_conf.h" + +#define VIR_FROM_THIS VIR_FROM_STORAGE + + +static int +virStorageBackendGlusterRefreshPool(virConnectPtr conn ATTRIBUTE_UNUSED, + virStoragePoolObjPtr pool ATTRIBUTE_UNUSED) +{ + virReportError(VIR_ERR_NO_SUPPORT, "%s", + _("gluster pool type not fully supported yet")); + return -1; +} + +virStorageBackend virStorageBackendGluster = { + .type = VIR_STORAGE_POOL_GLUSTER, + + .refreshPool = virStorageBackendGlusterRefreshPool, +}; diff --git a/src/storage/storage_backend_gluster.h b/src/storage/storage_backend_gluster.h new file mode 100644 index 0000000..b21bda7 --- /dev/null +++ b/src/storage/storage_backend_gluster.h @@ -0,0 +1,29 @@ +/* + * storage_backend_gluster.h: storage backend for Gluster handling + * + * Copyright (C) 2013 Red Hat, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library. If not, see + * <http://www.gnu.org/licenses/>. + * + */ + +#ifndef __VIR_STORAGE_BACKEND_GLUSTER_H__ +# define __VIR_STORAGE_BACKEND_GLUSTER_H__ + +# include "storage_backend.h" + +extern virStorageBackend virStorageBackendGluster; + +#endif /* __VIR_STORAGE_BACKEND_GLUSTER_H__ */ -- 1.8.3.1

On 11/12/13 05:19, Eric Blake wrote:
We support gluster volumes in domain XML, so we also ought to support them as a storage pool. Besides, a future patch will want to take advantage of libgfapi to handle the case of a gluster device holding qcow2 rather than raw storage, and for that to work, we need a storage backend that can read gluster storage volume contents. This sets up the framework.
* configure.ac (WITH_STORAGE_GLUSTER): New conditional. * m4/virt-gluster.m4: new file. * libvirt.spec.in (BuildRequires): Support gluster in spec file. * src/conf/storage_conf.h (VIR_STORAGE_POOL_GLUSTER): New pool type. * src/conf/storage_conf.c (poolTypeInfo): Treat similar to sheepdog and rbd. (virStoragePoolDefFormat): Don't output target for gluster. * src/storage/storage_backend_gluster.h: New file. * src/storage/storage_backend_gluster.c: Likewise. * po/POTFILES.in: Add new file. * src/storage/storage_backend.c (backends): Register new type. * src/Makefile.am (STORAGE_DRIVER_GLUSTER_SOURCES): Build new files. * src/storage/storage_backend.h (_virStorageBackend): Documet assumption.
Signed-off-by: Eric Blake <eblake@redhat.com> --- configure.ac | 21 ++++++++++++++++ libvirt.spec.in | 15 ++++++++++++ m4/virt-gluster.m4 | 26 ++++++++++++++++++++ po/POTFILES.in | 1 + src/Makefile.am | 9 +++++++ src/conf/storage_conf.c | 20 ++++++++++++--- src/conf/storage_conf.h | 3 ++- src/storage/storage_backend.c | 6 +++++ src/storage/storage_backend.h | 6 +++-- src/storage/storage_backend_gluster.c | 46 +++++++++++++++++++++++++++++++++++ src/storage/storage_backend_gluster.h | 29 ++++++++++++++++++++++ 11 files changed, 175 insertions(+), 7 deletions(-) create mode 100644 m4/virt-gluster.m4 create mode 100644 src/storage/storage_backend_gluster.c create mode 100644 src/storage/storage_backend_gluster.h
diff --git a/configure.ac b/configure.ac index 6003871..b193bc1 100644 --- a/configure.ac +++ b/configure.ac @@ -228,6 +228,7 @@ LIBVIRT_CHECK_CAPNG LIBVIRT_CHECK_CURL LIBVIRT_CHECK_DBUS LIBVIRT_CHECK_FUSE +LIBVIRT_CHECK_GLUSTER LIBVIRT_CHECK_HAL LIBVIRT_CHECK_NETCF LIBVIRT_CHECK_NUMACTL @@ -1643,6 +1644,10 @@ AC_ARG_WITH([storage-sheepdog], [AS_HELP_STRING([--with-storage-sheepdog], [with Sheepdog backend for the storage driver @<:@default=check@:>@])], [],[with_storage_sheepdog=check]) +AC_ARG_WITH([storage-gluster], + [AS_HELP_STRING([--with-storage-gluster], + [with Gluster backend for the storage driver @<:@default=check@:>@])], + [],[with_storage_gluster=check])
if test "$with_libvirtd" = "no"; then with_storage_dir=no @@ -1654,6 +1659,7 @@ if test "$with_libvirtd" = "no"; then with_storage_disk=no with_storage_rbd=no with_storage_sheepdog=no + with_storage_gluster=no fi if test "$with_storage_dir" = "yes" ; then AC_DEFINE_UNQUOTED([WITH_STORAGE_DIR], 1, [whether directory backend for storage driver is enabled]) @@ -1855,6 +1861,19 @@ fi AM_CONDITIONAL([WITH_STORAGE_SHEEPDOG], [test "$with_storage_sheepdog" = "yes"])
+LIBGLUSTER_LIBS= +if test "$with_storage_gluster" = "check"; then + with_storage_gluster=$with_glfs +fi +if test "$with_storage_gluster" = "yes"; then + if test "$with_glfs" = no; then + AC_MSG_ERROR([Need glfs (libgfapi) for gluster storage driver]) + fi + AC_DEFINE_UNQUOTED([WITH_STORAGE_GLUSTER], [1], + [whether Gluster backend for storage driver is enabled]) +fi +AM_CONDITIONAL([WITH_STORAGE_GLUSTER], [test "$with_storage_gluster" = "yes"]) +
LIBPARTED_CFLAGS= LIBPARTED_LIBS= @@ -2667,6 +2686,7 @@ AC_MSG_NOTICE([ mpath: $with_storage_mpath]) AC_MSG_NOTICE([ Disk: $with_storage_disk]) AC_MSG_NOTICE([ RBD: $with_storage_rbd]) AC_MSG_NOTICE([Sheepdog: $with_storage_sheepdog]) +AC_MSG_NOTICE([ Gluster: $with_storage_gluster]) AC_MSG_NOTICE([]) AC_MSG_NOTICE([Security Drivers]) AC_MSG_NOTICE([]) @@ -2692,6 +2712,7 @@ LIBVIRT_RESULT_CAPNG LIBVIRT_RESULT_CURL LIBVIRT_RESULT_DBUS LIBVIRT_RESULT_FUSE +LIBVIRT_RESULT_GLUSTER LIBVIRT_RESULT_HAL LIBVIRT_RESULT_NETCF LIBVIRT_RESULT_NUMACTL diff --git a/libvirt.spec.in b/libvirt.spec.in index 72815f4..a90ee2b 100644 --- a/libvirt.spec.in +++ b/libvirt.spec.in @@ -98,6 +98,11 @@ %else %define with_storage_sheepdog 0 %endif +%if 0%{?fedora} >= 19 + %define with_storage_gluster 0%{!?_without_storage_gluster:%{server_drivers}} +%else + %define with_storage_gluster 0 +%endif %define with_numactl 0%{!?_without_numactl:%{server_drivers}} %define with_selinux 0%{!?_without_selinux:%{server_drivers}}
@@ -281,6 +286,7 @@ %define with_storage_mpath 0 %define with_storage_rbd 0 %define with_storage_sheepdog 0 + %define with_storage_gluster 0 %define with_storage_disk 0 %endif
@@ -555,6 +561,10 @@ BuildRequires: device-mapper-devel BuildRequires: ceph-devel %endif %endif +%if %{with_storage_gluster} +BuildRequires: glusterfs-api-devel +BuildRequires: glusterfs-devel +%endif %if %{with_numactl} # For QEMU/LXC numa info BuildRequires: numactl-devel @@ -1274,6 +1284,10 @@ of recent versions of Linux (and other OSes). %define _without_storage_sheepdog --without-storage-sheepdog %endif
+%if ! %{with_storage_gluster} + %define _without_storage_gluster --without-storage-gluster +%endif + %if ! %{with_numactl} %define _without_numactl --without-numactl %endif @@ -1396,6 +1410,7 @@ of recent versions of Linux (and other OSes). %{?_without_storage_mpath} \ %{?_without_storage_rbd} \ %{?_without_storage_sheepdog} \ + %{?_without_storage_gluster} \ %{?_without_numactl} \ %{?_without_numad} \ %{?_without_capng} \ diff --git a/m4/virt-gluster.m4 b/m4/virt-gluster.m4 new file mode 100644 index 0000000..4851e17 --- /dev/null +++ b/m4/virt-gluster.m4 @@ -0,0 +1,26 @@ +dnl The gluster libgfapi.so library +dnl +dnl Copyright (C) 2013 Red Hat, Inc. +dnl +dnl This library is free software; you can redistribute it and/or +dnl modify it under the terms of the GNU Lesser General Public +dnl License as published by the Free Software Foundation; either +dnl version 2.1 of the License, or (at your option) any later version. +dnl +dnl This library is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. +dnl +dnl You should have received a copy of the GNU Lesser General Public +dnl License along with this library. If not, see +dnl <http://www.gnu.org/licenses/>. +dnl + +AC_DEFUN([LIBVIRT_CHECK_GLUSTER],[ + LIBVIRT_CHECK_PKG([GLFS], [glusterfs-api], [3.0]) +]) + +AC_DEFUN([LIBVIRT_RESULT_GLUSTER],[ + LIBVIRT_RESULT_LIB([GLFS]) +])
For the hunks above, I can't provide any useful feedback as I don't really have experience with automake and stuff ... :/ <snip> Weak ACK based on complile testing the stuff above. Peter

On Mon, Nov 11, 2013 at 09:19:28PM -0700, Eric Blake wrote:
diff --git a/libvirt.spec.in b/libvirt.spec.in index 72815f4..a90ee2b 100644 --- a/libvirt.spec.in +++ b/libvirt.spec.in @@ -555,6 +561,10 @@ BuildRequires: device-mapper-devel BuildRequires: ceph-devel %endif %endif +%if %{with_storage_gluster} +BuildRequires: glusterfs-api-devel +BuildRequires: glusterfs-devel +%endif
This accepts any version of the packages
diff --git a/m4/virt-gluster.m4 b/m4/virt-gluster.m4 new file mode 100644 index 0000000..4851e17 --- /dev/null +++ b/m4/virt-gluster.m4
+ +AC_DEFUN([LIBVIRT_CHECK_GLUSTER],[ + LIBVIRT_CHECK_PKG([GLFS], [glusterfs-api], [3.0]) +])
But this suggests only version >= 3.0 is acceptable. Should we make the RPM dep explicit for this. I think I'd prefer s/GLFS/GLUSTERFS/ - its only used in a few places, so being more verbose doesn't hurt us much.
diff --git a/src/Makefile.am b/src/Makefile.am index ad39b74..be9978a 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -1434,6 +1437,11 @@ if WITH_STORAGE_SHEEPDOG libvirt_driver_storage_impl_la_SOURCES += $(STORAGE_DRIVER_SHEEPDOG_SOURCES) endif WITH_STORAGE_SHEEPDOG
+if WITH_STORAGE_GLUSTER +libvirt_driver_storage_impl_la_SOURCES += $(STORAGE_DRIVER_GLUSTER_SOURCES) +libvirt_driver_storage_impl_la_LIBADD += $(GLFS_LIBS)
Hmm, I don't see CFLAGS ever mentioning GLFS_CFLAGS which could cause problems for anyone who has installed gluster outside of the /usr hiearchy.
diff --git a/src/conf/storage_conf.c b/src/conf/storage_conf.c index 975e662..8299f2a 100644 --- a/src/conf/storage_conf.c +++ b/src/conf/storage_conf.c @@ -55,7 +55,7 @@ VIR_ENUM_IMPL(virStoragePool, VIR_STORAGE_POOL_LAST, "dir", "fs", "netfs", "logical", "disk", "iscsi", - "scsi", "mpath", "rbd", "sheepdog") + "scsi", "mpath", "rbd", "sheepdog", "gluster")
Hmm, well that sucks - in some places we use 'gluster' and others we use 'glusterfs' :-( We've released code using both so we can't standardize everywhere now. So guess we should stick with what you have here.
+ {.poolType = VIR_STORAGE_POOL_GLUSTER, + .poolOptions = { + .flags = (VIR_STORAGE_POOL_SOURCE_HOST | + VIR_STORAGE_POOL_SOURCE_NETWORK | + VIR_STORAGE_POOL_SOURCE_NAME), + },
Are gluster volume names always wellformed paths ? If so is it worth using SOURCE_PATH instad of SOURCE_NAME ? I don't feel too strongly either way.
+ .volOptions = { + .defaultFormat = VIR_STORAGE_FILE_RAW, + .formatToString = virStorageFileFormatTypeToString, + } + }, {.poolType = VIR_STORAGE_POOL_MPATH, .volOptions = { .formatToString = virStoragePoolFormatDiskTypeToString,
ACK if you fix the nitpicks. Daniel -- |: http://berrange.com -o- http://www.flickr.com/photos/dberrange/ :| |: http://libvirt.org -o- http://virt-manager.org :| |: http://autobuild.org -o- http://search.cpan.org/~danberr/ :| |: http://entangle-photo.org -o- http://live.gnome.org/gtk-vnc :|

On 11/12/2013 09:48 AM, Daniel P. Berrange wrote:
On Mon, Nov 11, 2013 at 09:19:28PM -0700, Eric Blake wrote:
diff --git a/libvirt.spec.in b/libvirt.spec.in index 72815f4..a90ee2b 100644 --- a/libvirt.spec.in +++ b/libvirt.spec.in @@ -555,6 +561,10 @@ BuildRequires: device-mapper-devel BuildRequires: ceph-devel %endif %endif +%if %{with_storage_gluster} +BuildRequires: glusterfs-api-devel +BuildRequires: glusterfs-devel +%endif
This accepts any version of the packages
diff --git a/m4/virt-gluster.m4 b/m4/virt-gluster.m4 new file mode 100644 index 0000000..4851e17 --- /dev/null +++ b/m4/virt-gluster.m4
+ +AC_DEFUN([LIBVIRT_CHECK_GLUSTER],[ + LIBVIRT_CHECK_PKG([GLFS], [glusterfs-api], [3.0]) +])
But this suggests only version >= 3.0 is acceptable. Should we make the RPM dep explicit for this.
Okay. For that matter, so far I have only tested on Fedora 19, with 3.4.1; and Peter reported an issue to me off-list where the 3.4.0 client has a rather nasty bug that caused glfs_open/glfs_read to hang rather than diagnose a permission denied error, so I'll go with the conservative approach of requiring the minimum that I've tested with (and leave it to a future patch, if any, if we decide to relax to supporting earlier versions).
I think I'd prefer s/GLFS/GLUSTERFS/ - its only used in a few places, so being more verbose doesn't hurt us much.
Sure.
diff --git a/src/Makefile.am b/src/Makefile.am index ad39b74..be9978a 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -1434,6 +1437,11 @@ if WITH_STORAGE_SHEEPDOG libvirt_driver_storage_impl_la_SOURCES += $(STORAGE_DRIVER_SHEEPDOG_SOURCES) endif WITH_STORAGE_SHEEPDOG
+if WITH_STORAGE_GLUSTER +libvirt_driver_storage_impl_la_SOURCES += $(STORAGE_DRIVER_GLUSTER_SOURCES) +libvirt_driver_storage_impl_la_LIBADD += $(GLFS_LIBS)
Hmm, I don't see CFLAGS ever mentioning GLFS_CFLAGS which could cause problems for anyone who has installed gluster outside of the /usr hiearchy.
Will fix.
diff --git a/src/conf/storage_conf.c b/src/conf/storage_conf.c index 975e662..8299f2a 100644 --- a/src/conf/storage_conf.c +++ b/src/conf/storage_conf.c @@ -55,7 +55,7 @@ VIR_ENUM_IMPL(virStoragePool, VIR_STORAGE_POOL_LAST, "dir", "fs", "netfs", "logical", "disk", "iscsi", - "scsi", "mpath", "rbd", "sheepdog") + "scsi", "mpath", "rbd", "sheepdog", "gluster")
Hmm, well that sucks - in some places we use 'gluster' and others we use 'glusterfs' :-( We've released code using both so we can't standardize everywhere now.
I had to do some searching to see what you mean: The 'netfs' pool supports three types: 'nfs', 'cifs', and 'glusterfs' - but that applies to FUSE mounting. The <disk type='network'><source protocol='...'> supports 'nbd', 'rbd', 'sheepdog', and 'gluster' (among others), the others have a storage pool named with the same protocol name.
So guess we should stick with what you have here.
Yeah, having protocol='gluster' map to storage pool 'gluster' is more consistent with what the other protocols do, while the 'netfs' pool is more a selection of which mounting type is used (where the mount command really uses -t glusterfs). Also, going with the name 'gluster' for the new pool (gfapi, no mount involved) distinguishes that it is more efficient than 'netfs' use of glusterfs (which requires a mount point and has FUSE overhead).
+ {.poolType = VIR_STORAGE_POOL_GLUSTER, + .poolOptions = { + .flags = (VIR_STORAGE_POOL_SOURCE_HOST | + VIR_STORAGE_POOL_SOURCE_NETWORK | + VIR_STORAGE_POOL_SOURCE_NAME), + },
Are gluster volume names always wellformed paths ? If so is it worth using SOURCE_PATH instad of SOURCE_NAME ? I don't feel too strongly either way.
Qemu supports the following URIs for using gfapi: gluster://host/volume/image gluster://host/volume/dir/image Internally, use of gfapi requires that 'volume' be used without a leading slash, below there, you can either use 'dir/image' or '/dir/image' to refer to an image within the gluster 'volume'. But from libvirt's perspective, I was exposing this as pool name 'volume' when operating on the top-level directory (libvirt volume 'image' within the pool named 'volume'), and pool name 'volume/dir' when operating on a sub-directory (libvirt volume 'image' within the pool named 'volume/dir'). Since use of 'gluster(1)' command line tool does NOT use a leading slash, I didn't want to require a leading slash in the libvirt pool name. Does this mean I'm overloading the <name> XML element with too much information? Should I split it into two elements, one for volume name (no slash allowed), and an optional one for subdirectory name (leading slash required)? Another way of looking at this problem is what should the volume XML look like? With this round of patches, you get: <volume> <name>img4</name> <key>/vol1/sub/img4</key> <target> <path>gluster://localhost/vol1/sub/img4</path> I tried to make <key> encode both the volume name and subdirectory, so that the image name is uniquely resolvable to a storage pool (that is, I want unique keys for vol1/img, vol2/img, and vol2/sub/img, which are necessarily accessed through three different pools, even though all three volumes would have the same name of 'img'). This is similar to how keys are unique for different directory pools. Meanwhile, the <target> path is the URI that qemu will eventually use (and which you can copy-and-paste into command lines where you are using qemu-img). I did NOT want to encode the host name into <key> because gluster allows you to access the volume from any host in its cluster (that is, gluster://hosta/vol/file and gluster://hostb/vol/file can be the same file, so reflecting 'hosta' or 'hostb' into <key> would get in the way of using key lookup only providing differentiation between distinct storage). Of course, my current choice of <key> naming means that a gluster volume name and an absolute path of a regular directory pool will collide. We still have time to tweak the scheme, if you can think of any better plan for the choice of volume <key>s and pool <name>s.
ACK if you fix the nitpicks.
I'll wait for feedback on my reply before committing anything. -- Eric Blake eblake redhat com +1-919-301-3266 Libvirt virtualization library http://libvirt.org

Add support for a new <pool type='gluster'>, similar to RBD and Sheepdog. Terminology wise, a gluster volume forms a libvirt storage pool, within the gluster volume, individual files are treated as libvirt storage volumes. * docs/schemas/storagepool.rng (poolgluster): New pool type. * docs/formatstorage.html.in: Document gluster. * docs/storage.html.in: Likewise, and contrast it with netfs. * tests/storagepoolxml2xmlin/pool-gluster.xml: New test. * tests/storagepoolxml2xmlout/pool-gluster.xml: Likewise. * tests/storagepoolxml2xmltest.c (mymain): Likewise. Signed-off-by: Eric Blake <eblake@redhat.com> --- docs/formatstorage.html.in | 11 ++-- docs/schemas/storagepool.rng | 21 +++++++ docs/storage.html.in | 90 +++++++++++++++++++++++++++- tests/storagepoolxml2xmlin/pool-gluster.xml | 8 +++ tests/storagepoolxml2xmlout/pool-gluster.xml | 11 ++++ tests/storagepoolxml2xmltest.c | 1 + 6 files changed, 136 insertions(+), 6 deletions(-) create mode 100644 tests/storagepoolxml2xmlin/pool-gluster.xml create mode 100644 tests/storagepoolxml2xmlout/pool-gluster.xml diff --git a/docs/formatstorage.html.in b/docs/formatstorage.html.in index 90eeaa3..e74ad27 100644 --- a/docs/formatstorage.html.in +++ b/docs/formatstorage.html.in @@ -21,8 +21,10 @@ <code>iscsi</code>, <code>logical</code>, <code>scsi</code> (all <span class="since">since 0.4.1</span>), <code>mpath</code> (<span class="since">since 0.7.1</span>), <code>rbd</code> - (<span class="since">since 0.9.13</span>), or <code>sheepdog</code> - (<span class="since">since 0.10.0</span>). This corresponds to the + (<span class="since">since 0.9.13</span>), <code>sheepdog</code> + (<span class="since">since 0.10.0</span>), + or <code>gluster</code> (<span class="since">since + 1.1.4</span>). This corresponds to the storage backend drivers listed further along in this document. </p> <h3><a name="StoragePoolFirst">General metadata</a></h3> @@ -129,7 +131,7 @@ <dt><code>host</code></dt> <dd>Provides the source for pools backed by storage from a remote server (pool types <code>netfs</code>, <code>iscsi</code>, - <code>rbd</code>, <code>sheepdog</code>). Will be + <code>rbd</code>, <code>sheepdog</code>, <code>gluster</code>). Will be used in combination with a <code>directory</code> or <code>device</code> element. Contains an attribute <code>name</code> which is the hostname or IP address of the server. May optionally @@ -160,7 +162,8 @@ <dt><code>name</code></dt> <dd>Provides the source for pools backed by storage from a named element (pool types <code>logical</code>, <code>rbd</code>, - <code>sheepdog</code>). Contains a string identifier. + <code>sheepdog</code>, <code>gluster</code>). Contains a + string identifier. <span class="since">Since 0.4.5</span></dd> <dt><code>format</code></dt> <dd>Provides information about the format of the pool (pool diff --git a/docs/schemas/storagepool.rng b/docs/schemas/storagepool.rng index 66d3c22..17a3ae8 100644 --- a/docs/schemas/storagepool.rng +++ b/docs/schemas/storagepool.rng @@ -21,6 +21,7 @@ <ref name='poolmpath'/> <ref name='poolrbd'/> <ref name='poolsheepdog'/> + <ref name='poolgluster'/> </choice> </element> </define> @@ -145,6 +146,17 @@ </interleave> </define> + <define name='poolgluster'> + <attribute name='type'> + <value>gluster</value> + </attribute> + <interleave> + <ref name='commonmetadata'/> + <ref name='sizing'/> + <ref name='sourcegluster'/> + </interleave> + </define> + <define name='sourceinfovendor'> <interleave> <optional> @@ -555,6 +567,15 @@ </element> </define> + <define name='sourcegluster'> + <element name='source'> + <interleave> + <ref name='sourceinfohost'/> + <ref name='sourceinfoname'/> + </interleave> + </element> + </define> + <define name='IscsiQualifiedName'> <data type='string'> <param name="pattern">iqn\.[0-9]{4}-(0[1-9]|1[0-2])\.[a-zA-Z0-9\.\-]+(:.+)?</param> diff --git a/docs/storage.html.in b/docs/storage.html.in index 1181444..339759d 100644 --- a/docs/storage.html.in +++ b/docs/storage.html.in @@ -114,6 +114,9 @@ <li> <a href="#StorageBackendSheepdog">Sheepdog backend</a> </li> + <li> + <a href="#StorageBackendGluster">Gluster backend</a> + </li> </ul> <h2><a name="StorageBackendDir">Directory pool</a></h2> @@ -275,10 +278,12 @@ <code>nfs</code> </li> <li> - <code>glusterfs</code> + <code>glusterfs</code> - use the glusterfs FUSE file system + (to bypass the file system completely, see + the <a href="#StorageBackendGluster">gluster</a> pool). </li> <li> - <code>cifs</code> + <code>cifs</code> - use the SMB (samba) or CIFS file system </li> </ul> @@ -647,5 +652,86 @@ The Sheepdog pool does not use the volume format type element. </p> + <h2><a name="StorageBackendGluster">Gluster pools</a></h2> + <p> + This provides a pool based on native Gluster access. Gluster is + a distributed file system that can be exposed to the user via + FUSE, NFS or SMB (see the <a href="#StorageBackendNetfs">netfs</a> + pool for that usage); but for minimal overhead, the ideal access + is via native access (only possible for QEMU/KVM compiled with + libgfapi support). + + The cluster and storage volume must already be running, and it + is recommended that the volume be configured with <code>gluster + volume set $volname storage.owner-uid=$uid</code> + and <code>gluster volume set $volname + storage.owner-gid=$gid</code> for the uid and gid that qemu will + be run as. It may also be necessary to + set <code>rpc-auth-allow-insecure on</code> for the glusterd + service, as well as <code>gluster set $volname + server.allow-insecure on</code>, to allow access to the gluster + volume. + + <span class="since">Since 1.1.4</span> + </p> + + <h3>Example pool input</h3> + <p>A gluster volume corresponds to a libvirt storage pool. If a + gluster volume could be mounted as <code>mount -t glusterfs + localhost:/volname /some/path</code>, then the following example + will describe the same pool without having to create a local + mount point. Remember that with gluster, the mount point can be + through any machine in the cluster, and gluster will + automatically pick the ideal transport to the actual bricks + backing the gluster volume, even if on a different host than the + one named in the <code>host</code> designation. It is also + permitted to + use <code><name>volume/dir</name></code> to limit + the pool to a subdirectory within the gluster volume.</p> + <pre> + <pool type="gluster"> + <name>myglusterpool</name> + <source> + <name>volname</name> + <host name='localhost'/> + </source> + </pool></pre> + + <h3>Example volume output</h3> + <p>Libvirt storage volumes associated with a gluster pool + correspond to the files that can be found when mounting the + gluster volume. The <code>name</code> is the path relative to + the effective mount specified for the pool; and + the <code>key</code> is the path including the gluster volume + name and any subdirectories specified by the pool.</p> + <pre> + <volume> + <name>myfile</name> + <key>volname/myfile</key> + <source> + </source> + <capacity unit='bytes'>53687091200</capacity> + <allocation unit='bytes'>53687091200</allocation> + </volume></pre> + + <h3>Example disk attachment</h3> + <p>Files within a gluster volume can be attached to Qemu guests. + Information about attaching a Gluster image to a + guest can be found + at the <a href="formatdomain.html#elementsDisks">format domain</a> + page.</p> + + <h3>Valid pool format types</h3> + <p> + The Gluster pool does not use the pool format type element. + </p> + + <h3>Valid volume format types</h3> + <p> + The Gluster pool does not use the volume format type element; + for now, all files within a gluster pool are assumed to have raw + format. + </p> + </body> </html> diff --git a/tests/storagepoolxml2xmlin/pool-gluster.xml b/tests/storagepoolxml2xmlin/pool-gluster.xml new file mode 100644 index 0000000..ae9401f --- /dev/null +++ b/tests/storagepoolxml2xmlin/pool-gluster.xml @@ -0,0 +1,8 @@ +<pool type='gluster'> + <source> + <name>volume</name> + <host name='localhost'/> + </source> + <name>mygluster</name> + <uuid>65fcba04-5b13-bd93-cff3-52ce48e11ad8</uuid> +</pool> diff --git a/tests/storagepoolxml2xmlout/pool-gluster.xml b/tests/storagepoolxml2xmlout/pool-gluster.xml new file mode 100644 index 0000000..5844c1a --- /dev/null +++ b/tests/storagepoolxml2xmlout/pool-gluster.xml @@ -0,0 +1,11 @@ +<pool type='gluster'> + <name>mygluster</name> + <uuid>65fcba04-5b13-bd93-cff3-52ce48e11ad8</uuid> + <capacity unit='bytes'>0</capacity> + <allocation unit='bytes'>0</allocation> + <available unit='bytes'>0</available> + <source> + <host name='localhost'/> + <name>volume</name> + </source> +</pool> diff --git a/tests/storagepoolxml2xmltest.c b/tests/storagepoolxml2xmltest.c index 0ae9b29..0ab72e4 100644 --- a/tests/storagepoolxml2xmltest.c +++ b/tests/storagepoolxml2xmltest.c @@ -100,6 +100,7 @@ mymain(void) DO_TEST("pool-iscsi-multiiqn"); DO_TEST("pool-iscsi-vendor-product"); DO_TEST("pool-sheepdog"); + DO_TEST("pool-gluster"); return ret==0 ? EXIT_SUCCESS : EXIT_FAILURE; } -- 1.8.3.1

On 11/12/13 05:19, Eric Blake wrote:
Add support for a new <pool type='gluster'>, similar to RBD and Sheepdog. Terminology wise, a gluster volume forms a libvirt storage pool, within the gluster volume, individual files are treated as libvirt storage volumes.
* docs/schemas/storagepool.rng (poolgluster): New pool type. * docs/formatstorage.html.in: Document gluster. * docs/storage.html.in: Likewise, and contrast it with netfs. * tests/storagepoolxml2xmlin/pool-gluster.xml: New test. * tests/storagepoolxml2xmlout/pool-gluster.xml: Likewise. * tests/storagepoolxml2xmltest.c (mymain): Likewise.
Signed-off-by: Eric Blake <eblake@redhat.com> --- docs/formatstorage.html.in | 11 ++-- docs/schemas/storagepool.rng | 21 +++++++ docs/storage.html.in | 90 +++++++++++++++++++++++++++- tests/storagepoolxml2xmlin/pool-gluster.xml | 8 +++ tests/storagepoolxml2xmlout/pool-gluster.xml | 11 ++++ tests/storagepoolxml2xmltest.c | 1 + 6 files changed, 136 insertions(+), 6 deletions(-) create mode 100644 tests/storagepoolxml2xmlin/pool-gluster.xml create mode 100644 tests/storagepoolxml2xmlout/pool-gluster.xml
diff --git a/docs/formatstorage.html.in b/docs/formatstorage.html.in index 90eeaa3..e74ad27 100644 --- a/docs/formatstorage.html.in +++ b/docs/formatstorage.html.in @@ -21,8 +21,10 @@ <code>iscsi</code>, <code>logical</code>, <code>scsi</code> (all <span class="since">since 0.4.1</span>), <code>mpath</code> (<span class="since">since 0.7.1</span>), <code>rbd</code> - (<span class="since">since 0.9.13</span>), or <code>sheepdog</code> - (<span class="since">since 0.10.0</span>). This corresponds to the + (<span class="since">since 0.9.13</span>), <code>sheepdog</code> + (<span class="since">since 0.10.0</span>), + or <code>gluster</code> (<span class="since">since + 1.1.4</span>). This corresponds to the
Now 1.1.5.
storage backend drivers listed further along in this document. </p> <h3><a name="StoragePoolFirst">General metadata</a></h3>
...
diff --git a/docs/schemas/storagepool.rng b/docs/schemas/storagepool.rng index 66d3c22..17a3ae8 100644 --- a/docs/schemas/storagepool.rng +++ b/docs/schemas/storagepool.rng @@ -21,6 +21,7 @@ <ref name='poolmpath'/> <ref name='poolrbd'/> <ref name='poolsheepdog'/> + <ref name='poolgluster'/> </choice> </element> </define> @@ -145,6 +146,17 @@ </interleave> </define>
+ <define name='poolgluster'> + <attribute name='type'> + <value>gluster</value> + </attribute> + <interleave> + <ref name='commonmetadata'/> + <ref name='sizing'/> + <ref name='sourcegluster'/> + </interleave> + </define> + <define name='sourceinfovendor'> <interleave> <optional> @@ -555,6 +567,15 @@ </element> </define>
+ <define name='sourcegluster'> + <element name='source'> + <interleave> + <ref name='sourceinfohost'/> + <ref name='sourceinfoname'/> + </interleave> + </element> + </define> + <define name='IscsiQualifiedName'> <data type='string'> <param name="pattern">iqn\.[0-9]{4}-(0[1-9]|1[0-2])\.[a-zA-Z0-9\.\-]+(:.+)?</param> diff --git a/docs/storage.html.in b/docs/storage.html.in index 1181444..339759d 100644 --- a/docs/storage.html.in +++ b/docs/storage.html.in @@ -114,6 +114,9 @@ <li> <a href="#StorageBackendSheepdog">Sheepdog backend</a> </li> + <li> + <a href="#StorageBackendGluster">Gluster backend</a> + </li> </ul>
<h2><a name="StorageBackendDir">Directory pool</a></h2> @@ -275,10 +278,12 @@ <code>nfs</code> </li> <li> - <code>glusterfs</code> + <code>glusterfs</code> - use the glusterfs FUSE file system + (to bypass the file system completely, see + the <a href="#StorageBackendGluster">gluster</a> pool). </li> <li> - <code>cifs</code> + <code>cifs</code> - use the SMB (samba) or CIFS file system </li> </ul>
@@ -647,5 +652,86 @@ The Sheepdog pool does not use the volume format type element. </p>
+ <h2><a name="StorageBackendGluster">Gluster pools</a></h2> + <p> + This provides a pool based on native Gluster access. Gluster is + a distributed file system that can be exposed to the user via + FUSE, NFS or SMB (see the <a href="#StorageBackendNetfs">netfs</a> + pool for that usage); but for minimal overhead, the ideal access + is via native access (only possible for QEMU/KVM compiled with + libgfapi support). + + The cluster and storage volume must already be running, and it + is recommended that the volume be configured with <code>gluster + volume set $volname storage.owner-uid=$uid</code> + and <code>gluster volume set $volname + storage.owner-gid=$gid</code> for the uid and gid that qemu will + be run as. It may also be necessary to + set <code>rpc-auth-allow-insecure on</code> for the glusterd + service, as well as <code>gluster set $volname + server.allow-insecure on</code>, to allow access to the gluster + volume. + + <span class="since">Since 1.1.4</span>
1.1.5
+ </p> +
ACK with release number changed. Peter

On Mon, Nov 11, 2013 at 09:19:29PM -0700, Eric Blake wrote:
Add support for a new <pool type='gluster'>, similar to RBD and Sheepdog. Terminology wise, a gluster volume forms a libvirt storage pool, within the gluster volume, individual files are treated as libvirt storage volumes.
* docs/schemas/storagepool.rng (poolgluster): New pool type. * docs/formatstorage.html.in: Document gluster. * docs/storage.html.in: Likewise, and contrast it with netfs. * tests/storagepoolxml2xmlin/pool-gluster.xml: New test. * tests/storagepoolxml2xmlout/pool-gluster.xml: Likewise. * tests/storagepoolxml2xmltest.c (mymain): Likewise.
Signed-off-by: Eric Blake <eblake@redhat.com> --- docs/formatstorage.html.in | 11 ++-- docs/schemas/storagepool.rng | 21 +++++++ docs/storage.html.in | 90 +++++++++++++++++++++++++++- tests/storagepoolxml2xmlin/pool-gluster.xml | 8 +++ tests/storagepoolxml2xmlout/pool-gluster.xml | 11 ++++ tests/storagepoolxml2xmltest.c | 1 + 6 files changed, 136 insertions(+), 6 deletions(-) create mode 100644 tests/storagepoolxml2xmlin/pool-gluster.xml create mode 100644 tests/storagepoolxml2xmlout/pool-gluster.xml
ACK Daniel -- |: http://berrange.com -o- http://www.flickr.com/photos/dberrange/ :| |: http://libvirt.org -o- http://virt-manager.org :| |: http://autobuild.org -o- http://search.cpan.org/~danberr/ :| |: http://entangle-photo.org -o- http://live.gnome.org/gtk-vnc :|

Actually put gfapi to use, by allowing the creation of a gluster pool. Right now, all volumes are treated as raw; further patches will allow peering into files to allow for qcow2 files and backing chains, and reporting proper volume allocation. I've reported a couple of glusterfs bugs; if we were to require a minimum of (not-yet-released) glusterfs 3.5, we could use the new glfs_readdir [1] and not worry about the bogus return value of glfs_fini [2], but for now I'm testing with Fedora 19's glusterfs 3.4.1. [1] http://lists.gnu.org/archive/html/gluster-devel/2013-10/msg00085.html [2] http://lists.gnu.org/archive/html/gluster-devel/2013-10/msg00086.html * src/storage/storage_backend_gluster.c (virStorageBackendGlusterRefreshPool): Initial implementation. (virStorageBackendGlusterOpen, virStorageBackendGlusterClose): New helper functions. Signed-off-by: Eric Blake <eblake@redhat.com> --- src/storage/storage_backend_gluster.c | 220 +++++++++++++++++++++++++++++++++- 1 file changed, 215 insertions(+), 5 deletions(-) diff --git a/src/storage/storage_backend_gluster.c b/src/storage/storage_backend_gluster.c index 2863c73..bc90de9 100644 --- a/src/storage/storage_backend_gluster.c +++ b/src/storage/storage_backend_gluster.c @@ -23,20 +23,230 @@ #include <glusterfs/api/glfs.h> -#include "virerror.h" #include "storage_backend_gluster.h" #include "storage_conf.h" +#include "viralloc.h" +#include "virerror.h" +#include "virlog.h" +#include "virstoragefile.h" +#include "virstring.h" +#include "viruri.h" #define VIR_FROM_THIS VIR_FROM_STORAGE +struct _virStorageBackendGlusterState { + glfs_t *vol; + + /* Accept the same URIs as qemu's block/gluster.c: + * gluster[+transport]://[server[:port]]/vol/[dir/]image[?socket=...] */ + virURI *uri; + + char *volname; /* vol from URI */ + char *dir; /* dir from URI, or "." */ +}; + +typedef struct _virStorageBackendGlusterState virStorageBackendGlusterState; +typedef virStorageBackendGlusterState *virStorageBackendGlusterStatePtr; + +static void +virStorageBackendGlusterClose(virStorageBackendGlusterStatePtr state) +{ + if (!state) + return; + + /* Yuck - glusterfs-api-3.4.1 appears to always return -1 for + * glfs_fini, with errno containing random data, so there's no way + * to tell if it succeeded. 3.4.2 is supposed to fix this.*/ + if (state->vol && glfs_fini(state->vol) < 0) + VIR_DEBUG("shutdown of gluster volume %s failed with errno %d", + state->volname, errno); + + virURIFree(state->uri); + VIR_FREE(state->volname); + VIR_FREE(state->dir); + VIR_FREE(state); +} + +static virStorageBackendGlusterStatePtr +virStorageBackendGlusterOpen(virStoragePoolObjPtr pool) +{ + virStorageBackendGlusterStatePtr ret = NULL; + char *sub; + const char *name = pool->def->source.name; + + if (VIR_ALLOC(ret) < 0) + return NULL; + + if (*name == '/') + name++; + if ((sub = strchr(name, '/'))) { + if (VIR_STRNDUP(ret->volname, name, sub - name) < 0 || + VIR_STRDUP(ret->dir, sub) < 0) + goto error; + } else { + if (VIR_STRDUP(ret->volname, pool->def->source.name) < 0 || + VIR_STRDUP(ret->dir, ".") < 0) + goto error; + } + + /* FIXME: Currently hard-coded to tcp transport; XML needs to be + * extended to allow alternate transport */ + if (VIR_ALLOC(ret->uri) < 0 || + VIR_STRDUP(ret->uri->scheme, "gluster") < 0 || + VIR_STRDUP(ret->uri->server, pool->def->source.hosts[0].name) < 0 || + virAsprintf(&ret->uri->path, "%s%s", + *pool->def->source.name == '/' ? "" : "/", + pool->def->source.name) < 0) + goto error; + ret->uri->port = pool->def->source.hosts[0].port; + + if (!(ret->vol = glfs_new(ret->volname))) { + virReportOOMError(); + goto error; + } + + if (glfs_set_volfile_server(ret->vol, "tcp", + ret->uri->server, ret->uri->port) < 0 || + glfs_init(ret->vol) < 0) { + char *uri = virURIFormat(ret->uri); + + virReportSystemError(errno, _("failed to connect to %s"), + NULLSTR(uri)); + VIR_FREE(uri); + goto error; + } + + return ret; + +error: + virStorageBackendGlusterClose(ret); + return NULL; +} + + +/* Populate *volptr for the given name and stat information, or leave + * it NULL if the entry should be skipped (such as "."). Return 0 on + * success, -1 on failure. */ +static int +virStorageBackendGlusterRefreshVol(virStorageBackendGlusterStatePtr state, + const char *pool, + const char *name, + struct stat *st, + virStorageVolDefPtr *volptr) +{ + char *tmp; + int ret = -1; + virStorageVolDefPtr vol = NULL; + + /* Silently skip directories, including '.' and '..'. FIXME: + * should non-'.' subdirectories be listed as type dir? */ + *volptr = NULL; + if (S_ISDIR(st->st_mode)) + return 0; + + if (VIR_ALLOC(vol) < 0 || + VIR_STRDUP(vol->name, name) < 0 || + virAsprintf(&vol->key, "%s%s/%s", + *pool == '/' ? "" : "/", pool, vol->name) < 0) + goto cleanup; + + /* FIXME - must open files to determine if they are non-raw */ + vol->type = VIR_STORAGE_VOL_NETWORK; + vol->target.format = VIR_STORAGE_FILE_RAW; + vol->capacity = vol->allocation = st->st_size; + tmp = state->uri->path; + state->uri->path = vol->key; + if (!(vol->target.path = virURIFormat(state->uri))) { + state->uri->path = tmp; + goto cleanup; + } + state->uri->path = tmp; + + *volptr = vol; + vol = NULL; + ret = 0; +cleanup: + virStorageVolDefFree(vol); + return ret; +} static int virStorageBackendGlusterRefreshPool(virConnectPtr conn ATTRIBUTE_UNUSED, - virStoragePoolObjPtr pool ATTRIBUTE_UNUSED) + virStoragePoolObjPtr pool) { - virReportError(VIR_ERR_NO_SUPPORT, "%s", - _("gluster pool type not fully supported yet")); - return -1; + int ret = -1; + virStorageBackendGlusterStatePtr state = NULL; + struct { + struct dirent ent; + /* See comment below about readdir_r needing padding */ + char padding[MAX(1, 256 - (int) (sizeof(struct dirent) + - offsetof(struct dirent, d_name)))]; + } de; + struct dirent *ent; + glfs_fd_t *dir = NULL; + struct stat st; + struct statvfs sb; + + if (!(state = virStorageBackendGlusterOpen(pool))) + goto cleanup; + + /* Why oh why did glfs 3.4 decide to expose only readdir_r rather + * than readdir? POSIX admits that readdir_r is inherently a + * flawed design, because systems are not required to define + * NAME_MAX: http://austingroupbugs.net/view.php?id=696 + * http://womble.decadent.org.uk/readdir_r-advisory.html + * + * Fortunately, gluster appears to limit its underlying bricks to + * only use file systems such as XFS that have a NAME_MAX of 255; + * so we are currently guaranteed that if we provide 256 bytes of + * tail padding, then we should have enough space to avoid buffer + * overflow no matter whether the OS used d_name[], d_name[1], or + * d_name[256] in its 'struct dirent'. + * http://lists.gnu.org/archive/html/gluster-devel/2013-10/msg00083.html + */ + + if (!(dir = glfs_opendir(state->vol, state->dir))) { + virReportSystemError(errno, _("cannot open path '%s'"), + pool->def->name); + goto cleanup; + } + while (!(errno = glfs_readdirplus_r(dir, &st, &de.ent, &ent)) && ent) { + virStorageVolDefPtr vol; + int okay = virStorageBackendGlusterRefreshVol(state, + pool->def->source.name, + ent->d_name, &st, + &vol); + if (okay < 0) + goto cleanup; + if (vol && VIR_APPEND_ELEMENT(pool->volumes.objs, pool->volumes.count, + vol) < 0) + goto cleanup; + } + if (errno) { + virReportSystemError(errno, _("failed to read directory '%s'"), + pool->def->source.name); + goto cleanup; + } + + if (glfs_statvfs(state->vol, state->dir, &sb) < 0) { + virReportSystemError(errno, _("cannot statvfs path '%s'"), pool->def->source.name); + goto cleanup; + } + + pool->def->capacity = ((unsigned long long)sb.f_frsize * + (unsigned long long)sb.f_blocks); + pool->def->available = ((unsigned long long)sb.f_bfree * + (unsigned long long)sb.f_frsize); + pool->def->allocation = pool->def->capacity - pool->def->available; + + ret = 0; +cleanup: + if (dir) + glfs_closedir(dir); + virStorageBackendGlusterClose(state); + if (ret < 0) + virStoragePoolObjClearVols(pool); + return ret; } virStorageBackend virStorageBackendGluster = { -- 1.8.3.1

On 11/12/13 05:19, Eric Blake wrote:
Actually put gfapi to use, by allowing the creation of a gluster pool. Right now, all volumes are treated as raw; further patches will allow peering into files to allow for qcow2 files and backing chains, and reporting proper volume allocation.
I've reported a couple of glusterfs bugs; if we were to require a minimum of (not-yet-released) glusterfs 3.5, we could use the new glfs_readdir [1] and not worry about the bogus return value of glfs_fini [2], but for now I'm testing with Fedora 19's glusterfs 3.4.1.
I'm not sure if this paragraph is suitable in a commit message.
[1] http://lists.gnu.org/archive/html/gluster-devel/2013-10/msg00085.html [2] http://lists.gnu.org/archive/html/gluster-devel/2013-10/msg00086.html
* src/storage/storage_backend_gluster.c (virStorageBackendGlusterRefreshPool): Initial implementation. (virStorageBackendGlusterOpen, virStorageBackendGlusterClose): New helper functions.
Signed-off-by: Eric Blake <eblake@redhat.com> --- src/storage/storage_backend_gluster.c | 220 +++++++++++++++++++++++++++++++++- 1 file changed, 215 insertions(+), 5 deletions(-)
diff --git a/src/storage/storage_backend_gluster.c b/src/storage/storage_backend_gluster.c index 2863c73..bc90de9 100644 --- a/src/storage/storage_backend_gluster.c +++ b/src/storage/storage_backend_gluster.c @@ -23,20 +23,230 @@
#include <glusterfs/api/glfs.h>
-#include "virerror.h" #include "storage_backend_gluster.h" #include "storage_conf.h" +#include "viralloc.h" +#include "virerror.h" +#include "virlog.h" +#include "virstoragefile.h" +#include "virstring.h" +#include "viruri.h"
#define VIR_FROM_THIS VIR_FROM_STORAGE
+struct _virStorageBackendGlusterState { + glfs_t *vol; + + /* Accept the same URIs as qemu's block/gluster.c: + * gluster[+transport]://[server[:port]]/vol/[dir/]image[?socket=...] */ + virURI *uri; + + char *volname; /* vol from URI */ + char *dir; /* dir from URI, or "." */ +}; + +typedef struct _virStorageBackendGlusterState virStorageBackendGlusterState; +typedef virStorageBackendGlusterState *virStorageBackendGlusterStatePtr; + +static void +virStorageBackendGlusterClose(virStorageBackendGlusterStatePtr state) +{ + if (!state) + return; + + /* Yuck - glusterfs-api-3.4.1 appears to always return -1 for + * glfs_fini, with errno containing random data, so there's no way + * to tell if it succeeded. 3.4.2 is supposed to fix this.*/ + if (state->vol && glfs_fini(state->vol) < 0) + VIR_DEBUG("shutdown of gluster volume %s failed with errno %d", + state->volname, errno);
I'm not sure if we can do anything else if close of the volume fails. The only change would be to report something more critical.
+ + virURIFree(state->uri); + VIR_FREE(state->volname); + VIR_FREE(state->dir); + VIR_FREE(state); +} + +static virStorageBackendGlusterStatePtr +virStorageBackendGlusterOpen(virStoragePoolObjPtr pool) +{ + virStorageBackendGlusterStatePtr ret = NULL; + char *sub; + const char *name = pool->def->source.name; + + if (VIR_ALLOC(ret) < 0) + return NULL; + + if (*name == '/') + name++;
Wouldn't it be better just to forbid names with slashes? or trim them away when creating the pool? This would avoid multiple of such hacks.
+ if ((sub = strchr(name, '/'))) { + if (VIR_STRNDUP(ret->volname, name, sub - name) < 0 || + VIR_STRDUP(ret->dir, sub) < 0) + goto error; + } else { + if (VIR_STRDUP(ret->volname, pool->def->source.name) < 0 || + VIR_STRDUP(ret->dir, ".") < 0) + goto error; + } + + /* FIXME: Currently hard-coded to tcp transport; XML needs to be + * extended to allow alternate transport */
Okay for now IMHO.
+ if (VIR_ALLOC(ret->uri) < 0 || + VIR_STRDUP(ret->uri->scheme, "gluster") < 0 || + VIR_STRDUP(ret->uri->server, pool->def->source.hosts[0].name) < 0 || + virAsprintf(&ret->uri->path, "%s%s", + *pool->def->source.name == '/' ? "" : "/", + pool->def->source.name) < 0)
Same as above, if we would trim away the slash, we could add it without the need to check for it.
+ goto error; + ret->uri->port = pool->def->source.hosts[0].port; + + if (!(ret->vol = glfs_new(ret->volname))) { + virReportOOMError(); + goto error; + } + + if (glfs_set_volfile_server(ret->vol, "tcp", + ret->uri->server, ret->uri->port) < 0 || + glfs_init(ret->vol) < 0) { + char *uri = virURIFormat(ret->uri); + + virReportSystemError(errno, _("failed to connect to %s"), + NULLSTR(uri));
I'd suggest adding apostrophes around %s to denote the URI as we do in other places.
+ VIR_FREE(uri); + goto error; + } + + return ret; + +error: + virStorageBackendGlusterClose(ret); + return NULL; +} + + +/* Populate *volptr for the given name and stat information, or leave + * it NULL if the entry should be skipped (such as "."). Return 0 on + * success, -1 on failure. */ +static int +virStorageBackendGlusterRefreshVol(virStorageBackendGlusterStatePtr state, + const char *pool, + const char *name, + struct stat *st, + virStorageVolDefPtr *volptr) +{ + char *tmp; + int ret = -1; + virStorageVolDefPtr vol = NULL; + + /* Silently skip directories, including '.' and '..'. FIXME: + * should non-'.' subdirectories be listed as type dir? */
Is it even possible to use them with qemu as dir-type volumes?
+ *volptr = NULL; + if (S_ISDIR(st->st_mode)) + return 0; + + if (VIR_ALLOC(vol) < 0 || + VIR_STRDUP(vol->name, name) < 0 || + virAsprintf(&vol->key, "%s%s/%s", + *pool == '/' ? "" : "/", pool, vol->name) < 0)
Yet another place that could be cleaned up with explicitly forbidding/stripping slash.
+ goto cleanup; + + /* FIXME - must open files to determine if they are non-raw */ + vol->type = VIR_STORAGE_VOL_NETWORK; + vol->target.format = VIR_STORAGE_FILE_RAW; + vol->capacity = vol->allocation = st->st_size; + tmp = state->uri->path; + state->uri->path = vol->key; + if (!(vol->target.path = virURIFormat(state->uri))) { + state->uri->path = tmp; + goto cleanup; + } + state->uri->path = tmp; + + *volptr = vol; + vol = NULL; + ret = 0; +cleanup: + virStorageVolDefFree(vol); + return ret; +}
static int virStorageBackendGlusterRefreshPool(virConnectPtr conn ATTRIBUTE_UNUSED, - virStoragePoolObjPtr pool ATTRIBUTE_UNUSED) + virStoragePoolObjPtr pool) { - virReportError(VIR_ERR_NO_SUPPORT, "%s", - _("gluster pool type not fully supported yet")); - return -1; + int ret = -1; + virStorageBackendGlusterStatePtr state = NULL; + struct { + struct dirent ent; + /* See comment below about readdir_r needing padding */ + char padding[MAX(1, 256 - (int) (sizeof(struct dirent) + - offsetof(struct dirent, d_name)))]; + } de; + struct dirent *ent; + glfs_fd_t *dir = NULL; + struct stat st; + struct statvfs sb; + + if (!(state = virStorageBackendGlusterOpen(pool))) + goto cleanup; + + /* Why oh why did glfs 3.4 decide to expose only readdir_r rather + * than readdir? POSIX admits that readdir_r is inherently a + * flawed design, because systems are not required to define + * NAME_MAX: http://austingroupbugs.net/view.php?id=696 + * http://womble.decadent.org.uk/readdir_r-advisory.html + * + * Fortunately, gluster appears to limit its underlying bricks to + * only use file systems such as XFS that have a NAME_MAX of 255;
Basically any usable filesystem to time has NAME_MAX 255. There are a few exceptions like Reiser4 but those are not production ready yet. Also it's questionable if gluster would work correctly on such filesystem.
+ * so we are currently guaranteed that if we provide 256 bytes of + * tail padding, then we should have enough space to avoid buffer + * overflow no matter whether the OS used d_name[], d_name[1], or + * d_name[256] in its 'struct dirent'. + * http://lists.gnu.org/archive/html/gluster-devel/2013-10/msg00083.html + */ + + if (!(dir = glfs_opendir(state->vol, state->dir))) { + virReportSystemError(errno, _("cannot open path '%s'"), + pool->def->name); + goto cleanup; + } + while (!(errno = glfs_readdirplus_r(dir, &st, &de.ent, &ent)) && ent) { + virStorageVolDefPtr vol; + int okay = virStorageBackendGlusterRefreshVol(state, + pool->def->source.name, + ent->d_name, &st, + &vol); + if (okay < 0) + goto cleanup; + if (vol && VIR_APPEND_ELEMENT(pool->volumes.objs, pool->volumes.count, + vol) < 0) + goto cleanup; + } + if (errno) { + virReportSystemError(errno, _("failed to read directory '%s'"), + pool->def->source.name); + goto cleanup; + } + + if (glfs_statvfs(state->vol, state->dir, &sb) < 0) { + virReportSystemError(errno, _("cannot statvfs path '%s'"), pool->def->source.name);
Indentation is off.
+ goto cleanup; + } + + pool->def->capacity = ((unsigned long long)sb.f_frsize * + (unsigned long long)sb.f_blocks); + pool->def->available = ((unsigned long long)sb.f_bfree * + (unsigned long long)sb.f_frsize); + pool->def->allocation = pool->def->capacity - pool->def->available; + + ret = 0; +cleanup: + if (dir) + glfs_closedir(dir); + virStorageBackendGlusterClose(state); + if (ret < 0) + virStoragePoolObjClearVols(pool); + return ret; }
virStorageBackend virStorageBackendGluster = {
ACK with some nits fixed and questions answered. Peter

On 11/12/2013 08:35 AM, Peter Krempa wrote:
On 11/12/13 05:19, Eric Blake wrote:
Actually put gfapi to use, by allowing the creation of a gluster pool. Right now, all volumes are treated as raw; further patches will allow peering into files to allow for qcow2 files and backing chains, and reporting proper volume allocation.
I've reported a couple of glusterfs bugs; if we were to require a minimum of (not-yet-released) glusterfs 3.5, we could use the new glfs_readdir [1] and not worry about the bogus return value of glfs_fini [2], but for now I'm testing with Fedora 19's glusterfs 3.4.1.
I'm not sure if this paragraph is suitable in a commit message.
Yes, because it justifies why we require a minimum version of gluster, and what it would take to bump that minimum version (whether up or down). But perhaps it belongs better in 1/4 when we actually do autoconf probing for a particular glusterfs version. I'll move it.
+ + /* Yuck - glusterfs-api-3.4.1 appears to always return -1 for + * glfs_fini, with errno containing random data, so there's no way + * to tell if it succeeded. 3.4.2 is supposed to fix this.*/ + if (state->vol && glfs_fini(state->vol) < 0) + VIR_DEBUG("shutdown of gluster volume %s failed with errno %d", + state->volname, errno);
I'm not sure if we can do anything else if close of the volume fails. The only change would be to report something more critical.
The problem is that gluster 3.4.1 _always_ reports failure, even when it succeeds. If we were to require a minimum of (not-yet-released) 3.4.2, where that gluster bug is fixed, then yes we could log something more critical on failure to close.
+static virStorageBackendGlusterStatePtr +virStorageBackendGlusterOpen(virStoragePoolObjPtr pool) +{ + virStorageBackendGlusterStatePtr ret = NULL; + char *sub; + const char *name = pool->def->source.name; + + if (VIR_ALLOC(ret) < 0) + return NULL; + + if (*name == '/') + name++;
Wouldn't it be better just to forbid names with slashes? or trim them away when creating the pool? This would avoid multiple of such hacks.
Dan asked a similar question on 1/4; I guess we need to make sure we are happy with the XML representation (or whether I need to revisit things to make the XML specify volume separately from subdirectory, rather than my current attempt to hack volume/subdir into a single name).
+ /* FIXME: Currently hard-coded to tcp transport; XML needs to be + * extended to allow alternate transport */
Okay for now IMHO.
+ if (VIR_ALLOC(ret->uri) < 0 || + VIR_STRDUP(ret->uri->scheme, "gluster") < 0 || + VIR_STRDUP(ret->uri->server, pool->def->source.hosts[0].name) < 0 || + virAsprintf(&ret->uri->path, "%s%s", + *pool->def->source.name == '/' ? "" : "/", + pool->def->source.name) < 0)
Same as above, if we would trim away the slash, we could add it without the need to check for it.
And maybe I should just canonicalize what the user passed in, so that the rest of the code doesn't have to keep checking. But one thing I did do well in this patch was isolating the '/' games to just the open method; the rest of the code reuses details learned here, rather than repeating the conditionals.
+ virReportSystemError(errno, _("failed to connect to %s"), + NULLSTR(uri));
I'd suggest adding apostrophes around %s to denote the URI as we do in other places.
Done.
+ /* Silently skip directories, including '.' and '..'. FIXME: + * should non-'.' subdirectories be listed as type dir? */
Is it even possible to use them with qemu as dir-type volumes?
Yes, a 'directory' pool lists dir-type volumes for all subdirectories, skipping only '.' and '..'. I'll see about wiring that in, and whether it's easier to respin this patch or treat it as a followup. -- Eric Blake eblake redhat com +1-919-301-3266 Libvirt virtualization library http://libvirt.org

On Mon, Nov 11, 2013 at 09:19:30PM -0700, Eric Blake wrote:
Actually put gfapi to use, by allowing the creation of a gluster pool. Right now, all volumes are treated as raw; further patches will allow peering into files to allow for qcow2 files and backing chains, and reporting proper volume allocation.
I've reported a couple of glusterfs bugs; if we were to require a minimum of (not-yet-released) glusterfs 3.5, we could use the new glfs_readdir [1] and not worry about the bogus return value of glfs_fini [2], but for now I'm testing with Fedora 19's glusterfs 3.4.1.
[1] http://lists.gnu.org/archive/html/gluster-devel/2013-10/msg00085.html [2] http://lists.gnu.org/archive/html/gluster-devel/2013-10/msg00086.html
* src/storage/storage_backend_gluster.c (virStorageBackendGlusterRefreshPool): Initial implementation. (virStorageBackendGlusterOpen, virStorageBackendGlusterClose): New helper functions.
Signed-off-by: Eric Blake <eblake@redhat.com> --- src/storage/storage_backend_gluster.c | 220 +++++++++++++++++++++++++++++++++- 1 file changed, 215 insertions(+), 5 deletions(-)
diff --git a/src/storage/storage_backend_gluster.c b/src/storage/storage_backend_gluster.c index 2863c73..bc90de9 100644 --- a/src/storage/storage_backend_gluster.c +++ b/src/storage/storage_backend_gluster.c @@ -23,20 +23,230 @@
#include <glusterfs/api/glfs.h>
-#include "virerror.h" #include "storage_backend_gluster.h" #include "storage_conf.h" +#include "viralloc.h" +#include "virerror.h" +#include "virlog.h" +#include "virstoragefile.h" +#include "virstring.h" +#include "viruri.h"
#define VIR_FROM_THIS VIR_FROM_STORAGE
+struct _virStorageBackendGlusterState { + glfs_t *vol; + + /* Accept the same URIs as qemu's block/gluster.c: + * gluster[+transport]://[server[:port]]/vol/[dir/]image[?socket=...] */ + virURI *uri; + + char *volname; /* vol from URI */ + char *dir; /* dir from URI, or "." */ +}; + +typedef struct _virStorageBackendGlusterState virStorageBackendGlusterState; +typedef virStorageBackendGlusterState *virStorageBackendGlusterStatePtr; + +static void +virStorageBackendGlusterClose(virStorageBackendGlusterStatePtr state) +{ + if (!state) + return; + + /* Yuck - glusterfs-api-3.4.1 appears to always return -1 for + * glfs_fini, with errno containing random data, so there's no way + * to tell if it succeeded. 3.4.2 is supposed to fix this.*/ + if (state->vol && glfs_fini(state->vol) < 0) + VIR_DEBUG("shutdown of gluster volume %s failed with errno %d", + state->volname, errno); + + virURIFree(state->uri); + VIR_FREE(state->volname); + VIR_FREE(state->dir); + VIR_FREE(state); +} + +static virStorageBackendGlusterStatePtr +virStorageBackendGlusterOpen(virStoragePoolObjPtr pool) +{ + virStorageBackendGlusterStatePtr ret = NULL; + char *sub; + const char *name = pool->def->source.name; + + if (VIR_ALLOC(ret) < 0) + return NULL; + + if (*name == '/') + name++; + if ((sub = strchr(name, '/'))) { + if (VIR_STRNDUP(ret->volname, name, sub - name) < 0 || + VIR_STRDUP(ret->dir, sub) < 0) + goto error; + } else { + if (VIR_STRDUP(ret->volname, pool->def->source.name) < 0 || + VIR_STRDUP(ret->dir, ".") < 0) + goto error; + } + + /* FIXME: Currently hard-coded to tcp transport; XML needs to be + * extended to allow alternate transport */ + if (VIR_ALLOC(ret->uri) < 0 || + VIR_STRDUP(ret->uri->scheme, "gluster") < 0 || + VIR_STRDUP(ret->uri->server, pool->def->source.hosts[0].name) < 0 || + virAsprintf(&ret->uri->path, "%s%s", + *pool->def->source.name == '/' ? "" : "/", + pool->def->source.name) < 0) + goto error;
I'm not a huge fan of chaining together statements like this, since some gdb versions are pretty awful at telling you which one of these 4 statements has the problem on segv.
+ ret->uri->port = pool->def->source.hosts[0].port; + + if (!(ret->vol = glfs_new(ret->volname))) { + virReportOOMError(); + goto error; + } + + if (glfs_set_volfile_server(ret->vol, "tcp", + ret->uri->server, ret->uri->port) < 0 || + glfs_init(ret->vol) < 0) { + char *uri = virURIFormat(ret->uri); + + virReportSystemError(errno, _("failed to connect to %s"), + NULLSTR(uri)); + VIR_FREE(uri); + goto error; + } + + return ret; + +error: + virStorageBackendGlusterClose(ret); + return NULL; +} + + +/* Populate *volptr for the given name and stat information, or leave + * it NULL if the entry should be skipped (such as "."). Return 0 on + * success, -1 on failure. */ +static int +virStorageBackendGlusterRefreshVol(virStorageBackendGlusterStatePtr state, + const char *pool, + const char *name, + struct stat *st, + virStorageVolDefPtr *volptr) +{ + char *tmp; + int ret = -1; + virStorageVolDefPtr vol = NULL; + + /* Silently skip directories, including '.' and '..'. FIXME: + * should non-'.' subdirectories be listed as type dir? */
What behaviour does the 'fs' pool have in for non-'.' subdirs ? Seems we perhaps ought to match that.
+ *volptr = NULL; + if (S_ISDIR(st->st_mode)) + return 0; + + if (VIR_ALLOC(vol) < 0 || + VIR_STRDUP(vol->name, name) < 0 || + virAsprintf(&vol->key, "%s%s/%s", + *pool == '/' ? "" : "/", pool, vol->name) < 0) + goto cleanup; + + /* FIXME - must open files to determine if they are non-raw */ + vol->type = VIR_STORAGE_VOL_NETWORK; + vol->target.format = VIR_STORAGE_FILE_RAW; + vol->capacity = vol->allocation = st->st_size;
So gluster has no concept of sparse volumes ?
+ tmp = state->uri->path; + state->uri->path = vol->key; + if (!(vol->target.path = virURIFormat(state->uri))) { + state->uri->path = tmp; + goto cleanup; + } + state->uri->path = tmp; + + *volptr = vol; + vol = NULL; + ret = 0; +cleanup: + virStorageVolDefFree(vol); + return ret; +}
static int virStorageBackendGlusterRefreshPool(virConnectPtr conn ATTRIBUTE_UNUSED, - virStoragePoolObjPtr pool ATTRIBUTE_UNUSED) + virStoragePoolObjPtr pool) { - virReportError(VIR_ERR_NO_SUPPORT, "%s", - _("gluster pool type not fully supported yet")); - return -1; + int ret = -1; + virStorageBackendGlusterStatePtr state = NULL; + struct { + struct dirent ent; + /* See comment below about readdir_r needing padding */ + char padding[MAX(1, 256 - (int) (sizeof(struct dirent) + - offsetof(struct dirent, d_name)))]; + } de; + struct dirent *ent; + glfs_fd_t *dir = NULL; + struct stat st; + struct statvfs sb; + + if (!(state = virStorageBackendGlusterOpen(pool))) + goto cleanup;
What kind of overhead is there in opening / closing gluster connections ? Is it something we should just do in the pool start/stop methods, instead of on every operation like refresh.
+ + /* Why oh why did glfs 3.4 decide to expose only readdir_r rather + * than readdir? POSIX admits that readdir_r is inherently a + * flawed design, because systems are not required to define + * NAME_MAX: http://austingroupbugs.net/view.php?id=696 + * http://womble.decadent.org.uk/readdir_r-advisory.html + * + * Fortunately, gluster appears to limit its underlying bricks to + * only use file systems such as XFS that have a NAME_MAX of 255; + * so we are currently guaranteed that if we provide 256 bytes of + * tail padding, then we should have enough space to avoid buffer + * overflow no matter whether the OS used d_name[], d_name[1], or + * d_name[256] in its 'struct dirent'. + * http://lists.gnu.org/archive/html/gluster-devel/2013-10/msg00083.html + */ + + if (!(dir = glfs_opendir(state->vol, state->dir))) { + virReportSystemError(errno, _("cannot open path '%s'"), + pool->def->name); + goto cleanup; + } + while (!(errno = glfs_readdirplus_r(dir, &st, &de.ent, &ent)) && ent) { + virStorageVolDefPtr vol; + int okay = virStorageBackendGlusterRefreshVol(state, + pool->def->source.name, + ent->d_name, &st, + &vol); + if (okay < 0) + goto cleanup; + if (vol && VIR_APPEND_ELEMENT(pool->volumes.objs, pool->volumes.count, + vol) < 0) + goto cleanup; + } + if (errno) { + virReportSystemError(errno, _("failed to read directory '%s'"), + pool->def->source.name); + goto cleanup; + } + + if (glfs_statvfs(state->vol, state->dir, &sb) < 0) { + virReportSystemError(errno, _("cannot statvfs path '%s'"), pool->def->source.name);
Seems you lost the line break here.
+ goto cleanup; + } + + pool->def->capacity = ((unsigned long long)sb.f_frsize * + (unsigned long long)sb.f_blocks); + pool->def->available = ((unsigned long long)sb.f_bfree * + (unsigned long long)sb.f_frsize); + pool->def->allocation = pool->def->capacity - pool->def->available; + + ret = 0; +cleanup: + if (dir) + glfs_closedir(dir); + virStorageBackendGlusterClose(state); + if (ret < 0) + virStoragePoolObjClearVols(pool); + return ret; }
Daniel -- |: http://berrange.com -o- http://www.flickr.com/photos/dberrange/ :| |: http://libvirt.org -o- http://virt-manager.org :| |: http://autobuild.org -o- http://search.cpan.org/~danberr/ :| |: http://entangle-photo.org -o- http://live.gnome.org/gtk-vnc :|

On 11/12/2013 09:58 AM, Daniel P. Berrange wrote:
On Mon, Nov 11, 2013 at 09:19:30PM -0700, Eric Blake wrote:
Actually put gfapi to use, by allowing the creation of a gluster pool. Right now, all volumes are treated as raw; further patches will allow peering into files to allow for qcow2 files and backing chains, and reporting proper volume allocation.
+ /* FIXME: Currently hard-coded to tcp transport; XML needs to be + * extended to allow alternate transport */ + if (VIR_ALLOC(ret->uri) < 0 || + VIR_STRDUP(ret->uri->scheme, "gluster") < 0 || + VIR_STRDUP(ret->uri->server, pool->def->source.hosts[0].name) < 0 || + virAsprintf(&ret->uri->path, "%s%s", + *pool->def->source.name == '/' ? "" : "/", + pool->def->source.name) < 0) + goto error;
I'm not a huge fan of chaining together statements like this, since some gdb versions are pretty awful at telling you which one of these 4 statements has the problem on segv.
Easy enough to split; so consider it done.
+ /* Silently skip directories, including '.' and '..'. FIXME: + * should non-'.' subdirectories be listed as type dir? */
What behaviour does the 'fs' pool have in for non-'.' subdirs ? Seems we perhaps ought to match that.
Peter asked the same question - guess I'm stuck doing that :)
+ + /* FIXME - must open files to determine if they are non-raw */ + vol->type = VIR_STORAGE_VOL_NETWORK; + vol->target.format = VIR_STORAGE_FILE_RAW; + vol->capacity = vol->allocation = st->st_size;
So gluster has no concept of sparse volumes ?
Gluster has sparse support; I need to flesh this out a bit more, based on how directory pools do it (both pools have a struct stat to work with); I also need to populate ownership details.
+ + if (!(state = virStorageBackendGlusterOpen(pool))) + goto cleanup;
What kind of overhead is there in opening / closing gluster connections ? Is it something we should just do in the pool start/stop methods, instead of on every operation like refresh.
I was just copying after rbd. In gdb, I see several threads spawned for every refresh operation; but I can certainly try to rework things to open at pool start instead. Should I do it on this patch, or save it for a followup? The other consideration is that eventually, the src/util/virstoragefile.c code will need to call into the src/storage/storage_driver.c code (indirectly, via the virConnectPtr) when doing recursive backing chain checking; in that case, I'm envisioning a new callback (internal-only, no public API needed) that roughly says: "given this URI for a backing file, tell me if you have a backend for parsing the file, and if so, grab the header from that file"; which means my ultimate goal is to have a reusable method that both the pool XML (given host and name, [or host, volume, subdir, based on review of 1/4]) and the qemu URI (given a string "gluster://host/vol/..."). So while pool start may be better than pool refresh for opening a glfs connection, the ability to open a glfs connection also needs to be modular enough to reuse for one-off parsing of a URI.
+ if (glfs_statvfs(state->vol, state->dir, &sb) < 0) { + virReportSystemError(errno, _("cannot statvfs path '%s'"), pool->def->source.name);
Seems you lost the line break here.
Yep, Peter caught it too. -- Eric Blake eblake redhat com +1-919-301-3266 Libvirt virtualization library http://libvirt.org

Putting together pieces from previous patches, it is now possible for 'virsh dumpxml --pool gluster volname' to report metadata about a qcow2 file stored on gluster. The backing file is still treated as raw; to fix that, more patches are needed to make the storage backing chain analysis recursive rather than halting at a network protocol name, but that work will not need any further calls into libgfapi so much as just reusing this code, and that should be the only code outside of the storage driver that needs any help from libgfapi. Any additional use of libgfapi within libvirt should only be needed for implementing storage pool APIs such as volume creation or resizing, where backing chain analysis should be unaffected. * src/storage/storage_backend_gluster.c (virStorageBackendGlusterOpen): Make relative name handling easier. (virStorageBackendGlusterReadHeader): New helper function. (virStorageBackendGlusterRefreshVol): Probe non-raw files. Signed-off-by: Eric Blake <eblake@redhat.com> --- src/storage/storage_backend_gluster.c | 87 +++++++++++++++++++++++++++++++++++ 1 file changed, 87 insertions(+) diff --git a/src/storage/storage_backend_gluster.c b/src/storage/storage_backend_gluster.c index bc90de9..69e8e61 100644 --- a/src/storage/storage_backend_gluster.c +++ b/src/storage/storage_backend_gluster.c @@ -116,6 +116,12 @@ virStorageBackendGlusterOpen(virStoragePoolObjPtr pool) goto error; } + if (glfs_chdir(ret->vol, ret->dir) < 0) { + virReportSystemError(errno, _("failed to change to directory '%s'"), + ret->dir); + goto error; + } + return ret; error: @@ -124,6 +130,37 @@ error: } +static int +virStorageBackendGlusterReadHeader(glfs_fd_t *fd, + const char *name, + int maxlen, + char **buf) +{ + char *s; + size_t nread = 0; + + if (VIR_ALLOC_N(*buf, maxlen) < 0) + return -1; + + s = *buf; + while (maxlen) { + ssize_t r = glfs_read(fd, s, maxlen, 0); + if (r < 0 && errno == EINTR) + continue; + if (r < 0) { + VIR_FREE(*buf); + virReportSystemError(errno, _("unable to read '%s'"), name); + return r; + } + if (r == 0) + return nread; + buf += r; + maxlen -= r; + nread += r; + } + return nread; +} + /* Populate *volptr for the given name and stat information, or leave * it NULL if the entry should be skipped (such as "."). Return 0 on * success, -1 on failure. */ @@ -137,6 +174,10 @@ virStorageBackendGlusterRefreshVol(virStorageBackendGlusterStatePtr state, char *tmp; int ret = -1; virStorageVolDefPtr vol = NULL; + glfs_fd_t *fd = NULL; + virStorageFileMetadata *meta = NULL; + char *header = NULL; + ssize_t len = VIR_STORAGE_MAX_HEADER; /* Silently skip directories, including '.' and '..'. FIXME: * should non-'.' subdirectories be listed as type dir? */ @@ -162,11 +203,57 @@ virStorageBackendGlusterRefreshVol(virStorageBackendGlusterStatePtr state, } state->uri->path = tmp; + if (!(fd = glfs_open(state->vol, name, O_RDONLY| O_NONBLOCK | O_NOCTTY))) { + if ((errno == ENOENT || errno == ELOOP) && + S_ISLNK(st->st_mode)) { + VIR_WARN("ignoring dangling symlink '%s'", name); + ret = 0; + } else { + virReportSystemError(errno, _("cannot open volume '%s'"), name); + } + goto cleanup; + } + + if ((len = virStorageBackendGlusterReadHeader(fd, name, len, &header)) < 0) + goto cleanup; + + if ((vol->target.format = virStorageFileProbeFormatFromBuf(name, + header, + len)) < 0 || + !(meta = virStorageFileGetMetadataFromBuf(name, header, len, + vol->target.format))) + goto cleanup; + + if (meta->backingStore) { + vol->backingStore.path = meta->backingStore; + meta->backingStore = NULL; + vol->backingStore.format = meta->backingStoreFormat; + if (vol->backingStore.format < 0) + vol->backingStore.format = VIR_STORAGE_FILE_RAW; + } + if (meta->capacity) + vol->capacity = meta->capacity; + if (meta->encrypted) { + if (VIR_ALLOC(vol->target.encryption) < 0) + goto cleanup; + if (vol->target.format == VIR_STORAGE_FILE_QCOW || + vol->target.format == VIR_STORAGE_FILE_QCOW2) + vol->target.encryption->format = VIR_STORAGE_ENCRYPTION_FORMAT_QCOW; + } + vol->target.features = meta->features; + meta->features = NULL; + vol->target.compat = meta->compat; + meta->compat = NULL; + *volptr = vol; vol = NULL; ret = 0; cleanup: + virStorageFileFreeMetadata(meta); virStorageVolDefFree(vol); + if (fd) + glfs_close(fd); + VIR_FREE(header); return ret; } -- 1.8.3.1

On 11/12/13 05:19, Eric Blake wrote:
Putting together pieces from previous patches, it is now possible for 'virsh dumpxml --pool gluster volname' to report metadata
Did you mean 'virsh vol-dumpxml --pool ... '?
about a qcow2 file stored on gluster. The backing file is still treated as raw; to fix that, more patches are needed to make the storage backing chain analysis recursive rather than halting at a network protocol name, but that work will not need any further calls into libgfapi so much as just reusing this code, and that should be the only code outside of the storage driver that needs any help from libgfapi. Any additional use of libgfapi within libvirt should only be needed for implementing storage pool APIs such as volume creation or resizing, where backing chain analysis should be unaffected.
* src/storage/storage_backend_gluster.c (virStorageBackendGlusterOpen): Make relative name handling easier. (virStorageBackendGlusterReadHeader): New helper function. (virStorageBackendGlusterRefreshVol): Probe non-raw files.
Signed-off-by: Eric Blake <eblake@redhat.com> --- src/storage/storage_backend_gluster.c | 87 +++++++++++++++++++++++++++++++++++ 1 file changed, 87 insertions(+)
diff --git a/src/storage/storage_backend_gluster.c b/src/storage/storage_backend_gluster.c index bc90de9..69e8e61 100644 --- a/src/storage/storage_backend_gluster.c +++ b/src/storage/storage_backend_gluster.c
...
@@ -124,6 +130,37 @@ error: }
+static int
This function is declared as an int, but ...
+virStorageBackendGlusterReadHeader(glfs_fd_t *fd, + const char *name, + int maxlen, + char **buf) +{ + char *s; + size_t nread = 0;
.. returns size_t. This could overflow normally, but is guarded by maxlen. Okay.
+ + if (VIR_ALLOC_N(*buf, maxlen) < 0) + return -1; + + s = *buf; + while (maxlen) { + ssize_t r = glfs_read(fd, s, maxlen, 0); + if (r < 0 && errno == EINTR) + continue; + if (r < 0) { + VIR_FREE(*buf); + virReportSystemError(errno, _("unable to read '%s'"), name); + return r; + } + if (r == 0) + return nread; + buf += r; + maxlen -= r; + nread += r; + } + return nread; +} + /* Populate *volptr for the given name and stat information, or leave * it NULL if the entry should be skipped (such as "."). Return 0 on * success, -1 on failure. */
...
@@ -162,11 +203,57 @@ virStorageBackendGlusterRefreshVol(virStorageBackendGlusterStatePtr state, } state->uri->path = tmp;
+ if (!(fd = glfs_open(state->vol, name, O_RDONLY| O_NONBLOCK | O_NOCTTY))) { + if ((errno == ENOENT || errno == ELOOP) && + S_ISLNK(st->st_mode)) { + VIR_WARN("ignoring dangling symlink '%s'", name); + ret = 0; + } else { + virReportSystemError(errno, _("cannot open volume '%s'"), name); + } + goto cleanup; + }
Now that you actually open the volume file and probe it, you need to kill the comment added in previous patch: + if (VIR_ALLOC(vol) < 0 || + VIR_STRDUP(vol->name, name) < 0 || + virAsprintf(&vol->key, "%s%s/%s", + *pool == '/' ? "" : "/", pool, vol->name) < 0) + goto cleanup; + + /* FIXME - must open files to determine if they are non-raw */ + vol->type = VIR_STORAGE_VOL_NETWORK; + vol->target.format = VIR_STORAGE_FILE_RAW; + vol->capacity = vol->allocation = st->st_size; + tmp = state->uri->path; + state->uri->path = vol->key; + if (!(vol->target.path = virURIFormat(state->uri))) { + state->uri->path = tmp; + goto cleanup;
+ + if ((len = virStorageBackendGlusterReadHeader(fd, name, len, &header)) < 0) + goto cleanup; + + if ((vol->target.format = virStorageFileProbeFormatFromBuf(name, + header, + len)) < 0 || + !(meta = virStorageFileGetMetadataFromBuf(name, header, len, + vol->target.format))) + goto cleanup;
Also as you are erroring out in case of failure, you can remove the lines setting some of the defaults.
+ + if (meta->backingStore) { + vol->backingStore.path = meta->backingStore; + meta->backingStore = NULL; + vol->backingStore.format = meta->backingStoreFormat; + if (vol->backingStore.format < 0) + vol->backingStore.format = VIR_STORAGE_FILE_RAW; + } + if (meta->capacity) + vol->capacity = meta->capacity; + if (meta->encrypted) { + if (VIR_ALLOC(vol->target.encryption) < 0) + goto cleanup; + if (vol->target.format == VIR_STORAGE_FILE_QCOW || + vol->target.format == VIR_STORAGE_FILE_QCOW2) + vol->target.encryption->format = VIR_STORAGE_ENCRYPTION_FORMAT_QCOW; + } + vol->target.features = meta->features; + meta->features = NULL; + vol->target.compat = meta->compat; + meta->compat = NULL; + *volptr = vol; vol = NULL; ret = 0; cleanup: + virStorageFileFreeMetadata(meta); virStorageVolDefFree(vol); + if (fd) + glfs_close(fd); + VIR_FREE(header); return ret; }
ACK with the comment nit fixed. Peter

On 11/12/2013 09:00 AM, Peter Krempa wrote:
On 11/12/13 05:19, Eric Blake wrote:
Putting together pieces from previous patches, it is now possible for 'virsh dumpxml --pool gluster volname' to report metadata
Did you mean 'virsh vol-dumpxml --pool ... '?
Yes.
+static int
This function is declared as an int, but ...
+virStorageBackendGlusterReadHeader(glfs_fd_t *fd, + const char *name, + int maxlen, + char **buf) +{ + char *s; + size_t nread = 0;
.. returns size_t. This could overflow normally, but is guarded by maxlen. Okay.
I'll improve the types.
+ if (!(fd = glfs_open(state->vol, name, O_RDONLY| O_NONBLOCK | O_NOCTTY))) { + if ((errno == ENOENT || errno == ELOOP) && + S_ISLNK(st->st_mode)) { + VIR_WARN("ignoring dangling symlink '%s'", name); + ret = 0; + } else { + virReportSystemError(errno, _("cannot open volume '%s'"), name); + } + goto cleanup; + }
Now that you actually open the volume file and probe it, you need to kill the comment added in previous patch:
+ if (VIR_ALLOC(vol) < 0 || + VIR_STRDUP(vol->name, name) < 0 || + virAsprintf(&vol->key, "%s%s/%s", + *pool == '/' ? "" : "/", pool, vol->name) < 0) + goto cleanup; + + /* FIXME - must open files to determine if they are non-raw */ + vol->type = VIR_STORAGE_VOL_NETWORK; + vol->target.format = VIR_STORAGE_FILE_RAW; + vol->capacity = vol->allocation = st->st_size;
Good catch. -- Eric Blake eblake redhat com +1-919-301-3266 Libvirt virtualization library http://libvirt.org

On Mon, Nov 11, 2013 at 09:19:31PM -0700, Eric Blake wrote:
diff --git a/src/storage/storage_backend_gluster.c b/src/storage/storage_backend_gluster.c index bc90de9..69e8e61 100644 --- a/src/storage/storage_backend_gluster.c +++ b/src/storage/storage_backend_gluster.c @@ -116,6 +116,12 @@ virStorageBackendGlusterOpen(virStoragePoolObjPtr pool) goto error; }
+ if (glfs_chdir(ret->vol, ret->dir) < 0) { + virReportSystemError(errno, _("failed to change to directory '%s'"), + ret->dir); + goto error; + } + return ret;
error: @@ -124,6 +130,37 @@ error: }
+static int
s/int/ssize_t/ ?
+virStorageBackendGlusterReadHeader(glfs_fd_t *fd, + const char *name, + int maxlen,
s/int/size_t/ ?
+ char **buf) +{ + char *s; + size_t nread = 0; + + if (VIR_ALLOC_N(*buf, maxlen) < 0) + return -1; + + s = *buf; + while (maxlen) { + ssize_t r = glfs_read(fd, s, maxlen, 0); + if (r < 0 && errno == EINTR) + continue; + if (r < 0) { + VIR_FREE(*buf); + virReportSystemError(errno, _("unable to read '%s'"), name); + return r; + } + if (r == 0) + return nread; + buf += r; + maxlen -= r; + nread += r; + } + return nread; +} + /* Populate *volptr for the given name and stat information, or leave * it NULL if the entry should be skipped (such as "."). Return 0 on * success, -1 on failure. */ @@ -137,6 +174,10 @@ virStorageBackendGlusterRefreshVol(virStorageBackendGlusterStatePtr state, char *tmp; int ret = -1; virStorageVolDefPtr vol = NULL; + glfs_fd_t *fd = NULL; + virStorageFileMetadata *meta = NULL; + char *header = NULL; + ssize_t len = VIR_STORAGE_MAX_HEADER;
/* Silently skip directories, including '.' and '..'. FIXME: * should non-'.' subdirectories be listed as type dir? */ @@ -162,11 +203,57 @@ virStorageBackendGlusterRefreshVol(virStorageBackendGlusterStatePtr state, } state->uri->path = tmp;
+ if (!(fd = glfs_open(state->vol, name, O_RDONLY| O_NONBLOCK | O_NOCTTY))) { + if ((errno == ENOENT || errno == ELOOP) && + S_ISLNK(st->st_mode)) { + VIR_WARN("ignoring dangling symlink '%s'", name); + ret = 0; + } else { + virReportSystemError(errno, _("cannot open volume '%s'"), name); + } + goto cleanup; + } + + if ((len = virStorageBackendGlusterReadHeader(fd, name, len, &header)) < 0) + goto cleanup; + + if ((vol->target.format = virStorageFileProbeFormatFromBuf(name, + header, + len)) < 0 || + !(meta = virStorageFileGetMetadataFromBuf(name, header, len, + vol->target.format))) + goto cleanup;
Daniel -- |: http://berrange.com -o- http://www.flickr.com/photos/dberrange/ :| |: http://libvirt.org -o- http://virt-manager.org :| |: http://autobuild.org -o- http://search.cpan.org/~danberr/ :| |: http://entangle-photo.org -o- http://live.gnome.org/gtk-vnc :|
participants (3)
-
Daniel P. Berrange
-
Eric Blake
-
Peter Krempa