This patch offer,
1. Optimizing the calls to glfs_init() and friends
2. Temporarily reduce the memory leak appear in libvirt process account,
even if the root cause for the leaks are glfs_fini() (7 - 10MB per object)
[Hopefully gluster should address this in its future releases, not very near]
Currently, a start of a VM will call 2 glfs_new/glfs_init (which will create
glfs object, once for stat, read headers and next to chown) and then will fork
qemu process which will call once again (for actual read write IO).
Not that all, in case if we are have 4 extra attached disks, then the total
calls to glfs_init() and friends will be (4+1)*2 in libvirt and (4+1)*1 in
qemu space i.e 15 calls. Since we don't have control over qemu process as that
executes in a different process environment, lets do not bother much about it.
This patch shrinks these 10 calls (i.e objects from above example) to just
one, by maintaining a cache of glfs objects.
The glfs object is shared across other only if volume name and all the
volfile servers match. In case of hit glfs object takes a ref and
only on close unref happens.
Thanks to 'Peter Krempa' for the discussion.
Signed-off-by: Prasanna Kumar Kalever <prasanna.kalever(a)redhat.com>
---
WORK IN PROGRESS: (WIP)
----------------
While initially caching the glfs object, i.e. in
virStorageBackendGlusterSetPreopened() I have took a ref=2, so on the following
virStorageBackendGlusterClosePreopened() --ref will make ref=1, which will
help not cleaning up the object which has to be shared with next coming disks
(if conditions meat).
Given some context, the idea is that on time-out (or after all disks are
initiallized) some one should call virStorageBackendGlusterClosePreopened()
which will ideally make ref=0, hence cached object will be cleaned/deleted
calling glfs_fini()
I had a thought of doing the time-out cleanup call in
virSecurityManagerSetAllLabel() or similar, but that looks too odd for me?
Some help ?
Thanks in advance.
---
src/storage/storage_backend_gluster.c | 136 ++++++++++++++++++++++++++++++++--
src/storage/storage_backend_gluster.h | 33 ++++++++-
2 files changed, 160 insertions(+), 9 deletions(-)
diff --git a/src/storage/storage_backend_gluster.c
b/src/storage/storage_backend_gluster.c
index 8e86704..4f53ebc 100644
--- a/src/storage/storage_backend_gluster.c
+++ b/src/storage/storage_backend_gluster.c
@@ -47,19 +47,132 @@ struct _virStorageBackendGlusterState {
char *dir; /* dir from URI, or "/"; always starts and ends in '/'
*/
};
+virGlusterDefPtr ConnCache = {0,};
+
typedef struct _virStorageBackendGlusterState virStorageBackendGlusterState;
typedef virStorageBackendGlusterState *virStorageBackendGlusterStatePtr;
+void
+virStorageBackendGlusterSetPreopened(virStorageSourcePtr src, glfs_t *fs)
+{
+ size_t i;
+ virStorageBackendGlusterStatePtrPreopened entry = NULL;
+
+ if (ConnCache == NULL && (VIR_ALLOC(ConnCache) < 0))
+ return;
+
+ for (i = 0; i < ConnCache->nConn; i++) {
+ if (STREQ(ConnCache->Conn[i]->volname, src->volume))
+ return;
+ }
+
+ if (VIR_ALLOC(entry) < 0)
+ goto L1;
+
+ if (VIR_STRDUP(entry->volname, src->volume) < 0)
+ goto L1;
+
+ entry->nhosts = src->nhosts;
+ for (i = 0; i < src->nhosts; i++) {
+ if (VIR_ALLOC_N(entry->hosts[i], strlen(src->hosts[i].name) + 1) < 0)
+ goto L2;
+ strcpy(entry->hosts[i], src->hosts[i].name);
+ }
+
+ entry->fs = fs;
+ entry->ref = 2; /* persist glfs obj per volume until a final timeout
+ virStorageBackendGlusterClosePreopened() is called */
+
+ if (VIR_INSERT_ELEMENT(ConnCache->Conn, -1, ConnCache->nConn, entry) < 0)
+ goto L2;
+
+ return;
+
+L2:
+ for (i = 0; i < entry->nhosts; i++)
+ VIR_FREE(entry->hosts[i]);
+L1:
+ if(ConnCache->nConn == 0)
+ VIR_FREE(ConnCache);
+ VIR_FREE(entry->volname);
+ VIR_FREE(entry);
+}
+
+glfs_t *
+virStorageBackendGlusterFindPreopened(virStorageSourcePtr src)
+{
+ size_t i, j, k, ret = 0;
+ size_t min, max;
+
+ if (ConnCache == NULL)
+ return NULL;
+
+ virStorageBackendGlusterStatePtrPreopened entry;
+
+ for (i = 0; i < ConnCache->nConn; i++) {
+ entry = ConnCache->Conn[i];
+ if (STREQ(entry->volname, src->volume)) {
+ min = entry->nhosts < src->nhosts ? entry->nhosts :
src->nhosts;
+ max = entry->nhosts >= src->nhosts ? entry->nhosts :
src->nhosts;
+ for (j = 0; j< min; j++) {
+ if (entry->nhosts == min) {
+ for (k = 0; k < max; k++) {
+ if (STREQ(entry->hosts[j], src->hosts[k].name)) {
+ ret = 1;
+ break;
+ }
+ }
+ if (!ret)
+ return NULL;
+ } else {
+ for (k = 0; k < max; k++) {
+ if (STREQ(src->hosts[j].name, entry->hosts[k])) {
+ ret = 1;
+ break;
+ }
+ }
+ if (!ret)
+ return NULL;
+ }
+ }
+ entry->ref++;
+ return entry->fs;
+ }
+ }
+ return NULL;
+}
+
+int
+virStorageBackendGlusterClosePreopened(glfs_t *fs)
+{
+ size_t i;
+ int ret = 0;
+
+ if (fs == NULL)
+ return ret;
+
+ for (i = 0; i < ConnCache->nConn; i++) {
+ if (ConnCache->Conn[i]->fs == fs) {
+ if (--ConnCache->Conn[i]->ref)
+ return ret;
+
+ ret = glfs_fini(ConnCache->Conn[i]->fs);
+ VIR_FREE(ConnCache->Conn[i]->volname);
+ VIR_FREE(ConnCache->Conn[i]);
+
+ VIR_DELETE_ELEMENT(ConnCache->Conn, i, ConnCache->nConn);
+ }
+ }
+ return ret;
+}
+
static void
virStorageBackendGlusterClose(virStorageBackendGlusterStatePtr state)
{
if (!state)
return;
- /* Yuck - glusterfs-api-3.4.1 appears to always return -1 for
- * glfs_fini, with errno containing random data, so there's no way
- * to tell if it succeeded. 3.4.2 is supposed to fix this.*/
- if (state->vol && glfs_fini(state->vol) < 0)
+ if (state->vol && virStorageBackendGlusterClosePreopened(state->vol)
< 0)
VIR_DEBUG("shutdown of gluster volume %s failed with errno %d",
state->volname, errno);
@@ -556,8 +669,7 @@ virStorageFileBackendGlusterDeinit(virStorageSourcePtr src)
src, src->hosts->name, src->hosts->port ?
src->hosts->port : "0",
src->volume, src->path);
- if (priv->vol)
- glfs_fini(priv->vol);
+ virStorageBackendGlusterClosePreopened(priv->vol);
VIR_FREE(priv->canonpath);
VIR_FREE(priv);
@@ -630,11 +742,20 @@ virStorageFileBackendGlusterInit(virStorageSourcePtr src)
src, priv, src->volume, src->path,
(unsigned int)src->drv->uid, (unsigned int)src->drv->gid);
+
+ priv->vol = virStorageBackendGlusterFindPreopened(src);
+ if (priv->vol) {
+ src->drv->priv = priv;
+ return 0;
+ }
+
if (!(priv->vol = glfs_new(src->volume))) {
virReportOOMError();
goto error;
}
+ virStorageBackendGlusterSetPreopened(src, priv->vol);
+
for (i = 0; i < src->nhosts; i++) {
if (virStorageFileBackendGlusterInitServer(priv, src->hosts + i) < 0)
goto error;
@@ -652,8 +773,7 @@ virStorageFileBackendGlusterInit(virStorageSourcePtr src)
return 0;
error:
- if (priv->vol)
- glfs_fini(priv->vol);
+ virStorageBackendGlusterClosePreopened(priv->vol);
VIR_FREE(priv);
return -1;
diff --git a/src/storage/storage_backend_gluster.h
b/src/storage/storage_backend_gluster.h
index 6796016..a0326aa 100644
--- a/src/storage/storage_backend_gluster.h
+++ b/src/storage/storage_backend_gluster.h
@@ -22,9 +22,40 @@
#ifndef __VIR_STORAGE_BACKEND_GLUSTER_H__
# define __VIR_STORAGE_BACKEND_GLUSTER_H__
-# include "storage_backend.h"
+#include "storage_backend.h"
+#include <glusterfs/api/glfs.h>
extern virStorageBackend virStorageBackendGluster;
extern virStorageFileBackend virStorageFileBackendGluster;
+
+struct _virStorageBackendGlusterStatePreopened {
+ char *volname;
+ size_t nhosts;
+ char *hosts[1024]; /* FIXME: 1024 ? */
+ glfs_t *fs;
+ int ref;
+};
+
+typedef struct _virStorageBackendGlusterStatePreopened
virStorageBackendGlusterStatePreopened;
+typedef virStorageBackendGlusterStatePreopened
*virStorageBackendGlusterStatePtrPreopened;
+
+struct _virGlusterDef {
+ size_t nConn;
+ virStorageBackendGlusterStatePtrPreopened *Conn;
+};
+
+typedef struct _virGlusterDef virGlusterDef;
+typedef virGlusterDef *virGlusterDefPtr;
+
+extern virGlusterDefPtr ConnCache;
+
+void
+virStorageBackendGlusterSetPreopened(virStorageSourcePtr src, glfs_t *fs);
+
+glfs_t*
+virStorageBackendGlusterFindPreopened(virStorageSourcePtr src);
+
+int
+virStorageBackendGlusterClosePreopened(glfs_t *fs);
#endif /* __VIR_STORAGE_BACKEND_GLUSTER_H__ */
--
2.7.4