[libvirt] [RFC] [PATCH v2 1/6] add configure option --with-fuse for libvirt

add a configure option --with-fuse to prepare introduction of fuse support for libvirt lxc. With help from Daniel Signed-off-by: Gao feng <gaofeng@cn.fujitsu.com> --- configure.ac | 36 ++++++++++++++++++++++++++++++++++++ libvirt.spec.in | 9 +++++++++ 2 files changed, 45 insertions(+), 0 deletions(-) diff --git a/configure.ac b/configure.ac index 3cc7b3c..e6d207e 100644 --- a/configure.ac +++ b/configure.ac @@ -1695,6 +1695,37 @@ AC_SUBST([CAPNG_CFLAGS]) AC_SUBST([CAPNG_LIBS]) +dnl libfuse +AC_ARG_WITH([fuse], + AC_HELP_STRING([--with-fuse], [use libfuse to proivde fuse filesystem support for libvirt lxc]), + [], + [with_fuse=check]) + +dnl +dnl This check looks for 'fuse' +dnl +FUSE_CFLAGS= +FUSE_LIBS= +if test "x$with_fuse" != "xno"; then + PKG_CHECK_MODULES([FUSE], [fuse], + [with_fuse=yes], [ + if test "x$with_fuse" = "xcheck" ; then + with_fuse=no + else + AC_MSG_ERROR( + [You must install fuse-devel to compile libvirt]) + fi + ]) + if test "x$with_fuse" = "xyes" ; then + FUSE_LIBS="-lfuse" + FUSE_CFLAGS="-D_FILE_OFFSET_BITS=64" + AC_DEFINE_UNQUOTED([HAVE_FUSE], 1, [whether fuse is available for libvirt lxc]) + fi +fi +AM_CONDITIONAL([HAVE_FUSE], [test "x$with_fuse" = "xyes"]) +AC_SUBST([FUSE_CFLAGS]) +AC_SUBST([FUSE_LIBS]) + dnl virsh libraries AC_CHECK_HEADERS([readline/readline.h]) @@ -2944,6 +2975,11 @@ AC_MSG_NOTICE([ capng: $CAPNG_CFLAGS $CAPNG_LIBS]) else AC_MSG_NOTICE([ capng: no]) fi +if test "$with_fuse" = "yes" ; then +AC_MSG_NOTICE([ fuse: $FUSE_CFLAGS $FUSE_LIBS]) +else +AC_MSG_NOTICE([ fuse: no]) +fi if test "$with_xen" = "yes" ; then AC_MSG_NOTICE([ xen: $XEN_CFLAGS $XEN_LIBS]) else diff --git a/libvirt.spec.in b/libvirt.spec.in index 140a182..939569e 100644 --- a/libvirt.spec.in +++ b/libvirt.spec.in @@ -91,6 +91,7 @@ # A few optional bits off by default, we enable later %define with_polkit 0%{!?_without_polkit:0} %define with_capng 0%{!?_without_capng:0} +%define with_fuse 0%{!?_without_fuse:0} %define with_netcf 0%{!?_without_netcf:0} %define with_udev 0%{!?_without_udev:0} %define with_hal 0%{!?_without_hal:0} @@ -466,6 +467,9 @@ BuildRequires: numactl-devel %if %{with_capng} BuildRequires: libcap-ng-devel >= 0.5.0 %endif +%if %{with_fuse} +BuildRequires: fuse-devel +%endif %if %{with_phyp} BuildRequires: libssh2-devel %endif @@ -1116,6 +1120,10 @@ of recent versions of Linux (and other OSes). %define _without_capng --without-capng %endif +%if ! %{with_fuse} +%define _without_fuse --without-fuse +%endif + %if ! %{with_netcf} %define _without_netcf --without-netcf %endif @@ -1206,6 +1214,7 @@ autoreconf -if %{?_without_numactl} \ %{?_without_numad} \ %{?_without_capng} \ + %{?_without_fuse} \ %{?_without_netcf} \ %{?_without_selinux} \ %{?_without_hal} \ -- 1.7.7.6

this patch addes fuse support for libvirt lxc. we can use fuse filesystem to generate sysinfo dynamically, So we can isolate /proc/meminfo,cpuinfo and so on through fuse filesystem. we mount fuse filesystem for every container.the mount name is Lxc-containename-fuse. Signed-off-by: Gao feng <gaofeng@cn.fujitsu.com> --- po/POTFILES.in | 1 + src/Makefile.am | 9 ++- src/lxc/lxc_controller.c | 10 +++ src/lxc/lxc_fuse.c | 152 ++++++++++++++++++++++++++++++++++++++++++++++ src/lxc/lxc_fuse.h | 40 ++++++++++++ src/lxc/lxc_process.c | 2 + 6 files changed, 212 insertions(+), 2 deletions(-) create mode 100644 src/lxc/lxc_fuse.c create mode 100644 src/lxc/lxc_fuse.h diff --git a/po/POTFILES.in b/po/POTFILES.in index 7587c61..dedf83e 100644 --- a/po/POTFILES.in +++ b/po/POTFILES.in @@ -43,6 +43,7 @@ src/libvirt-qemu.c src/locking/lock_driver_sanlock.c src/locking/lock_manager.c src/lxc/lxc_cgroup.c +src/lxc/lxc_fuse.c src/lxc/lxc_container.c src/lxc/lxc_conf.c src/lxc/lxc_controller.c diff --git a/src/Makefile.am b/src/Makefile.am index a9f8d94..5282a06 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -353,12 +353,14 @@ LXC_DRIVER_SOURCES = \ lxc/lxc_cgroup.c lxc/lxc_cgroup.h \ lxc/lxc_domain.c lxc/lxc_domain.h \ lxc/lxc_process.c lxc/lxc_process.h \ + lxc/lxc_fuse.c lxc/lxc_fuse.h \ lxc/lxc_driver.c lxc/lxc_driver.h LXC_CONTROLLER_SOURCES = \ lxc/lxc_conf.c lxc/lxc_conf.h \ lxc/lxc_container.c lxc/lxc_container.h \ lxc/lxc_cgroup.c lxc/lxc_cgroup.h \ + lxc/lxc_fuse.c lxc/lxc_fuse.h \ lxc/lxc_controller.c SECURITY_DRIVER_APPARMOR_HELPER_SOURCES = \ @@ -827,8 +829,9 @@ endif libvirt_driver_lxc_impl_la_CFLAGS = \ $(LIBNL_CFLAGS) \ + $(FUSE_CFLAGS) \ -I$(top_srcdir)/src/conf $(AM_CFLAGS) -libvirt_driver_lxc_impl_la_LIBADD = $(CAPNG_LIBS) $(LIBNL_LIBS) +libvirt_driver_lxc_impl_la_LIBADD = $(CAPNG_LIBS) $(LIBNL_LIBS) $(FUSE_LIBS) if HAVE_LIBBLKID libvirt_driver_lxc_impl_la_CFLAGS += $(BLKID_CFLAGS) libvirt_driver_lxc_impl_la_LIBADD += $(BLKID_LIBS) @@ -1536,6 +1539,7 @@ libvirt_lxc_SOURCES = \ libvirt_lxc_LDFLAGS = $(WARN_CFLAGS) $(AM_LDFLAGS) libvirt_lxc_LDADD = \ $(NUMACTL_LIBS) \ + $(FUSE_LIBS) \ libvirt-net-rpc-server.la \ libvirt-net-rpc.la \ libvirt_driver_security.la \ @@ -1553,7 +1557,8 @@ libvirt_lxc_LDADD += $(APPARMOR_LIBS) endif libvirt_lxc_CFLAGS = \ -I$(top_srcdir)/src/conf \ - $(AM_CFLAGS) + $(AM_CFLAGS) \ + $(FUSE_CFLAGS) if HAVE_LIBBLKID libvirt_lxc_CFLAGS += $(BLKID_CFLAGS) libvirt_lxc_LDADD += $(BLKID_LIBS) diff --git a/src/lxc/lxc_controller.c b/src/lxc/lxc_controller.c index 4777d51..ff3bb6d 100644 --- a/src/lxc/lxc_controller.c +++ b/src/lxc/lxc_controller.c @@ -59,6 +59,7 @@ #include "lxc_conf.h" #include "lxc_container.h" #include "lxc_cgroup.h" +#include "lxc_fuse.h" #include "virnetdev.h" #include "virnetdevveth.h" #include "memory.h" @@ -1329,6 +1330,7 @@ int main(int argc, char *argv[]) char *name = NULL; size_t nveths = 0; char **veths = NULL; + virThread thread; int handshakeFd = -1; int bg = 0; const struct option options[] = { @@ -1516,6 +1518,14 @@ int main(int argc, char *argv[]) } } + rc = virThreadCreate(&thread, true, lxcRegisterFuse, + (void *)ctrl->def); + if (rc < 0) { + virReportSystemError(-rc, "%s", + _("Create Fuse filesystem failed")); + goto cleanup; + } + rc = virLXCControllerRun(ctrl); cleanup: diff --git a/src/lxc/lxc_fuse.c b/src/lxc/lxc_fuse.c new file mode 100644 index 0000000..3ffe82d --- /dev/null +++ b/src/lxc/lxc_fuse.c @@ -0,0 +1,152 @@ +/* + * Copyright (C) 2012 Fujitsu Limited. + * + * lxc_fuse.c: fuse filesystem support for libvirt lxc + * + * Authors: + * Gao feng <gaofeng at cn.fujitsu.com> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include <config.h> +#include <fcntl.h> +#include <stdio.h> +#include <errno.h> +#include <string.h> +#include <sys/mount.h> + +#include "virterror_internal.h" +#include "lxc_fuse.h" + + +#define VIR_FROM_THIS VIR_FROM_LXC + +#if HAVE_FUSE + +static int lxcProcGetattr(const char *path, struct stat *stbuf) +{ + int res = 0; + + memset(stbuf, 0, sizeof(struct stat)); + + if (STREQ(path, "/")) { + stbuf->st_mode = S_IFDIR | 0755; + stbuf->st_nlink = 2; + } else { + res = -ENOENT; + } + + return res; +} + +static int lxcProcReaddir(const char *path, void *buf, + fuse_fill_dir_t filler, + off_t offset ATTRIBUTE_UNUSED, + struct fuse_file_info *fi ATTRIBUTE_UNUSED) +{ + if (STREQ(path, "/")) + return -ENOENT; + + filler(buf, ".", NULL, 0); + filler(buf, "..", NULL, 0); + + return 0; +} + +static int lxcProcOpen(const char *path ATTRIBUTE_UNUSED, + struct fuse_file_info *fi ATTRIBUTE_UNUSED) +{ + return -ENOENT; +} + +static int lxcProcRead(const char *path ATTRIBUTE_UNUSED, + char *buf ATTRIBUTE_UNUSED, + size_t size ATTRIBUTE_UNUSED, + off_t offset ATTRIBUTE_UNUSED, + struct fuse_file_info *fi ATTRIBUTE_UNUSED) +{ + return -ENOENT; +} + +static struct fuse_operations lxcProcOper = { + .getattr = lxcProcGetattr, + .readdir = lxcProcReaddir, + .open = lxcProcOpen, + .read = lxcProcRead, +}; + +void lxcRegisterFuse(void *DomainDef) +{ + int argc = 4; + char *argv[argc]; + char *path = NULL; + char *name = NULL; + virDomainDefPtr def = (virDomainDefPtr) DomainDef; + + if (virAsprintf(&name, "Lxc-%s-fuse", def->name) < 0) { + virReportOOMError(); + goto cleanup; + } + + if (virAsprintf(&path, "%s/%s/", LXC_STATE_DIR, def->name) < 0) { + virReportOOMError(); + goto cleanup; + } + + if (virFileMakePath(path) < 0) { + virReportSystemError(errno, _("Cannot create %s"), path); + goto cleanup; + } + + argv[0] = name; + argv[1] = path; + argv[2] = (char *)"-odirect_io"; + argv[3] = (char *)"-f"; + + if (fuse_main(argc, argv, &lxcProcOper, def) < 0) + virReportSystemError(errno, "%s", _("Cannot start fuse\n")); + +cleanup: + VIR_FREE(name); + VIR_FREE(path); + return; +} + +void lxcUnregisterFuse(virDomainDefPtr def) +{ + char *path = NULL; + + if (virAsprintf(&path, "%s/%s/", LXC_STATE_DIR, def->name) < 0) { + virReportOOMError(); + return; + } + + if (umount(path) < 0) + virReportSystemError(errno, "%s", + _("umount fuse filesystem failed\n")); + + VIR_FREE(path); +} + +#else +void lxcRegisterFuse(void *DomainDef ATTRIBUTE_UNUSED) +{ +} + +void lxcUnregisterFuse(virDomainDefPtr def ATTRIBUTE_UNUSED) +{ +} +#endif diff --git a/src/lxc/lxc_fuse.h b/src/lxc/lxc_fuse.h new file mode 100644 index 0000000..d60c238 --- /dev/null +++ b/src/lxc/lxc_fuse.h @@ -0,0 +1,40 @@ +/* + * Copyright (C) 2012 Fujitsu Limited. + * + * lxc_fuse.c: fuse filesystem support for libvirt lxc + * + * Authors: + * Gao feng <gaofeng at cn.fujitsu.com> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef LXC_FUSE_H +#define LXC_FUSE_H + +#define FUSE_USE_VERSION 26 + +#include <config.h> +#if HAVE_FUSE +#include <fuse.h> +#endif + +#include "lxc_conf.h" +#include "util.h" +#include "memory.h" + +extern void lxcRegisterFuse(void *DomainDef); +extern void lxcUnregisterFuse(virDomainDefPtr def); +#endif diff --git a/src/lxc/lxc_process.c b/src/lxc/lxc_process.c index 12f6ae6..341a2b8 100644 --- a/src/lxc/lxc_process.c +++ b/src/lxc/lxc_process.c @@ -28,6 +28,7 @@ #include "lxc_process.h" #include "lxc_domain.h" #include "lxc_container.h" +#include "lxc_fuse.h" #include "datatypes.h" #include "virfile.h" #include "virpidfile.h" @@ -542,6 +543,7 @@ int lxcVmTerminate(lxc_driver_t *driver, return -1; } + lxcUnregisterFuse(vm->def); virSecurityManagerRestoreAllLabel(driver->securityManager, vm->def, false); virSecurityManagerReleaseLabel(driver->securityManager, vm->def); -- 1.7.7.6

virCgroupGetMemSwapUsage is used to get container's swap usage, with this interface,we can get swap usage in fuse filesystem. Signed-off-by: Gao feng <gaofeng@cn.fujitsu.com> --- src/libvirt_private.syms | 1 + src/util/cgroup.c | 20 ++++++++++++++++++++ src/util/cgroup.h | 1 + 3 files changed, 22 insertions(+), 0 deletions(-) diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms index 734c881..9cbdbb4 100644 --- a/src/libvirt_private.syms +++ b/src/libvirt_private.syms @@ -82,6 +82,7 @@ virCgroupGetCpuacctStat; virCgroupGetCpuacctUsage; virCgroupGetCpusetMems; virCgroupGetFreezerState; +virCgroupGetMemSwapUsage; virCgroupGetMemSwapHardLimit; virCgroupGetMemoryHardLimit; virCgroupGetMemorySoftLimit; diff --git a/src/util/cgroup.c b/src/util/cgroup.c index 5b32881..d6fcd61 100644 --- a/src/util/cgroup.c +++ b/src/util/cgroup.c @@ -1188,6 +1188,26 @@ int virCgroupSetMemSwapHardLimit(virCgroupPtr group, unsigned long long kb) } /** + * virCgroupGetMemSwapUsage: + * + * @group: The cgroup to get mem+swap usage for + * @kb: The mem+swap amount in kilobytes + * + * Returns: 0 on success + */ +int virCgroupGetMemSwapUsage(virCgroupPtr group, unsigned long long *kb) +{ + long long unsigned int usage_in_bytes; + int ret; + ret = virCgroupGetValueU64(group, + VIR_CGROUP_CONTROLLER_MEMORY, + "memory.memsw.usage_in_bytes", &usage_in_bytes); + if (ret == 0) + *kb = usage_in_bytes >> 10; + return ret; +} + +/** * virCgroupGetMemSwapHardLimit: * * @group: The cgroup to get mem+swap hard limit for diff --git a/src/util/cgroup.h b/src/util/cgroup.h index 05325ae..caca362 100644 --- a/src/util/cgroup.h +++ b/src/util/cgroup.h @@ -70,6 +70,7 @@ int virCgroupSetMemorySoftLimit(virCgroupPtr group, unsigned long long kb); int virCgroupGetMemorySoftLimit(virCgroupPtr group, unsigned long long *kb); int virCgroupSetMemSwapHardLimit(virCgroupPtr group, unsigned long long kb); int virCgroupGetMemSwapHardLimit(virCgroupPtr group, unsigned long long *kb); +int virCgroupGetMemSwapUsage(virCgroupPtr group, unsigned long long *kb); enum { VIR_CGROUP_DEVICE_READ = 1, -- 1.7.7.6

because libvirt_lxc's cgroup mountpoint is what it shown in /proc/self/cgroup. we can get container's cgroup through virCgroupNew("/", &group), add interface virCgroupGetAppRoot to help container to get it's cgroup. Signed-off-by: Gao feng <gaofeng@cn.fujitsu.com> --- src/libvirt_private.syms | 1 + src/util/cgroup.c | 18 ++++++++++++++++++ src/util/cgroup.h | 2 ++ 3 files changed, 21 insertions(+), 0 deletions(-) diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms index 9cbdbb4..53c35da 100644 --- a/src/libvirt_private.syms +++ b/src/libvirt_private.syms @@ -71,6 +71,7 @@ virCgroupDenyDeviceMajor; virCgroupDenyDevicePath; virCgroupForDomain; virCgroupForDriver; +virCgroupGetAppRoot; virCgroupForVcpu; virCgroupFree; virCgroupGetBlkioWeight; diff --git a/src/util/cgroup.c b/src/util/cgroup.c index d6fcd61..ee88aaf 100644 --- a/src/util/cgroup.c +++ b/src/util/cgroup.c @@ -846,6 +846,24 @@ int virCgroupForDriver(const char *name ATTRIBUTE_UNUSED, /** + * virCgroupGetAppRoot: + * + * @group: Pointer to returned virCgroupPtr + * + * Returns 0 on success + */ + +int virCgroupGetAppRoot(virCgroupPtr *group) +{ +#if defined HAVE_MNTENT_H && defined HAVE_GETMNTENT_R + return virCgroupNew("/", group); +#else + return -ENXIO; +#endif +} + + +/** * virCgroupForDomain: * * @driver: group for driver owning the domain diff --git a/src/util/cgroup.h b/src/util/cgroup.h index caca362..f0e11fe 100644 --- a/src/util/cgroup.h +++ b/src/util/cgroup.h @@ -37,6 +37,8 @@ int virCgroupForDriver(const char *name, int privileged, int create); +int virCgroupGetAppRoot(virCgroupPtr *group); + int virCgroupForDomain(virCgroupPtr driver, const char *name, virCgroupPtr *group, -- 1.7.7.6

with this patch,container's meminfo will be shown based on containers' mem cgroup. Right now,it's impossible to virtualize all values in meminfo, I collect some values such as MemTotal,MemFree,Cached,Active, Inactive,Active(anon),Inactive(anon),Active(file),Inactive(anon), Active(file),Inactive(file),Unevictable,SwapTotal,SwapFree. if I miss something, please let me know. Signed-off-by: Gao feng <gaofeng@cn.fujitsu.com> --- src/lxc/lxc_cgroup.c | 160 +++++++++++++++++++++++++++++++++++ src/lxc/lxc_cgroup.h | 2 +- src/lxc/lxc_fuse.c | 230 +++++++++++++++++++++++++++++++++++++++++++++++--- src/lxc/lxc_fuse.h | 14 +++ 4 files changed, 393 insertions(+), 13 deletions(-) diff --git a/src/lxc/lxc_cgroup.c b/src/lxc/lxc_cgroup.c index ddd4901..e93c4a2 100644 --- a/src/lxc/lxc_cgroup.c +++ b/src/lxc/lxc_cgroup.c @@ -23,10 +23,12 @@ #include "lxc_cgroup.h" #include "lxc_container.h" +#include "lxc_fuse.h" #include "virterror_internal.h" #include "logging.h" #include "memory.h" #include "cgroup.h" +#include "virfile.h" #define VIR_FROM_THIS VIR_FROM_LXC @@ -138,6 +140,164 @@ cleanup: } +static int virLXCCgroupGetMemSwapUsage(virCgroupPtr cgroup, + unsigned long long *usage) +{ + return virCgroupGetMemSwapUsage(cgroup, usage); +} + + +static int virLXCCgroupGetMemSwapTotal(virCgroupPtr cgroup, + unsigned long long *total) +{ + return virCgroupGetMemSwapHardLimit(cgroup, total); +} + + +static int virLXCCgroupGetMemUsage(virCgroupPtr cgroup, + unsigned long long *usage) +{ + int ret; + unsigned long memUsage; + + ret = virCgroupGetMemoryUsage(cgroup, &memUsage); + *usage = (unsigned long long) memUsage; + + return ret; +} + + +static int virLXCCgroupGetMemTotal(virCgroupPtr cgroup, + unsigned long long *total) +{ + return virCgroupGetMemoryHardLimit(cgroup, total); +} + + +static int virLXCCgroupGetMemStat(virCgroupPtr cgroup, + unsigned long long *meminfo) +{ + int ret = 0; + FILE *statfd = NULL; + char *statFile = NULL; + char line[1024]; + + ret = virCgroupPathOfController(cgroup, VIR_CGROUP_CONTROLLER_MEMORY, + "memory.stat", &statFile); + if (ret < 0 ) { + virReportSystemError(-ret, "%s", + _("cannot get the path of MEMORY cgroup controller")); + return ret; + } + + statfd = fopen(statFile, "r"); + if (statfd == NULL) { + ret = -ENOENT; + goto out_free; + } + + while (fgets(line, sizeof(line), statfd) != NULL) { + char *value = strchr(line, ' '); + char *nl = value ? strchr(line, '\n') : NULL; + unsigned long long stat_value; + + if (!value) + continue; + + if (nl) + *nl = '\0'; + + *value = '\0'; + if (STREQ(line, "cache")) { + if ((ret = virStrToLong_ull(value + 1, NULL, 10, &stat_value)) < 0) + goto out; + meminfo[CACHED] = stat_value >> 10; + } else if (STREQ(line, "inactive_anon")) { + if ((ret = virStrToLong_ull(value + 1, NULL, 10, &stat_value)) < 0) + goto out; + meminfo[INACTIVE_ANON] = stat_value >> 10; + } else if (STREQ(line, "active_anon")) { + if ((ret = virStrToLong_ull(value + 1, NULL, 10, &stat_value)) < 0) + goto out; + meminfo[ACTIVE_ANON] = stat_value >> 10; + } else if (STREQ(line, "inactive_file")) { + if ((ret = virStrToLong_ull(value + 1, NULL, 10, &stat_value)) < 0) + goto out; + meminfo[INACTIVE_FILE] = stat_value >> 10; + } else if (STREQ(line, "active_file")) { + if ((ret = virStrToLong_ull(value + 1, NULL, 10, &stat_value)) < 0) + goto out; + meminfo[ACTIVE_FILE] = stat_value >> 10; + } else if (STREQ(line, "unevictable")) { + if ((ret = virStrToLong_ull(value + 1, NULL, 10, &stat_value)) < 0) + goto out; + meminfo[UNEVICTABLE] = stat_value >> 10; + } + } + ret = 0; +out: + VIR_FORCE_FCLOSE(statfd); +out_free: + VIR_FREE(statFile); + return ret; +} + + +int virLXCCgroupGetMeminfo(unsigned long long *meminfo) +{ + int ret; + virCgroupPtr cgroup; + + ret = virCgroupGetAppRoot(&cgroup); + if (ret < 0) { + virReportSystemError(-ret, "%s", + _("Unable to get cgroup for container")); + return ret; + } + + ret = virLXCCgroupGetMemStat(cgroup, meminfo); + if (ret < 0) { + virReportSystemError(-ret, "%s", + _("Unable to get memory cgroup stat info")); + goto out; + } + + ret = virLXCCgroupGetMemTotal(cgroup, &meminfo[MEMTOTAL]); + if (ret < 0) { + virReportSystemError(-ret, "%s", + _("Unable to get memory cgroup total")); + goto out; + } + + ret = virLXCCgroupGetMemUsage(cgroup, &meminfo[MEMUSAGE]); + if (ret < 0) { + virReportSystemError(-ret, "%s", + _("Unable to get memory cgroup stat usage")); + goto out; + } + + ret = virLXCCgroupGetMemSwapTotal(cgroup, &meminfo[SWAPTOTAL]); + if (ret < 0) { + virReportSystemError(-ret, "%s", + _("Unable to get memory cgroup stat swaptotal")); + goto out; + } + + ret = virLXCCgroupGetMemSwapUsage(cgroup, &meminfo[SWAPUSAGE]); + if (ret < 0) { + virReportSystemError(-ret, "%s", + _("Unable to get memory cgroup stat swapusage")); + goto out; + } + + ret = 0; +out: + virCgroupFree(&cgroup); + + return ret; +} + + typedef struct _virLXCCgroupDevicePolicy virLXCCgroupDevicePolicy; typedef virLXCCgroupDevicePolicy *virLXCCgroupDevicePolicyPtr; diff --git a/src/lxc/lxc_cgroup.h b/src/lxc/lxc_cgroup.h index 97bb12a..55553c1 100644 --- a/src/lxc/lxc_cgroup.h +++ b/src/lxc/lxc_cgroup.h @@ -25,5 +25,5 @@ # include "domain_conf.h" int virLXCCgroupSetup(virDomainDefPtr def); - +int virLXCCgroupGetMeminfo(unsigned long long *meminfo); #endif /* __VIR_LXC_CGROUP_H__ */ diff --git a/src/lxc/lxc_fuse.c b/src/lxc/lxc_fuse.c index 3ffe82d..6f6e9ef 100644 --- a/src/lxc/lxc_fuse.c +++ b/src/lxc/lxc_fuse.c @@ -30,25 +30,48 @@ #include "virterror_internal.h" #include "lxc_fuse.h" +#include "lxc_cgroup.h" +#include "virfile.h" #define VIR_FROM_THIS VIR_FROM_LXC #if HAVE_FUSE +static const char *meminfo_path = "/meminfo"; + static int lxcProcGetattr(const char *path, struct stat *stbuf) { - int res = 0; + int res; + char *mempath = NULL; + struct stat sb; memset(stbuf, 0, sizeof(struct stat)); + if ((res = virAsprintf(&mempath, "/proc/%s", path)) < 0) { + virReportOOMError(); + return res; + } + + res = 0; if (STREQ(path, "/")) { stbuf->st_mode = S_IFDIR | 0755; stbuf->st_nlink = 2; + } else if (STREQ(path, meminfo_path)) { + stat(mempath, &sb); + stbuf->st_mode = sb.st_mode; + stbuf->st_nlink = 1; + stbuf->st_blksize = sb.st_blksize; + stbuf->st_blocks = sb.st_blocks; + stbuf->st_size = sb.st_size; + stbuf->st_atime = sb.st_atime; + stbuf->st_ctime = sb.st_ctime; + stbuf->st_mtime = sb.st_mtime; } else { res = -ENOENT; } + VIR_FREE(mempath); return res; } @@ -57,28 +80,211 @@ static int lxcProcReaddir(const char *path, void *buf, off_t offset ATTRIBUTE_UNUSED, struct fuse_file_info *fi ATTRIBUTE_UNUSED) { - if (STREQ(path, "/")) + if (!STREQ(path, "/")) return -ENOENT; filler(buf, ".", NULL, 0); filler(buf, "..", NULL, 0); + filler(buf, meminfo_path + 1, NULL, 0); return 0; } -static int lxcProcOpen(const char *path ATTRIBUTE_UNUSED, - struct fuse_file_info *fi ATTRIBUTE_UNUSED) +static int lxcProcOpen(const char *path, + struct fuse_file_info *fi) { - return -ENOENT; + if (!STREQ(path, meminfo_path)) + return -ENOENT; + + if ((fi->flags & 3) != O_RDONLY) + return -EACCES; + + return 0; } -static int lxcProcRead(const char *path ATTRIBUTE_UNUSED, - char *buf ATTRIBUTE_UNUSED, - size_t size ATTRIBUTE_UNUSED, - off_t offset ATTRIBUTE_UNUSED, +static int lxcProcHostRead(char *path, char *buf, size_t size, off_t offset) +{ + int fd; + int res; + + fd = open(path, O_RDONLY); + if (fd == -1) + return -errno; + + res = pread(fd, buf, size, offset); + if (res == -1) + res = -errno; + + VIR_FORCE_CLOSE(fd); + return res; +} + +static int lxcProcReadMeminfo(char *hostpath, virDomainDefPtr def, + char *buf, size_t size, off_t offset) +{ + int copied = 0; + int res = 0; + FILE *fd = NULL; + char line[1024]; + unsigned long long meminfo[MEMMAX]; + memset(meminfo, 0, sizeof(meminfo)); + + if ((res = virLXCCgroupGetMeminfo(meminfo)) < 0) + return res; + + fd = fopen(hostpath, "r"); + if (fd == NULL) { + virReportSystemError(errno, _("Cannot open %s"), hostpath); + res = -errno; + goto out; + } + + fseek(fd, offset, SEEK_SET); + + while (copied < size && fgets(line, sizeof(line), fd) != NULL) { + int len = 0; + char *new_line = NULL; + char *ptr = strchr(line, ':'); + if (ptr) { + *ptr = '\0'; + new_line = line; + + if (STREQ(line, "MemTotal") && + (def->mem.hard_limit || def->mem.max_balloon)) { + if ((res = virAsprintf(&new_line, "MemTotal: %8llu KB\n", + meminfo[MEMTOTAL])) < 0) { + virReportOOMError(); + goto out; + } + } else if (STREQ(line, "MemFree") && + (def->mem.hard_limit || def->mem.max_balloon)) { + if ((res = virAsprintf(&new_line, "MemFree: %8llu KB\n", + (meminfo[MEMTOTAL] - meminfo[MEMUSAGE]))) < 0) { + virReportOOMError(); + goto out; + } + } else if (STREQ(line, "Buffers")) { + if ((res = virAsprintf(&new_line, "Buffers: %8d KB\n", 0)) < 0) { + virReportOOMError(); + goto out; + } + } else if (STREQ(line, "Cached")) { + if ((res = virAsprintf(&new_line, "Cached: %8llu KB\n", + meminfo[CACHED])) < 0) { + virReportOOMError(); + goto out; + } + } else if (STREQ(line, "Active")) { + if ((res = virAsprintf(&new_line, "Active: %8llu KB\n", + (meminfo[ACTIVE_ANON] + meminfo[ACTIVE_FILE]))) < 0) { + virReportOOMError(); + goto out; + } + } else if (STREQ(line, "Inactive")) { + if ((res = virAsprintf(&new_line, "Inactive: %8llu KB\n", + (meminfo[INACTIVE_ANON] + meminfo[INACTIVE_FILE]))) < 0) { + virReportOOMError(); + goto out; + } + } else if (STREQ(line, "Active(anon)")) { + if ((res = virAsprintf(&new_line, "Active(anon): %8llu KB\n", + meminfo[ACTIVE_ANON])) < 0) { + virReportOOMError(); + goto out; + } + } else if (STREQ(line, "Inactive(anon)")) { + if ((res = virAsprintf(&new_line, "Inactive(anon): %8llu KB\n", + meminfo[INACTIVE_ANON])) < 0) { + virReportOOMError(); + goto out; + } + } else if (STREQ(line, "Active(file)")) { + if ((res = virAsprintf(&new_line, "Active(file): %8llu KB\n", + meminfo[ACTIVE_FILE])) < 0) { + virReportOOMError(); + goto out; + } + } else if (STREQ(line, "Inactive(file)")) { + if ((res = virAsprintf(&new_line, "Inactive(file): %8llu KB\n", + meminfo[INACTIVE_FILE])) < 0) { + virReportOOMError(); + goto out; + } + } else if (STREQ(line, "Unevictable")) { + if ((res = virAsprintf(&new_line, "Unevictable: %8llu KB\n", + meminfo[UNEVICTABLE])) < 0) { + virReportOOMError(); + goto out; + } + } else if (STREQ(line, "SwapTotal") && def->mem.swap_hard_limit) { + if ((res = virAsprintf(&new_line, "SwapTotal: %8llu KB\n", + (meminfo[SWAPTOTAL] - meminfo[MEMTOTAL]))) < 0){ + virReportOOMError(); + goto out; + } + } else if (STREQ(line, "SwapFree") && def->mem.swap_hard_limit) { + if ((res = virAsprintf(&new_line, "SwapFree: %8llu KB\n", + (meminfo[SWAPTOTAL] - meminfo[MEMTOTAL] - + meminfo[SWAPUSAGE] + meminfo[MEMUSAGE]))) < 0) { + virReportOOMError(); + goto out; + } + } + *ptr=':'; + } + + len = strlen(new_line); + + if (copied + len > size) + len = size - copied; + + memcpy(buf + copied, new_line, len); + copied += len; + memset(line, 0, sizeof(line)); + if (new_line != line) + VIR_FREE(new_line); + } + res = copied; +out: + VIR_FORCE_FCLOSE(fd); + return res; +} + +static int lxcProcRead(const char *path, + char *buf, + size_t size, + off_t offset, struct fuse_file_info *fi ATTRIBUTE_UNUSED) { - return -ENOENT; + int res = 0; + char *hostpath = NULL; + struct fuse_context *context = NULL; + virDomainDefPtr def = NULL; + + if ((res = virAsprintf(&hostpath, "/proc/%s", path)) < 0) { + virReportOOMError(); + return res; + } + + context = fuse_get_context(); + def = (virDomainDefPtr)context->private_data; + + if (STREQ(path, meminfo_path)) { + res = lxcProcReadMeminfo(hostpath, def, buf, size, offset); + } else { + res = -ENOENT; + goto out; + } + + if (res < 0) { + if((res = lxcProcHostRead(hostpath, buf, size, offset)) < 0) + virReportSystemError(errno, "%s", + _("failed to show host's meminfo")); + } + +out: + VIR_FREE(hostpath); + return res; } static struct fuse_operations lxcProcOper = { @@ -117,7 +323,7 @@ void lxcRegisterFuse(void *DomainDef) argv[3] = (char *)"-f"; if (fuse_main(argc, argv, &lxcProcOper, def) < 0) - virReportSystemError(errno, "%s", _("Cannot start fuse\n")); + virReportSystemError(errno, "%s", _("Cannot start fuse")); cleanup: VIR_FREE(name); @@ -136,7 +342,7 @@ void lxcUnregisterFuse(virDomainDefPtr def) if (umount(path) < 0) virReportSystemError(errno, "%s", - _("umount fuse filesystem failed\n")); + _("umount fuse filesystem failed")); VIR_FREE(path); } diff --git a/src/lxc/lxc_fuse.h b/src/lxc/lxc_fuse.h index d60c238..7db534b 100644 --- a/src/lxc/lxc_fuse.h +++ b/src/lxc/lxc_fuse.h @@ -35,6 +35,20 @@ #include "util.h" #include "memory.h" +enum { + MEMTOTAL, + MEMUSAGE, + CACHED, + ACTIVE_ANON, + INACTIVE_ANON, + ACTIVE_FILE, + INACTIVE_FILE, + UNEVICTABLE, + SWAPTOTAL, + SWAPUSAGE, + MEMMAX, +}; + extern void lxcRegisterFuse(void *DomainDef); extern void lxcUnregisterFuse(virDomainDefPtr def); #endif -- 1.7.7.6

we already have virtualize meminfo for container through fuse filesystem, add function lxcContainerMountProcFuse to mount this meminfo file to the container's /proc/meminfo. So we can isolate container's /proc/meminfo from host now. Signed-off-by: Gao feng <gaofeng@cn.fujitsu.com> --- src/lxc/lxc_container.c | 37 +++++++++++++++++++++++++++++++++++++ 1 files changed, 37 insertions(+), 0 deletions(-) diff --git a/src/lxc/lxc_container.c b/src/lxc/lxc_container.c index 6fdf359..6067cd2 100644 --- a/src/lxc/lxc_container.c +++ b/src/lxc/lxc_container.c @@ -517,6 +517,35 @@ cleanup: return rc; } +#if HAVE_FUSE +static int lxcContainerMountProcFuse(virDomainDefPtr def, + const char *srcprefix) +{ + int ret = 0; + char *meminfo_path = NULL; + + if ((ret = virAsprintf(&meminfo_path, + "%s/%s/%s/meminfo", + srcprefix, LXC_STATE_DIR, + def->name)) < 0) + return ret; + + if ((ret = mount(meminfo_path, "/proc/meminfo", + NULL, MS_BIND, NULL)) < 0) { + virReportSystemError(errno, + _("Failed to mount %s on /proc/meminfo"), + meminfo_path); + } + VIR_FREE(meminfo_path); + return ret; +} +#else +static int lxcContainerMountProcFuse(virDomainDefPtr def ATTRIBUTE_UNUSED, + const char *srcprefix ATTRIBUTE_UNUSED) +{ + return 0; +} +#endif static int lxcContainerMountFSDevPTS(virDomainFSDefPtr root) { @@ -1441,6 +1470,10 @@ static int lxcContainerSetupPivotRoot(virDomainDefPtr vmDef, if (lxcContainerMountBasicFS(true, sec_mount_options) < 0) goto cleanup; + /* Mounts /proc/meminfo etc sysinfo */ + if (lxcContainerMountProcFuse(vmDef, "/.oldroot") < 0) + goto cleanup; + /* Now we can re-mount the cgroups controllers in the * same configuration as before */ if (lxcContainerMountCGroups(mounts, nmounts, sec_mount_options) < 0) @@ -1520,6 +1553,10 @@ static int lxcContainerSetupExtraMounts(virDomainDefPtr vmDef, if (lxcContainerMountBasicFS(false, sec_mount_options) < 0) goto cleanup; + /* Mounts /proc/meminfo etc sysinfo */ + if (lxcContainerMountProcFuse(vmDef, "/.oldroot") < 0) + goto cleanup; + /* Now we can re-mount the cgroups controllers in the * same configuration as before */ if (lxcContainerMountCGroups(mounts, nmounts, sec_mount_options) < 0) -- 1.7.7.6

于 2012年07月23日 17:49, Gao feng 写道:
add a configure option --with-fuse to prepare introduction of fuse support for libvirt lxc.
With help from Daniel
Signed-off-by: Gao feng <gaofeng@cn.fujitsu.com>
Hi Daniel Can you take a look at this patchset and give me some suggestions? Thanks!

Hi Gao, I'm wondering if you are planning to attend the Linux Plumbers Conference in San Diego at the end of August ? Glauber is going to be giving a talk on precisely the subject of virtualizing /proc in containers which is exactly what your patch is looking at https://blueprints.launchpad.net/lpc/+spec/lpc2012-cont-proc I'll review your patches now, but I think I'd like to wait to hear what Glauber talks about at LPC before we try to merge this support in libvirt, so we have an broadly agreed long term strategy for /proc between all the interested userspace & kernel guys. Regards, Daniel On Tue, Jul 31, 2012 at 02:33:52PM +0800, Gao feng wrote:
于 2012年07月23日 17:49, Gao feng 写道:
add a configure option --with-fuse to prepare introduction of fuse support for libvirt lxc.
With help from Daniel
Signed-off-by: Gao feng <gaofeng@cn.fujitsu.com>
Hi Daniel
Can you take a look at this patchset and give me some suggestions?
Thanks!
-- |: http://berrange.com -o- http://www.flickr.com/photos/dberrange/ :| |: http://libvirt.org -o- http://virt-manager.org :| |: http://autobuild.org -o- http://search.cpan.org/~danberr/ :| |: http://entangle-photo.org -o- http://live.gnome.org/gtk-vnc :|

Hi Daniel 于 2012年07月31日 17:27, Daniel P. Berrange 写道:
Hi Gao,
I'm wondering if you are planning to attend the Linux Plumbers Conference in San Diego at the end of August ? Glauber is going to be giving a talk on precisely the subject of virtualizing /proc in containers which is exactly what your patch is looking at
https://blueprints.launchpad.net/lpc/+spec/lpc2012-cont-proc
I am very interested in this, actually I did consider making /proc virtualization. I think it's the best way to resolve the problems we face.
I'll review your patches now, but I think I'd like to wait to hear what Glauber talks about at LPC before we try to merge this support in libvirt, so we have an broadly agreed long term strategy for /proc between all the interested userspace & kernel guys.
Agree with you, Maybe some people have some good ideas. :) Thanks Gao
Regards, Daniel
On Tue, Jul 31, 2012 at 02:33:52PM +0800, Gao feng wrote:
于 2012年07月23日 17:49, Gao feng 写道:
add a configure option --with-fuse to prepare introduction of fuse support for libvirt lxc.
With help from Daniel
Signed-off-by: Gao feng <gaofeng@cn.fujitsu.com>
Hi Daniel
Can you take a look at this patchset and give me some suggestions?
Thanks!

Hi Daniel & Glauber 于 2012年07月31日 17:27, Daniel P. Berrange 写道:
Hi Gao,
I'm wondering if you are planning to attend the Linux Plumbers Conference in San Diego at the end of August ? Glauber is going to be giving a talk on precisely the subject of virtualizing /proc in containers which is exactly what your patch is looking at
https://blueprints.launchpad.net/lpc/+spec/lpc2012-cont-proc
I'll review your patches now, but I think I'd like to wait to hear what Glauber talks about at LPC before we try to merge this support in libvirt, so we have an broadly agreed long term strategy for /proc between all the interested userspace & kernel guys.
I did not attend the LPC,so can you tell me what's the situation of the /proc virtualization? I think maybe we should just apply this patchset first,and wait for somebody sending patches to implement /proc virtualization. Thanks Gao

On Wed, Sep 05, 2012 at 05:41:40PM +0800, Gao feng wrote:
Hi Daniel & Glauber
于 2012年07月31日 17:27, Daniel P. Berrange 写道:
Hi Gao,
I'm wondering if you are planning to attend the Linux Plumbers Conference in San Diego at the end of August ? Glauber is going to be giving a talk on precisely the subject of virtualizing /proc in containers which is exactly what your patch is looking at
https://blueprints.launchpad.net/lpc/+spec/lpc2012-cont-proc
I'll review your patches now, but I think I'd like to wait to hear what Glauber talks about at LPC before we try to merge this support in libvirt, so we have an broadly agreed long term strategy for /proc between all the interested userspace & kernel guys.
I did not attend the LPC,so can you tell me what's the situation of the /proc virtualization?
I think maybe we should just apply this patchset first,and wait for somebody sending patches to implement /proc virtualization.
So there were three main approaches discussed 1. FUSE based /proc + a real hidden /.proc. The FUSE /proc provides custom handling of various files like meminfo, otherwise forwards I/O requests through to the hidden /.proc files. This was the original proof of concept. 2. One FUSE filesystem for all containers + a real /proc. Bind mount files from the FUSE filesystem into the container's /proc. This is what Glauber has done. 3. One FUSE filesystem per container + a real /proc. Bind mount files from the FUSE filesystem into the container's /proc. This is what your patch is doing Options 2 & 3 have a clear a win over option 1 in efficiency terms, since they avoid doubling the I/O required for the majority of files. Glaubar thinks it is perferrable to have a single FUSE filesystem that has one sub-directory for each container. Then bind mount the appropriate sub dir into each container. I kinda like the way you have done things, having a private FUSE filesystem per container, for security reasons. By having the FUSE backend be part of the libvirt_lxc process we have strictly isolated each containers' environment. If we wanted a single shared FUSE for all containers, we'd need to have some single shared daemon to maintain it. This could not be libvirtd itself, since we need the containers & their filesystems to continue to work when libvirtd itself is not running. We could introduce a separate libvirt_fused which provided a shared filesystem, but this still has the downside that any flaw in its impl could provide a way for one container to attack another container So in summary, I think your patches which add a private FUSE per container in libvirt_lxc appear to be the best option at this time. Regards, Daniel -- |: http://berrange.com -o- http://www.flickr.com/photos/dberrange/ :| |: http://libvirt.org -o- http://virt-manager.org :| |: http://autobuild.org -o- http://search.cpan.org/~danberr/ :| |: http://entangle-photo.org -o- http://live.gnome.org/gtk-vnc :|

于 2012年09月05日 20:42, Daniel P. Berrange 写道:
On Wed, Sep 05, 2012 at 05:41:40PM +0800, Gao feng wrote:
Hi Daniel & Glauber
于 2012年07月31日 17:27, Daniel P. Berrange 写道:
Hi Gao,
I'm wondering if you are planning to attend the Linux Plumbers Conference in San Diego at the end of August ? Glauber is going to be giving a talk on precisely the subject of virtualizing /proc in containers which is exactly what your patch is looking at
https://blueprints.launchpad.net/lpc/+spec/lpc2012-cont-proc
I'll review your patches now, but I think I'd like to wait to hear what Glauber talks about at LPC before we try to merge this support in libvirt, so we have an broadly agreed long term strategy for /proc between all the interested userspace & kernel guys.
I did not attend the LPC,so can you tell me what's the situation of the /proc virtualization?
I think maybe we should just apply this patchset first,and wait for somebody sending patches to implement /proc virtualization.
So there were three main approaches discussed
1. FUSE based /proc + a real hidden /.proc. The FUSE /proc provides custom handling of various files like meminfo, otherwise forwards I/O requests through to the hidden /.proc files. This was the original proof of concept.
2. One FUSE filesystem for all containers + a real /proc. Bind mount files from the FUSE filesystem into the container's /proc. This is what Glauber has done.
3. One FUSE filesystem per container + a real /proc. Bind mount files from the FUSE filesystem into the container's /proc. This is what your patch is doing
Options 2 & 3 have a clear a win over option 1 in efficiency terms, since they avoid doubling the I/O required for the majority of files.
Glaubar thinks it is perferrable to have a single FUSE filesystem that has one sub-directory for each container. Then bind mount the appropriate sub dir into each container.
I kinda like the way you have done things, having a private FUSE filesystem per container, for security reasons. By having the FUSE backend be part of the libvirt_lxc process we have strictly isolated each containers' environment.
If we wanted a single shared FUSE for all containers, we'd need to have some single shared daemon to maintain it. This could not be libvirtd itself, since we need the containers & their filesystems to continue to work when libvirtd itself is not running. We could introduce a separate libvirt_fused which provided a shared filesystem, but this still has the downside that any flaw in its impl could provide a way for one container to attack another container
Agree,if we choose the option 2,we have to organize the sub-directory for each container in fuse,it will make fuse filesystem complicated.
So in summary, I think your patches which add a private FUSE per container in libvirt_lxc appear to be the best option at this time.
Ok,I will rebase this patchset and send the v3 patchset. Thanks Gao

On 09/06/2012 05:53 AM, Gao feng wrote:
于 2012年09月05日 20:42, Daniel P. Berrange 写道:
On Wed, Sep 05, 2012 at 05:41:40PM +0800, Gao feng wrote:
Hi Daniel & Glauber
于 2012年07月31日 17:27, Daniel P. Berrange 写道:
Hi Gao,
I'm wondering if you are planning to attend the Linux Plumbers Conference in San Diego at the end of August ? Glauber is going to be giving a talk on precisely the subject of virtualizing /proc in containers which is exactly what your patch is looking at
https://blueprints.launchpad.net/lpc/+spec/lpc2012-cont-proc
I'll review your patches now, but I think I'd like to wait to hear what Glauber talks about at LPC before we try to merge this support in libvirt, so we have an broadly agreed long term strategy for /proc between all the interested userspace & kernel guys.
I did not attend the LPC,so can you tell me what's the situation of the /proc virtualization?
I think maybe we should just apply this patchset first,and wait for somebody sending patches to implement /proc virtualization.
So there were three main approaches discussed
1. FUSE based /proc + a real hidden /.proc. The FUSE /proc provides custom handling of various files like meminfo, otherwise forwards I/O requests through to the hidden /.proc files. This was the original proof of concept.
2. One FUSE filesystem for all containers + a real /proc. Bind mount files from the FUSE filesystem into the container's /proc. This is what Glauber has done.
3. One FUSE filesystem per container + a real /proc. Bind mount files from the FUSE filesystem into the container's /proc. This is what your patch is doing
Options 2 & 3 have a clear a win over option 1 in efficiency terms, since they avoid doubling the I/O required for the majority of files.
Glaubar thinks it is perferrable to have a single FUSE filesystem that has one sub-directory for each container. Then bind mount the appropriate sub dir into each container.
I kinda like the way you have done things, having a private FUSE filesystem per container, for security reasons. By having the FUSE backend be part of the libvirt_lxc process we have strictly isolated each containers' environment.
If we wanted a single shared FUSE for all containers, we'd need to have some single shared daemon to maintain it. This could not be libvirtd itself, since we need the containers & their filesystems to continue to work when libvirtd itself is not running. We could introduce a separate libvirt_fused which provided a shared filesystem, but this still has the downside that any flaw in its impl could provide a way for one container to attack another container
Agree,if we choose the option 2,we have to organize the sub-directory for each container in fuse,it will make fuse filesystem complicated.
So, according to Daniel Lezcano, that tried it once, FUSE is very fork intensive, and having one mount per-container would lead to bad performance. But I have to admit I have never measured it myself. I would be curious to see any numbers for a large deployment, to see if that complication is worth the gain.
participants (3)
-
Daniel P. Berrange
-
Gao feng
-
Glauber Costa