O_DIRECT has stringent requirements. Rather than make lots of changes
at each site that wants to use O_DIRECT, it is easier to offload
the work through a helper process that mirrors the I/O between a
pipe and the actual direct fd, so that the other end of the pipe
no longer has to worry about constraints.
Plus, if the kernel ever gains better posix_fadvise support, then we
only have to touch a single file to let all callers benefit from a
more efficient way to avoid file system caching.
* src/util/virfile.h (virFileDirectFdFlag, virFileDirectFdNew)
(virFileDirectFdClose, virFileDirectFdFree): New prototypes.
* src/util/virdirect.c: Implement new wrapper object.
* src/libvirt_private.syms (virfile.h): Export new symbols.
* cfg.mk (useless_free_options): Add to list.
* po/POTFILES.in: Add new translations.
---
v2: move everything into virfile.c rather than creating a new file,
with renames everywhere. Fail rather than silently ignore systems
where O_DIRECT is 0. Add comments to take advantage of posix_fadvise
rather than O_DIRECT if the Linux kernel ever gets fixed. Add
virFileDirectFdFlag as a better probe of which cache-bypass mechanism
is actually being used. Better documentation.
cfg.mk | 1 +
po/POTFILES.in | 1 +
src/libvirt_private.syms | 4 +
src/util/virfile.c | 175 +++++++++++++++++++++++++++++++++++++++++++++-
src/util/virfile.h | 15 ++++
5 files changed, 195 insertions(+), 1 deletions(-)
diff --git a/cfg.mk b/cfg.mk
index 0a624f1..773a3df 100644
--- a/cfg.mk
+++ b/cfg.mk
@@ -119,6 +119,7 @@ useless_free_options = \
--name=virDomainSoundDefFree \
--name=virDomainVideoDefFree \
--name=virDomainWatchdogDefFree \
+ --name=virFileDirectFdFree \
--name=virHashFree \
--name=virInterfaceDefFree \
--name=virInterfaceIpDefFree \
diff --git a/po/POTFILES.in b/po/POTFILES.in
index 5782cbf..7bb08b0 100644
--- a/po/POTFILES.in
+++ b/po/POTFILES.in
@@ -122,6 +122,7 @@ src/util/storage_file.c
src/util/sysinfo.c
src/util/util.c
src/util/viraudit.c
+src/util/virfile.c
src/util/virterror.c
src/util/xml.c
src/vbox/vbox_MSCOMGlue.c
diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms
index d42e0a0..0bab64d 100644
--- a/src/libvirt_private.syms
+++ b/src/libvirt_private.syms
@@ -1082,6 +1082,10 @@ virAuditSend;
# virfile.h
virFileClose;
+virFileDirectFdClose;
+virFileDirectFdFlag;
+virFileDirectFdFree;
+virFileDirectFdNew;
virFileFclose;
virFileFdopen;
diff --git a/src/util/virfile.c b/src/util/virfile.c
index 6576921..8b32518 100644
--- a/src/util/virfile.c
+++ b/src/util/virfile.c
@@ -23,10 +23,24 @@
*/
#include <config.h>
+#include "internal.h"
+#include "virfile.h"
+
+#include <fcntl.h>
+#include <sys/stat.h>
#include <unistd.h>
-#include "virfile.h"
+#include "command.h"
+#include "configmake.h"
+#include "memory.h"
+#include "virterror_internal.h"
+
+#define VIR_FROM_THIS VIR_FROM_NONE
+#define virFileError(code, ...) \
+ virReportErrorHelper(VIR_FROM_THIS, code, __FILE__, \
+ __FUNCTION__, __LINE__, __VA_ARGS__)
+
int virFileClose(int *fdptr, bool preserve_errno)
{
@@ -78,3 +92,162 @@ FILE *virFileFdopen(int *fdptr, const char *mode)
return file;
}
+
+
+/* Opaque type for managing a wrapper around an O_DIRECT fd. For now,
+ * read-write is not supported, just a single direction. */
+struct _virFileDirectFd {
+ virCommandPtr cmd; /* Child iohelper process to do the I/O. */
+};
+
+/**
+ * virFileDirectFdFlag:
+ *
+ * Returns 0 if the kernel can avoid file system cache pollution
+ * without any additional flags, O_DIRECT if the original fd must be
+ * opened in direct mode, or -1 if there is no support for bypassing
+ * the file system cache.
+ */
+int
+virFileDirectFdFlag(void)
+{
+ /* XXX For now, Linux posix_fadvise is not powerful enough to
+ * avoid O_DIRECT. */
+ return O_DIRECT ? O_DIRECT : -1;
+}
+
+/**
+ * virFileDirectFdNew:
+ * @fd: pointer to fd to wrap
+ * @name: name of fd, for diagnostics
+ *
+ * Update *FD (created with virFileDirectFdFlag() among the flags to
+ * open()) to ensure that all I/O to that file will bypass the system
+ * cache. This must be called after open() and optional fchown() or
+ * fchmod(), but before any seek or I/O, and only on seekable fd. The
+ * file must be O_RDONLY (to read the entire existing file) or
+ * O_WRONLY (to write to an empty file). In some cases, *FD is
+ * changed to a non-seekable pipe; in this case, the caller must not
+ * do anything further with the original fd.
+ *
+ * On success, the new wrapper object is returned, which must be later
+ * freed with virFileDirectFdFree(). On failure, *FD is unchanged, an
+ * error message is output, and NULL is returned.
+ */
+virFileDirectFdPtr
+virFileDirectFdNew(int *fd, const char *name)
+{
+ virFileDirectFdPtr ret = NULL;
+ bool output = false;
+ int pipefd[2] = { -1, -1 };
+ int mode = -1;
+
+ /* XXX support posix_fadvise rather than spawning a child, if the
+ * kernel support for that is decent enough. */
+
+ if (!O_DIRECT) {
+ virFileError(VIR_ERR_INTERNAL_ERROR, "%s",
+ _("O_DIRECT unsupported on this platform"));
+ return NULL;
+ }
+
+ if (VIR_ALLOC(ret) < 0) {
+ virReportOOMError();
+ return NULL;
+ }
+
+#ifdef F_GETFL
+ /* Mingw lacks F_GETFL, but it also lacks O_DIRECT so didn't get
+ * here in the first place. All other platforms reach this
+ * line. */
+ mode = fcntl(*fd, F_GETFL);
+#endif
+
+ if (mode < 0) {
+ virFileError(VIR_ERR_INTERNAL_ERROR, _("invalid fd %d for %s"),
+ *fd, name);
+ goto error;
+ } else if ((mode & O_ACCMODE) == O_WRONLY) {
+ output = true;
+ } else if ((mode & O_ACCMODE) != O_RDONLY) {
+ virFileError(VIR_ERR_INTERNAL_ERROR, _("unexpected mode %x for %s"),
+ mode & O_ACCMODE, name);
+ goto error;
+ }
+
+ if (pipe2(pipefd, O_CLOEXEC) < 0) {
+ virFileError(VIR_ERR_INTERNAL_ERROR,
+ _("unable to create pipe for %s"), name);
+ goto error;
+ }
+
+ ret->cmd = virCommandNewArgList(LIBEXECDIR "/libvirt_iohelper",
+ name, "0", NULL);
+ if (output) {
+ virCommandSetInputFD(ret->cmd, pipefd[0]);
+ virCommandSetOutputFD(ret->cmd, fd);
+ virCommandAddArg(ret->cmd, "1");
+ } else {
+ virCommandSetInputFD(ret->cmd, *fd);
+ virCommandSetOutputFD(ret->cmd, &pipefd[1]);
+ virCommandAddArg(ret->cmd, "0");
+ }
+
+ if (virCommandRunAsync(ret->cmd, NULL) < 0)
+ goto error;
+
+ if (VIR_CLOSE(pipefd[!output]) < 0) {
+ virFileError(VIR_ERR_INTERNAL_ERROR, "%s", _("unable to close
pipe"));
+ goto error;
+ }
+
+ VIR_FORCE_CLOSE(*fd);
+ *fd = pipefd[output];
+ return ret;
+
+error:
+ VIR_FORCE_CLOSE(pipefd[0]);
+ VIR_FORCE_CLOSE(pipefd[1]);
+ virFileDirectFdFree(ret);
+ return NULL;
+}
+
+/**
+ * virFileDirectFdClose:
+ * @dfd: direct fd wrapper, or NULL
+ *
+ * If DFD is valid, then ensure that I/O has completed, which may
+ * include reaping a child process. Return 0 if all data for the
+ * wrapped fd is complete, or -1 on failure with an error emitted.
+ * This function intentionally returns 0 when DFD is NULL, so that
+ * callers can conditionally create a virFileDirectFd wrapper but
+ * unconditionally call the cleanup code. To avoid deadlock, only
+ * call this after closing the fd resulting from virFileDirectFdNew().
+ */
+int
+virFileDirectFdClose(virFileDirectFdPtr dfd)
+{
+ if (!dfd)
+ return 0;
+
+ return virCommandWait(dfd->cmd, NULL);
+}
+
+/**
+ * virFileDirectFdFree:
+ * @dfd: direct fd wrapper, or NULL
+ *
+ * Free all remaining resources associated with DFD. If
+ * virFileDirectFdClose() was not previously called, then this may
+ * discard some previous I/O. To avoid deadlock, only call this after
+ * closing the fd resulting from virFileDirectFdNew().
+ */
+void
+virFileDirectFdFree(virFileDirectFdPtr dfd)
+{
+ if (!dfd)
+ return;
+
+ virCommandFree(dfd->cmd);
+ VIR_FREE(dfd);
+}
diff --git a/src/util/virfile.h b/src/util/virfile.h
index d11f902..0906568 100644
--- a/src/util/virfile.h
+++ b/src/util/virfile.h
@@ -50,4 +50,19 @@ FILE *virFileFdopen(int *fdptr, const char *mode)
ATTRIBUTE_RETURN_CHECK;
# define VIR_FORCE_CLOSE(FD) ignore_value(virFileClose(&(FD), true))
# define VIR_FORCE_FCLOSE(FILE) ignore_value(virFileFclose(&(FILE), true))
+/* Opaque type for managing a wrapper around an O_DIRECT fd. */
+struct _virFileDirectFd;
+
+typedef struct _virFileDirectFd virFileDirectFd;
+typedef virFileDirectFd *virFileDirectFdPtr;
+
+int virFileDirectFdFlag(void);
+
+virFileDirectFdPtr virFileDirectFdNew(int *fd, const char *name)
+ ATTRIBUTE_NONNULL(1) ATTRIBUTE_NONNULL(2) ATTRIBUTE_RETURN_CHECK;
+
+int virFileDirectFdClose(virFileDirectFdPtr dfd);
+
+void virFileDirectFdFree(virFileDirectFdPtr dfd);
+
#endif /* __VIR_FILES_H */
--
1.7.4.4