O_DIRECT has stringent requirements - I/O must occur with buffers
that have both alignment and size as multiples of the file system
block size (used to be 512 bytes, but these days, 4k is safer, and
64k allows for better throughput). Rather than make lots of changes
at each site that wants to use O_DIRECT, it is easier to offload
the work through a helper process that mirrors the I/O between a
pipe and the actual direct fd, so that the other end of the pipe
no longer has to worry about constraints.
* src/util/virdirect.h: New file.
* src/util/virdirect.c: Likewise.
* src/Makefile.am (UTIL_SOURCES): Build them.
* src/libvirt_private.syms: Export new symbols.
* cfg.mk (useless_free_options): Add to list.
* po/POTFILES.in: Translate new file.
---
cfg.mk | 1 +
po/POTFILES.in | 1 +
src/Makefile.am | 1 +
src/libvirt_private.syms | 6 ++
src/util/virdirect.c | 149 ++++++++++++++++++++++++++++++++++++++++++++++
src/util/virdirect.h | 37 +++++++++++
6 files changed, 195 insertions(+), 0 deletions(-)
create mode 100644 src/util/virdirect.c
create mode 100644 src/util/virdirect.h
diff --git a/cfg.mk b/cfg.mk
index 2873177..69d2b6a 100644
--- a/cfg.mk
+++ b/cfg.mk
@@ -97,6 +97,7 @@ useless_free_options = \
--name=virCommandFree \
--name=virConfFreeList \
--name=virConfFreeValue \
+ --name=virDirectFdFree \
--name=virDomainChrDefFree \
--name=virDomainChrSourceDefFree \
--name=virDomainControllerDefFree \
diff --git a/po/POTFILES.in b/po/POTFILES.in
index 5782cbf..ad031f3 100644
--- a/po/POTFILES.in
+++ b/po/POTFILES.in
@@ -122,6 +122,7 @@ src/util/storage_file.c
src/util/sysinfo.c
src/util/util.c
src/util/viraudit.c
+src/util/virdirect.c
src/util/virterror.c
src/util/xml.c
src/vbox/vbox_MSCOMGlue.c
diff --git a/src/Makefile.am b/src/Makefile.am
index d19d1ca..ce041b4 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -80,6 +80,7 @@ UTIL_SOURCES = \
util/uuid.c util/uuid.h \
util/util.c util/util.h \
util/viraudit.c util/viraudit.h \
+ util/virdirect.c util/virdirect.h \
util/xml.c util/xml.h \
util/virterror.c util/virterror_internal.h
diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms
index f95d341..c78485d 100644
--- a/src/libvirt_private.syms
+++ b/src/libvirt_private.syms
@@ -1085,6 +1085,12 @@ virAuditOpen;
virAuditSend;
+# virdirect.h
+virDirectFdClose;
+virDirectFdFree;
+virDirectFdNew;
+
+
# virterror_internal.h
virDispatchError;
virErrorMsg;
diff --git a/src/util/virdirect.c b/src/util/virdirect.c
new file mode 100644
index 0000000..a3dca77
--- /dev/null
+++ b/src/util/virdirect.c
@@ -0,0 +1,149 @@
+/*
+ * direct.c: management of O_DIRECT fds
+ *
+ * Copyright (C) 2011 Red Hat, Inc.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include <config.h>
+#include "internal.h"
+
+#include "virdirect.h"
+
+#include <fcntl.h>
+#include <sys/stat.h>
+
+#include "command.h"
+#include "configmake.h"
+#include "files.h"
+#include "memory.h"
+#include "virterror_internal.h"
+
+#define VIR_FROM_THIS VIR_FROM_NONE
+#define virDirectError(code, ...) \
+ virReportErrorHelper(VIR_FROM_THIS, code, __FILE__, \
+ __FUNCTION__, __LINE__, __VA_ARGS__)
+
+/* Opaque type for managing a wrapper around an O_DIRECT fd. For now,
+ * read-write is not supported, just a single direction. */
+struct _virDirectFd {
+ virCommandPtr cmd; /* Child iohelper process to do the I/O. */
+};
+
+/* Replace *FD (open to an O_DIRECT file or device) with a new (pipe)
+ * fd that will pass all I/O through a child process, in order to obey
+ * O_DIRECT restrictions when doing I/O to the original fd. NAME is
+ * for diagnostics only. On success, the new wrapper object is
+ * returned, the original fd is no longer open in the parent process,
+ * and the caller must close the new *FD descriptor. On failure, *FD
+ * is unchanged, an error message is output, and NULL is returned. */
+virDirectFdPtr
+virDirectFdNew(int *fd, const char *name)
+{
+ virDirectFdPtr ret = NULL;
+ bool output = false;
+ int pipefd[2] = { -1, -1 };
+ int mode = -1;
+
+ if (VIR_ALLOC(ret) < 0) {
+ virReportOOMError();
+ goto error;
+ }
+ if (!O_DIRECT)
+ return ret;
+
+#ifdef F_GETFL
+ /* Mingw lacks F_GETFL, but it also lacks O_DIRECT, in which case
+ * we don't need a child process in the first place. */
+ mode = fcntl(*fd, F_GETFL);
+#endif
+
+ if (mode < 0) {
+ virDirectError(VIR_ERR_INTERNAL_ERROR, _("invalid fd %d for %s"),
+ *fd, name);
+ goto error;
+ } else if ((mode & O_ACCMODE) == O_WRONLY) {
+ output = true;
+ } else if ((mode & O_ACCMODE) != O_RDONLY) {
+ virDirectError(VIR_ERR_INTERNAL_ERROR, _("unexpected mode %x for %s"),
+ mode & O_ACCMODE, name);
+ goto error;
+ }
+
+ if (pipe2(pipefd, O_CLOEXEC) < 0) {
+ virDirectError(VIR_ERR_INTERNAL_ERROR,
+ _("unable to create pipe for %s"), name);
+ goto error;
+ }
+
+ ret->cmd = virCommandNewArgList(LIBEXECDIR "/libvirt_iohelper",
+ name, "0", NULL);
+ if (output) {
+ virCommandSetInputFD(ret->cmd, pipefd[0]);
+ virCommandSetOutputFD(ret->cmd, fd);
+ virCommandAddArg(ret->cmd, "1");
+ } else {
+ virCommandSetInputFD(ret->cmd, *fd);
+ virCommandSetOutputFD(ret->cmd, &pipefd[1]);
+ virCommandAddArg(ret->cmd, "0");
+ }
+
+ if (virCommandRunAsync(ret->cmd, NULL) < 0)
+ goto error;
+
+ if (VIR_CLOSE(pipefd[!output]) < 0) {
+ virDirectError(VIR_ERR_INTERNAL_ERROR, "%s",
+ _("unable to close pipe"));
+ goto error;
+ }
+
+ VIR_FORCE_CLOSE(*fd);
+ *fd = pipefd[output];
+ return ret;
+
+error:
+ VIR_FORCE_CLOSE(pipefd[0]);
+ VIR_FORCE_CLOSE(pipefd[1]);
+ virDirectFdFree(ret);
+ return NULL;
+}
+
+/* If DFD is valid, then reap the child process. Return 0 if the
+ * child process was successfully reaped, or -1 on failure with an
+ * error emitted. This function intentionally returns 0 when DFD is
+ * not valid, so that callers can conditionally create a virDirectFd
+ * wrapper but unconditionally call the cleanup code. */
+int
+virDirectFdClose(virDirectFdPtr dfd)
+{
+ if (!dfd || !O_DIRECT)
+ return 0;
+
+ return virCommandWait(dfd->cmd, NULL);
+}
+
+/* Free all resources associated with DFD. If virDirectFdClose was
+ * not previously called, then this forcefully terminates the child
+ * process. */
+void
+virDirectFdFree(virDirectFdPtr dfd)
+{
+ if (!dfd)
+ return;
+
+ virCommandFree(dfd->cmd);
+ VIR_FREE(dfd);
+}
diff --git a/src/util/virdirect.h b/src/util/virdirect.h
new file mode 100644
index 0000000..35f5f6b
--- /dev/null
+++ b/src/util/virdirect.h
@@ -0,0 +1,37 @@
+/*
+ * direct.h: management of O_DIRECT fds
+ *
+ * Copyright (C) 2011 Red Hat, Inc.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#ifndef __VIR_DIRECT_H__
+# define __VIR_DIRECT_H__
+
+/* Opaque type for managing a wrapper around an O_DIRECT fd. */
+struct _virDirectFd;
+
+typedef struct _virDirectFd virDirectFd;
+typedef virDirectFd *virDirectFdPtr;
+
+virDirectFdPtr virDirectFdNew(int *fd, const char *name)
+ ATTRIBUTE_NONNULL(1) ATTRIBUTE_NONNULL(2) ATTRIBUTE_RETURN_CHECK;
+
+int virDirectFdClose(virDirectFdPtr dfd);
+
+void virDirectFdFree(virDirectFdPtr dfd);
+
+#endif /* __VIR_DIRECT_H__ */
--
1.7.4.4