On Thu, Jul 14, 2011 at 08:24:33AM -0600, Eric Blake wrote:
Required for a coming patch where iohelper will operate on O_DIRECT
fds. There, the user-space memory must be aligned to file system
boundaries (at least page-aligned works best, and some file systems
prefer 64k). Made tougher by the fact that VIR_ALLOC won't work
on void *, but posix_memalign won't work on char * and isn't
available everywhere.
This patch makes some simplifying assumptions - namely, output
to an O_DIRECT fd will only be attempted on an empty seekable
file (hence, no need to worry about preserving existing data
on a partial block, and ftruncate will work to undo the effects
of having to round up the size of the last block written).
* configure.ac (AC_CHECK_FUNCS_ONCE): Check for posix_memalign.
* src/util/iohelper.c (runIO): Use aligned memory, and handle
quirks of O_DIRECT on last write.
---
This one took me the longest time to get right, so a careful
review is appreciated.
configure.ac | 6 +++---
src/util/iohelper.c | 45 ++++++++++++++++++++++++++++++++++++++++++---
2 files changed, 45 insertions(+), 6 deletions(-)
diff --git a/configure.ac b/configure.ac
index e9d5be4..9e39f44 100644
--- a/configure.ac
+++ b/configure.ac
@@ -121,9 +121,9 @@ AC_CHECK_SIZEOF([long])
dnl Availability of various common functions (non-fatal if missing),
dnl and various less common threadsafe functions
-AC_CHECK_FUNCS_ONCE([cfmakeraw regexec sched_getaffinity getuid getgid \
- geteuid initgroups posix_fallocate mmap kill \
- getmntent_r getgrnam_r getpwuid_r])
+AC_CHECK_FUNCS_ONCE([cfmakeraw geteuid getgid getgrnam_r getmntent_r \
+ getpwuid_r getuid initgroups kill mmap posix_fallocate posix_memalign \
+ regexec sched_getaffinity])
dnl Availability of pthread functions (if missing, win32 threading is
dnl assumed). Because of $LIB_PTHREAD, we cannot use AC_CHECK_FUNCS_ONCE.
diff --git a/src/util/iohelper.c b/src/util/iohelper.c
index 1185bde..82f62e1 100644
--- a/src/util/iohelper.c
+++ b/src/util/iohelper.c
@@ -74,17 +74,32 @@ cleanup:
static int
runIO(const char *path, int fd, int oflags, unsigned long long length)
{
- char *buf = NULL;
+ void *base = NULL; /* Location to be freed */
+ char *buf = NULL; /* Aligned location within base */
size_t buflen = 1024*1024;
+ intptr_t alignMask = 64*1024 - 1;
int ret = -1;
int fdin, fdout;
const char *fdinname, *fdoutname;
unsigned long long total = 0;
+ bool direct = O_DIRECT && ((oflags & O_DIRECT) != 0);
+ bool shortRead = false; /* true if we hit a short read */
+ off_t end = 0;
- if (VIR_ALLOC_N(buf, buflen) < 0) {
+#if HAVE_POSIX_MEMALIGN
+ if (posix_memalign(&base, alignMask + 1, buflen)) {
virReportOOMError();
goto cleanup;
}
+ buf = base;
+#else
+ if (VIR_ALLOC_N(buf, buflen + alignMask) < 0) {
+ virReportOOMError();
+ goto cleanup;
+ }
+ base = buf;
+ buf = (char *) (((intptr_t) base + alignMask) & alignMask);
+#endif
switch (oflags & O_ACCMODE) {
case O_RDONLY:
@@ -98,6 +113,13 @@ runIO(const char *path, int fd, int oflags, unsigned long long
length)
fdinname = "stdin";
fdout = fd;
fdoutname = path;
+ /* To make the implementation simpler, we give up on any
+ * attempt to use O_DIRECT in a non-trivial manner. */
+ if (direct && (end = lseek(fd, 0, SEEK_END)) != 0) {
+ virReportSystemError(end < 0 ? errno : EINVAL, "%s",
+ _("O_DIRECT needs empty seekable file"));
+ goto cleanup;
+ }
break;
case O_RDWR:
@@ -124,12 +146,29 @@ runIO(const char *path, int fd, int oflags, unsigned long long
length)
}
if (got == 0)
break; /* End of file before end of requested data */
+ if (got < buflen || (buflen & alignMask)) {
+ /* O_DIRECT can handle at most one short read, at end of file */
+ if (direct && shortRead) {
+ virReportSystemError(EINVAL, "%s",
+ _("Too many short reads for O_DIRECT"));
+ }
+ shortRead = true;
+ }
total += got;
+ if (fdout == fd && direct && shortRead) {
+ end = total;
+ memset(buf + got, 0, buflen - got);
+ got = (got + alignMask) & ~alignMask;
+ }
if (safewrite(fdout, buf, got) < 0) {
virReportSystemError(errno, _("Unable to write %s"), fdoutname);
goto cleanup;
}
+ if (end && ftruncate(fd, end) < 0) {
+ virReportSystemError(errno, _("Unable to truncate %s"),
fdoutname);
+ goto cleanup;
+ }
}
ret = 0;
@@ -141,7 +180,7 @@ cleanup:
ret = -1;
}
- VIR_FREE(buf);
+ VIR_FREE(base);
return ret;
}
ACK,
I'm wondering if we should move the guts of iohelper out into a function
in src/util which we can unit test. Or perhaps its easy enough to just
unit test the fdstream code and thus iohelper
Daniel
--
|:
http://berrange.com -o-
http://www.flickr.com/photos/dberrange/ :|
|:
http://libvirt.org -o-
http://virt-manager.org :|
|:
http://autobuild.org -o-
http://search.cpan.org/~danberr/ :|
|:
http://entangle-photo.org -o-
http://live.gnome.org/gtk-vnc :|