[libvirt] [PATCH 0/4] qemu: fix restore domain with bypass cache flag

It is not working currently. Error message is (with convinient wraps): error: Failed to restore domain from tst6.sav error: internal error: Child process (LIBVIRT_LOG_OUTPUTS=1:stderr \ /usr/libexec/libvirt_iohelper /vz/dev/libvirt/tst6.sav 0 0) unexpected exit status 1: \ /usr/libexec/libvirt_iohelper: failure with /vz/dev/libvirt/tst6.sav : \ Unable to read /vz/dev/libvirt/tst6.sav: Invalid argument The problem is that saferead function being used by iohelper is not quite suitable for direct reads. It's last read that is supposed to read EOF is not properly aligned thus EINVAL is returned. Nikolay Shirokovskiy (4): iohelper: drop unused operation length limit iohelper: simplify last direct write alignment iohelper: reduce zero-out in align case iohelper: fix reading with O_DIRECT src/util/iohelper.c | 98 +++++++++++++++++++++++++++++++---------------------- src/util/virfile.c | 2 +- 2 files changed, 59 insertions(+), 41 deletions(-) -- 1.8.3.1

--- src/util/iohelper.c | 28 +++++++--------------------- src/util/virfile.c | 2 +- 2 files changed, 8 insertions(+), 22 deletions(-) diff --git a/src/util/iohelper.c b/src/util/iohelper.c index d7bf5c7..5fc311b 100644 --- a/src/util/iohelper.c +++ b/src/util/iohelper.c @@ -44,7 +44,7 @@ #define VIR_FROM_THIS VIR_FROM_STORAGE static int -runIO(const char *path, int fd, int oflags, unsigned long long length) +runIO(const char *path, int fd, int oflags) { void *base = NULL; /* Location to be freed */ char *buf = NULL; /* Aligned location within base */ @@ -79,7 +79,7 @@ runIO(const char *path, int fd, int oflags, unsigned long long length) fdoutname = "stdout"; /* To make the implementation simpler, we give up on any * attempt to use O_DIRECT in a non-trivial manner. */ - if (direct && ((end = lseek(fd, 0, SEEK_CUR)) != 0 || length)) { + if (direct && ((end = lseek(fd, 0, SEEK_CUR)) != 0)) { virReportSystemError(end < 0 ? errno : EINVAL, "%s", _("O_DIRECT read needs entire seekable file")); goto cleanup; @@ -110,20 +110,13 @@ runIO(const char *path, int fd, int oflags, unsigned long long length) while (1) { ssize_t got; - if (length && - (length - total) < buflen) - buflen = length - total; - - if (buflen == 0) - break; /* End of requested data from client */ - if ((got = saferead(fdin, buf, buflen)) < 0) { virReportSystemError(errno, _("Unable to read %s"), fdinname); goto cleanup; } if (got == 0) break; /* End of file before end of requested data */ - if (got < buflen || (buflen & alignMask)) { + if (got < buflen) { /* O_DIRECT can handle at most one short read, at end of file */ if (direct && shortRead) { virReportSystemError(EINVAL, "%s", @@ -178,7 +171,7 @@ usage(int status) if (status) { fprintf(stderr, _("%s: try --help for more details"), program_name); } else { - printf(_("Usage: %s FILENAME LENGTH FD\n"), program_name); + printf(_("Usage: %s FILENAME FD\n"), program_name); } exit(status); } @@ -187,7 +180,6 @@ int main(int argc, char **argv) { const char *path; - unsigned long long length; int oflags = -1; int fd = -1; @@ -204,14 +196,8 @@ main(int argc, char **argv) if (argc > 1 && STREQ(argv[1], "--help")) usage(EXIT_SUCCESS); - if (argc == 4) { /* FILENAME LENGTH FD */ - if (virStrToLong_ull(argv[2], NULL, 10, &length) < 0) { - fprintf(stderr, _("%s: malformed file length %s"), - program_name, argv[2]); - exit(EXIT_FAILURE); - } - - if (virStrToLong_i(argv[3], NULL, 10, &fd) < 0) { + if (argc == 3) { /* FILENAME FD */ + if (virStrToLong_i(argv[2], NULL, 10, &fd) < 0) { fprintf(stderr, _("%s: malformed fd %s"), program_name, argv[3]); exit(EXIT_FAILURE); @@ -234,7 +220,7 @@ main(int argc, char **argv) usage(EXIT_FAILURE); } - if (fd < 0 || runIO(path, fd, oflags, length) < 0) + if (fd < 0 || runIO(path, fd, oflags) < 0) goto error; return 0; diff --git a/src/util/virfile.c b/src/util/virfile.c index 2f28e83..e746d87 100644 --- a/src/util/virfile.c +++ b/src/util/virfile.c @@ -260,7 +260,7 @@ virFileWrapperFdNew(int *fd, const char *name, unsigned int flags) LIBEXECDIR))) goto error; - ret->cmd = virCommandNewArgList(iohelper_path, name, "0", NULL); + ret->cmd = virCommandNewArgList(iohelper_path, name, NULL); VIR_FREE(iohelper_path); -- 1.8.3.1

Make alignment of last direct write more straightforward. Using additionally two flags 'end' and 'shortRead' looks complicated. --- src/util/iohelper.c | 33 +++++++++++++++++---------------- 1 file changed, 17 insertions(+), 16 deletions(-) diff --git a/src/util/iohelper.c b/src/util/iohelper.c index 5fc311b..1896fd3 100644 --- a/src/util/iohelper.c +++ b/src/util/iohelper.c @@ -55,7 +55,6 @@ runIO(const char *path, int fd, int oflags) const char *fdinname, *fdoutname; unsigned long long total = 0; bool direct = O_DIRECT && ((oflags & O_DIRECT) != 0); - bool shortRead = false; /* true if we hit a short read */ off_t end = 0; #if HAVE_POSIX_MEMALIGN @@ -115,30 +114,32 @@ runIO(const char *path, int fd, int oflags) goto cleanup; } if (got == 0) - break; /* End of file before end of requested data */ - if (got < buflen) { - /* O_DIRECT can handle at most one short read, at end of file */ - if (direct && shortRead) { - virReportSystemError(EINVAL, "%s", - _("Too many short reads for O_DIRECT")); - } - shortRead = true; - } + break; total += got; - if (fdout == fd && direct && shortRead) { - end = total; + + /* handle last write size align in direct case */ + if (got < buflen && direct && fdout == fd) { memset(buf + got, 0, buflen - got); got = (got + alignMask) & ~alignMask; + + if (safewrite(fdout, buf, got) < 0) { + virReportSystemError(errno, _("Unable to write %s"), fdoutname); + goto cleanup; + } + + if (ftruncate(fd, total) < 0) { + virReportSystemError(errno, _("Unable to truncate %s"), fdoutname); + goto cleanup; + } + + break; } + if (safewrite(fdout, buf, got) < 0) { virReportSystemError(errno, _("Unable to write %s"), fdoutname); goto cleanup; } - if (end && ftruncate(fd, end) < 0) { - virReportSystemError(errno, _("Unable to truncate %s"), fdoutname); - goto cleanup; - } } /* Ensure all data is written */ -- 1.8.3.1

We only need to zero-out bytes that will be written. May be we even don't need to zero-out at all because of immediate truncate. --- src/util/iohelper.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/util/iohelper.c b/src/util/iohelper.c index 1896fd3..fe15a92 100644 --- a/src/util/iohelper.c +++ b/src/util/iohelper.c @@ -120,10 +120,11 @@ runIO(const char *path, int fd, int oflags) /* handle last write size align in direct case */ if (got < buflen && direct && fdout == fd) { - memset(buf + got, 0, buflen - got); - got = (got + alignMask) & ~alignMask; + ssize_t aligned_got = (got + alignMask) & ~alignMask; - if (safewrite(fdout, buf, got) < 0) { + memset(buf + got, 0, aligned_got - got); + + if (safewrite(fdout, buf, aligned_got) < 0) { virReportSystemError(errno, _("Unable to write %s"), fdoutname); goto cleanup; } -- 1.8.3.1

saferead is not suitable for direct reads. If file size is not multiple of align size then we get EINVAL on the read(2) that is supposed to return 0 because read buffer will not be aligned at this point. Let's not read again after partial read and check that we read everything by comparing the number of total bytes read against file size. --- src/util/iohelper.c | 40 +++++++++++++++++++++++++++++++++++----- 1 file changed, 35 insertions(+), 5 deletions(-) diff --git a/src/util/iohelper.c b/src/util/iohelper.c index fe15a92..32c5a89 100644 --- a/src/util/iohelper.c +++ b/src/util/iohelper.c @@ -78,10 +78,22 @@ runIO(const char *path, int fd, int oflags) fdoutname = "stdout"; /* To make the implementation simpler, we give up on any * attempt to use O_DIRECT in a non-trivial manner. */ - if (direct && ((end = lseek(fd, 0, SEEK_CUR)) != 0)) { - virReportSystemError(end < 0 ? errno : EINVAL, "%s", - _("O_DIRECT read needs entire seekable file")); - goto cleanup; + if (direct) { + if ((end = lseek(fd, 0, SEEK_CUR)) != 0) { + virReportSystemError(end < 0 ? errno : EINVAL, "%s", + _("O_DIRECT read needs entire seekable file")); + goto cleanup; + } + + if ((end = lseek(fd, 0, SEEK_END)) < 0) { + virReportSystemError(errno, "%s", _("can not seek file end")); + goto cleanup; + } + + if (lseek(fd, 0, SEEK_SET) < 0) { + virReportSystemError(errno, "%s", _("can not seek file begin")); + goto cleanup; + } } break; case O_WRONLY: @@ -109,7 +121,25 @@ runIO(const char *path, int fd, int oflags) while (1) { ssize_t got; - if ((got = saferead(fdin, buf, buflen)) < 0) { + /* in case of O_DIRECT we cannot read again to check for EOF + * after partial buffer read as it is done in saferead */ + if (direct && fdin == fd && end - total < buflen) { + if (total == end) + break; + + while ((got = read(fd, buf, buflen)) < 0 && errno == EINTR); + + if (got < end - total) { + virReportError(VIR_ERR_INTERNAL_ERROR, + _("Unable to read last chunk from %s"), + fdinname); + goto cleanup; + } + } else { + got = saferead(fdin, buf, buflen); + } + + if (got < 0) { virReportSystemError(errno, _("Unable to read %s"), fdinname); goto cleanup; } -- 1.8.3.1

On 09/07/2017 09:44 AM, Nikolay Shirokovskiy wrote:
saferead is not suitable for direct reads. If file size is not multiple of align size then we get EINVAL on the read(2) that is supposed to return 0 because read buffer will not be aligned at this point.
Let's not read again after partial read and check that we read everything by comparing the number of total bytes read against file size. --- src/util/iohelper.c | 40 +++++++++++++++++++++++++++++++++++----- 1 file changed, 35 insertions(+), 5 deletions(-)
diff --git a/src/util/iohelper.c b/src/util/iohelper.c index fe15a92..32c5a89 100644 --- a/src/util/iohelper.c +++ b/src/util/iohelper.c @@ -78,10 +78,22 @@ runIO(const char *path, int fd, int oflags) fdoutname = "stdout"; /* To make the implementation simpler, we give up on any * attempt to use O_DIRECT in a non-trivial manner. */ - if (direct && ((end = lseek(fd, 0, SEEK_CUR)) != 0)) { - virReportSystemError(end < 0 ? errno : EINVAL, "%s", - _("O_DIRECT read needs entire seekable file")); - goto cleanup; + if (direct) { + if ((end = lseek(fd, 0, SEEK_CUR)) != 0) { + virReportSystemError(end < 0 ? errno : EINVAL, "%s", + _("O_DIRECT read needs entire seekable file")); + goto cleanup; + } + + if ((end = lseek(fd, 0, SEEK_END)) < 0) { + virReportSystemError(errno, "%s", _("can not seek file end")); + goto cleanup; + } + + if (lseek(fd, 0, SEEK_SET) < 0) { + virReportSystemError(errno, "%s", _("can not seek file begin")); + goto cleanup; + }
This might change the position in the file. I mean, currently there is no way that the @fd is not set at its beginning. However, I'd like to have iohelper generic enough so that if a lseek()-ed fd was passed iohelper reads from there and not reposition itself. Also, @end is rewritten here twice. It's confusing. What we can do here is: cur = lseek(fd, 0, SEEK_CUR); end = lseek(fd, 0, SEEK_END); lseek(fd, cur, SEEK_SET); (with proper error handling obviously)
} break; case O_WRONLY: @@ -109,7 +121,25 @@ runIO(const char *path, int fd, int oflags) while (1) { ssize_t got;
- if ((got = saferead(fdin, buf, buflen)) < 0) { + /* in case of O_DIRECT we cannot read again to check for EOF + * after partial buffer read as it is done in saferead */ + if (direct && fdin == fd && end - total < buflen) { + if (total == end) + break; + + while ((got = read(fd, buf, buflen)) < 0 && errno == EINTR);
In this case, the semicolon should be on a separate line: while () ;
+ + if (got < end - total) { + virReportError(VIR_ERR_INTERNAL_ERROR, + _("Unable to read last chunk from %s"), + fdinname); + goto cleanup; + } + } else { + got = saferead(fdin, buf, buflen); + } + + if (got < 0) { virReportSystemError(errno, _("Unable to read %s"), fdinname); goto cleanup; }
Otherwise looking good. Also, ACK to patches 1-3. I'll merge them shortly so that you can send v2 just for this one. Michal

On 19.09.2017 12:36, Michal Privoznik wrote:
On 09/07/2017 09:44 AM, Nikolay Shirokovskiy wrote:
saferead is not suitable for direct reads. If file size is not multiple of align size then we get EINVAL on the read(2) that is supposed to return 0 because read buffer will not be aligned at this point.
Let's not read again after partial read and check that we read everything by comparing the number of total bytes read against file size. --- src/util/iohelper.c | 40 +++++++++++++++++++++++++++++++++++----- 1 file changed, 35 insertions(+), 5 deletions(-)
diff --git a/src/util/iohelper.c b/src/util/iohelper.c index fe15a92..32c5a89 100644 --- a/src/util/iohelper.c +++ b/src/util/iohelper.c @@ -78,10 +78,22 @@ runIO(const char *path, int fd, int oflags) fdoutname = "stdout"; /* To make the implementation simpler, we give up on any * attempt to use O_DIRECT in a non-trivial manner. */ - if (direct && ((end = lseek(fd, 0, SEEK_CUR)) != 0)) { - virReportSystemError(end < 0 ? errno : EINVAL, "%s", - _("O_DIRECT read needs entire seekable file")); - goto cleanup; + if (direct) { + if ((end = lseek(fd, 0, SEEK_CUR)) != 0) { + virReportSystemError(end < 0 ? errno : EINVAL, "%s", + _("O_DIRECT read needs entire seekable file")); + goto cleanup; + } + + if ((end = lseek(fd, 0, SEEK_END)) < 0) { + virReportSystemError(errno, "%s", _("can not seek file end")); + goto cleanup; + } + + if (lseek(fd, 0, SEEK_SET) < 0) { + virReportSystemError(errno, "%s", _("can not seek file begin")); + goto cleanup; + }
This might change the position in the file. I mean, currently there is no way that the @fd is not set at its beginning. However, I'd like to have iohelper generic enough so that if a lseek()-ed fd was passed iohelper reads from there and not reposition itself.
Looks like this will not be enough anyway. The first check makes sure we are at the beginning. If we some day drop this check then we'll have to write extra code too besides storing original position. Like taking into account alignment issues.
Also, @end is rewritten here twice. It's confusing.
But as we are going to introduce cur variable why not?)
What we can do here is:
cur = lseek(fd, 0, SEEK_CUR); end = lseek(fd, 0, SEEK_END); lseek(fd, cur, SEEK_SET);
(with proper error handling obviously)
} break; case O_WRONLY: @@ -109,7 +121,25 @@ runIO(const char *path, int fd, int oflags) while (1) { ssize_t got;
- if ((got = saferead(fdin, buf, buflen)) < 0) { + /* in case of O_DIRECT we cannot read again to check for EOF + * after partial buffer read as it is done in saferead */ + if (direct && fdin == fd && end - total < buflen) { + if (total == end) + break; + + while ((got = read(fd, buf, buflen)) < 0 && errno == EINTR);
In this case, the semicolon should be on a separate line:
while () ;
+ + if (got < end - total) { + virReportError(VIR_ERR_INTERNAL_ERROR, + _("Unable to read last chunk from %s"), + fdinname); + goto cleanup; + } + } else { + got = saferead(fdin, buf, buflen); + } + + if (got < 0) { virReportSystemError(errno, _("Unable to read %s"), fdinname); goto cleanup; }
Otherwise looking good. Also, ACK to patches 1-3. I'll merge them shortly so that you can send v2 just for this one.
Ok. Nikolay
participants (2)
-
Michal Privoznik
-
Nikolay Shirokovskiy