[libvirt RFC 0/5] introduce virFileDirect API for block-friendly read/write

As a spin-off from the previous multifd series, this series offers a basic API for virfile.c to deal with reading and writing files opened with O_DIRECT. It applies this new API in the virFileDiskCopy code currently used for iohelper, and reworks the saveimage code and on-disk lengths to be O_DIRECT friendly. This series is also available at: https://gitlab.com/hw-claudio/libvirt.git "directio" Gitlab successful pipeline is at: https://gitlab.com/hw-claudio/libvirt/-/pipelines/546635704 Thank you for your feedback, Claudio Claudio Fontana (5): virfile: introduce virFileDirect APIs virfile: use virFileDirect API in runIOCopy qemu: saveimage: rework image read/write to be O_DIRECT friendly qemu: saveimage: assume future formats will also support compression virfile: virFileDiskCopy: prepare for O_DIRECT files without wrapper src/libvirt_private.syms | 8 + src/qemu/qemu_saveimage.c | 269 ++++++++++++++++++++------------ src/qemu/qemu_saveimage.h | 20 ++- src/util/virfile.c | 316 +++++++++++++++++++++++++++++++++----- src/util/virfile.h | 11 ++ 5 files changed, 476 insertions(+), 148 deletions(-) -- 2.26.2

these functions help with allocating buffers, aligning, reading and writing files opened with O_DIRECT. Signed-off-by: Claudio Fontana <cfontana@suse.de> --- src/libvirt_private.syms | 8 ++ src/util/virfile.c | 249 +++++++++++++++++++++++++++++++++++++++ src/util/virfile.h | 11 ++ 3 files changed, 268 insertions(+) diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms index bfedd85326..1b3930dfb4 100644 --- a/src/libvirt_private.syms +++ b/src/libvirt_private.syms @@ -2240,7 +2240,15 @@ virFileComparePaths; virFileCopyACLs; virFileDataSync; virFileDeleteTree; +virFileDirectAlign; +virFileDirectBufferNew; +virFileDirectCopyBuf; virFileDirectFdFlag; +virFileDirectRead; +virFileDirectReadCopy; +virFileDirectReadLim; +virFileDirectWrite; +virFileDirectWriteLim; virFileExists; virFileFclose; virFileFdopen; diff --git a/src/util/virfile.c b/src/util/virfile.c index e4522b5f67..03a7cdc9bf 100644 --- a/src/util/virfile.c +++ b/src/util/virfile.c @@ -192,6 +192,210 @@ virFileDirectFdFlag(void) return O_DIRECT ? O_DIRECT : -1; } +/** + * virFileDirectAlign: align a value, which may include a pointer. + * + * @value: the value that will be incremented to reach alignment + * + * Returns the aligned value. + */ +uintptr_t +virFileDirectAlign(uintptr_t value) +{ + return (value + VIR_FILE_DIRECT_ALIGN_MASK) & ~VIR_FILE_DIRECT_ALIGN_MASK; +} + +/** + * virFileDirectRead: perform a single aligned read. + * @fd: O_DIRECT file descriptor + * @buf: aligned buffer to read into + * @count: the desired read size. Note that if unaligned, + * extra bytes will be read based on alignment. + * + * Note that buf should be able to contain count plus the alignment! + * + * Returns < 0 and errno set on error, or the number of bytes read, + * which may be smaller or even greater than count. + */ +ssize_t +virFileDirectRead(int fd, void *buf, size_t count) +{ + size_t aligned_count = virFileDirectAlign(count); + while (count > 0) { + ssize_t r = read(fd, buf, aligned_count); + if (r < 0 && errno == EINTR) + continue; + return r; + } + return 0; +} + +/** + * virFileDirectReadLim: perform multiple aligned reads up to limit + * @fd: O_DIRECT file descriptor + * @buf: aligned buffer to read into + * @limit: the desired limit to read into buffer. + * Note that if unaligned, extra bytes will be read based on alignment. + * + * Note that buf should be able to contain limit plus the alignment! + * + * Compared with virFileDirectRead, this function reads potentially + * multiple times to fill up to buffer with up to limit bytes plus alignment, + * so on success it does not return a number of bytes smaller than limit. + * + * Returns < 0 and errno set on error, + * and on success the number of bytes read, which may be greater than limit + * due to alignment. + */ +ssize_t +virFileDirectReadLim(int fd, void *buf, size_t lim) +{ + ssize_t nread = 0; + + while (lim > 0) { + ssize_t r = virFileDirectRead(fd, buf, lim); + if (r < 0) + return r; + if (r == 0) + return nread; + buf = (char *)buf + r; + nread += r; + if (lim < r) + break; + lim -= r; + } + return nread; +} + +/** + * virFileDirectCopyBuf: copy a buffer contents to destination in memory + * @buf: aligned buffer to copy from + * @count: amount of data to copy + * @dst: the destination in memory + * @dst_len: the maximum the destination can hold. + * + * copies up to count bytes from buf into dst, but not more than dst_len. + * increments buf pointer and dst pointer, as well as decrementing the + * maximum the destination can hold (dst_len). + * + * Returns the amount copied. + */ +size_t +virFileDirectCopyBuf(void **buf, size_t count, void **dst, size_t *dst_len) +{ + size_t to_copy; + + to_copy = count > *dst_len ? *dst_len : count; + memcpy(*dst, *buf, to_copy); + *dst_len -= to_copy; + *(char **)dst += to_copy; + *(char **)buf += to_copy; + return to_copy; +} + +/** + * virFileDirectReadCopy: read an fd and copy contents to memory + * @fd: O_DIRECT file descriptor + * @buf: aligned buffer to read the fd into, and then copy from + * @buflen: size of the buffer + * @dst: the destination in memory + * @dst_len: the maximum the destination can hold. + * + * reads data from the fd file descriptor into the buffer, + * and then copy to the destination, filling it up to dst_len. + * + * Returns < 0 and errno set on error, + * or the number of bytes read, which may be past the requested dst_len, + * or may be smaller if the fd does not contain enough data. + * + * The buf pointer is updated to point to eventual exccess data in the buffer. + */ +ssize_t +virFileDirectReadCopy(int fd, void **buf, size_t buflen, void *dst, size_t dst_len) +{ + ssize_t nread = 0; + void *d = dst; + char *s = *buf; + + while (dst_len > 0) { + ssize_t rv; + *buf = s; + rv = virFileDirectReadLim(fd, s, dst_len < buflen ? dst_len : buflen); + if (rv < 0) + return rv; + if (rv == 0) + return nread; /* not enough data to fulfill request */ + + nread += rv; /* note, we might read past the requested len */ + virFileDirectCopyBuf(buf, rv, &d, &dst_len); + } + return nread; +} + +/** + * virFileDirectWrite: perform a single aligned write. + * @fd: O_DIRECT file descriptor to write to + * @buf: aligned buffer to write from + * @count: the desired write size. Note that if unaligned, + * extra 0 bytes will be written based on alignment. + * + * Returns < 0 and errno set on error, or the number of bytes written, + * which may be smaller or even greater than count. + */ +ssize_t +virFileDirectWrite(int fd, void *buf, size_t count) +{ + size_t aligned_count = virFileDirectAlign(count); + if (aligned_count > count) { + memset((char *)buf + count, 0, aligned_count - count); + } + while (count > 0) { + ssize_t r = write(fd, buf, aligned_count); /* sc_avoid_write */ + if (r < 0 && errno == EINTR) + continue; + return r; + } + return 0; +} + +/** + * virFileDirectWriteLim: perform multiple aligned writes up to limit + * @fd: O_DIRECT file descriptor + * @buf: aligned buffer to write from + * @limit: the desired limit for the total write size + * Note that if unaligned, extra bytes will be written based on alignment. + * + * Note that buf should be able to contain limit plus the alignment! + * + * Compared with virFileDirectWrite, this function writes potentially + * multiple times to drain the buffer up to the limit bytes plus alignment, + * so on success it does not return a number of bytes smaller than limit. + * + * Returns < 0 and errno set on error, + * and on success the number of bytes written, which may be greater than limit + * due to alignment. + */ + +ssize_t +virFileDirectWriteLim(int fd, void *buf, size_t lim) +{ + ssize_t nwritten = 0; + + while (lim > 0) { + ssize_t r = virFileDirectWrite(fd, buf, lim); + if (r < 0) + return r; + if (r == 0) + return nwritten; + buf = (char *)buf + r; + nwritten += r; + if (lim < r) + break; + lim -= r; + } + return nwritten; +} + /* Opaque type for managing a wrapper around a fd. For now, * read-write is not supported, just a single direction. */ struct _virFileWrapperFd { @@ -202,6 +406,41 @@ struct _virFileWrapperFd { #ifndef WIN32 +/** + * virFileDirectBufferNew: allocate a buffer and return the first + * block-aligned address in it. + * + * @alloc_base: pointer to the to-be-allocated memory buffer. + * @buflen: desired length, which should be greater than alignment. + * + * Allocate a memory area large enough to accommodate an aligned + * buffer of size buflen. + * + * On success, *alloc_base is set to the newly allocated memory, + * and the aligned buffer within it is returned. + * + * On failure, *alloc_base is set to NULL and the function + * returns NULL. + */ +void * +virFileDirectBufferNew(void **alloc_base, size_t buflen) +{ + void *buf; + buflen = virFileDirectAlign(buflen); + +# if WITH_POSIX_MEMALIGN + if (posix_memalign(alloc_base, VIR_FILE_DIRECT_ALIGN_MASK + 1, buflen)) { + *alloc_base = NULL; + return NULL; + } + buf = *alloc_base; +# else + *alloc_base = g_malloc(buflen + VIR_FILE_DIRECT_ALIGN_MASK); + buf = virFileDirectAlign((uintptr_t)*alloc_base); +# endif + return buf; +} + # ifdef __linux__ /** @@ -372,6 +611,16 @@ virFileWrapperFdNew(int *fd, const char *name, unsigned int flags) return NULL; } #else /* WIN32 */ + +void * +virFileDirectBufferNew(void **alloc_base G_GNUC_UNUSED, + size_t buflen G_GNUC_UNUSED) +{ + virReportError(VIR_ERR_INTERNAL_ERROR, "%s", + _("virFileDirectBufferNew unsupported on this platform")); + return NULL; +} + virFileWrapperFd * virFileWrapperFdNew(int *fd G_GNUC_UNUSED, const char *name G_GNUC_UNUSED, diff --git a/src/util/virfile.h b/src/util/virfile.h index 8e378efe30..9af3dc5528 100644 --- a/src/util/virfile.h +++ b/src/util/virfile.h @@ -104,6 +104,17 @@ typedef struct _virFileWrapperFd virFileWrapperFd; int virFileDirectFdFlag(void); +#define VIR_FILE_DIRECT_ALIGN_MASK ((64 * 1024) - 1) + +void *virFileDirectBufferNew(void **alloc_base, size_t buflen); +uintptr_t virFileDirectAlign(uintptr_t value); +ssize_t virFileDirectRead(int fd, void *buf, size_t count); +ssize_t virFileDirectWrite(int fd, void *buf, size_t count); +ssize_t virFileDirectReadLim(int fd, void *buf, size_t lim); +ssize_t virFileDirectWriteLim(int fd, void *buf, size_t lim); +size_t virFileDirectCopyBuf(void **buf, size_t count, void **dst, size_t *dst_len); +ssize_t virFileDirectReadCopy(int fd, void **buf, size_t buflen, void *dst, size_t dst_len); + typedef enum { VIR_FILE_WRAPPER_BYPASS_CACHE = (1 << 0), VIR_FILE_WRAPPER_NON_BLOCKING = (1 << 1), -- 2.26.2

Signed-off-by: Claudio Fontana <cfontana@suse.de> --- src/util/virfile.c | 28 ++++++++-------------------- 1 file changed, 8 insertions(+), 20 deletions(-) diff --git a/src/util/virfile.c b/src/util/virfile.c index 03a7cdc9bf..c529598595 100644 --- a/src/util/virfile.c +++ b/src/util/virfile.c @@ -4851,20 +4851,14 @@ static off_t runIOCopy(const struct runIOParams p) { g_autofree void *base = NULL; /* Location to be freed */ - char *buf = NULL; /* Aligned location within base */ - size_t buflen = 1024*1024; - intptr_t alignMask = 64*1024 - 1; off_t total = 0; + size_t buflen = 1024*1024; + char *buf = virFileDirectBufferNew(&base, buflen); -# if WITH_POSIX_MEMALIGN - if (posix_memalign(&base, alignMask + 1, buflen)) - abort(); - buf = base; -# else - buf = g_new0(char, buflen + alignMask); - base = buf; - buf = (char *) (((intptr_t) base + alignMask) & ~alignMask); -# endif + if (!buf) { + virReportSystemError(errno, _("Failed to allocate aligned memory in function %s"), __FUNCTION__); + return -5; + } while (1) { ssize_t got; @@ -4876,9 +4870,7 @@ runIOCopy(const struct runIOParams p) * In other cases using saferead reduces number of syscalls. */ if (!p.isWrite && p.isDirect) { - if ((got = read(p.fdin, buf, buflen)) < 0 && - errno == EINTR) - continue; + got = virFileDirectRead(p.fdin, buf, buflen); } else { got = saferead(p.fdin, buf, buflen); } @@ -4894,11 +4886,7 @@ runIOCopy(const struct runIOParams p) /* handle last write size align in direct case */ if (got < buflen && p.isDirect && p.isWrite) { - ssize_t aligned_got = (got + alignMask) & ~alignMask; - - memset(buf + got, 0, aligned_got - got); - - if (safewrite(p.fdout, buf, aligned_got) < 0) { + if (virFileDirectWriteLim(p.fdout, buf, got) < 0) { virReportSystemError(errno, _("Unable to write %s"), p.fdoutname); return -3; } -- 2.26.2

maintain a compatible image format (still QEMU_SAVE_VERSION 2), but change the on-disk representation to be more O_DIRECT friendly: 1) ensure that the header struct fields are packed, so we can be sure no padding will ever ruin the day in the future 2) finish the libvirt header (header + xml + cookie) with zero padding, in order to ensure that the QEMU VM (QEVM Magic) is aligned. Adapt the read and write of the libvirt header accordingly. The old images should be still loadable after this change, and the new images should be still loadable in old libvirt as well. Signed-off-by: Claudio Fontana <cfontana@suse.de> --- src/qemu/qemu_saveimage.c | 267 ++++++++++++++++++++++++-------------- src/qemu/qemu_saveimage.h | 20 +-- 2 files changed, 182 insertions(+), 105 deletions(-) diff --git a/src/qemu/qemu_saveimage.c b/src/qemu/qemu_saveimage.c index 4fd4c5cfcd..7262c598f8 100644 --- a/src/qemu/qemu_saveimage.c +++ b/src/qemu/qemu_saveimage.c @@ -139,12 +139,12 @@ virQEMUSaveDataWrite(virQEMUSaveData *data, int fd, const char *path) { + g_autofree void *base = NULL; + char *buf, *cur; virQEMUSaveHeader *header = &data->header; size_t len; size_t xml_len; size_t cookie_len = 0; - size_t zerosLen = 0; - g_autofree char *zeros = NULL; xml_len = strlen(data->xml) + 1; if (data->cookie) @@ -165,42 +165,190 @@ virQEMUSaveDataWrite(virQEMUSaveData *data, return -1; } } + /* align data_len */ + len = sizeof(*header) + header->data_len; + header->data_len += virFileDirectAlign(len) - len; - zerosLen = header->data_len - len; - zeros = g_new0(char, zerosLen); + buf = virFileDirectBufferNew(&base, sizeof(*header) + header->data_len); + cur = buf; if (data->cookie) header->cookieOffset = xml_len; - if (safewrite(fd, header, sizeof(*header)) != sizeof(*header)) { - virReportSystemError(errno, - _("failed to write header to domain save file '%s'"), - path); - return -1; + memcpy(cur, header, sizeof(*header)); + cur += sizeof(*header); + memcpy(cur, data->xml, xml_len); + cur += xml_len; + if (data->cookie) { + memcpy(cur, data->cookie, cookie_len); + cur += cookie_len; } - if (safewrite(fd, data->xml, xml_len) != xml_len) { + if (virFileDirectWriteLim(fd, buf, sizeof(*header) + header->data_len) < 0) { virReportSystemError(errno, - _("failed to write domain xml to '%s'"), + _("failed to write libvirt header of domain save file '%s'"), path); return -1; } - if (data->cookie && - safewrite(fd, data->cookie, cookie_len) != cookie_len) { + return 0; +} + +/* virQEMUSaveDataRead: + * + * Reads libvirt's header (including domain XML) from a saved image. + * + * Returns -1 on generic failure, -3 on a corrupted image, or 0 on success. + */ +int +virQEMUSaveDataRead(virQEMUSaveData *data, + int fd, + const char *path) +{ + g_autofree void *base = NULL; + virQEMUSaveHeader *header = &data->header; + size_t xml_len; + size_t cookie_len; + ssize_t rv; + void *dst; + int oflags = fcntl(fd, F_GETFL); + bool isDirect = O_DIRECT && (oflags & O_DIRECT); + size_t buflen = 1024 * 1024; + char *buf = NULL; + void *src = NULL; + + header = &data->header; + if (isDirect) { + buf = virFileDirectBufferNew(&base, buflen); + src = buf; + rv = virFileDirectReadCopy(fd, &src, buflen, header, sizeof(*header)); + } else { + rv = saferead(fd, header, sizeof(*header)); + } + if (rv < 0) { virReportSystemError(errno, - _("failed to write cookie to '%s'"), + _("failed to read libvirt header of domain save file '%s'"), path); return -1; } + if (rv < sizeof(*header)) { + virReportError(VIR_ERR_OPERATION_FAILED, + _("domain save file '%s' libvirt header appears truncated"), + path); + return -3; + } + rv -= sizeof(*header); - if (safewrite(fd, zeros, zerosLen) != zerosLen) { - virReportSystemError(errno, - _("failed to write padding to '%s'"), - path); + if (memcmp(header->magic, QEMU_SAVE_MAGIC, sizeof(header->magic)) != 0) { + if (memcmp(header->magic, QEMU_SAVE_PARTIAL, sizeof(header->magic)) == 0) { + virReportError(VIR_ERR_OPERATION_FAILED, + _("domain save file '%s' seems incomplete"), + path); + return -3; + } + virReportError(VIR_ERR_OPERATION_FAILED, "%s", + _("image magic is incorrect")); + return -1; + } + if (header->version > QEMU_SAVE_VERSION) { + /* convert endianness and try again */ + qemuSaveImageBswapHeader(header); + } + if (header->version > QEMU_SAVE_VERSION) { + virReportError(VIR_ERR_OPERATION_FAILED, + _("image version is not supported (%d > %d)"), + header->version, QEMU_SAVE_VERSION); return -1; } + if (header->cookieOffset) + xml_len = header->cookieOffset; + else + xml_len = header->data_len; + if (xml_len <= 0) { + virReportError(VIR_ERR_OPERATION_FAILED, + _("invalid xml length: %lu"), (unsigned long)xml_len); + return -1; + } + if (header->data_len < xml_len) { + virReportError(VIR_ERR_OPERATION_FAILED, + _("invalid cookieOffset: %u"), header->cookieOffset); + return -1; + } + cookie_len = header->data_len - xml_len; + data->xml = g_new0(char, xml_len); + dst = data->xml; + if (rv > 0) { + if (isDirect) { + rv -= virFileDirectCopyBuf(&src, rv, &dst, &xml_len); + } else { + virReportError(VIR_ERR_INTERNAL_ERROR, "%s", _("unexpected leftover data in non-direct mode")); + return -1; + } + } + if (xml_len > 0) { + if (isDirect) { + rv = virFileDirectReadCopy(fd, &src, buflen, dst, xml_len); + } else { + rv = saferead(fd, dst, xml_len); + } + if (rv < 0) { + virReportSystemError(errno, + _("failed to read libvirt xml in domain save file '%s'"), + path); + return -1; + } + if (rv < xml_len) { + virReportError(VIR_ERR_OPERATION_FAILED, + _("domain save file '%s' xml seems incomplete"), + path); + return -3; + } + rv -= xml_len; + } + if (cookie_len > 0) { + data->cookie = g_new0(char, cookie_len); + dst = data->cookie; + if (rv > 0) { + if (isDirect) { + rv -= virFileDirectCopyBuf(&src, rv, &dst, &cookie_len); + } else { + virReportError(VIR_ERR_INTERNAL_ERROR, "%s", _("unexpected leftover data in non-direct mode")); + return -1; + } + } + if (cookie_len > 0) { + if (isDirect) { + rv = virFileDirectReadCopy(fd, &src, buflen, dst, cookie_len); + } else { + rv = saferead(fd, dst, cookie_len); + } + if (rv < 0) { + virReportSystemError(errno, + _("failed to read libvirt cookie in domain save file '%s'"), + path); + return -1; + } + if (rv < cookie_len) { + virReportError(VIR_ERR_OPERATION_FAILED, + _("domain save file '%s' cookie seems incomplete"), + path); + return -3; + } + rv -= cookie_len; + } + } + /* + * we should now be aligned and ready to read the QEVM. + */ + if (rv > 0) { + if (isDirect) { + virReportError(VIR_ERR_INTERNAL_ERROR, "%s", _("unexpected unaligned data in direct mode")); + } else { + virReportError(VIR_ERR_INTERNAL_ERROR, "%s", _("unexpected leftover data in non-direct mode")); + return -1; + } + } return 0; } @@ -444,11 +592,8 @@ qemuSaveImageOpen(virQEMUDriver *driver, VIR_AUTOCLOSE fd = -1; int ret = -1; g_autoptr(virQEMUSaveData) data = NULL; - virQEMUSaveHeader *header; g_autoptr(virDomainDef) def = NULL; int oflags = open_write ? O_RDWR : O_RDONLY; - size_t xml_len; - size_t cookie_len; if (bypass_cache) { int directFlag = virFileDirectFdFlag(); @@ -469,89 +614,17 @@ qemuSaveImageOpen(virQEMUDriver *driver, return -1; data = g_new0(virQEMUSaveData, 1); - - header = &data->header; - if (saferead(fd, header, sizeof(*header)) != sizeof(*header)) { - if (unlink_corrupt) { + ret = virQEMUSaveDataRead(data, fd, path); + if (ret < 0) { + if (unlink_corrupt && ret == -3) { if (unlink(path) < 0) { virReportSystemError(errno, _("cannot remove corrupt file: %s"), path); return -1; - } else { - return -3; } } - - virReportError(VIR_ERR_OPERATION_FAILED, - "%s", _("failed to read qemu header")); - return -1; - } - - if (memcmp(header->magic, QEMU_SAVE_MAGIC, sizeof(header->magic)) != 0) { - if (memcmp(header->magic, QEMU_SAVE_PARTIAL, sizeof(header->magic)) == 0) { - if (unlink_corrupt) { - if (unlink(path) < 0) { - virReportSystemError(errno, - _("cannot remove corrupt file: %s"), - path); - return -1; - } else { - return -3; - } - } - - virReportError(VIR_ERR_OPERATION_FAILED, "%s", - _("save image is incomplete")); - return -1; - } - - virReportError(VIR_ERR_OPERATION_FAILED, "%s", - _("image magic is incorrect")); - return -1; - } - - if (header->version > QEMU_SAVE_VERSION) { - /* convert endianness and try again */ - qemuSaveImageBswapHeader(header); - } - - if (header->version > QEMU_SAVE_VERSION) { - virReportError(VIR_ERR_OPERATION_FAILED, - _("image version is not supported (%d > %d)"), - header->version, QEMU_SAVE_VERSION); - return -1; - } - - if (header->data_len <= 0) { - virReportError(VIR_ERR_OPERATION_FAILED, - _("invalid header data length: %d"), header->data_len); - return -1; - } - - if (header->cookieOffset) - xml_len = header->cookieOffset; - else - xml_len = header->data_len; - - cookie_len = header->data_len - xml_len; - - data->xml = g_new0(char, xml_len); - - if (saferead(fd, data->xml, xml_len) != xml_len) { - virReportError(VIR_ERR_OPERATION_FAILED, - "%s", _("failed to read domain XML")); - return -1; - } - - if (cookie_len > 0) { - data->cookie = g_new0(char, cookie_len); - - if (saferead(fd, data->cookie, cookie_len) != cookie_len) { - virReportError(VIR_ERR_OPERATION_FAILED, "%s", - _("failed to read cookie")); - return -1; - } + return ret; } /* Create a domain from this XML */ diff --git a/src/qemu/qemu_saveimage.h b/src/qemu/qemu_saveimage.h index 391cd55ed0..f12374b3d7 100644 --- a/src/qemu/qemu_saveimage.h +++ b/src/qemu/qemu_saveimage.h @@ -36,14 +36,14 @@ G_STATIC_ASSERT(sizeof(QEMU_SAVE_MAGIC) == sizeof(QEMU_SAVE_PARTIAL)); typedef struct _virQEMUSaveHeader virQEMUSaveHeader; struct _virQEMUSaveHeader { - char magic[sizeof(QEMU_SAVE_MAGIC)-1]; - uint32_t version; - uint32_t data_len; - uint32_t was_running; - uint32_t compressed; - uint32_t cookieOffset; - uint32_t unused[14]; -}; + char magic[sizeof(QEMU_SAVE_MAGIC)-1]; /* 16 bytes */ + uint32_t version; /* 4 bytes */ + uint32_t data_len; /* 4 bytes */ + uint32_t was_running; /* 4 bytes */ + uint32_t compressed; /* 4 bytes */ + uint32_t cookieOffset; /* 4 bytes */ + uint32_t unused[14]; /* 56 bytes */ +} ATTRIBUTE_PACKED; /* = 92 bytes */ typedef struct _virQEMUSaveData virQEMUSaveData; @@ -103,6 +103,10 @@ int virQEMUSaveDataWrite(virQEMUSaveData *data, int fd, const char *path); +int +virQEMUSaveDataRead(virQEMUSaveData *data, + int fd, + const char *path); virQEMUSaveData * virQEMUSaveDataNew(char *domXML, -- 2.26.2

change the version check to assume even future versions of the format will continue to support compression. Signed-off-by: Claudio Fontana <cfontana@suse.de> --- src/qemu/qemu_saveimage.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/qemu/qemu_saveimage.c b/src/qemu/qemu_saveimage.c index 7262c598f8..73ddd6a1da 100644 --- a/src/qemu/qemu_saveimage.c +++ b/src/qemu/qemu_saveimage.c @@ -674,7 +674,7 @@ qemuSaveImageStartVM(virConnectPtr conn, virDomainXMLOptionGetSaveCookie(driver->xmlopt)) < 0) goto cleanup; - if ((header->version == 2) && + if ((header->version >= 2) && (header->compressed != QEMU_SAVE_FORMAT_RAW)) { if (!(cmd = qemuSaveImageGetCompressionCommand(header->compressed))) goto cleanup; -- 2.26.2

we will allow to use already open fds that are not empty for both read and write, as long as they are properly aligned. Adapt the truncation to take into account the initial offset. Signed-off-by: Claudio Fontana <cfontana@suse.de> --- src/util/virfile.c | 39 +++++++++++++++++---------------------- 1 file changed, 17 insertions(+), 22 deletions(-) diff --git a/src/util/virfile.c b/src/util/virfile.c index c529598595..b4eacfe608 100644 --- a/src/util/virfile.c +++ b/src/util/virfile.c @@ -4890,12 +4890,6 @@ runIOCopy(const struct runIOParams p) virReportSystemError(errno, _("Unable to write %s"), p.fdoutname); return -3; } - - if (!p.isBlockDev && ftruncate(p.fdout, total) < 0) { - virReportSystemError(errno, _("Unable to truncate %s"), p.fdoutname); - return -4; - } - break; } @@ -4930,6 +4924,7 @@ off_t virFileDiskCopy(int disk_fd, const char *disk_path, int remote_fd, const char *remote_path) { int ret = -1; + off_t off = 0; off_t total = 0; struct stat sb; struct runIOParams p; @@ -4973,23 +4968,17 @@ virFileDiskCopy(int disk_fd, const char *disk_path, int remote_fd, const char *r (oflags & O_ACCMODE)); goto cleanup; } - /* To make the implementation simpler, we give up on any - * attempt to use O_DIRECT in a non-trivial manner. */ if (!p.isBlockDev && p.isDirect) { - off_t off; - if (p.isWrite) { - /* - * note: for write we do not only check that disk_fd is seekable, - * we also want to know that the file is empty, so we need SEEK_END. - */ - if ((off = lseek(disk_fd, 0, SEEK_END)) != 0) { - virReportSystemError(off < 0 ? errno : EINVAL, "%s", - _("O_DIRECT write needs empty seekable file")); - goto cleanup; - } - } else if ((off = lseek(disk_fd, 0, SEEK_CUR)) != 0) { - virReportSystemError(off < 0 ? errno : EINVAL, "%s", - _("O_DIRECT read needs entire seekable file")); + off = lseek(disk_fd, 0, SEEK_CUR); + + /* Detect wrong uses of O_DIRECT. */ + if (off < 0) { + virReportSystemError(errno, "%s", _("O_DIRECT needs a seekable file")); + goto cleanup; + } + if (virFileDirectAlign(off) != off) { + /* we could write some zeroes, but maybe it is safer to just fail */ + virReportSystemError(EINVAL, "%s", _("O_DIRECT attempted on an open fd that is not aligned")); goto cleanup; } } @@ -4997,6 +4986,12 @@ virFileDiskCopy(int disk_fd, const char *disk_path, int remote_fd, const char *r if (total < 0) goto cleanup; + if (!p.isBlockDev && p.isDirect && p.isWrite) { + if (ftruncate(p.fdout, off + total) < 0) { + virReportSystemError(errno, _("Unable to truncate %s"), p.fdoutname); + goto cleanup; + } + } /* Ensure all data is written */ if (virFileDataSync(p.fdout) < 0) { if (errno != EINVAL && errno != EROFS) { -- 2.26.2
participants (1)
-
Claudio Fontana