[PATCH 0/2] Support VM core dump to block device

When the destination of a VM core dump is a block device (e.g. NBD) avoid file operations that are unnecessary. Simon Rowe (2): iohelper: skip lseek() and ftruncate() on block devices qemu: never unlink() the core dump output file src/qemu/qemu_driver.c | 2 -- src/util/iohelper.c | 10 ++++++++-- 2 files changed, 8 insertions(+), 4 deletions(-) -- 2.22.3

Signed-off-by: Simon Rowe <simon.rowe@nutanix.com> --- src/util/iohelper.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/src/util/iohelper.c b/src/util/iohelper.c index b8810d16d3..e6eb178fde 100644 --- a/src/util/iohelper.c +++ b/src/util/iohelper.c @@ -28,6 +28,8 @@ #include <unistd.h> #include <fcntl.h> #include <stdlib.h> +#include <sys/types.h> +#include <sys/stat.h> #include "virthread.h" #include "virfile.h" @@ -56,6 +58,8 @@ runIO(const char *path, int fd, int oflags) unsigned long long total = 0; bool direct = O_DIRECT && ((oflags & O_DIRECT) != 0); off_t end = 0; + struct stat sb; + bool isBlockDev = false; #if WITH_POSIX_MEMALIGN if (posix_memalign(&base, alignMask + 1, buflen)) @@ -86,9 +90,11 @@ runIO(const char *path, int fd, int oflags) fdinname = "stdin"; fdout = fd; fdoutname = path; + if (fstat(fd, &sb) == 0) + isBlockDev = S_ISBLK(sb.st_mode); /* To make the implementation simpler, we give up on any * attempt to use O_DIRECT in a non-trivial manner. */ - if (direct && (end = lseek(fd, 0, SEEK_END)) != 0) { + if (!isBlockDev && direct && (end = lseek(fd, 0, SEEK_END)) != 0) { virReportSystemError(end < 0 ? errno : EINVAL, "%s", _("O_DIRECT write needs empty seekable file")); goto cleanup; @@ -140,7 +146,7 @@ runIO(const char *path, int fd, int oflags) goto cleanup; } - if (ftruncate(fd, total) < 0) { + if (!isBlockDev && ftruncate(fd, total) < 0) { virReportSystemError(errno, _("Unable to truncate %s"), fdoutname); goto cleanup; } -- 2.22.3

On 8/20/21 10:39 AM, Simon Rowe wrote:
Signed-off-by: Simon Rowe <simon.rowe@nutanix.com> --- src/util/iohelper.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-)
diff --git a/src/util/iohelper.c b/src/util/iohelper.c index b8810d16d3..e6eb178fde 100644 --- a/src/util/iohelper.c +++ b/src/util/iohelper.c @@ -28,6 +28,8 @@ #include <unistd.h> #include <fcntl.h> #include <stdlib.h> +#include <sys/types.h> +#include <sys/stat.h>
#include "virthread.h" #include "virfile.h" @@ -56,6 +58,8 @@ runIO(const char *path, int fd, int oflags) unsigned long long total = 0; bool direct = O_DIRECT && ((oflags & O_DIRECT) != 0); off_t end = 0; + struct stat sb; + bool isBlockDev = false;
#if WITH_POSIX_MEMALIGN if (posix_memalign(&base, alignMask + 1, buflen)) @@ -86,9 +90,11 @@ runIO(const char *path, int fd, int oflags) fdinname = "stdin"; fdout = fd; fdoutname = path; + if (fstat(fd, &sb) == 0) + isBlockDev = S_ISBLK(sb.st_mode); /* To make the implementation simpler, we give up on any * attempt to use O_DIRECT in a non-trivial manner. */ - if (direct && (end = lseek(fd, 0, SEEK_END)) != 0) { + if (!isBlockDev && direct && (end = lseek(fd, 0, SEEK_END)) != 0) { virReportSystemError(end < 0 ? errno : EINVAL, "%s", _("O_DIRECT write needs empty seekable file")); goto cleanup; @@ -140,7 +146,7 @@ runIO(const char *path, int fd, int oflags) goto cleanup; }
- if (ftruncate(fd, total) < 0) { + if (!isBlockDev && ftruncate(fd, total) < 0) { virReportSystemError(errno, _("Unable to truncate %s"), fdoutname); goto cleanup; }
IIUC, O_DIRECT is no good for block devices. I wonder whether the caller (doCoreDump()) should learn whether the path is a block device and don't set O_DIRECT flag instead of changing iohelper. Michal

I can change to doing this if it preferred, Simon ________________________________ From: Michal Prívozník <mprivozn@redhat.com> Sent: 20 August 2021 16:09 To: Simon Rowe <simon.rowe@nutanix.com>; libvir-list@redhat.com <libvir-list@redhat.com> Subject: Re: [PATCH 1/2] iohelper: skip lseek() and ftruncate() on block devices On 8/20/21 10:39 AM, Simon Rowe wrote:
Signed-off-by: Simon Rowe <simon.rowe@nutanix.com> --- src/util/iohelper.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-)
diff --git a/src/util/iohelper.c b/src/util/iohelper.c index b8810d16d3..e6eb178fde 100644 --- a/src/util/iohelper.c +++ b/src/util/iohelper.c @@ -28,6 +28,8 @@ #include <unistd.h> #include <fcntl.h> #include <stdlib.h> +#include <sys/types.h> +#include <sys/stat.h>
#include "virthread.h" #include "virfile.h" @@ -56,6 +58,8 @@ runIO(const char *path, int fd, int oflags) unsigned long long total = 0; bool direct = O_DIRECT && ((oflags & O_DIRECT) != 0); off_t end = 0; + struct stat sb; + bool isBlockDev = false;
#if WITH_POSIX_MEMALIGN if (posix_memalign(&base, alignMask + 1, buflen)) @@ -86,9 +90,11 @@ runIO(const char *path, int fd, int oflags) fdinname = "stdin"; fdout = fd; fdoutname = path; + if (fstat(fd, &sb) == 0) + isBlockDev = S_ISBLK(sb.st_mode); /* To make the implementation simpler, we give up on any * attempt to use O_DIRECT in a non-trivial manner. */ - if (direct && (end = lseek(fd, 0, SEEK_END)) != 0) { + if (!isBlockDev && direct && (end = lseek(fd, 0, SEEK_END)) != 0) { virReportSystemError(end < 0 ? errno : EINVAL, "%s", _("O_DIRECT write needs empty seekable file")); goto cleanup; @@ -140,7 +146,7 @@ runIO(const char *path, int fd, int oflags) goto cleanup; }
- if (ftruncate(fd, total) < 0) { + if (!isBlockDev && ftruncate(fd, total) < 0) { virReportSystemError(errno, _("Unable to truncate %s"), fdoutname); goto cleanup; }
IIUC, O_DIRECT is no good for block devices. I wonder whether the caller (doCoreDump()) should learn whether the path is a block device and don't set O_DIRECT flag instead of changing iohelper. Michal

O_DIRECT is under control of the caller (though the --bypass-cache option). I am running in a constrained memory environment and when I don't use O_DIRECT the dump stalls at 5% load av hits 41 and the system becomes unresponsive. Simon ________________________________ From: Michal Prívozník <mprivozn@redhat.com> Sent: 20 August 2021 16:09 To: Simon Rowe <simon.rowe@nutanix.com>; libvir-list@redhat.com <libvir-list@redhat.com> Subject: Re: [PATCH 1/2] iohelper: skip lseek() and ftruncate() on block devices On 8/20/21 10:39 AM, Simon Rowe wrote:
Signed-off-by: Simon Rowe <simon.rowe@nutanix.com> --- src/util/iohelper.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-)
diff --git a/src/util/iohelper.c b/src/util/iohelper.c index b8810d16d3..e6eb178fde 100644 --- a/src/util/iohelper.c +++ b/src/util/iohelper.c @@ -28,6 +28,8 @@ #include <unistd.h> #include <fcntl.h> #include <stdlib.h> +#include <sys/types.h> +#include <sys/stat.h>
#include "virthread.h" #include "virfile.h" @@ -56,6 +58,8 @@ runIO(const char *path, int fd, int oflags) unsigned long long total = 0; bool direct = O_DIRECT && ((oflags & O_DIRECT) != 0); off_t end = 0; + struct stat sb; + bool isBlockDev = false;
#if WITH_POSIX_MEMALIGN if (posix_memalign(&base, alignMask + 1, buflen)) @@ -86,9 +90,11 @@ runIO(const char *path, int fd, int oflags) fdinname = "stdin"; fdout = fd; fdoutname = path; + if (fstat(fd, &sb) == 0) + isBlockDev = S_ISBLK(sb.st_mode); /* To make the implementation simpler, we give up on any * attempt to use O_DIRECT in a non-trivial manner. */ - if (direct && (end = lseek(fd, 0, SEEK_END)) != 0) { + if (!isBlockDev && direct && (end = lseek(fd, 0, SEEK_END)) != 0) { virReportSystemError(end < 0 ? errno : EINVAL, "%s", _("O_DIRECT write needs empty seekable file")); goto cleanup; @@ -140,7 +146,7 @@ runIO(const char *path, int fd, int oflags) goto cleanup; }
- if (ftruncate(fd, total) < 0) { + if (!isBlockDev && ftruncate(fd, total) < 0) { virReportSystemError(errno, _("Unable to truncate %s"), fdoutname); goto cleanup; }
IIUC, O_DIRECT is no good for block devices. I wonder whether the caller (doCoreDump()) should learn whether the path is a block device and don't set O_DIRECT flag instead of changing iohelper. Michal

On 8/23/21 4:38 PM, Simon Rowe wrote:
O_DIRECT is under control of the caller (though the --bypass-cache option). I am running in a constrained memory environment and when I don't use O_DIRECT the dump stalls at 5% load av hits 41 and the system becomes unresponsive.
Fair enough. I thought that block devices are always opened without poisoning host cache. If they aren't then you patch makes sense. But, what is your kernel version? Hopefully it's not too old. Is it a special block device or just some regular disk? Looking into the kernel code, it does implement O_DIRECT for block devices: kernel.git $ git grep -npC10 "\.direct_IO" -- fs/block_dev.c fs/block_dev.c-1680-} fs/block_dev.c-1681- fs/block_dev.c=1682=static const struct address_space_operations def_blk_aops = { fs/block_dev.c-1683- .set_page_dirty = __set_page_dirty_buffers, fs/block_dev.c-1684- .readpage = blkdev_readpage, fs/block_dev.c-1685- .readahead = blkdev_readahead, fs/block_dev.c-1686- .writepage = blkdev_writepage, fs/block_dev.c-1687- .write_begin = blkdev_write_begin, fs/block_dev.c-1688- .write_end = blkdev_write_end, fs/block_dev.c-1689- .writepages = blkdev_writepages, fs/block_dev.c:1690: .direct_IO = blkdev_direct_IO, fs/block_dev.c-1691- .migratepage = buffer_migrate_page_norefs, fs/block_dev.c-1692- .is_dirty_writeback = buffer_check_dirty_writeback, fs/block_dev.c-1693-}; fs/block_dev.c-1694- fs/block_dev.c-1695-#define BLKDEV_FALLOC_FL_SUPPORTED \ fs/block_dev.c-1696- (FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE | \ fs/block_dev.c-1697- FALLOC_FL_ZERO_RANGE | FALLOC_FL_NO_HIDE_STALE) fs/block_dev.c-1698- fs/block_dev.c-1699-static long blkdev_fallocate(struct file *file, int mode, loff_t start, fs/block_dev.c-1700- loff_t len) So it seems like your patch was correct after all. Would you like to resend this in v2 (I understand that patch 2/2 needs a rework) or do you want me to push this one right away? Michal

I'm using 5.4.109. I'll re-submit with changes to the second patch shortly, thanks Simon ________________________________ From: Michal Prívozník <mprivozn@redhat.com> Sent: 23 August 2021 16:04 To: Simon Rowe <simon.rowe@nutanix.com>; libvir-list@redhat.com <libvir-list@redhat.com> Subject: Re: [PATCH 1/2] iohelper: skip lseek() and ftruncate() on block devices On 8/23/21 4:38 PM, Simon Rowe wrote:
O_DIRECT is under control of the caller (though the --bypass-cache option). I am running in a constrained memory environment and when I don't use O_DIRECT the dump stalls at 5% load av hits 41 and the system becomes unresponsive.
Fair enough. I thought that block devices are always opened without poisoning host cache. If they aren't then you patch makes sense. But, what is your kernel version? Hopefully it's not too old. Is it a special block device or just some regular disk? Looking into the kernel code, it does implement O_DIRECT for block devices: kernel.git $ git grep -npC10 "\.direct_IO" -- fs/block_dev.c fs/block_dev.c-1680-} fs/block_dev.c-1681- fs/block_dev.c=1682=static const struct address_space_operations def_blk_aops = { fs/block_dev.c-1683- .set_page_dirty = __set_page_dirty_buffers, fs/block_dev.c-1684- .readpage = blkdev_readpage, fs/block_dev.c-1685- .readahead = blkdev_readahead, fs/block_dev.c-1686- .writepage = blkdev_writepage, fs/block_dev.c-1687- .write_begin = blkdev_write_begin, fs/block_dev.c-1688- .write_end = blkdev_write_end, fs/block_dev.c-1689- .writepages = blkdev_writepages, fs/block_dev.c:1690: .direct_IO = blkdev_direct_IO, fs/block_dev.c-1691- .migratepage = buffer_migrate_page_norefs, fs/block_dev.c-1692- .is_dirty_writeback = buffer_check_dirty_writeback, fs/block_dev.c-1693-}; fs/block_dev.c-1694- fs/block_dev.c-1695-#define BLKDEV_FALLOC_FL_SUPPORTED \ fs/block_dev.c-1696- (FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE | \ fs/block_dev.c-1697- FALLOC_FL_ZERO_RANGE | FALLOC_FL_NO_HIDE_STALE) fs/block_dev.c-1698- fs/block_dev.c-1699-static long blkdev_fallocate(struct file *file, int mode, loff_t start, fs/block_dev.c-1700- loff_t len) So it seems like your patch was correct after all. Would you like to resend this in v2 (I understand that patch 2/2 needs a rework) or do you want me to push this one right away? Michal

Missed to answer part of your question. I'm using an NBD device backed by a userspace process using nbdkit, Simon ________________________________ From: Simon Rowe <simon.rowe@nutanix.com> Sent: 23 August 2021 16:15 To: Michal Prívozník <mprivozn@redhat.com>; libvir-list@redhat.com <libvir-list@redhat.com> Subject: Re: [PATCH 1/2] iohelper: skip lseek() and ftruncate() on block devices I'm using 5.4.109. I'll re-submit with changes to the second patch shortly, thanks Simon ________________________________ From: Michal Prívozník <mprivozn@redhat.com> Sent: 23 August 2021 16:04 To: Simon Rowe <simon.rowe@nutanix.com>; libvir-list@redhat.com <libvir-list@redhat.com> Subject: Re: [PATCH 1/2] iohelper: skip lseek() and ftruncate() on block devices On 8/23/21 4:38 PM, Simon Rowe wrote:
O_DIRECT is under control of the caller (though the --bypass-cache option). I am running in a constrained memory environment and when I don't use O_DIRECT the dump stalls at 5% load av hits 41 and the system becomes unresponsive.
Fair enough. I thought that block devices are always opened without poisoning host cache. If they aren't then you patch makes sense. But, what is your kernel version? Hopefully it's not too old. Is it a special block device or just some regular disk? Looking into the kernel code, it does implement O_DIRECT for block devices: kernel.git $ git grep -npC10 "\.direct_IO" -- fs/block_dev.c fs/block_dev.c-1680-} fs/block_dev.c-1681- fs/block_dev.c=1682=static const struct address_space_operations def_blk_aops = { fs/block_dev.c-1683- .set_page_dirty = __set_page_dirty_buffers, fs/block_dev.c-1684- .readpage = blkdev_readpage, fs/block_dev.c-1685- .readahead = blkdev_readahead, fs/block_dev.c-1686- .writepage = blkdev_writepage, fs/block_dev.c-1687- .write_begin = blkdev_write_begin, fs/block_dev.c-1688- .write_end = blkdev_write_end, fs/block_dev.c-1689- .writepages = blkdev_writepages, fs/block_dev.c:1690: .direct_IO = blkdev_direct_IO, fs/block_dev.c-1691- .migratepage = buffer_migrate_page_norefs, fs/block_dev.c-1692- .is_dirty_writeback = buffer_check_dirty_writeback, fs/block_dev.c-1693-}; fs/block_dev.c-1694- fs/block_dev.c-1695-#define BLKDEV_FALLOC_FL_SUPPORTED \ fs/block_dev.c-1696- (FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE | \ fs/block_dev.c-1697- FALLOC_FL_ZERO_RANGE | FALLOC_FL_NO_HIDE_STALE) fs/block_dev.c-1698- fs/block_dev.c-1699-static long blkdev_fallocate(struct file *file, int mode, loff_t start, fs/block_dev.c-1700- loff_t len) So it seems like your patch was correct after all. Would you like to resend this in v2 (I understand that patch 2/2 needs a rework) or do you want me to push this one right away? Michal

The comment above virQEMUFileOpenAs() implies any result should be left intact. Signed-off-by: Simon Rowe <simon.rowe@nutanix.com> --- src/qemu/qemu_driver.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c index f31e13889e..b1ac1cb73b 100644 --- a/src/qemu/qemu_driver.c +++ b/src/qemu/qemu_driver.c @@ -3282,8 +3282,6 @@ doCoreDump(virQEMUDriver *driver, if (qemuDomainFileWrapperFDClose(vm, wrapperFd) < 0) ret = -1; virFileWrapperFdFree(wrapperFd); - if (ret != 0) - unlink(path); return ret; } -- 2.22.3

On 8/20/21 10:39 AM, Simon Rowe wrote:
The comment above virQEMUFileOpenAs() implies any result should be left intact.
Signed-off-by: Simon Rowe <simon.rowe@nutanix.com> --- src/qemu/qemu_driver.c | 2 -- 1 file changed, 2 deletions(-)
diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c index f31e13889e..b1ac1cb73b 100644 --- a/src/qemu/qemu_driver.c +++ b/src/qemu/qemu_driver.c @@ -3282,8 +3282,6 @@ doCoreDump(virQEMUDriver *driver, if (qemuDomainFileWrapperFDClose(vm, wrapperFd) < 0) ret = -1; virFileWrapperFdFree(wrapperFd); - if (ret != 0) - unlink(path); return ret; }
So the @path is opened using virQEMUFileOpenAs() which as the last argument has @needUnlink which tells caller whether the file was created by this call (true) or was already existing (false). But for some reason doCoreDump() passes NULL and then unlinks the file unconditionally. I think the proper fix would be to introduce a boolean, pass it instead of NULL and then have: if (ret != 0 && needUnlink) unlink(path); Michal

I have an alternative patch which does this, I'll submit that instead. Simon ________________________________ From: Michal Prívozník <mprivozn@redhat.com> Sent: 20 August 2021 16:09 To: Simon Rowe <simon.rowe@nutanix.com>; libvir-list@redhat.com <libvir-list@redhat.com> Subject: Re: [PATCH 2/2] qemu: never unlink() the core dump output file On 8/20/21 10:39 AM, Simon Rowe wrote:
The comment above virQEMUFileOpenAs() implies any result should be left intact.
Signed-off-by: Simon Rowe <simon.rowe@nutanix.com> --- src/qemu/qemu_driver.c | 2 -- 1 file changed, 2 deletions(-)
diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c index f31e13889e..b1ac1cb73b 100644 --- a/src/qemu/qemu_driver.c +++ b/src/qemu/qemu_driver.c @@ -3282,8 +3282,6 @@ doCoreDump(virQEMUDriver *driver, if (qemuDomainFileWrapperFDClose(vm, wrapperFd) < 0) ret = -1; virFileWrapperFdFree(wrapperFd); - if (ret != 0) - unlink(path); return ret; }
So the @path is opened using virQEMUFileOpenAs() which as the last argument has @needUnlink which tells caller whether the file was created by this call (true) or was already existing (false). But for some reason doCoreDump() passes NULL and then unlinks the file unconditionally. I think the proper fix would be to introduce a boolean, pass it instead of NULL and then have: if (ret != 0 && needUnlink) unlink(path); Michal
participants (2)
-
Michal Prívozník
-
Simon Rowe