Running into an issue where, if I/O is hampered by load for example,
reading a largish state file (created by 'virsh save') is not allowed to
complete.
qemudStartVMDaemon in src/qemu_driver.c has a loop that waits 10 seconds
for the VM to be brought up. An strace against libvirt when doing a
'virsh restore' against a largish state file shows the VM being sent a
kill when it's still happily reading from the file.
inotify is used in the xen and uml drivers, so I thought it would be a
suitable mechanism to delay the timeout loop if the state file was still
being read.
Is this the right way to solve this problem?
I've inlined some (flimsy) mods for comment
Regards
Matt McCowan
--- libvirt.orig/src/qemu_driver.c 2009-03-20 11:17:57.000000000 +0900
+++ libvirt-0.6.1.1/src/qemu_driver.c 2009-03-22 18:36:06.000000000
+0900
@@ -43,6 +43,7 @@
#include <stdio.h>
#include <sys/wait.h>
#include <sys/ioctl.h>
+#include <sys/inotify.h>
#if HAVE_NUMACTL
#define NUMA_VERSION1_COMPATIBILITY 1
@@ -1310,6 +1309,11 @@
pid_t child;
int pos = -1;
char ebuf[1024];
+ char rpathname[PATH_MAX];
+ int fd = 0;
+ int wd = 0;
+ int got = 0;
+ char buf[1024];
struct gemudHookData hookData;
hookData.conn = conn;
@@ -1430,6 +1434,19 @@
qemudSecurityHook, &hookData);
/* wait for qemu process to to show up */
+
+ /* Times out if using a restore file and that file
+ is big. Use inotify */
+ if (stdin_fd > 0) {
+ fd = inotify_init();
+
+ if (snprintf(rpathname, sizeof(rpathname), "/proc/self/fd/%d",
+ stdin_fd) >= (int)sizeof(rpathname)) {
+ return -1;
+ }
+ wd = inotify_add_watch(fd, rpathname, IN_ACCESS);
+ }
+
if (ret == 0) {
int retries = 100;
int childstat;
@@ -1442,7 +1459,15 @@
if ((ret = virFileReadPid(driver->stateDir,
vm->def->name, &vm->pid)) == 0)
break;
usleep(100*1000);
- retries--;
+
+ /* Is state file being accessed? */
+ got = read(fd, buf, sizeof(buf));
+ if (got < 1)
+ retries--;
+ }
+ if (stdin_fd > 0) {
+ inotify_rm_watch(fd, wd);
+ close(fd);
}
if (ret) {
qemudReportError(conn, NULL, NULL,
VIR_ERR_INTERNAL_ERROR,