[libvirt] [PATCH 0/2] try harder to get dest qemu errors on migation

Hi, all. In case migration fails due to destination qemu exits unexpectedly user recevies the qemu log in the error message. Unfortunately log is truncated and the most interesting part is missed (below is the example of such a log [1]). Actually for the most cases the first patch will be enough to fix the issue. Originally I thought the problem is qemu logging and reading the log are not in sync (which is true) so I tried to fix it as well in the second patch. [1] log example: CPU Reset (CPU 0) EAX=00000000 EBX=00000000 ECX=00000000 EDX=00000000 ESI=00000000 EDI=00000000 EBP=00000000 ESP=00000000 EIP=00000000 EFL=00000000 [-------] CPL=0 II=0 A20=0 SMM=0 HLT=0 ES =0000 00000000 00000000 00000000 CS =0000 00000000 00000000 00000000 SS =0000 00000000 00000000 00000000 DS =0000 00000000 00000000 00000000 FS =0000 00000000 00000000 00000000 GS =0000 00000000 00000000 00000000 LDT=0000 00000000 00000000 00000000 TR =0000 00000000 00000000 00000000 GDT= 00000000 00000000 IDT= 00000000 00000000 CR0=00000000 CR2=00000000 CR3=00000000 CR4=00000000 DR0=0000000000000000 DR1=0000000000000000 DR2=0000000000000000 DR3=0000000000000000 DR6=0000000000000000 DR7=0000000000000000 CCS=00000000 CCD=00000000 CCO=DYNAMIC EFER=0000000000000000 FCW=0000 FSW=0000 [ST=0] FTW=ff MXCSR=00000000 FPR0=0000000000000000 0000 FPR1=0000000000000000 0000 FPR2=0000000000000000 0000 FPR3=0000000000000000 0000 FPR4=0000000000000000 0000 FPR5=0000000000000000 0000 FPR6=0000000000000000 0000 FPR7=0000000000000000 0000 XMM00=00000000000000000000000000000000 XMM01=00000000000000000000000000000000 XMM02=00000000000000000000000000000000 XMM03=00000000000000000000000000000000 XMM04=00000000000000000000000000000000 XMM05=00000000000000000000000000000000 XMM06=00000000000000000000000000000000 XMM07=00000000000000000000000000000000 CPU Reset (CPU 1) EAX=00000000 EBX=00000000 ECX=00000000 EDX=000206a1 ESI=00000000 EDI=00000000 EBP=00000000 ESP=00000000 EIP=0000fff0 EFL=00000002 [-------] CPL=0 II=0 A20=1 SMM=0 HLT=0 ES =0000 00000000 0000ffff 00009300 CS =f000 ffff0000 0000ffff 00009b00 SS =0000 00000000 0000ffff 00009300 DS =0000 00000000 0000ffff 00009300 FS =0000 00000000 0000ffff 00009300 GS =0000 00000000 0000ffff 00009300 LDT=0000 00000000 0000ffff 00008200 TR =0000 00000000 0000ffff 00008b00 GDT= 00000000 0000ffff IDT= 00000000 0000ffff CR0=60000010 CR2=00000000 CR3=00000000 CR4=00000000 DR0=0000000000000000 DR1=0000000000000000 DR2=0000000000000000 DR3=0000000000000000 DR6=00000000ffff0ff0 DR7=0000000000000400 CCS=00000000 CCD=00000000 CCO=DYNAMIC EFER=0000000000000000 FCW=037f FSW=0000 [ST=0] FTW=00 MXCSR=00001f80 FPR0=0000000000000000 0000 FPR1=0000000000000000 0000 FPR2=0000000000000000 0000 FPR3=0000000000000000 0000 FPR4=0000000000000000 0000 FPR5=0000000000000000 0000 FPR6=0000000000000000 0000 FPR7=0000000000000000 0000 XMM00=00000000000000000000000000000000 XMM01=00000000000000000000000000000000 XMM02=00000000000000000000000000000000 XMM03=00000000000000000000000000000000 XMM04=00000000000000000000000000000000 XMM05=00000000000000000000000000000000 XMM06=00000000000000000000000000000000 XMM07=000 qemu: terminating on signal 15 from pid 168133 Nikolay Shirokovskiy (2): log: remove 1k limit for error messages log: drain log of exiting qemu process carefully src/logging/log_handler.c | 39 +++++++++++++++++++++++++++++++++++++-- src/qemu/qemu_domain.c | 4 ++++ src/qemu/qemu_domain.h | 1 + src/qemu/qemu_process.c | 2 ++ src/util/virerror.c | 9 ++++----- 5 files changed, 48 insertions(+), 7 deletions(-) -- 1.8.3.1

This limit is the issue for example when we want to return in the error message the log of the abnormally exited qemu process of the destination side in the process of migration. virVasprintf is already used in the process of logging in the callers down the stack. --- src/util/virerror.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/src/util/virerror.c b/src/util/virerror.c index 2958308..ca74c14 100644 --- a/src/util/virerror.c +++ b/src/util/virerror.c @@ -1427,22 +1427,21 @@ void virReportErrorHelper(int domcode, { int save_errno = errno; va_list args; - char errorMessage[1024]; + char *errorMessage = NULL; const char *virerr; if (fmt) { va_start(args, fmt); - vsnprintf(errorMessage, sizeof(errorMessage)-1, fmt, args); + ignore_value(virVasprintfQuiet(&errorMessage, fmt, args)); va_end(args); - } else { - errorMessage[0] = '\0'; } - virerr = virErrorMsg(errorcode, (errorMessage[0] ? errorMessage : NULL)); + virerr = virErrorMsg(errorcode, errorMessage); virRaiseErrorFull(filename, funcname, linenr, domcode, errorcode, VIR_ERR_ERROR, virerr, errorMessage, NULL, -1, -1, virerr, errorMessage); + VIR_FREE(errorMessage); errno = save_errno; } -- 1.8.3.1

Read API call of logger daemon is used to get extra information from terminated qemu process. For this purpose we need to wait until qemu process finishes its writing to log. We need to: 1. Read until EOF. 2. Don't stop reading on hangup. virtlogd will not receive EOF as both qemu and libvirtd keep logging pipe descriptor. Thus let's close it after qemu process startup is finished as it will not be used anymore. All following logging on behalf of qemu by libvirt is done either thru daemon API or by receiving new writing descriptor. Beware, qemuDomainLogContextFree is unref actually. --- src/logging/log_handler.c | 39 +++++++++++++++++++++++++++++++++++++-- src/qemu/qemu_domain.c | 4 ++++ src/qemu/qemu_domain.h | 1 + src/qemu/qemu_process.c | 2 ++ 4 files changed, 44 insertions(+), 2 deletions(-) diff --git a/src/logging/log_handler.c b/src/logging/log_handler.c index cd0ba6e..d0479c9 100644 --- a/src/logging/log_handler.c +++ b/src/logging/log_handler.c @@ -54,10 +54,12 @@ struct _virLogHandlerLogFile { char *driver; unsigned char domuuid[VIR_UUID_BUFLEN]; char *domname; + char *path; }; struct _virLogHandler { virObjectLockable parent; + virCond condition; bool privileged; size_t max_size; @@ -102,6 +104,7 @@ virLogHandlerLogFileFree(virLogHandlerLogFilePtr file) VIR_FREE(file->driver); VIR_FREE(file->domname); + VIR_FREE(file->path); VIR_FREE(file); } @@ -137,6 +140,21 @@ virLogHandlerGetLogFileFromWatch(virLogHandlerPtr handler, } +static virLogHandlerLogFilePtr +virLogHandlerGetLogFileFromPath(virLogHandlerPtr handler, + const char* path) +{ + size_t i; + + for (i = 0; i < handler->nfiles; i++) { + if (STREQ(handler->files[i]->path, path)) + return handler->files[i]; + } + + return NULL; +} + + static void virLogHandlerDomainLogFileEvent(int watch, int fd, @@ -167,11 +185,14 @@ virLogHandlerDomainLogFileEvent(int watch, goto error; } + if (len == 0) + goto error; + if (virRotatingFileWriterAppend(logfile->file, buf, len) != len) goto error; if (events & VIR_EVENT_HANDLE_HANGUP) - goto error; + goto reread; virObjectUnlock(handler); return; @@ -179,6 +200,7 @@ virLogHandlerDomainLogFileEvent(int watch, error: handler->inhibitor(false, handler->opaque); virLogHandlerLogFileClose(handler, logfile); + virCondBroadcast(&handler->condition); virObjectUnlock(handler); } @@ -198,6 +220,9 @@ virLogHandlerNew(bool privileged, if (!(handler = virObjectLockableNew(virLogHandlerClass))) goto error; + if (virCondInit(&handler->condition) < 0) + goto error; + handler->privileged = privileged; handler->max_size = max_size; handler->max_backups = max_backups; @@ -357,6 +382,7 @@ virLogHandlerDispose(void *obj) virLogHandlerLogFileFree(handler->files[i]); } VIR_FREE(handler->files); + virCondDestroy(&handler->condition); } @@ -401,7 +427,8 @@ virLogHandlerDomainOpenLogFile(virLogHandlerPtr handler, pipefd[0] = -1; memcpy(file->domuuid, domuuid, VIR_UUID_BUFLEN); if (VIR_STRDUP(file->driver, driver) < 0 || - VIR_STRDUP(file->domname, domname) < 0) + VIR_STRDUP(file->domname, domname) < 0 || + VIR_STRDUP(file->path, path) < 0) goto error; if ((file->file = virRotatingFileWriterNew(path, @@ -496,6 +523,14 @@ virLogHandlerDomainReadLogFile(virLogHandlerPtr handler, virObjectLock(handler); + while (virLogHandlerGetLogFileFromPath(handler, path)) { + if (virCondWait(&handler->condition, &handler->parent.lock) < 0) { + virReportSystemError(errno, "%s", + _("failed to wait for EOF")); + goto error; + } + } + if (!(file = virRotatingFileReaderNew(path, handler->max_backups))) goto error; diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c index fd79390..26a0255 100644 --- a/src/qemu/qemu_domain.c +++ b/src/qemu/qemu_domain.c @@ -3974,6 +3974,10 @@ virLogManagerPtr qemuDomainLogContextGetManager(qemuDomainLogContextPtr ctxt) return ctxt->manager; } +void qemuDomainLogContextHalfClose(qemuDomainLogContextPtr ctxt) +{ + VIR_FORCE_CLOSE(ctxt->writefd); +} void qemuDomainLogContextFree(qemuDomainLogContextPtr ctxt) { diff --git a/src/qemu/qemu_domain.h b/src/qemu/qemu_domain.h index 13c0372..71fe6ad 100644 --- a/src/qemu/qemu_domain.h +++ b/src/qemu/qemu_domain.h @@ -499,6 +499,7 @@ ssize_t qemuDomainLogContextRead(qemuDomainLogContextPtr ctxt, int qemuDomainLogContextGetWriteFD(qemuDomainLogContextPtr ctxt); void qemuDomainLogContextMarkPosition(qemuDomainLogContextPtr ctxt); void qemuDomainLogContextRef(qemuDomainLogContextPtr ctxt); +void qemuDomainLogContextHalfClose(qemuDomainLogContextPtr ctxt); void qemuDomainLogContextFree(qemuDomainLogContextPtr ctxt); virLogManagerPtr qemuDomainLogContextGetManager(qemuDomainLogContextPtr ctxt); diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c index 7596579..737b2e0 100644 --- a/src/qemu/qemu_process.c +++ b/src/qemu/qemu_process.c @@ -5493,6 +5493,8 @@ qemuProcessLaunch(virConnectPtr conn, cleanup: qemuDomainSecretDestroy(vm); virCommandFree(cmd); + if (logCtxt) + qemuDomainLogContextHalfClose(logCtxt); qemuDomainLogContextFree(logCtxt); virObjectUnref(cfg); virObjectUnref(caps); -- 1.8.3.1

On Mon, Sep 12, 2016 at 02:12:10PM +0300, Nikolay Shirokovskiy wrote:
Read API call of logger daemon is used to get extra information from terminated qemu process. For this purpose we need to wait until qemu process finishes its writing to log. We need to:
1. Read until EOF. 2. Don't stop reading on hangup.
virtlogd will not receive EOF as both qemu and libvirtd keep logging pipe descriptor. Thus let's close it after qemu process startup is finished as it will not be used anymore. All following logging on behalf of qemu by libvirt is done either thru daemon API or by receiving new writing descriptor.
Beware, qemuDomainLogContextFree is unref actually. --- src/logging/log_handler.c | 39 +++++++++++++++++++++++++++++++++++++-- src/qemu/qemu_domain.c | 4 ++++ src/qemu/qemu_domain.h | 1 + src/qemu/qemu_process.c | 2 ++ 4 files changed, 44 insertions(+), 2 deletions(-)
Any virtlogd changes should really be in separate commits from any QEMU changes. virtlogd is intended to be a reusable component of libvirt for multiple use cases, so any changes to it need to described & justifiable independently of any QEMU changes.
+static virLogHandlerLogFilePtr +virLogHandlerGetLogFileFromPath(virLogHandlerPtr handler, + const char* path) +{ + size_t i; + + for (i = 0; i < handler->nfiles; i++) { + if (STREQ(handler->files[i]->path, path)) + return handler->files[i]; + } + + return NULL; +}
@@ -167,11 +185,14 @@ virLogHandlerDomainLogFileEvent(int watch, goto error; }
+ if (len == 0) + goto error; + if (virRotatingFileWriterAppend(logfile->file, buf, len) != len) goto error;
if (events & VIR_EVENT_HANDLE_HANGUP) - goto error; + goto reread;
This patch hunk seems to be indendant of the other changes in this file. It is a straightfoward bugfix to ensure we full read all data before handling the hangup. So it should be a separate commit too.
virObjectUnlock(handler); return; @@ -179,6 +200,7 @@ virLogHandlerDomainLogFileEvent(int watch, error: handler->inhibitor(false, handler->opaque); virLogHandlerLogFileClose(handler, logfile); + virCondBroadcast(&handler->condition); virObjectUnlock(handler); }
@@ -198,6 +220,9 @@ virLogHandlerNew(bool privileged, if (!(handler = virObjectLockableNew(virLogHandlerClass))) goto error;
+ if (virCondInit(&handler->condition) < 0) + goto error; + handler->privileged = privileged; handler->max_size = max_size; handler->max_backups = max_backups; @@ -357,6 +382,7 @@ virLogHandlerDispose(void *obj) virLogHandlerLogFileFree(handler->files[i]); } VIR_FREE(handler->files); + virCondDestroy(&handler->condition); }
@@ -401,7 +427,8 @@ virLogHandlerDomainOpenLogFile(virLogHandlerPtr handler, pipefd[0] = -1; memcpy(file->domuuid, domuuid, VIR_UUID_BUFLEN); if (VIR_STRDUP(file->driver, driver) < 0 || - VIR_STRDUP(file->domname, domname) < 0) + VIR_STRDUP(file->domname, domname) < 0 || + VIR_STRDUP(file->path, path) < 0) goto error;
if ((file->file = virRotatingFileWriterNew(path, @@ -496,6 +523,14 @@ virLogHandlerDomainReadLogFile(virLogHandlerPtr handler,
virObjectLock(handler);
+ while (virLogHandlerGetLogFileFromPath(handler, path)) { + if (virCondWait(&handler->condition, &handler->parent.lock) < 0) { + virReportSystemError(errno, "%s", + _("failed to wait for EOF")); + goto error; + } + }
Ewww no - this code is not only intended for use from codepath where QEMU is shutdown. It absolutely should be able to read from a log where QEMU is still running. Regards, Daniel -- |: http://berrange.com -o- http://www.flickr.com/photos/dberrange/ :| |: http://libvirt.org -o- http://virt-manager.org :| |: http://autobuild.org -o- http://search.cpan.org/~danberr/ :| |: http://entangle-photo.org -o- http://live.gnome.org/gtk-vnc :|
participants (2)
-
Daniel P. Berrange
-
Nikolay Shirokovskiy