On 09/01/2014 11:05 AM, Jiri Denemark wrote:
Total time of a migration and total downtime transfered from a source
to
a destination host do not count with the transfer time to the
destination host and with the time elapsed before guest CPUs are
resumed. Thus, source libvirtd remembers when migration started and when
guest CPUs were paused. Both timestamps are transferred to destination
libvirtd which uses them to compute total migration time and total
downtime. This, obviously, requires clock to be synchronized between the
s/This, obviously,/Obviously this/
"requires clock" reads funny... "requires the time" seems closer
two hosts. The reported times are useless otherwise but they would
be
equally useless if we didn't do this recomputation so don't lose
anything by doing it.
Say nothing of inter-timezone migrations right?
Signed-off-by: Jiri Denemark <jdenemar(a)redhat.com>
---
src/libvirt.c | 5 ++++-
src/qemu/qemu_domain.c | 28 ++++++++++++++++++++++++++++
src/qemu/qemu_domain.h | 2 ++
src/qemu/qemu_migration.c | 15 ++++++++++++++-
src/qemu/qemu_process.c | 9 ++++++++-
tools/virsh.pod | 5 ++++-
6 files changed, 60 insertions(+), 4 deletions(-)
diff --git a/src/libvirt.c b/src/libvirt.c
index 6fa0a6b..61d0543 100644
--- a/src/libvirt.c
+++ b/src/libvirt.c
@@ -17581,7 +17581,10 @@ virDomainGetJobInfo(virDomainPtr domain, virDomainJobInfoPtr
info)
* return statistics about a recently completed job. Specifically, this
* flag may be used to query statistics of a completed incoming migration.
* Statistics of a completed job are automatically destroyed once read or
- * when libvirtd is restarted.
+ * when libvirtd is restarted. Note that time information returned for
+ * completed migrations may be completely irrelevant unless both source and
+ * destination hosts have synchronized time (i.e., NTP daemon is running on
+ * both of them).
*
* Returns 0 in case of success and -1 in case of failure.
*/
diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c
index 18a3761..cec7828 100644
--- a/src/qemu/qemu_domain.c
+++ b/src/qemu/qemu_domain.c
@@ -222,11 +222,39 @@ qemuDomainJobInfoUpdateTime(qemuDomainJobInfoPtr jobInfo)
if (virTimeMillisNow(&now) < 0)
return -1;
+ if (now < jobInfo->started) {
+ VIR_WARN("Async job starts in the future");
+ jobInfo->started = 0;
+ return 0;
+ }
+
jobInfo->timeElapsed = now - jobInfo->started;
return 0;
}
int
+qemuDomainJobInfoUpdateDowntime(qemuDomainJobInfoPtr jobInfo)
+{
+ unsigned long long now;
+
Can jobInfo == NULL? - It's the qemuMigrationWaitForCompletion() path
timing concern from patch 1.
+ if (!jobInfo->stopped)
+ return 0;
+
+ if (virTimeMillisNow(&now) < 0)
+ return -1;
+
+ if (now < jobInfo->stopped) {
+ VIR_WARN("Guest's CPUs stopped in the future");
+ jobInfo->stopped = 0;
+ return 0;
+ }
+
+ jobInfo->status.downtime = now - jobInfo->stopped;
+ jobInfo->status.downtime_set = true;
+ return 0;
+}
+
+int
qemuDomainJobInfoToInfo(qemuDomainJobInfoPtr jobInfo,
virDomainJobInfoPtr info)
{
diff --git a/src/qemu/qemu_domain.h b/src/qemu/qemu_domain.h
index 365238b..435a22b 100644
--- a/src/qemu/qemu_domain.h
+++ b/src/qemu/qemu_domain.h
@@ -105,6 +105,7 @@ typedef qemuDomainJobInfo *qemuDomainJobInfoPtr;
struct _qemuDomainJobInfo {
virDomainJobType type;
unsigned long long started; /* When the async job started */
+ unsigned long long stopped; /* When the domain's CPUs were stopped */
/* Computed values */
unsigned long long timeElapsed;
unsigned long long timeRemaining;
@@ -390,6 +391,7 @@ bool qemuDomainAgentAvailable(qemuDomainObjPrivatePtr priv,
bool reportError);
int qemuDomainJobInfoUpdateTime(qemuDomainJobInfoPtr jobInfo);
+int qemuDomainJobInfoUpdateDowntime(qemuDomainJobInfoPtr jobInfo);
Does this also need some sort of ATTRIBUTE_NONNULL(1)?
ACK in general
John
int qemuDomainJobInfoToInfo(qemuDomainJobInfoPtr jobInfo,
virDomainJobInfoPtr info);
int qemuDomainJobInfoToParams(qemuDomainJobInfoPtr jobInfo,
diff --git a/src/qemu/qemu_migration.c b/src/qemu/qemu_migration.c
index f1b3d50..43b42ac 100644
--- a/src/qemu/qemu_migration.c
+++ b/src/qemu/qemu_migration.c
@@ -623,6 +623,9 @@ qemuMigrationCookieStatisticsXMLFormat(virBufferPtr buf,
virBufferAddLit(buf, "<statistics>\n");
virBufferAdjustIndent(buf, 2);
+ virBufferAsprintf(buf, "<started>%llu</started>\n",
jobInfo->started);
+ virBufferAsprintf(buf, "<stopped>%llu</stopped>\n",
jobInfo->stopped);
+
virBufferAsprintf(buf, "<%1$s>%2$llu</%1$s>\n",
VIR_DOMAIN_JOB_TIME_ELAPSED,
jobInfo->timeElapsed);
@@ -891,6 +894,9 @@ qemuMigrationCookieStatisticsXMLParse(xmlXPathContextPtr ctxt)
status = &jobInfo->status;
jobInfo->type = VIR_DOMAIN_JOB_COMPLETED;
+ virXPathULongLong("string(./started[1])", ctxt,
&jobInfo->started);
+ virXPathULongLong("string(./stopped[1])", ctxt,
&jobInfo->stopped);
+
virXPathULongLong("string(./" VIR_DOMAIN_JOB_TIME_ELAPSED
"[1])",
ctxt, &jobInfo->timeElapsed);
virXPathULongLong("string(./" VIR_DOMAIN_JOB_TIME_REMAINING
"[1])",
@@ -2015,6 +2021,7 @@ qemuMigrationWaitForCompletion(virQEMUDriverPtr driver,
}
if (jobInfo->type == VIR_DOMAIN_JOB_COMPLETED) {
+ qemuDomainJobInfoUpdateDowntime(jobInfo);
VIR_FREE(priv->job.completed);
if (VIR_ALLOC(priv->job.completed) == 0)
*priv->job.completed = *jobInfo;
@@ -3597,8 +3604,10 @@ qemuMigrationRun(virQEMUDriverPtr driver,
VIR_FORCE_CLOSE(fd);
}
- if (priv->job.completed)
+ if (priv->job.completed) {
qemuDomainJobInfoUpdateTime(priv->job.completed);
+ qemuDomainJobInfoUpdateDowntime(priv->job.completed);
+ }
cookieFlags |= QEMU_MIGRATION_COOKIE_NETWORK |
QEMU_MIGRATION_COOKIE_STATS;
@@ -4811,6 +4820,10 @@ qemuMigrationFinish(virQEMUDriverPtr driver,
}
goto endjob;
}
+ if (priv->job.completed) {
+ qemuDomainJobInfoUpdateTime(priv->job.completed);
+ qemuDomainJobInfoUpdateDowntime(priv->job.completed);
+ }
}
dom = virGetDomain(dconn, vm->def->name, vm->def->uuid);
diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c
index f68dfbe..c4c1ce5 100644
--- a/src/qemu/qemu_process.c
+++ b/src/qemu/qemu_process.c
@@ -754,6 +754,9 @@ qemuProcessHandleStop(qemuMonitorPtr mon ATTRIBUTE_UNUSED,
VIR_DEBUG("Transitioned guest %s to paused state",
vm->def->name);
+ if (priv->job.current)
+ ignore_value(virTimeMillisNow(&priv->job.current->stopped));
+
virDomainObjSetState(vm, VIR_DOMAIN_PAUSED, VIR_DOMAIN_PAUSED_UNKNOWN);
event = virDomainEventLifecycleNewFromObj(vm,
VIR_DOMAIN_EVENT_SUSPENDED,
@@ -2888,7 +2891,8 @@ qemuProcessStartCPUs(virQEMUDriverPtr driver, virDomainObjPtr vm,
}
-int qemuProcessStopCPUs(virQEMUDriverPtr driver, virDomainObjPtr vm,
+int qemuProcessStopCPUs(virQEMUDriverPtr driver,
+ virDomainObjPtr vm,
virDomainPausedReason reason,
qemuDomainAsyncJob asyncJob)
{
@@ -2906,6 +2910,9 @@ int qemuProcessStopCPUs(virQEMUDriverPtr driver, virDomainObjPtr
vm,
if (ret < 0)
goto cleanup;
+ if (priv->job.current)
+ ignore_value(virTimeMillisNow(&priv->job.current->stopped));
+
virDomainObjSetState(vm, VIR_DOMAIN_PAUSED, reason);
if (virDomainLockProcessPause(driver->lockManager, vm, &priv->lockState)
< 0)
VIR_WARN("Unable to release lease on %s", vm->def->name);
diff --git a/tools/virsh.pod b/tools/virsh.pod
index 3c71db9..13187ce 100644
--- a/tools/virsh.pod
+++ b/tools/virsh.pod
@@ -1115,7 +1115,10 @@ Abort the currently running domain job.
=item B<domjobinfo> I<domain> [I<--completed>]
Returns information about jobs running on a domain. I<--completed> tells
-virsh to return information about a recently finished job.
+virsh to return information about a recently finished job. Note that time
+information returned for completed migrations may be completely irrelevant
+unless both source and destination hosts have synchronized time (i.e., NTP
+daemon is running on both of them).
=item B<domname> I<domain-id-or-uuid>