[libvirt] [PATCH v2 0/8] Add support for fetching statistics of completed jobs

Using virDomainGetJobStats, we can monitor running jobs but sometimes it may be useful to get statistics about a job that already finished, for example, to get the final amount of data transferred during migration or to get an idea about total downtime. This is what the following patches are about. Version 2: - changed according to John's review (see individual patches for details) Jiri Denemark (8): Refactor job statistics qemu: Avoid incrementing jobs_queued if virTimeMillisNow fails Add support for fetching statistics of completed jobs qemu: Silence coverity on optional migration stats virsh: Add support for completed job stats qemu: Transfer migration statistics to destination qemu: Recompute downtime and total time when migration completes qemu: Transfer recomputed stats back to source include/libvirt/libvirt.h.in | 11 ++ src/libvirt.c | 11 +- src/qemu/qemu_domain.c | 189 ++++++++++++++++++++++++++- src/qemu/qemu_domain.h | 32 ++++- src/qemu/qemu_driver.c | 130 ++++-------------- src/qemu/qemu_migration.c | 304 ++++++++++++++++++++++++++++++++++++------- src/qemu/qemu_monitor_json.c | 10 +- src/qemu/qemu_process.c | 9 +- tools/virsh-domain.c | 27 +++- tools/virsh.pod | 10 +- 10 files changed, 557 insertions(+), 176 deletions(-) -- 2.1.0

Job statistics data were tracked in several structures and variables. Let's make a new qemuDomainJobInfo structure which can be used as a single source of statistics data as a preparation for storing data about completed a job. Signed-off-by: Jiri Denemark <jdenemar@redhat.com> --- Notes: Version 2: - added ATTRIBUTE_NONNULL to all new internal APIs src/qemu/qemu_domain.c | 157 ++++++++++++++++++++++++++++++++++++++++++++-- src/qemu/qemu_domain.h | 28 ++++++++- src/qemu/qemu_driver.c | 119 ++++------------------------------- src/qemu/qemu_migration.c | 72 ++++++++------------- 4 files changed, 217 insertions(+), 159 deletions(-) diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c index 306ff10..7b54306 100644 --- a/src/qemu/qemu_domain.c +++ b/src/qemu/qemu_domain.c @@ -163,11 +163,9 @@ qemuDomainObjResetAsyncJob(qemuDomainObjPrivatePtr priv) job->asyncOwner = 0; job->phase = 0; job->mask = QEMU_JOB_DEFAULT_MASK; - job->start = 0; job->dump_memory_only = false; job->asyncAbort = false; - memset(&job->status, 0, sizeof(job->status)); - memset(&job->info, 0, sizeof(job->info)); + VIR_FREE(job->current); } void @@ -200,6 +198,7 @@ qemuDomainObjTransferJob(virDomainObjPtr obj) static void qemuDomainObjFreeJob(qemuDomainObjPrivatePtr priv) { + VIR_FREE(priv->job.current); virCondDestroy(&priv->job.cond); virCondDestroy(&priv->job.asyncCond); } @@ -211,6 +210,150 @@ qemuDomainTrackJob(qemuDomainJob job) } +int +qemuDomainJobInfoUpdateTime(qemuDomainJobInfoPtr jobInfo) +{ + unsigned long long now; + + if (!jobInfo->started) + return 0; + + if (virTimeMillisNow(&now) < 0) + return -1; + + jobInfo->timeElapsed = now - jobInfo->started; + return 0; +} + +int +qemuDomainJobInfoToInfo(qemuDomainJobInfoPtr jobInfo, + virDomainJobInfoPtr info) +{ + info->timeElapsed = jobInfo->timeElapsed; + info->timeRemaining = jobInfo->timeRemaining; + + info->memTotal = jobInfo->status.ram_total; + info->memRemaining = jobInfo->status.ram_remaining; + info->memProcessed = jobInfo->status.ram_transferred; + + info->fileTotal = jobInfo->status.disk_total; + info->fileRemaining = jobInfo->status.disk_remaining; + info->fileProcessed = jobInfo->status.disk_transferred; + + info->dataTotal = info->memTotal + info->fileTotal; + info->dataRemaining = info->memRemaining + info->fileRemaining; + info->dataProcessed = info->memProcessed + info->fileProcessed; + + return 0; +} + +int +qemuDomainJobInfoToParams(qemuDomainJobInfoPtr jobInfo, + int *type, + virTypedParameterPtr *params, + int *nparams) +{ + qemuMonitorMigrationStatus *status = &jobInfo->status; + virTypedParameterPtr par = NULL; + int maxpar = 0; + int npar = 0; + + if (virTypedParamsAddULLong(&par, &npar, &maxpar, + VIR_DOMAIN_JOB_TIME_ELAPSED, + jobInfo->timeElapsed) < 0) + goto error; + + if (jobInfo->type == VIR_DOMAIN_JOB_BOUNDED && + virTypedParamsAddULLong(&par, &npar, &maxpar, + VIR_DOMAIN_JOB_TIME_REMAINING, + jobInfo->timeRemaining) < 0) + goto error; + + if (status->downtime_set && + virTypedParamsAddULLong(&par, &npar, &maxpar, + VIR_DOMAIN_JOB_DOWNTIME, + status->downtime) < 0) + goto error; + + if (virTypedParamsAddULLong(&par, &npar, &maxpar, + VIR_DOMAIN_JOB_DATA_TOTAL, + status->ram_total + + status->disk_total) < 0 || + virTypedParamsAddULLong(&par, &npar, &maxpar, + VIR_DOMAIN_JOB_DATA_PROCESSED, + status->ram_transferred + + status->disk_transferred) < 0 || + virTypedParamsAddULLong(&par, &npar, &maxpar, + VIR_DOMAIN_JOB_DATA_REMAINING, + status->ram_remaining + + status->disk_remaining) < 0) + goto error; + + if (virTypedParamsAddULLong(&par, &npar, &maxpar, + VIR_DOMAIN_JOB_MEMORY_TOTAL, + status->ram_total) < 0 || + virTypedParamsAddULLong(&par, &npar, &maxpar, + VIR_DOMAIN_JOB_MEMORY_PROCESSED, + status->ram_transferred) < 0 || + virTypedParamsAddULLong(&par, &npar, &maxpar, + VIR_DOMAIN_JOB_MEMORY_REMAINING, + status->ram_remaining) < 0) + goto error; + + if (status->ram_duplicate_set) { + if (virTypedParamsAddULLong(&par, &npar, &maxpar, + VIR_DOMAIN_JOB_MEMORY_CONSTANT, + status->ram_duplicate) < 0 || + virTypedParamsAddULLong(&par, &npar, &maxpar, + VIR_DOMAIN_JOB_MEMORY_NORMAL, + status->ram_normal) < 0 || + virTypedParamsAddULLong(&par, &npar, &maxpar, + VIR_DOMAIN_JOB_MEMORY_NORMAL_BYTES, + status->ram_normal_bytes) < 0) + goto error; + } + + if (virTypedParamsAddULLong(&par, &npar, &maxpar, + VIR_DOMAIN_JOB_DISK_TOTAL, + status->disk_total) < 0 || + virTypedParamsAddULLong(&par, &npar, &maxpar, + VIR_DOMAIN_JOB_DISK_PROCESSED, + status->disk_transferred) < 0 || + virTypedParamsAddULLong(&par, &npar, &maxpar, + VIR_DOMAIN_JOB_DISK_REMAINING, + status->disk_remaining) < 0) + goto error; + + if (status->xbzrle_set) { + if (virTypedParamsAddULLong(&par, &npar, &maxpar, + VIR_DOMAIN_JOB_COMPRESSION_CACHE, + status->xbzrle_cache_size) < 0 || + virTypedParamsAddULLong(&par, &npar, &maxpar, + VIR_DOMAIN_JOB_COMPRESSION_BYTES, + status->xbzrle_bytes) < 0 || + virTypedParamsAddULLong(&par, &npar, &maxpar, + VIR_DOMAIN_JOB_COMPRESSION_PAGES, + status->xbzrle_pages) < 0 || + virTypedParamsAddULLong(&par, &npar, &maxpar, + VIR_DOMAIN_JOB_COMPRESSION_CACHE_MISSES, + status->xbzrle_cache_miss) < 0 || + virTypedParamsAddULLong(&par, &npar, &maxpar, + VIR_DOMAIN_JOB_COMPRESSION_OVERFLOW, + status->xbzrle_overflow) < 0) + goto error; + } + + *type = jobInfo->type; + *params = par; + *nparams = npar; + return 0; + + error: + virTypedParamsFree(par, npar); + return -1; +} + + static void * qemuDomainObjPrivateAlloc(void) { @@ -1071,7 +1214,7 @@ qemuDomainObjBeginJobInternal(virQEMUDriverPtr driver, unsigned long long then; bool nested = job == QEMU_JOB_ASYNC_NESTED; virQEMUDriverConfigPtr cfg = virQEMUDriverGetConfig(driver); - int ret; + int ret = -1; VIR_DEBUG("Starting %s: %s (async=%s vm=%p name=%s)", job == QEMU_JOB_ASYNC ? "async job" : "job", @@ -1127,9 +1270,11 @@ qemuDomainObjBeginJobInternal(virQEMUDriverPtr driver, qemuDomainAsyncJobTypeToString(asyncJob), obj, obj->def->name); qemuDomainObjResetAsyncJob(priv); + if (VIR_ALLOC(priv->job.current) < 0) + goto cleanup; priv->job.asyncJob = asyncJob; priv->job.asyncOwner = virThreadSelfID(); - priv->job.start = now; + priv->job.current->started = now; } if (qemuDomainTrackJob(job)) @@ -1163,6 +1308,8 @@ qemuDomainObjBeginJobInternal(virQEMUDriverPtr driver, virReportSystemError(errno, "%s", _("cannot acquire job mutex")); } + + cleanup: priv->jobs_queued--; virObjectUnref(obj); virObjectUnref(cfg); diff --git a/src/qemu/qemu_domain.h b/src/qemu/qemu_domain.h index f353d90..99c7d6a 100644 --- a/src/qemu/qemu_domain.h +++ b/src/qemu/qemu_domain.h @@ -100,6 +100,18 @@ typedef enum { } qemuDomainAsyncJob; VIR_ENUM_DECL(qemuDomainAsyncJob) +typedef struct _qemuDomainJobInfo qemuDomainJobInfo; +typedef qemuDomainJobInfo *qemuDomainJobInfoPtr; +struct _qemuDomainJobInfo { + virDomainJobType type; + unsigned long long started; /* When the async job started */ + /* Computed values */ + unsigned long long timeElapsed; + unsigned long long timeRemaining; + /* Raw values from QEMU */ + qemuMonitorMigrationStatus status; +}; + struct qemuDomainJobObj { virCond cond; /* Use to coordinate jobs */ qemuDomainJob active; /* Currently running job */ @@ -110,10 +122,8 @@ struct qemuDomainJobObj { unsigned long long asyncOwner; /* Thread which set current async job */ int phase; /* Job phase (mainly for migrations) */ unsigned long long mask; /* Jobs allowed during async job */ - unsigned long long start; /* When the async job started */ bool dump_memory_only; /* use dump-guest-memory to do dump */ - qemuMonitorMigrationStatus status; /* Raw async job progress data */ - virDomainJobInfo info; /* Processed async job progress data */ + qemuDomainJobInfoPtr current; /* async job progress data */ bool asyncAbort; /* abort of async job requested */ }; @@ -378,4 +388,16 @@ bool qemuDomainDefCheckABIStability(virQEMUDriverPtr driver, bool qemuDomainAgentAvailable(qemuDomainObjPrivatePtr priv, bool reportError); +int qemuDomainJobInfoUpdateTime(qemuDomainJobInfoPtr jobInfo) + ATTRIBUTE_NONNULL(1); +int qemuDomainJobInfoToInfo(qemuDomainJobInfoPtr jobInfo, + virDomainJobInfoPtr info) + ATTRIBUTE_NONNULL(1) ATTRIBUTE_NONNULL(2); +int qemuDomainJobInfoToParams(qemuDomainJobInfoPtr jobInfo, + int *type, + virTypedParameterPtr *params, + int *nparams) + ATTRIBUTE_NONNULL(1) ATTRIBUTE_NONNULL(2) + ATTRIBUTE_NONNULL(3) ATTRIBUTE_NONNULL(4); + #endif /* __QEMU_DOMAIN_H__ */ diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c index d724eeb..a5533c5 100644 --- a/src/qemu/qemu_driver.c +++ b/src/qemu/qemu_driver.c @@ -2655,15 +2655,13 @@ qemuDomainGetControlInfo(virDomainPtr dom, if (priv->monError) { info->state = VIR_DOMAIN_CONTROL_ERROR; } else if (priv->job.active) { - if (!priv->monStart) { + if (virTimeMillisNow(&info->stateTime) < 0) + goto cleanup; + if (priv->job.current) { info->state = VIR_DOMAIN_CONTROL_JOB; - if (virTimeMillisNow(&info->stateTime) < 0) - goto cleanup; - info->stateTime -= priv->job.start; + info->stateTime -= priv->job.current->started; } else { info->state = VIR_DOMAIN_CONTROL_OCCUPIED; - if (virTimeMillisNow(&info->stateTime) < 0) - goto cleanup; info->stateTime -= priv->monStart; } } else { @@ -3110,8 +3108,7 @@ qemuDomainSaveInternal(virQEMUDriverPtr driver, virDomainPtr dom, goto endjob; } - memset(&priv->job.info, 0, sizeof(priv->job.info)); - priv->job.info.type = VIR_DOMAIN_JOB_UNBOUNDED; + priv->job.current->type = VIR_DOMAIN_JOB_UNBOUNDED; /* Pause */ if (virDomainObjGetState(vm, NULL) == VIR_DOMAIN_RUNNING) { @@ -3460,6 +3457,7 @@ static int qemuDumpToFd(virQEMUDriverPtr driver, virDomainObjPtr vm, fd) < 0) return -1; + VIR_FREE(priv->job.current); priv->job.dump_memory_only = true; if (qemuDomainObjEnterMonitorAsync(driver, vm, asyncJob) < 0) @@ -11616,17 +11614,15 @@ static int qemuDomainGetJobInfo(virDomainPtr dom, goto cleanup; if (virDomainObjIsActive(vm)) { - if (priv->job.asyncJob && !priv->job.dump_memory_only) { - memcpy(info, &priv->job.info, sizeof(*info)); - + if (priv->job.current) { /* Refresh elapsed time again just to ensure it * is fully updated. This is primarily for benefit * of incoming migration which we don't currently * monitor actively in the background thread */ - if (virTimeMillisNow(&info->timeElapsed) < 0) + if (qemuDomainJobInfoUpdateTime(priv->job.current) < 0 || + qemuDomainJobInfoToInfo(priv->job.current, info) < 0) goto cleanup; - info->timeElapsed -= priv->job.start; } else { memset(info, 0, sizeof(*info)); info->type = VIR_DOMAIN_JOB_NONE; @@ -11655,9 +11651,6 @@ qemuDomainGetJobStats(virDomainPtr dom, { virDomainObjPtr vm; qemuDomainObjPrivatePtr priv; - virTypedParameterPtr par = NULL; - int maxpar = 0; - int npar = 0; int ret = -1; virCheckFlags(0, -1); @@ -11676,7 +11669,7 @@ qemuDomainGetJobStats(virDomainPtr dom, goto cleanup; } - if (!priv->job.asyncJob || priv->job.dump_memory_only) { + if (!priv->job.current) { *type = VIR_DOMAIN_JOB_NONE; *params = NULL; *nparams = 0; @@ -11689,102 +11682,16 @@ qemuDomainGetJobStats(virDomainPtr dom, * of incoming migration which we don't currently * monitor actively in the background thread */ - if (virTimeMillisNow(&priv->job.info.timeElapsed) < 0) + if (qemuDomainJobInfoUpdateTime(priv->job.current) < 0 || + qemuDomainJobInfoToParams(priv->job.current, + type, params, nparams) < 0) goto cleanup; - priv->job.info.timeElapsed -= priv->job.start; - if (virTypedParamsAddULLong(&par, &npar, &maxpar, - VIR_DOMAIN_JOB_TIME_ELAPSED, - priv->job.info.timeElapsed) < 0) - goto cleanup; - - if (priv->job.info.type == VIR_DOMAIN_JOB_BOUNDED && - virTypedParamsAddULLong(&par, &npar, &maxpar, - VIR_DOMAIN_JOB_TIME_REMAINING, - priv->job.info.timeRemaining) < 0) - goto cleanup; - - if (priv->job.status.downtime_set && - virTypedParamsAddULLong(&par, &npar, &maxpar, - VIR_DOMAIN_JOB_DOWNTIME, - priv->job.status.downtime) < 0) - goto cleanup; - - if (virTypedParamsAddULLong(&par, &npar, &maxpar, - VIR_DOMAIN_JOB_DATA_TOTAL, - priv->job.info.dataTotal) < 0 || - virTypedParamsAddULLong(&par, &npar, &maxpar, - VIR_DOMAIN_JOB_DATA_PROCESSED, - priv->job.info.dataProcessed) < 0 || - virTypedParamsAddULLong(&par, &npar, &maxpar, - VIR_DOMAIN_JOB_DATA_REMAINING, - priv->job.info.dataRemaining) < 0) - goto cleanup; - - if (virTypedParamsAddULLong(&par, &npar, &maxpar, - VIR_DOMAIN_JOB_MEMORY_TOTAL, - priv->job.info.memTotal) < 0 || - virTypedParamsAddULLong(&par, &npar, &maxpar, - VIR_DOMAIN_JOB_MEMORY_PROCESSED, - priv->job.info.memProcessed) < 0 || - virTypedParamsAddULLong(&par, &npar, &maxpar, - VIR_DOMAIN_JOB_MEMORY_REMAINING, - priv->job.info.memRemaining) < 0) - goto cleanup; - - if (priv->job.status.ram_duplicate_set) { - if (virTypedParamsAddULLong(&par, &npar, &maxpar, - VIR_DOMAIN_JOB_MEMORY_CONSTANT, - priv->job.status.ram_duplicate) < 0 || - virTypedParamsAddULLong(&par, &npar, &maxpar, - VIR_DOMAIN_JOB_MEMORY_NORMAL, - priv->job.status.ram_normal) < 0 || - virTypedParamsAddULLong(&par, &npar, &maxpar, - VIR_DOMAIN_JOB_MEMORY_NORMAL_BYTES, - priv->job.status.ram_normal_bytes) < 0) - goto cleanup; - } - - if (virTypedParamsAddULLong(&par, &npar, &maxpar, - VIR_DOMAIN_JOB_DISK_TOTAL, - priv->job.info.fileTotal) < 0 || - virTypedParamsAddULLong(&par, &npar, &maxpar, - VIR_DOMAIN_JOB_DISK_PROCESSED, - priv->job.info.fileProcessed) < 0 || - virTypedParamsAddULLong(&par, &npar, &maxpar, - VIR_DOMAIN_JOB_DISK_REMAINING, - priv->job.info.fileRemaining) < 0) - goto cleanup; - - if (priv->job.status.xbzrle_set) { - if (virTypedParamsAddULLong(&par, &npar, &maxpar, - VIR_DOMAIN_JOB_COMPRESSION_CACHE, - priv->job.status.xbzrle_cache_size) < 0 || - virTypedParamsAddULLong(&par, &npar, &maxpar, - VIR_DOMAIN_JOB_COMPRESSION_BYTES, - priv->job.status.xbzrle_bytes) < 0 || - virTypedParamsAddULLong(&par, &npar, &maxpar, - VIR_DOMAIN_JOB_COMPRESSION_PAGES, - priv->job.status.xbzrle_pages) < 0 || - virTypedParamsAddULLong(&par, &npar, &maxpar, - VIR_DOMAIN_JOB_COMPRESSION_CACHE_MISSES, - priv->job.status.xbzrle_cache_miss) < 0 || - virTypedParamsAddULLong(&par, &npar, &maxpar, - VIR_DOMAIN_JOB_COMPRESSION_OVERFLOW, - priv->job.status.xbzrle_overflow) < 0) - goto cleanup; - } - - *type = priv->job.info.type; - *params = par; - *nparams = npar; ret = 0; cleanup: if (vm) virObjectUnlock(vm); - if (ret < 0) - virTypedParamsFree(par, npar); return ret; } diff --git a/src/qemu/qemu_migration.c b/src/qemu/qemu_migration.c index 64f0d8d..61d2d68 100644 --- a/src/qemu/qemu_migration.c +++ b/src/qemu/qemu_migration.c @@ -1723,8 +1723,9 @@ qemuMigrationUpdateJobStatus(virQEMUDriverPtr driver, qemuDomainAsyncJob asyncJob) { qemuDomainObjPrivatePtr priv = vm->privateData; - int ret; qemuMonitorMigrationStatus status; + qemuDomainJobInfoPtr jobInfo; + int ret; memset(&status, 0, sizeof(status)); @@ -1738,62 +1739,40 @@ qemuMigrationUpdateJobStatus(virQEMUDriverPtr driver, qemuDomainObjExitMonitor(driver, vm); - priv->job.status = status; - - if (ret < 0 || virTimeMillisNow(&priv->job.info.timeElapsed) < 0) + if (ret < 0 || + qemuDomainJobInfoUpdateTime(priv->job.current) < 0) return -1; - priv->job.info.timeElapsed -= priv->job.start; - ret = -1; - switch (priv->job.status.status) { - case QEMU_MONITOR_MIGRATION_STATUS_INACTIVE: - priv->job.info.type = VIR_DOMAIN_JOB_NONE; - virReportError(VIR_ERR_OPERATION_FAILED, - _("%s: %s"), job, _("is not active")); - break; - + jobInfo = priv->job.current; + switch (status.status) { + case QEMU_MONITOR_MIGRATION_STATUS_COMPLETED: + jobInfo->type = VIR_DOMAIN_JOB_COMPLETED; + /* fall through */ case QEMU_MONITOR_MIGRATION_STATUS_SETUP: - ret = 0; - break; - case QEMU_MONITOR_MIGRATION_STATUS_ACTIVE: - priv->job.info.fileTotal = priv->job.status.disk_total; - priv->job.info.fileRemaining = priv->job.status.disk_remaining; - priv->job.info.fileProcessed = priv->job.status.disk_transferred; - - priv->job.info.memTotal = priv->job.status.ram_total; - priv->job.info.memRemaining = priv->job.status.ram_remaining; - priv->job.info.memProcessed = priv->job.status.ram_transferred; - - priv->job.info.dataTotal = - priv->job.status.ram_total + priv->job.status.disk_total; - priv->job.info.dataRemaining = - priv->job.status.ram_remaining + priv->job.status.disk_remaining; - priv->job.info.dataProcessed = - priv->job.status.ram_transferred + - priv->job.status.disk_transferred; - ret = 0; break; - case QEMU_MONITOR_MIGRATION_STATUS_COMPLETED: - priv->job.info.type = VIR_DOMAIN_JOB_COMPLETED; - ret = 0; + case QEMU_MONITOR_MIGRATION_STATUS_INACTIVE: + jobInfo->type = VIR_DOMAIN_JOB_NONE; + virReportError(VIR_ERR_OPERATION_FAILED, + _("%s: %s"), job, _("is not active")); break; case QEMU_MONITOR_MIGRATION_STATUS_ERROR: - priv->job.info.type = VIR_DOMAIN_JOB_FAILED; + jobInfo->type = VIR_DOMAIN_JOB_FAILED; virReportError(VIR_ERR_OPERATION_FAILED, _("%s: %s"), job, _("unexpectedly failed")); break; case QEMU_MONITOR_MIGRATION_STATUS_CANCELLED: - priv->job.info.type = VIR_DOMAIN_JOB_CANCELLED; + jobInfo->type = VIR_DOMAIN_JOB_CANCELLED; virReportError(VIR_ERR_OPERATION_ABORTED, _("%s: %s"), job, _("canceled by client")); break; } + jobInfo->status = status; return ret; } @@ -1803,11 +1782,14 @@ qemuMigrationUpdateJobStatus(virQEMUDriverPtr driver, * QEMU reports failed migration. */ static int -qemuMigrationWaitForCompletion(virQEMUDriverPtr driver, virDomainObjPtr vm, +qemuMigrationWaitForCompletion(virQEMUDriverPtr driver, + virDomainObjPtr vm, qemuDomainAsyncJob asyncJob, - virConnectPtr dconn, bool abort_on_error) + virConnectPtr dconn, + bool abort_on_error) { qemuDomainObjPrivatePtr priv = vm->privateData; + qemuDomainJobInfoPtr jobInfo = priv->job.current; const char *job; int pauseReason; @@ -1825,9 +1807,9 @@ qemuMigrationWaitForCompletion(virQEMUDriverPtr driver, virDomainObjPtr vm, job = _("job"); } - priv->job.info.type = VIR_DOMAIN_JOB_UNBOUNDED; + jobInfo->type = VIR_DOMAIN_JOB_UNBOUNDED; - while (priv->job.info.type == VIR_DOMAIN_JOB_UNBOUNDED) { + while (jobInfo->type == VIR_DOMAIN_JOB_UNBOUNDED) { /* Poll every 50ms for progress & to allow cancellation */ struct timespec ts = { .tv_sec = 0, .tv_nsec = 50 * 1000 * 1000ull }; @@ -1856,13 +1838,13 @@ qemuMigrationWaitForCompletion(virQEMUDriverPtr driver, virDomainObjPtr vm, virObjectLock(vm); } - if (priv->job.info.type == VIR_DOMAIN_JOB_COMPLETED) { + if (jobInfo->type == VIR_DOMAIN_JOB_COMPLETED) { return 0; - } else if (priv->job.info.type == VIR_DOMAIN_JOB_UNBOUNDED) { + } else if (jobInfo->type == VIR_DOMAIN_JOB_UNBOUNDED) { /* The migration was aborted by us rather than QEMU itself so let's * update the job type and notify the caller to send migrate_cancel. */ - priv->job.info.type = VIR_DOMAIN_JOB_FAILED; + jobInfo->type = VIR_DOMAIN_JOB_FAILED; return -2; } else { return -1; @@ -4914,7 +4896,7 @@ qemuMigrationJobStart(virQEMUDriverPtr driver, JOB_MASK(QEMU_JOB_MIGRATION_OP))); } - priv->job.info.type = VIR_DOMAIN_JOB_UNBOUNDED; + priv->job.current->type = VIR_DOMAIN_JOB_UNBOUNDED; return 0; } -- 2.1.0

Signed-off-by: Jiri Denemark <jdenemar@redhat.com> --- Notes: Version 2: - new (unrelated) patch src/qemu/qemu_domain.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c index 7b54306..8c94e27 100644 --- a/src/qemu/qemu_domain.c +++ b/src/qemu/qemu_domain.c @@ -1222,13 +1222,12 @@ qemuDomainObjBeginJobInternal(virQEMUDriverPtr driver, qemuDomainAsyncJobTypeToString(priv->job.asyncJob), obj, obj->def->name); - priv->jobs_queued++; - if (virTimeMillisNow(&now) < 0) { virObjectUnref(cfg); return -1; } + priv->jobs_queued++; then = now + QEMU_JOB_WAIT_TIME; virObjectRef(obj); -- 2.1.0

virDomainGetJobStats gains new VIR_DOMAIN_JOB_STATS_COMPLETED flag that can be used to fetch statistics of a completed job rather than a currently running job. Signed-off-by: Jiri Denemark <jdenemar@redhat.com> --- Notes: Version 2: - no change include/libvirt/libvirt.h.in | 11 +++++++++++ src/libvirt.c | 8 +++++++- src/qemu/qemu_domain.c | 1 + src/qemu/qemu_domain.h | 1 + src/qemu/qemu_driver.c | 25 +++++++++++++++++++------ src/qemu/qemu_migration.c | 6 ++++++ src/qemu/qemu_monitor_json.c | 3 ++- 7 files changed, 47 insertions(+), 8 deletions(-) diff --git a/include/libvirt/libvirt.h.in b/include/libvirt/libvirt.h.in index aced31c..94b942c 100644 --- a/include/libvirt/libvirt.h.in +++ b/include/libvirt/libvirt.h.in @@ -4320,6 +4320,17 @@ struct _virDomainJobInfo { unsigned long long fileRemaining; }; +/** + * virDomainGetJobStatsFlags: + * + * Flags OR'ed together to provide specific behavior when querying domain + * job statistics. + */ +typedef enum { + VIR_DOMAIN_JOB_STATS_COMPLETED = 1 << 0, /* return stats of a recently + * completed job */ +} virDomainGetJobStatsFlags; + int virDomainGetJobInfo(virDomainPtr dom, virDomainJobInfoPtr info); int virDomainGetJobStats(virDomainPtr domain, diff --git a/src/libvirt.c b/src/libvirt.c index 4806535..00b4d6f 100644 --- a/src/libvirt.c +++ b/src/libvirt.c @@ -17567,7 +17567,7 @@ virDomainGetJobInfo(virDomainPtr domain, virDomainJobInfoPtr info) * @type: where to store the job type (one of virDomainJobType) * @params: where to store job statistics * @nparams: number of items in @params - * @flags: extra flags; not used yet, so callers should always pass 0 + * @flags: bitwise-OR of virDomainGetJobStatsFlags * * Extract information about progress of a background job on a domain. * Will return an error if the domain is not active. The function returns @@ -17577,6 +17577,12 @@ virDomainGetJobInfo(virDomainPtr domain, virDomainJobInfoPtr info) * may receive fields that they do not understand in case they talk to a * newer server. * + * When @flags contains VIR_DOMAIN_JOB_STATS_COMPLETED, the function will + * return statistics about a recently completed job. Specifically, this + * flag may be used to query statistics of a completed incoming migration. + * Statistics of a completed job are automatically destroyed once read or + * when libvirtd is restarted. + * * Returns 0 in case of success and -1 in case of failure. */ int diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c index 8c94e27..78d6e8c 100644 --- a/src/qemu/qemu_domain.c +++ b/src/qemu/qemu_domain.c @@ -199,6 +199,7 @@ static void qemuDomainObjFreeJob(qemuDomainObjPrivatePtr priv) { VIR_FREE(priv->job.current); + VIR_FREE(priv->job.completed); virCondDestroy(&priv->job.cond); virCondDestroy(&priv->job.asyncCond); } diff --git a/src/qemu/qemu_domain.h b/src/qemu/qemu_domain.h index 99c7d6a..2e16cde 100644 --- a/src/qemu/qemu_domain.h +++ b/src/qemu/qemu_domain.h @@ -124,6 +124,7 @@ struct qemuDomainJobObj { unsigned long long mask; /* Jobs allowed during async job */ bool dump_memory_only; /* use dump-guest-memory to do dump */ qemuDomainJobInfoPtr current; /* async job progress data */ + qemuDomainJobInfoPtr completed; /* statistics data of a recently completed job */ bool asyncAbort; /* abort of async job requested */ }; diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c index a5533c5..fd82af1 100644 --- a/src/qemu/qemu_driver.c +++ b/src/qemu/qemu_driver.c @@ -11651,9 +11651,10 @@ qemuDomainGetJobStats(virDomainPtr dom, { virDomainObjPtr vm; qemuDomainObjPrivatePtr priv; + qemuDomainJobInfoPtr jobInfo; int ret = -1; - virCheckFlags(0, -1); + virCheckFlags(VIR_DOMAIN_JOB_STATS_COMPLETED, -1); if (!(vm = qemuDomObjFromDomain(dom))) goto cleanup; @@ -11663,13 +11664,19 @@ qemuDomainGetJobStats(virDomainPtr dom, if (virDomainGetJobStatsEnsureACL(dom->conn, vm->def) < 0) goto cleanup; - if (!virDomainObjIsActive(vm)) { + if (!(flags & VIR_DOMAIN_JOB_STATS_COMPLETED) && + !virDomainObjIsActive(vm)) { virReportError(VIR_ERR_OPERATION_INVALID, "%s", _("domain is not running")); goto cleanup; } - if (!priv->job.current) { + if (flags & VIR_DOMAIN_JOB_STATS_COMPLETED) + jobInfo = priv->job.completed; + else + jobInfo = priv->job.current; + + if (!jobInfo) { *type = VIR_DOMAIN_JOB_NONE; *params = NULL; *nparams = 0; @@ -11682,11 +11689,17 @@ qemuDomainGetJobStats(virDomainPtr dom, * of incoming migration which we don't currently * monitor actively in the background thread */ - if (qemuDomainJobInfoUpdateTime(priv->job.current) < 0 || - qemuDomainJobInfoToParams(priv->job.current, - type, params, nparams) < 0) + if ((jobInfo->type == VIR_DOMAIN_JOB_BOUNDED || + jobInfo->type == VIR_DOMAIN_JOB_UNBOUNDED) && + qemuDomainJobInfoUpdateTime(jobInfo) < 0) goto cleanup; + if (qemuDomainJobInfoToParams(jobInfo, type, params, nparams) < 0) + goto cleanup; + + if (flags & VIR_DOMAIN_JOB_STATS_COMPLETED) + VIR_FREE(priv->job.completed); + ret = 0; cleanup: diff --git a/src/qemu/qemu_migration.c b/src/qemu/qemu_migration.c index 61d2d68..e7c1249 100644 --- a/src/qemu/qemu_migration.c +++ b/src/qemu/qemu_migration.c @@ -1839,6 +1839,9 @@ qemuMigrationWaitForCompletion(virQEMUDriverPtr driver, } if (jobInfo->type == VIR_DOMAIN_JOB_COMPLETED) { + VIR_FREE(priv->job.completed); + if (VIR_ALLOC(priv->job.completed) == 0) + *priv->job.completed = *jobInfo; return 0; } else if (jobInfo->type == VIR_DOMAIN_JOB_UNBOUNDED) { /* The migration was aborted by us rather than QEMU itself so let's @@ -3418,6 +3421,9 @@ qemuMigrationRun(virQEMUDriverPtr driver, VIR_FORCE_CLOSE(fd); } + if (priv->job.completed) + qemuDomainJobInfoUpdateTime(priv->job.completed); + cookieFlags |= QEMU_MIGRATION_COOKIE_NETWORK; if (flags & VIR_MIGRATE_PERSIST_DEST) cookieFlags |= QEMU_MIGRATION_COOKIE_PERSISTENT; diff --git a/src/qemu/qemu_monitor_json.c b/src/qemu/qemu_monitor_json.c index 4373ba2..0aa34b8 100644 --- a/src/qemu/qemu_monitor_json.c +++ b/src/qemu/qemu_monitor_json.c @@ -2500,7 +2500,8 @@ qemuMonitorJSONGetMigrationStatusReply(virJSONValuePtr reply, if (rc == 0) status->downtime_set = true; - if (status->status == QEMU_MONITOR_MIGRATION_STATUS_ACTIVE) { + if (status->status == QEMU_MONITOR_MIGRATION_STATUS_ACTIVE || + status->status == QEMU_MONITOR_MIGRATION_STATUS_COMPLETED) { virJSONValuePtr ram = virJSONValueObjectGet(ret, "ram"); if (!ram) { virReportError(VIR_ERR_INTERNAL_ERROR, "%s", -- 2.1.0

Hi, FYI as tracking down that failure did cost me some time and hopefully this summary will help other to avoid this situation: Am 09.09.2014 um 11:54 schrieb Jiri Denemark:
virDomainGetJobStats gains new VIR_DOMAIN_JOB_STATS_COMPLETED flag that can be used to fetch statistics of a completed job rather than a currently running job.
Signed-off-by: Jiri Denemark <jdenemar@redhat.com> ... --- a/src/qemu/qemu_monitor_json.c +++ b/src/qemu/qemu_monitor_json.c @@ -2500,7 +2500,8 @@ qemuMonitorJSONGetMigrationStatusReply(virJSONValuePtr reply, ... - if (status->status == QEMU_MONITOR_MIGRATION_STATUS_ACTIVE) { + if (status->status == QEMU_MONITOR_MIGRATION_STATUS_ACTIVE || + status->status == QEMU_MONITOR_MIGRATION_STATUS_COMPLETED) { virJSONValuePtr ram = virJSONValueObjectGet(ret, "ram"); if (!ram) { virReportError(VIR_ERR_INTERNAL_ERROR, "%s", _("migration was active, but RAM 'transferred' " "data was missing"));
This change from libvirt v1.2.9-rc1~203 breaks migration with qemu-kvm-1.1.2, as that ancient implementation does *not* export transfer statistics for completed jobs:
qemuMonitorJSONCommandWithFd:286 : Send command '{"execute":"query-migrate","id":"libvirt-41"}' for write with FD -1 qemuMonitorJSONIOProcessLine:179 : Line [{"return": {"status": "active", "ram": {"total": 2164654080, "remaining": 22474752, "transferred": 175117413}}, "id": "libvirt-41"}] ... qemuMonitorJSONCommandWithFd:286 : Send command '{"execute":"query-migrate","id":"libvirt-42"}' for write with FD -1 qemuMonitorJSONIOProcessLine:179 : Line [{"return": {"status": "completed"}, "id": "libvirt-42"}]
As you can see, there is not "ram" section and the migration is aborted with the message:
internal error: migration was active, but no RAM info was set
- "ram": only present if "status" is "active", it is a json-object with the following RAM information (in bytes): but the example some lines below is wrong: 2. Migration is done and has succeeded
-> { "execute": "query-migrate" } <- { "return": { "status": "completed", "ram":{ That example has been updated by v1.2.0-rc0~29^2~2, but forgot to update
qemu-kvm/qmp-commands.hx even states this: the specification above. The attached hack for libvirt makes migration work again for me by making "ram" optional in case of "completed". Philipp

Signed-off-by: Jiri Denemark <jdenemar@redhat.com> --- Notes: Version 2: - new (unrelated) patch src/qemu/qemu_monitor_json.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/qemu/qemu_monitor_json.c b/src/qemu/qemu_monitor_json.c index 0aa34b8..60e6b00 100644 --- a/src/qemu/qemu_monitor_json.c +++ b/src/qemu/qemu_monitor_json.c @@ -2534,9 +2534,10 @@ qemuMonitorJSONGetMigrationStatusReply(virJSONValuePtr reply, if (virJSONValueObjectGetNumberUlong(ram, "duplicate", &status->ram_duplicate) == 0) status->ram_duplicate_set = true; - virJSONValueObjectGetNumberUlong(ram, "normal", &status->ram_normal); - virJSONValueObjectGetNumberUlong(ram, "normal-bytes", - &status->ram_normal_bytes); + ignore_value(virJSONValueObjectGetNumberUlong(ram, "normal", + &status->ram_normal)); + ignore_value(virJSONValueObjectGetNumberUlong(ram, "normal-bytes", + &status->ram_normal_bytes)); virJSONValuePtr disk = virJSONValueObjectGet(ret, "disk"); if (disk) { -- 2.1.0

New --completed flag for virsh domjobinfo command. Signed-off-by: Jiri Denemark <jdenemar@redhat.com> --- Notes: Version 2: - enhanced virsh man page tools/virsh-domain.c | 27 ++++++++++++++++++++++++--- tools/virsh.pod | 7 +++++-- 2 files changed, 29 insertions(+), 5 deletions(-) diff --git a/tools/virsh-domain.c b/tools/virsh-domain.c index ed2e3ea..70960b4 100644 --- a/tools/virsh-domain.c +++ b/tools/virsh-domain.c @@ -5115,6 +5115,10 @@ static const vshCmdOptDef opts_resume[] = { .flags = VSH_OFLAG_REQ, .help = N_("domain name, id or uuid") }, + {.name = "completed", + .type = VSH_OT_BOOL, + .help = N_("return statistics of a recently completed job") + }, {.name = NULL} }; @@ -5405,14 +5409,18 @@ cmdDomjobinfo(vshControl *ctl, const vshCmd *cmd) virTypedParameterPtr params = NULL; int nparams = 0; unsigned long long value; + unsigned int flags = 0; int rc; if (!(dom = vshCommandOptDomain(ctl, cmd, NULL))) return false; + if (vshCommandOptBool(cmd, "completed")) + flags |= VIR_DOMAIN_JOB_STATS_COMPLETED; + memset(&info, 0, sizeof(info)); - rc = virDomainGetJobStats(dom, &info.type, ¶ms, &nparams, 0); + rc = virDomainGetJobStats(dom, &info.type, ¶ms, &nparams, flags); if (rc == 0) { if (virTypedParamsGetULLong(params, nparams, VIR_DOMAIN_JOB_TIME_ELAPSED, @@ -5449,6 +5457,11 @@ cmdDomjobinfo(vshControl *ctl, const vshCmd *cmd) &info.fileRemaining) < 0) goto save_error; } else if (last_error->code == VIR_ERR_NO_SUPPORT) { + if (flags) { + vshError(ctl, "%s", _("Optional flags are not supported by the " + "daemon")); + goto cleanup; + } vshDebug(ctl, VSH_ERR_DEBUG, "detailed statistics not supported\n"); vshResetLibvirtError(); rc = virDomainGetJobInfo(dom, &info); @@ -5459,7 +5472,9 @@ cmdDomjobinfo(vshControl *ctl, const vshCmd *cmd) vshPrint(ctl, "%-17s %-12s\n", _("Job type:"), vshDomainJobToString(info.type)); if (info.type != VIR_DOMAIN_JOB_BOUNDED && - info.type != VIR_DOMAIN_JOB_UNBOUNDED) { + info.type != VIR_DOMAIN_JOB_UNBOUNDED && + (!(flags & VIR_DOMAIN_JOB_STATS_COMPLETED) || + info.type != VIR_DOMAIN_JOB_COMPLETED)) { ret = true; goto cleanup; } @@ -5524,7 +5539,13 @@ cmdDomjobinfo(vshControl *ctl, const vshCmd *cmd) &value)) < 0) { goto save_error; } else if (rc) { - vshPrint(ctl, "%-17s %-12llu ms\n", _("Expected downtime:"), value); + if (info.type == VIR_DOMAIN_JOB_COMPLETED) { + vshPrint(ctl, "%-17s %-12llu ms\n", + _("Total downtime:"), value); + } else { + vshPrint(ctl, "%-17s %-12llu ms\n", + _("Expected downtime:"), value); + } } if ((rc = virTypedParamsGetULLong(params, nparams, diff --git a/tools/virsh.pod b/tools/virsh.pod index 4401d55..4411027 100644 --- a/tools/virsh.pod +++ b/tools/virsh.pod @@ -1131,9 +1131,12 @@ Convert a domain name (or UUID) to a domain id Abort the currently running domain job. -=item B<domjobinfo> I<domain> +=item B<domjobinfo> I<domain> [I<--completed>] -Returns information about jobs running on a domain. +Returns information about jobs running on a domain. I<--completed> tells +virsh to return information about a recently finished job. Statistics of +a completed job are automatically destroyed once read or when libvirtd +is restarted. =item B<domname> I<domain-id-or-uuid> -- 2.1.0

On Tue, Sep 09, 2014 at 11:54:09 +0200, Jiri Denemark wrote:
New --completed flag for virsh domjobinfo command.
Signed-off-by: Jiri Denemark <jdenemar@redhat.com> ---
Notes: Version 2: - enhanced virsh man page
tools/virsh-domain.c | 27 ++++++++++++++++++++++++--- tools/virsh.pod | 7 +++++-- 2 files changed, 29 insertions(+), 5 deletions(-)
diff --git a/tools/virsh-domain.c b/tools/virsh-domain.c index ed2e3ea..70960b4 100644 --- a/tools/virsh-domain.c +++ b/tools/virsh-domain.c @@ -5115,6 +5115,10 @@ static const vshCmdOptDef opts_resume[] = { .flags = VSH_OFLAG_REQ, .help = N_("domain name, id or uuid") }, + {.name = "completed", + .type = VSH_OT_BOOL, + .help = N_("return statistics of a recently completed job") + }, {.name = NULL} };
Oops, the *** git rebase did this once again to me... It moved the --completed option from domjobinfo to resume command. I'll fix this soon. Jirka

When migrating a transient domain or with VIR_MIGRATE_UNDEFINE_SOURCE flag, the domain may disappear from source host. And so will migration statistics associated with the domain. We need to transfer the statistics at the end of a migration so that they can be queried at the destination host. Signed-off-by: Jiri Denemark <jdenemar@redhat.com> --- Notes: Version 2: - no change src/qemu/qemu_migration.c | 190 +++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 187 insertions(+), 3 deletions(-) diff --git a/src/qemu/qemu_migration.c b/src/qemu/qemu_migration.c index e7c1249..0a00295 100644 --- a/src/qemu/qemu_migration.c +++ b/src/qemu/qemu_migration.c @@ -80,6 +80,7 @@ enum qemuMigrationCookieFlags { QEMU_MIGRATION_COOKIE_FLAG_PERSISTENT, QEMU_MIGRATION_COOKIE_FLAG_NETWORK, QEMU_MIGRATION_COOKIE_FLAG_NBD, + QEMU_MIGRATION_COOKIE_FLAG_STATS, QEMU_MIGRATION_COOKIE_FLAG_LAST }; @@ -91,7 +92,8 @@ VIR_ENUM_IMPL(qemuMigrationCookieFlag, "lockstate", "persistent", "network", - "nbd"); + "nbd", + "statistics"); enum qemuMigrationCookieFeatures { QEMU_MIGRATION_COOKIE_GRAPHICS = (1 << QEMU_MIGRATION_COOKIE_FLAG_GRAPHICS), @@ -99,6 +101,7 @@ enum qemuMigrationCookieFeatures { QEMU_MIGRATION_COOKIE_PERSISTENT = (1 << QEMU_MIGRATION_COOKIE_FLAG_PERSISTENT), QEMU_MIGRATION_COOKIE_NETWORK = (1 << QEMU_MIGRATION_COOKIE_FLAG_NETWORK), QEMU_MIGRATION_COOKIE_NBD = (1 << QEMU_MIGRATION_COOKIE_FLAG_NBD), + QEMU_MIGRATION_COOKIE_STATS = (1 << QEMU_MIGRATION_COOKIE_FLAG_STATS), }; typedef struct _qemuMigrationCookieGraphics qemuMigrationCookieGraphics; @@ -169,6 +172,9 @@ struct _qemuMigrationCookie { /* If (flags & QEMU_MIGRATION_COOKIE_NBD) */ qemuMigrationCookieNBDPtr nbd; + + /* If (flags & QEMU_MIGRATION_COOKIE_STATS) */ + qemuDomainJobInfoPtr jobInfo; }; static void qemuMigrationCookieGraphicsFree(qemuMigrationCookieGraphicsPtr grap) @@ -533,6 +539,25 @@ qemuMigrationCookieAddNBD(qemuMigrationCookiePtr mig, } +static int +qemuMigrationCookieAddStatistics(qemuMigrationCookiePtr mig, + virDomainObjPtr vm) +{ + qemuDomainObjPrivatePtr priv = vm->privateData; + + if (!priv->job.completed) + return 0; + + if (!mig->jobInfo && VIR_ALLOC(mig->jobInfo) < 0) + return -1; + + *mig->jobInfo = *priv->job.completed; + mig->flags |= QEMU_MIGRATION_COOKIE_STATS; + + return 0; +} + + static void qemuMigrationCookieGraphicsXMLFormat(virBufferPtr buf, qemuMigrationCookieGraphicsPtr grap) { @@ -589,6 +614,81 @@ qemuMigrationCookieNetworkXMLFormat(virBufferPtr buf, } +static void +qemuMigrationCookieStatisticsXMLFormat(virBufferPtr buf, + qemuDomainJobInfoPtr jobInfo) +{ + qemuMonitorMigrationStatus *status = &jobInfo->status; + + virBufferAddLit(buf, "<statistics>\n"); + virBufferAdjustIndent(buf, 2); + + virBufferAsprintf(buf, "<%1$s>%2$llu</%1$s>\n", + VIR_DOMAIN_JOB_TIME_ELAPSED, + jobInfo->timeElapsed); + virBufferAsprintf(buf, "<%1$s>%2$llu</%1$s>\n", + VIR_DOMAIN_JOB_TIME_REMAINING, + jobInfo->timeRemaining); + if (status->downtime_set) + virBufferAsprintf(buf, "<%1$s>%2$llu</%1$s>\n", + VIR_DOMAIN_JOB_DOWNTIME, + status->downtime); + + virBufferAsprintf(buf, "<%1$s>%2$llu</%1$s>\n", + VIR_DOMAIN_JOB_MEMORY_TOTAL, + status->ram_total); + virBufferAsprintf(buf, "<%1$s>%2$llu</%1$s>\n", + VIR_DOMAIN_JOB_MEMORY_PROCESSED, + status->ram_transferred); + virBufferAsprintf(buf, "<%1$s>%2$llu</%1$s>\n", + VIR_DOMAIN_JOB_MEMORY_REMAINING, + status->ram_remaining); + + if (status->ram_duplicate_set) { + virBufferAsprintf(buf, "<%1$s>%2$llu</%1$s>\n", + VIR_DOMAIN_JOB_MEMORY_CONSTANT, + status->ram_duplicate); + virBufferAsprintf(buf, "<%1$s>%2$llu</%1$s>\n", + VIR_DOMAIN_JOB_MEMORY_NORMAL, + status->ram_normal); + virBufferAsprintf(buf, "<%1$s>%2$llu</%1$s>\n", + VIR_DOMAIN_JOB_MEMORY_NORMAL_BYTES, + status->ram_normal_bytes); + } + + virBufferAsprintf(buf, "<%1$s>%2$llu</%1$s>\n", + VIR_DOMAIN_JOB_DISK_TOTAL, + status->disk_total); + virBufferAsprintf(buf, "<%1$s>%2$llu</%1$s>\n", + VIR_DOMAIN_JOB_DISK_PROCESSED, + status->disk_transferred); + virBufferAsprintf(buf, "<%1$s>%2$llu</%1$s>\n", + VIR_DOMAIN_JOB_DISK_REMAINING, + status->disk_remaining); + + if (status->xbzrle_set) { + virBufferAsprintf(buf, "<%1$s>%2$llu</%1$s>\n", + VIR_DOMAIN_JOB_COMPRESSION_CACHE, + status->xbzrle_cache_size); + virBufferAsprintf(buf, "<%1$s>%2$llu</%1$s>\n", + VIR_DOMAIN_JOB_COMPRESSION_BYTES, + status->xbzrle_bytes); + virBufferAsprintf(buf, "<%1$s>%2$llu</%1$s>\n", + VIR_DOMAIN_JOB_COMPRESSION_PAGES, + status->xbzrle_pages); + virBufferAsprintf(buf, "<%1$s>%2$llu</%1$s>\n", + VIR_DOMAIN_JOB_COMPRESSION_CACHE_MISSES, + status->xbzrle_cache_miss); + virBufferAsprintf(buf, "<%1$s>%2$llu</%1$s>\n", + VIR_DOMAIN_JOB_COMPRESSION_OVERFLOW, + status->xbzrle_overflow); + } + + virBufferAdjustIndent(buf, -2); + virBufferAddLit(buf, "</statistics>\n"); +} + + static int qemuMigrationCookieXMLFormat(virQEMUDriverPtr driver, virBufferPtr buf, @@ -650,6 +750,9 @@ qemuMigrationCookieXMLFormat(virQEMUDriverPtr driver, virBufferAddLit(buf, "/>\n"); } + if (mig->flags & QEMU_MIGRATION_COOKIE_STATS && mig->jobInfo) + qemuMigrationCookieStatisticsXMLFormat(buf, mig->jobInfo); + virBufferAdjustIndent(buf, -2); virBufferAddLit(buf, "</qemu-migration>\n"); return 0; @@ -772,6 +875,70 @@ qemuMigrationCookieNetworkXMLParse(xmlXPathContextPtr ctxt) } +static qemuDomainJobInfoPtr +qemuMigrationCookieStatisticsXMLParse(xmlXPathContextPtr ctxt) +{ + qemuDomainJobInfoPtr jobInfo = NULL; + qemuMonitorMigrationStatus *status; + xmlNodePtr save_ctxt = ctxt->node; + + if (!(ctxt->node = virXPathNode("./statistics", ctxt))) + goto cleanup; + + if (VIR_ALLOC(jobInfo) < 0) + goto cleanup; + + status = &jobInfo->status; + jobInfo->type = VIR_DOMAIN_JOB_COMPLETED; + + virXPathULongLong("string(./" VIR_DOMAIN_JOB_TIME_ELAPSED "[1])", + ctxt, &jobInfo->timeElapsed); + virXPathULongLong("string(./" VIR_DOMAIN_JOB_TIME_REMAINING "[1])", + ctxt, &jobInfo->timeRemaining); + if (virXPathULongLong("string(./" VIR_DOMAIN_JOB_DOWNTIME "[1])", + ctxt, &status->downtime) == 0) + status->downtime_set = true; + + virXPathULongLong("string(./" VIR_DOMAIN_JOB_MEMORY_TOTAL "[1])", + ctxt, &status->ram_total); + virXPathULongLong("string(./" VIR_DOMAIN_JOB_MEMORY_PROCESSED "[1])", + ctxt, &status->ram_transferred); + virXPathULongLong("string(./" VIR_DOMAIN_JOB_MEMORY_REMAINING "[1])", + ctxt, &status->ram_remaining); + + if (virXPathULongLong("string(./" VIR_DOMAIN_JOB_MEMORY_CONSTANT "[1])", + ctxt, &status->ram_duplicate) == 0) + status->ram_duplicate_set = true; + virXPathULongLong("string(./" VIR_DOMAIN_JOB_MEMORY_NORMAL "[1])", + ctxt, &status->ram_normal); + virXPathULongLong("string(./" VIR_DOMAIN_JOB_MEMORY_NORMAL_BYTES "[1])", + ctxt, &status->ram_normal_bytes); + + virXPathULongLong("string(./" VIR_DOMAIN_JOB_DISK_TOTAL "[1])", + ctxt, &status->disk_total); + virXPathULongLong("string(./" VIR_DOMAIN_JOB_DISK_PROCESSED "[1])", + ctxt, &status->disk_transferred); + virXPathULongLong("string(./" VIR_DOMAIN_JOB_DISK_REMAINING "[1])", + ctxt, &status->disk_remaining); + + if (virXPathULongLong("string(./" VIR_DOMAIN_JOB_COMPRESSION_CACHE "[1])", + ctxt, &status->xbzrle_cache_size) == 0) + status->xbzrle_set = true; + virXPathULongLong("string(./" VIR_DOMAIN_JOB_COMPRESSION_BYTES "[1])", + ctxt, &status->xbzrle_bytes); + virXPathULongLong("string(./" VIR_DOMAIN_JOB_COMPRESSION_PAGES "[1])", + ctxt, &status->xbzrle_pages); + virXPathULongLong("string(./" VIR_DOMAIN_JOB_COMPRESSION_CACHE_MISSES "[1])", + ctxt, &status->xbzrle_cache_miss); + virXPathULongLong("string(./" VIR_DOMAIN_JOB_COMPRESSION_OVERFLOW "[1])", + ctxt, &status->xbzrle_overflow); + + cleanup: + ctxt->node = save_ctxt; + return jobInfo; +} + + static int qemuMigrationCookieXMLParse(qemuMigrationCookiePtr mig, virQEMUDriverPtr driver, @@ -947,6 +1114,11 @@ qemuMigrationCookieXMLParse(qemuMigrationCookiePtr mig, VIR_FREE(port); } + if (flags & QEMU_MIGRATION_COOKIE_STATS && + virXPathBoolean("boolean(./statistics)", ctxt) && + (!(mig->jobInfo = qemuMigrationCookieStatisticsXMLParse(ctxt)))) + goto error; + virObjectUnref(caps); return 0; @@ -1017,6 +1189,10 @@ qemuMigrationBakeCookie(qemuMigrationCookiePtr mig, qemuMigrationCookieAddNBD(mig, driver, dom) < 0) return -1; + if (flags & QEMU_MIGRATION_COOKIE_STATS && + qemuMigrationCookieAddStatistics(mig, dom) < 0) + return -1; + if (!(*cookieout = qemuMigrationCookieXMLFormatStr(driver, mig))) return -1; @@ -3424,7 +3600,8 @@ qemuMigrationRun(virQEMUDriverPtr driver, if (priv->job.completed) qemuDomainJobInfoUpdateTime(priv->job.completed); - cookieFlags |= QEMU_MIGRATION_COOKIE_NETWORK; + cookieFlags |= QEMU_MIGRATION_COOKIE_NETWORK | + QEMU_MIGRATION_COOKIE_STATS; if (flags & VIR_MIGRATE_PERSIST_DEST) cookieFlags |= QEMU_MIGRATION_COOKIE_PERSISTENT; if (ret == 0 && @@ -4508,8 +4685,10 @@ qemuMigrationFinish(virQEMUDriverPtr driver, : QEMU_MIGRATION_PHASE_FINISH2); qemuDomainCleanupRemove(vm, qemuMigrationPrepareCleanup); + VIR_FREE(priv->job.completed); - cookie_flags = QEMU_MIGRATION_COOKIE_NETWORK; + cookie_flags = QEMU_MIGRATION_COOKIE_NETWORK | + QEMU_MIGRATION_COOKIE_STATS; if (flags & VIR_MIGRATE_PERSIST_DEST) cookie_flags |= QEMU_MIGRATION_COOKIE_PERSISTENT; @@ -4527,6 +4706,11 @@ qemuMigrationFinish(virQEMUDriverPtr driver, goto endjob; } + if (mig->jobInfo) { + priv->job.completed = mig->jobInfo; + mig->jobInfo = NULL; + } + if (!(flags & VIR_MIGRATE_OFFLINE)) { if (qemuMigrationVPAssociatePortProfiles(vm->def) < 0) { qemuProcessStop(driver, vm, VIR_DOMAIN_SHUTOFF_FAILED, -- 2.1.0

Total time of a migration and total downtime transfered from a source to a destination host do not count with the transfer time to the destination host and with the time elapsed before guest CPUs are resumed. Thus, source libvirtd remembers when migration started and when guest CPUs were paused. Both timestamps are transferred to destination libvirtd which uses them to compute total migration time and total downtime. Obviously, this requires the time to be synchronized between the two hosts. The reported times are useless otherwise but they would be equally useless if we didn't do this recomputation so don't lose anything by doing it. Signed-off-by: Jiri Denemark <jdenemar@redhat.com> --- Notes: Version 2: - added ATTRIBUTE_NONNULL to the new internal API - fixed commit message src/libvirt.c | 5 ++++- src/qemu/qemu_domain.c | 28 ++++++++++++++++++++++++++++ src/qemu/qemu_domain.h | 3 +++ src/qemu/qemu_migration.c | 15 ++++++++++++++- src/qemu/qemu_process.c | 9 ++++++++- tools/virsh.pod | 5 ++++- 6 files changed, 61 insertions(+), 4 deletions(-) diff --git a/src/libvirt.c b/src/libvirt.c index 00b4d6f..941c518 100644 --- a/src/libvirt.c +++ b/src/libvirt.c @@ -17581,7 +17581,10 @@ virDomainGetJobInfo(virDomainPtr domain, virDomainJobInfoPtr info) * return statistics about a recently completed job. Specifically, this * flag may be used to query statistics of a completed incoming migration. * Statistics of a completed job are automatically destroyed once read or - * when libvirtd is restarted. + * when libvirtd is restarted. Note that time information returned for + * completed migrations may be completely irrelevant unless both source and + * destination hosts have synchronized time (i.e., NTP daemon is running on + * both of them). * * Returns 0 in case of success and -1 in case of failure. */ diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c index 78d6e8c..c0306d7 100644 --- a/src/qemu/qemu_domain.c +++ b/src/qemu/qemu_domain.c @@ -222,11 +222,39 @@ qemuDomainJobInfoUpdateTime(qemuDomainJobInfoPtr jobInfo) if (virTimeMillisNow(&now) < 0) return -1; + if (now < jobInfo->started) { + VIR_WARN("Async job starts in the future"); + jobInfo->started = 0; + return 0; + } + jobInfo->timeElapsed = now - jobInfo->started; return 0; } int +qemuDomainJobInfoUpdateDowntime(qemuDomainJobInfoPtr jobInfo) +{ + unsigned long long now; + + if (!jobInfo->stopped) + return 0; + + if (virTimeMillisNow(&now) < 0) + return -1; + + if (now < jobInfo->stopped) { + VIR_WARN("Guest's CPUs stopped in the future"); + jobInfo->stopped = 0; + return 0; + } + + jobInfo->status.downtime = now - jobInfo->stopped; + jobInfo->status.downtime_set = true; + return 0; +} + +int qemuDomainJobInfoToInfo(qemuDomainJobInfoPtr jobInfo, virDomainJobInfoPtr info) { diff --git a/src/qemu/qemu_domain.h b/src/qemu/qemu_domain.h index 2e16cde..479b51f 100644 --- a/src/qemu/qemu_domain.h +++ b/src/qemu/qemu_domain.h @@ -105,6 +105,7 @@ typedef qemuDomainJobInfo *qemuDomainJobInfoPtr; struct _qemuDomainJobInfo { virDomainJobType type; unsigned long long started; /* When the async job started */ + unsigned long long stopped; /* When the domain's CPUs were stopped */ /* Computed values */ unsigned long long timeElapsed; unsigned long long timeRemaining; @@ -391,6 +392,8 @@ bool qemuDomainAgentAvailable(qemuDomainObjPrivatePtr priv, int qemuDomainJobInfoUpdateTime(qemuDomainJobInfoPtr jobInfo) ATTRIBUTE_NONNULL(1); +int qemuDomainJobInfoUpdateDowntime(qemuDomainJobInfoPtr jobInfo) + ATTRIBUTE_NONNULL(1); int qemuDomainJobInfoToInfo(qemuDomainJobInfoPtr jobInfo, virDomainJobInfoPtr info) ATTRIBUTE_NONNULL(1) ATTRIBUTE_NONNULL(2); diff --git a/src/qemu/qemu_migration.c b/src/qemu/qemu_migration.c index 0a00295..801e545 100644 --- a/src/qemu/qemu_migration.c +++ b/src/qemu/qemu_migration.c @@ -623,6 +623,9 @@ qemuMigrationCookieStatisticsXMLFormat(virBufferPtr buf, virBufferAddLit(buf, "<statistics>\n"); virBufferAdjustIndent(buf, 2); + virBufferAsprintf(buf, "<started>%llu</started>\n", jobInfo->started); + virBufferAsprintf(buf, "<stopped>%llu</stopped>\n", jobInfo->stopped); + virBufferAsprintf(buf, "<%1$s>%2$llu</%1$s>\n", VIR_DOMAIN_JOB_TIME_ELAPSED, jobInfo->timeElapsed); @@ -891,6 +894,9 @@ qemuMigrationCookieStatisticsXMLParse(xmlXPathContextPtr ctxt) status = &jobInfo->status; jobInfo->type = VIR_DOMAIN_JOB_COMPLETED; + virXPathULongLong("string(./started[1])", ctxt, &jobInfo->started); + virXPathULongLong("string(./stopped[1])", ctxt, &jobInfo->stopped); + virXPathULongLong("string(./" VIR_DOMAIN_JOB_TIME_ELAPSED "[1])", ctxt, &jobInfo->timeElapsed); virXPathULongLong("string(./" VIR_DOMAIN_JOB_TIME_REMAINING "[1])", @@ -2015,6 +2021,7 @@ qemuMigrationWaitForCompletion(virQEMUDriverPtr driver, } if (jobInfo->type == VIR_DOMAIN_JOB_COMPLETED) { + qemuDomainJobInfoUpdateDowntime(jobInfo); VIR_FREE(priv->job.completed); if (VIR_ALLOC(priv->job.completed) == 0) *priv->job.completed = *jobInfo; @@ -3597,8 +3604,10 @@ qemuMigrationRun(virQEMUDriverPtr driver, VIR_FORCE_CLOSE(fd); } - if (priv->job.completed) + if (priv->job.completed) { qemuDomainJobInfoUpdateTime(priv->job.completed); + qemuDomainJobInfoUpdateDowntime(priv->job.completed); + } cookieFlags |= QEMU_MIGRATION_COOKIE_NETWORK | QEMU_MIGRATION_COOKIE_STATS; @@ -4811,6 +4820,10 @@ qemuMigrationFinish(virQEMUDriverPtr driver, } goto endjob; } + if (priv->job.completed) { + qemuDomainJobInfoUpdateTime(priv->job.completed); + qemuDomainJobInfoUpdateDowntime(priv->job.completed); + } } dom = virGetDomain(dconn, vm->def->name, vm->def->uuid); diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c index b1d8a32..c70290a 100644 --- a/src/qemu/qemu_process.c +++ b/src/qemu/qemu_process.c @@ -755,6 +755,9 @@ qemuProcessHandleStop(qemuMonitorPtr mon ATTRIBUTE_UNUSED, VIR_DEBUG("Transitioned guest %s to paused state", vm->def->name); + if (priv->job.current) + ignore_value(virTimeMillisNow(&priv->job.current->stopped)); + virDomainObjSetState(vm, VIR_DOMAIN_PAUSED, VIR_DOMAIN_PAUSED_UNKNOWN); event = virDomainEventLifecycleNewFromObj(vm, VIR_DOMAIN_EVENT_SUSPENDED, @@ -2889,7 +2892,8 @@ qemuProcessStartCPUs(virQEMUDriverPtr driver, virDomainObjPtr vm, } -int qemuProcessStopCPUs(virQEMUDriverPtr driver, virDomainObjPtr vm, +int qemuProcessStopCPUs(virQEMUDriverPtr driver, + virDomainObjPtr vm, virDomainPausedReason reason, qemuDomainAsyncJob asyncJob) { @@ -2907,6 +2911,9 @@ int qemuProcessStopCPUs(virQEMUDriverPtr driver, virDomainObjPtr vm, if (ret < 0) goto cleanup; + if (priv->job.current) + ignore_value(virTimeMillisNow(&priv->job.current->stopped)); + virDomainObjSetState(vm, VIR_DOMAIN_PAUSED, reason); if (virDomainLockProcessPause(driver->lockManager, vm, &priv->lockState) < 0) VIR_WARN("Unable to release lease on %s", vm->def->name); diff --git a/tools/virsh.pod b/tools/virsh.pod index 4411027..60ee515 100644 --- a/tools/virsh.pod +++ b/tools/virsh.pod @@ -1136,7 +1136,10 @@ Abort the currently running domain job. Returns information about jobs running on a domain. I<--completed> tells virsh to return information about a recently finished job. Statistics of a completed job are automatically destroyed once read or when libvirtd -is restarted. +is restarted. Note that time information returned for completed +migrations may be completely irrelevant unless both source and +destination hosts have synchronized time (i.e., NTP daemon is running +on both of them). =item B<domname> I<domain-id-or-uuid> -- 2.1.0

After the previous commit, migration statistics on the source and destination hosts are not equal because the destination updated time statistics. Let's send the result back so that the same data can be queried on both sides of the migration. Signed-off-by: Jiri Denemark <jdenemar@redhat.com> --- Notes: Version 2: - fixed commit message src/qemu/qemu_migration.c | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/src/qemu/qemu_migration.c b/src/qemu/qemu_migration.c index 801e545..e4b664b 100644 --- a/src/qemu/qemu_migration.c +++ b/src/qemu/qemu_migration.c @@ -3019,9 +3019,27 @@ qemuMigrationConfirmPhase(virQEMUDriverPtr driver, ? QEMU_MIGRATION_PHASE_CONFIRM3 : QEMU_MIGRATION_PHASE_CONFIRM3_CANCELLED); - if (!(mig = qemuMigrationEatCookie(driver, vm, cookiein, cookieinlen, 0))) + if (!(mig = qemuMigrationEatCookie(driver, vm, cookiein, cookieinlen, + QEMU_MIGRATION_COOKIE_STATS))) goto cleanup; + /* Update total times with the values sent by the destination daemon */ + if (mig->jobInfo) { + qemuDomainObjPrivatePtr priv = vm->privateData; + if (priv->job.completed) { + qemuDomainJobInfoPtr jobInfo = priv->job.completed; + if (mig->jobInfo->status.downtime_set) { + jobInfo->status.downtime = mig->jobInfo->status.downtime; + jobInfo->status.downtime_set = true; + } + if (mig->jobInfo->timeElapsed) + jobInfo->timeElapsed = mig->jobInfo->timeElapsed; + } else { + priv->job.completed = mig->jobInfo; + mig->jobInfo = NULL; + } + } + if (flags & VIR_MIGRATE_OFFLINE) goto done; @@ -4860,7 +4878,8 @@ qemuMigrationFinish(virQEMUDriverPtr driver, VIR_DOMAIN_EVENT_STOPPED_FAILED); } - if (qemuMigrationBakeCookie(mig, driver, vm, cookieout, cookieoutlen, 0) < 0) + if (qemuMigrationBakeCookie(mig, driver, vm, cookieout, cookieoutlen, + QEMU_MIGRATION_COOKIE_STATS) < 0) VIR_WARN("Unable to encode migration cookie"); endjob: -- 2.1.0

On 09/09/2014 05:54 AM, Jiri Denemark wrote:
Using virDomainGetJobStats, we can monitor running jobs but sometimes it may be useful to get statistics about a job that already finished, for example, to get the final amount of data transferred during migration or to get an idea about total downtime. This is what the following patches are about.
Version 2: - changed according to John's review (see individual patches for details)
Jiri Denemark (8): Refactor job statistics qemu: Avoid incrementing jobs_queued if virTimeMillisNow fails Add support for fetching statistics of completed jobs qemu: Silence coverity on optional migration stats virsh: Add support for completed job stats qemu: Transfer migration statistics to destination qemu: Recompute downtime and total time when migration completes qemu: Transfer recomputed stats back to source
include/libvirt/libvirt.h.in | 11 ++ src/libvirt.c | 11 +- src/qemu/qemu_domain.c | 189 ++++++++++++++++++++++++++- src/qemu/qemu_domain.h | 32 ++++- src/qemu/qemu_driver.c | 130 ++++-------------- src/qemu/qemu_migration.c | 304 ++++++++++++++++++++++++++++++++++++------- src/qemu/qemu_monitor_json.c | 10 +- src/qemu/qemu_process.c | 9 +- tools/virsh-domain.c | 27 +++- tools/virsh.pod | 10 +- 10 files changed, 557 insertions(+), 176 deletions(-)
One nit: Patch 4 - add an ignore_value() around the "total-time" fetch... ACK - series. Ran through 7.5.0 Coverity FWIW: My NMI from 4/6 of v1 probably had more to do with trying to swap between different windows and forgetting where I was... I think what happened is I read ahead and saw that the values were calculated and started deleting a thought and yes, got interrupted (not difficult to do at times). I was comparing those changes qemuDomainJobInfoToParams for completeness...

On Tue, Sep 09, 2014 at 19:36:52 -0400, John Ferlan wrote:
On 09/09/2014 05:54 AM, Jiri Denemark wrote:
Using virDomainGetJobStats, we can monitor running jobs but sometimes it may be useful to get statistics about a job that already finished, for example, to get the final amount of data transferred during migration or to get an idea about total downtime. This is what the following patches are about.
Version 2: - changed according to John's review (see individual patches for details)
Jiri Denemark (8): Refactor job statistics qemu: Avoid incrementing jobs_queued if virTimeMillisNow fails Add support for fetching statistics of completed jobs qemu: Silence coverity on optional migration stats virsh: Add support for completed job stats qemu: Transfer migration statistics to destination qemu: Recompute downtime and total time when migration completes qemu: Transfer recomputed stats back to source
include/libvirt/libvirt.h.in | 11 ++ src/libvirt.c | 11 +- src/qemu/qemu_domain.c | 189 ++++++++++++++++++++++++++- src/qemu/qemu_domain.h | 32 ++++- src/qemu/qemu_driver.c | 130 ++++-------------- src/qemu/qemu_migration.c | 304 ++++++++++++++++++++++++++++++++++++------- src/qemu/qemu_monitor_json.c | 10 +- src/qemu/qemu_process.c | 9 +- tools/virsh-domain.c | 27 +++- tools/virsh.pod | 10 +- 10 files changed, 557 insertions(+), 176 deletions(-)
One nit: Patch 4 - add an ignore_value() around the "total-time" fetch...
Oh right, I missed that one.
ACK - series.
Thanks, I pushed this series. Jirka
participants (3)
-
Jiri Denemark
-
John Ferlan
-
Philipp Hahn