[libvirt] [PATCH 0/6] Add support for fetching statistics of completed jobs

Using virDomainGetJobStats, we can monitor running jobs but sometimes it may be useful to get statistics about a job that already finished, for example, to get the final amount of data transferred during migration or to get an idea about total downtime. This is what the following patches are about. Jiri Denemark (6): Refactor job statistics Add support for fetching statistics of completed jobs virsh: Add support for completed job stats qemu: Transfer migration statistics to destination qemu: Recompute downtime and total time when migration completes qemu: Transfer recomputed stats back to source include/libvirt/libvirt.h.in | 11 ++ src/libvirt.c | 11 +- src/qemu/qemu_domain.c | 186 +++++++++++++++++++++++++- src/qemu/qemu_domain.h | 27 +++- src/qemu/qemu_driver.c | 130 ++++-------------- src/qemu/qemu_migration.c | 304 ++++++++++++++++++++++++++++++++++++------- src/qemu/qemu_monitor_json.c | 3 +- src/qemu/qemu_process.c | 9 +- tools/virsh-domain.c | 27 +++- tools/virsh.pod | 8 +- 10 files changed, 545 insertions(+), 171 deletions(-) -- 2.1.0

Job statistics data were tracked in several structures and variables. Let's make a new qemuDomainJobInfo structure which can be used as a single source of statistics data as a preparation for storing data about completed a job. Signed-off-by: Jiri Denemark <jdenemar@redhat.com> --- src/qemu/qemu_domain.c | 157 ++++++++++++++++++++++++++++++++++++++++++++-- src/qemu/qemu_domain.h | 24 ++++++- src/qemu/qemu_driver.c | 119 ++++------------------------------- src/qemu/qemu_migration.c | 72 ++++++++------------- 4 files changed, 213 insertions(+), 159 deletions(-) diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c index e9506e0..2c709e9 100644 --- a/src/qemu/qemu_domain.c +++ b/src/qemu/qemu_domain.c @@ -163,11 +163,9 @@ qemuDomainObjResetAsyncJob(qemuDomainObjPrivatePtr priv) job->asyncOwner = 0; job->phase = 0; job->mask = DEFAULT_JOB_MASK; - job->start = 0; job->dump_memory_only = false; job->asyncAbort = false; - memset(&job->status, 0, sizeof(job->status)); - memset(&job->info, 0, sizeof(job->info)); + VIR_FREE(job->current); } void @@ -200,6 +198,7 @@ qemuDomainObjTransferJob(virDomainObjPtr obj) static void qemuDomainObjFreeJob(qemuDomainObjPrivatePtr priv) { + VIR_FREE(priv->job.current); virCondDestroy(&priv->job.cond); virCondDestroy(&priv->job.asyncCond); } @@ -211,6 +210,150 @@ qemuDomainTrackJob(qemuDomainJob job) } +int +qemuDomainJobInfoUpdateTime(qemuDomainJobInfoPtr jobInfo) +{ + unsigned long long now; + + if (!jobInfo->started) + return 0; + + if (virTimeMillisNow(&now) < 0) + return -1; + + jobInfo->timeElapsed = now - jobInfo->started; + return 0; +} + +int +qemuDomainJobInfoToInfo(qemuDomainJobInfoPtr jobInfo, + virDomainJobInfoPtr info) +{ + info->timeElapsed = jobInfo->timeElapsed; + info->timeRemaining = jobInfo->timeRemaining; + + info->memTotal = jobInfo->status.ram_total; + info->memRemaining = jobInfo->status.ram_remaining; + info->memProcessed = jobInfo->status.ram_transferred; + + info->fileTotal = jobInfo->status.disk_total; + info->fileRemaining = jobInfo->status.disk_remaining; + info->fileProcessed = jobInfo->status.disk_transferred; + + info->dataTotal = info->memTotal + info->fileTotal; + info->dataRemaining = info->memRemaining + info->fileRemaining; + info->dataProcessed = info->memProcessed + info->fileProcessed; + + return 0; +} + +int +qemuDomainJobInfoToParams(qemuDomainJobInfoPtr jobInfo, + int *type, + virTypedParameterPtr *params, + int *nparams) +{ + qemuMonitorMigrationStatus *status = &jobInfo->status; + virTypedParameterPtr par = NULL; + int maxpar = 0; + int npar = 0; + + if (virTypedParamsAddULLong(&par, &npar, &maxpar, + VIR_DOMAIN_JOB_TIME_ELAPSED, + jobInfo->timeElapsed) < 0) + goto error; + + if (jobInfo->type == VIR_DOMAIN_JOB_BOUNDED && + virTypedParamsAddULLong(&par, &npar, &maxpar, + VIR_DOMAIN_JOB_TIME_REMAINING, + jobInfo->timeRemaining) < 0) + goto error; + + if (status->downtime_set && + virTypedParamsAddULLong(&par, &npar, &maxpar, + VIR_DOMAIN_JOB_DOWNTIME, + status->downtime) < 0) + goto error; + + if (virTypedParamsAddULLong(&par, &npar, &maxpar, + VIR_DOMAIN_JOB_DATA_TOTAL, + status->ram_total + + status->disk_total) < 0 || + virTypedParamsAddULLong(&par, &npar, &maxpar, + VIR_DOMAIN_JOB_DATA_PROCESSED, + status->ram_transferred + + status->disk_transferred) < 0 || + virTypedParamsAddULLong(&par, &npar, &maxpar, + VIR_DOMAIN_JOB_DATA_REMAINING, + status->ram_remaining + + status->disk_remaining) < 0) + goto error; + + if (virTypedParamsAddULLong(&par, &npar, &maxpar, + VIR_DOMAIN_JOB_MEMORY_TOTAL, + status->ram_total) < 0 || + virTypedParamsAddULLong(&par, &npar, &maxpar, + VIR_DOMAIN_JOB_MEMORY_PROCESSED, + status->ram_transferred) < 0 || + virTypedParamsAddULLong(&par, &npar, &maxpar, + VIR_DOMAIN_JOB_MEMORY_REMAINING, + status->ram_remaining) < 0) + goto error; + + if (status->ram_duplicate_set) { + if (virTypedParamsAddULLong(&par, &npar, &maxpar, + VIR_DOMAIN_JOB_MEMORY_CONSTANT, + status->ram_duplicate) < 0 || + virTypedParamsAddULLong(&par, &npar, &maxpar, + VIR_DOMAIN_JOB_MEMORY_NORMAL, + status->ram_normal) < 0 || + virTypedParamsAddULLong(&par, &npar, &maxpar, + VIR_DOMAIN_JOB_MEMORY_NORMAL_BYTES, + status->ram_normal_bytes) < 0) + goto error; + } + + if (virTypedParamsAddULLong(&par, &npar, &maxpar, + VIR_DOMAIN_JOB_DISK_TOTAL, + status->disk_total) < 0 || + virTypedParamsAddULLong(&par, &npar, &maxpar, + VIR_DOMAIN_JOB_DISK_PROCESSED, + status->disk_transferred) < 0 || + virTypedParamsAddULLong(&par, &npar, &maxpar, + VIR_DOMAIN_JOB_DISK_REMAINING, + status->disk_remaining) < 0) + goto error; + + if (status->xbzrle_set) { + if (virTypedParamsAddULLong(&par, &npar, &maxpar, + VIR_DOMAIN_JOB_COMPRESSION_CACHE, + status->xbzrle_cache_size) < 0 || + virTypedParamsAddULLong(&par, &npar, &maxpar, + VIR_DOMAIN_JOB_COMPRESSION_BYTES, + status->xbzrle_bytes) < 0 || + virTypedParamsAddULLong(&par, &npar, &maxpar, + VIR_DOMAIN_JOB_COMPRESSION_PAGES, + status->xbzrle_pages) < 0 || + virTypedParamsAddULLong(&par, &npar, &maxpar, + VIR_DOMAIN_JOB_COMPRESSION_CACHE_MISSES, + status->xbzrle_cache_miss) < 0 || + virTypedParamsAddULLong(&par, &npar, &maxpar, + VIR_DOMAIN_JOB_COMPRESSION_OVERFLOW, + status->xbzrle_overflow) < 0) + goto error; + } + + *type = jobInfo->type; + *params = par; + *nparams = npar; + return 0; + + error: + virTypedParamsFree(par, npar); + return -1; +} + + static void * qemuDomainObjPrivateAlloc(void) { @@ -1071,7 +1214,7 @@ qemuDomainObjBeginJobInternal(virQEMUDriverPtr driver, unsigned long long then; bool nested = job == QEMU_JOB_ASYNC_NESTED; virQEMUDriverConfigPtr cfg = virQEMUDriverGetConfig(driver); - int ret; + int ret = -1; VIR_DEBUG("Starting %s: %s (async=%s vm=%p name=%s)", job == QEMU_JOB_ASYNC ? "async job" : "job", @@ -1127,9 +1270,11 @@ qemuDomainObjBeginJobInternal(virQEMUDriverPtr driver, qemuDomainAsyncJobTypeToString(asyncJob), obj, obj->def->name); qemuDomainObjResetAsyncJob(priv); + if (VIR_ALLOC(priv->job.current) < 0) + goto cleanup; priv->job.asyncJob = asyncJob; priv->job.asyncOwner = virThreadSelfID(); - priv->job.start = now; + priv->job.current->started = now; } if (qemuDomainTrackJob(job)) @@ -1163,6 +1308,8 @@ qemuDomainObjBeginJobInternal(virQEMUDriverPtr driver, virReportSystemError(errno, "%s", _("cannot acquire job mutex")); } + + cleanup: priv->jobs_queued--; virObjectUnref(obj); virObjectUnref(cfg); diff --git a/src/qemu/qemu_domain.h b/src/qemu/qemu_domain.h index 8736889..119590e 100644 --- a/src/qemu/qemu_domain.h +++ b/src/qemu/qemu_domain.h @@ -100,6 +100,18 @@ typedef enum { } qemuDomainAsyncJob; VIR_ENUM_DECL(qemuDomainAsyncJob) +typedef struct _qemuDomainJobInfo qemuDomainJobInfo; +typedef qemuDomainJobInfo *qemuDomainJobInfoPtr; +struct _qemuDomainJobInfo { + virDomainJobType type; + unsigned long long started; /* When the async job started */ + /* Computed values */ + unsigned long long timeElapsed; + unsigned long long timeRemaining; + /* Raw values from QEMU */ + qemuMonitorMigrationStatus status; +}; + struct qemuDomainJobObj { virCond cond; /* Use to coordinate jobs */ qemuDomainJob active; /* Currently running job */ @@ -110,10 +122,8 @@ struct qemuDomainJobObj { unsigned long long asyncOwner; /* Thread which set current async job */ int phase; /* Job phase (mainly for migrations) */ unsigned long long mask; /* Jobs allowed during async job */ - unsigned long long start; /* When the async job started */ bool dump_memory_only; /* use dump-guest-memory to do dump */ - qemuMonitorMigrationStatus status; /* Raw async job progress data */ - virDomainJobInfo info; /* Processed async job progress data */ + qemuDomainJobInfoPtr current; /* async job progress data */ bool asyncAbort; /* abort of async job requested */ }; @@ -378,4 +388,12 @@ bool qemuDomainDefCheckABIStability(virQEMUDriverPtr driver, bool qemuDomainAgentAvailable(qemuDomainObjPrivatePtr priv, bool reportError); +int qemuDomainJobInfoUpdateTime(qemuDomainJobInfoPtr jobInfo); +int qemuDomainJobInfoToInfo(qemuDomainJobInfoPtr jobInfo, + virDomainJobInfoPtr info); +int qemuDomainJobInfoToParams(qemuDomainJobInfoPtr jobInfo, + int *type, + virTypedParameterPtr *params, + int *nparams); + #endif /* __QEMU_DOMAIN_H__ */ diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c index 239a300..265315d 100644 --- a/src/qemu/qemu_driver.c +++ b/src/qemu/qemu_driver.c @@ -2655,15 +2655,13 @@ qemuDomainGetControlInfo(virDomainPtr dom, if (priv->monError) { info->state = VIR_DOMAIN_CONTROL_ERROR; } else if (priv->job.active) { - if (!priv->monStart) { + if (virTimeMillisNow(&info->stateTime) < 0) + goto cleanup; + if (priv->job.current) { info->state = VIR_DOMAIN_CONTROL_JOB; - if (virTimeMillisNow(&info->stateTime) < 0) - goto cleanup; - info->stateTime -= priv->job.start; + info->stateTime -= priv->job.current->started; } else { info->state = VIR_DOMAIN_CONTROL_OCCUPIED; - if (virTimeMillisNow(&info->stateTime) < 0) - goto cleanup; info->stateTime -= priv->monStart; } } else { @@ -3110,8 +3108,7 @@ qemuDomainSaveInternal(virQEMUDriverPtr driver, virDomainPtr dom, goto endjob; } - memset(&priv->job.info, 0, sizeof(priv->job.info)); - priv->job.info.type = VIR_DOMAIN_JOB_UNBOUNDED; + priv->job.current->type = VIR_DOMAIN_JOB_UNBOUNDED; /* Pause */ if (virDomainObjGetState(vm, NULL) == VIR_DOMAIN_RUNNING) { @@ -3460,6 +3457,7 @@ static int qemuDumpToFd(virQEMUDriverPtr driver, virDomainObjPtr vm, fd) < 0) return -1; + VIR_FREE(priv->job.current); priv->job.dump_memory_only = true; if (qemuDomainObjEnterMonitorAsync(driver, vm, asyncJob) < 0) @@ -11616,17 +11614,15 @@ static int qemuDomainGetJobInfo(virDomainPtr dom, goto cleanup; if (virDomainObjIsActive(vm)) { - if (priv->job.asyncJob && !priv->job.dump_memory_only) { - memcpy(info, &priv->job.info, sizeof(*info)); - + if (priv->job.current) { /* Refresh elapsed time again just to ensure it * is fully updated. This is primarily for benefit * of incoming migration which we don't currently * monitor actively in the background thread */ - if (virTimeMillisNow(&info->timeElapsed) < 0) + if (qemuDomainJobInfoUpdateTime(priv->job.current) < 0 || + qemuDomainJobInfoToInfo(priv->job.current, info) < 0) goto cleanup; - info->timeElapsed -= priv->job.start; } else { memset(info, 0, sizeof(*info)); info->type = VIR_DOMAIN_JOB_NONE; @@ -11655,9 +11651,6 @@ qemuDomainGetJobStats(virDomainPtr dom, { virDomainObjPtr vm; qemuDomainObjPrivatePtr priv; - virTypedParameterPtr par = NULL; - int maxpar = 0; - int npar = 0; int ret = -1; virCheckFlags(0, -1); @@ -11676,7 +11669,7 @@ qemuDomainGetJobStats(virDomainPtr dom, goto cleanup; } - if (!priv->job.asyncJob || priv->job.dump_memory_only) { + if (!priv->job.current) { *type = VIR_DOMAIN_JOB_NONE; *params = NULL; *nparams = 0; @@ -11689,102 +11682,16 @@ qemuDomainGetJobStats(virDomainPtr dom, * of incoming migration which we don't currently * monitor actively in the background thread */ - if (virTimeMillisNow(&priv->job.info.timeElapsed) < 0) + if (qemuDomainJobInfoUpdateTime(priv->job.current) < 0 || + qemuDomainJobInfoToParams(priv->job.current, + type, params, nparams) < 0) goto cleanup; - priv->job.info.timeElapsed -= priv->job.start; - if (virTypedParamsAddULLong(&par, &npar, &maxpar, - VIR_DOMAIN_JOB_TIME_ELAPSED, - priv->job.info.timeElapsed) < 0) - goto cleanup; - - if (priv->job.info.type == VIR_DOMAIN_JOB_BOUNDED && - virTypedParamsAddULLong(&par, &npar, &maxpar, - VIR_DOMAIN_JOB_TIME_REMAINING, - priv->job.info.timeRemaining) < 0) - goto cleanup; - - if (priv->job.status.downtime_set && - virTypedParamsAddULLong(&par, &npar, &maxpar, - VIR_DOMAIN_JOB_DOWNTIME, - priv->job.status.downtime) < 0) - goto cleanup; - - if (virTypedParamsAddULLong(&par, &npar, &maxpar, - VIR_DOMAIN_JOB_DATA_TOTAL, - priv->job.info.dataTotal) < 0 || - virTypedParamsAddULLong(&par, &npar, &maxpar, - VIR_DOMAIN_JOB_DATA_PROCESSED, - priv->job.info.dataProcessed) < 0 || - virTypedParamsAddULLong(&par, &npar, &maxpar, - VIR_DOMAIN_JOB_DATA_REMAINING, - priv->job.info.dataRemaining) < 0) - goto cleanup; - - if (virTypedParamsAddULLong(&par, &npar, &maxpar, - VIR_DOMAIN_JOB_MEMORY_TOTAL, - priv->job.info.memTotal) < 0 || - virTypedParamsAddULLong(&par, &npar, &maxpar, - VIR_DOMAIN_JOB_MEMORY_PROCESSED, - priv->job.info.memProcessed) < 0 || - virTypedParamsAddULLong(&par, &npar, &maxpar, - VIR_DOMAIN_JOB_MEMORY_REMAINING, - priv->job.info.memRemaining) < 0) - goto cleanup; - - if (priv->job.status.ram_duplicate_set) { - if (virTypedParamsAddULLong(&par, &npar, &maxpar, - VIR_DOMAIN_JOB_MEMORY_CONSTANT, - priv->job.status.ram_duplicate) < 0 || - virTypedParamsAddULLong(&par, &npar, &maxpar, - VIR_DOMAIN_JOB_MEMORY_NORMAL, - priv->job.status.ram_normal) < 0 || - virTypedParamsAddULLong(&par, &npar, &maxpar, - VIR_DOMAIN_JOB_MEMORY_NORMAL_BYTES, - priv->job.status.ram_normal_bytes) < 0) - goto cleanup; - } - - if (virTypedParamsAddULLong(&par, &npar, &maxpar, - VIR_DOMAIN_JOB_DISK_TOTAL, - priv->job.info.fileTotal) < 0 || - virTypedParamsAddULLong(&par, &npar, &maxpar, - VIR_DOMAIN_JOB_DISK_PROCESSED, - priv->job.info.fileProcessed) < 0 || - virTypedParamsAddULLong(&par, &npar, &maxpar, - VIR_DOMAIN_JOB_DISK_REMAINING, - priv->job.info.fileRemaining) < 0) - goto cleanup; - - if (priv->job.status.xbzrle_set) { - if (virTypedParamsAddULLong(&par, &npar, &maxpar, - VIR_DOMAIN_JOB_COMPRESSION_CACHE, - priv->job.status.xbzrle_cache_size) < 0 || - virTypedParamsAddULLong(&par, &npar, &maxpar, - VIR_DOMAIN_JOB_COMPRESSION_BYTES, - priv->job.status.xbzrle_bytes) < 0 || - virTypedParamsAddULLong(&par, &npar, &maxpar, - VIR_DOMAIN_JOB_COMPRESSION_PAGES, - priv->job.status.xbzrle_pages) < 0 || - virTypedParamsAddULLong(&par, &npar, &maxpar, - VIR_DOMAIN_JOB_COMPRESSION_CACHE_MISSES, - priv->job.status.xbzrle_cache_miss) < 0 || - virTypedParamsAddULLong(&par, &npar, &maxpar, - VIR_DOMAIN_JOB_COMPRESSION_OVERFLOW, - priv->job.status.xbzrle_overflow) < 0) - goto cleanup; - } - - *type = priv->job.info.type; - *params = par; - *nparams = npar; ret = 0; cleanup: if (vm) virObjectUnlock(vm); - if (ret < 0) - virTypedParamsFree(par, npar); return ret; } diff --git a/src/qemu/qemu_migration.c b/src/qemu/qemu_migration.c index 9cfb77e..64adab4 100644 --- a/src/qemu/qemu_migration.c +++ b/src/qemu/qemu_migration.c @@ -1723,8 +1723,9 @@ qemuMigrationUpdateJobStatus(virQEMUDriverPtr driver, qemuDomainAsyncJob asyncJob) { qemuDomainObjPrivatePtr priv = vm->privateData; - int ret; qemuMonitorMigrationStatus status; + qemuDomainJobInfoPtr jobInfo; + int ret; memset(&status, 0, sizeof(status)); @@ -1738,62 +1739,40 @@ qemuMigrationUpdateJobStatus(virQEMUDriverPtr driver, qemuDomainObjExitMonitor(driver, vm); - priv->job.status = status; - - if (ret < 0 || virTimeMillisNow(&priv->job.info.timeElapsed) < 0) + if (ret < 0 || + qemuDomainJobInfoUpdateTime(priv->job.current) < 0) return -1; - priv->job.info.timeElapsed -= priv->job.start; - ret = -1; - switch (priv->job.status.status) { - case QEMU_MONITOR_MIGRATION_STATUS_INACTIVE: - priv->job.info.type = VIR_DOMAIN_JOB_NONE; - virReportError(VIR_ERR_OPERATION_FAILED, - _("%s: %s"), job, _("is not active")); - break; - + jobInfo = priv->job.current; + switch (status.status) { + case QEMU_MONITOR_MIGRATION_STATUS_COMPLETED: + jobInfo->type = VIR_DOMAIN_JOB_COMPLETED; + /* fall through */ case QEMU_MONITOR_MIGRATION_STATUS_SETUP: - ret = 0; - break; - case QEMU_MONITOR_MIGRATION_STATUS_ACTIVE: - priv->job.info.fileTotal = priv->job.status.disk_total; - priv->job.info.fileRemaining = priv->job.status.disk_remaining; - priv->job.info.fileProcessed = priv->job.status.disk_transferred; - - priv->job.info.memTotal = priv->job.status.ram_total; - priv->job.info.memRemaining = priv->job.status.ram_remaining; - priv->job.info.memProcessed = priv->job.status.ram_transferred; - - priv->job.info.dataTotal = - priv->job.status.ram_total + priv->job.status.disk_total; - priv->job.info.dataRemaining = - priv->job.status.ram_remaining + priv->job.status.disk_remaining; - priv->job.info.dataProcessed = - priv->job.status.ram_transferred + - priv->job.status.disk_transferred; - ret = 0; break; - case QEMU_MONITOR_MIGRATION_STATUS_COMPLETED: - priv->job.info.type = VIR_DOMAIN_JOB_COMPLETED; - ret = 0; + case QEMU_MONITOR_MIGRATION_STATUS_INACTIVE: + jobInfo->type = VIR_DOMAIN_JOB_NONE; + virReportError(VIR_ERR_OPERATION_FAILED, + _("%s: %s"), job, _("is not active")); break; case QEMU_MONITOR_MIGRATION_STATUS_ERROR: - priv->job.info.type = VIR_DOMAIN_JOB_FAILED; + jobInfo->type = VIR_DOMAIN_JOB_FAILED; virReportError(VIR_ERR_OPERATION_FAILED, _("%s: %s"), job, _("unexpectedly failed")); break; case QEMU_MONITOR_MIGRATION_STATUS_CANCELLED: - priv->job.info.type = VIR_DOMAIN_JOB_CANCELLED; + jobInfo->type = VIR_DOMAIN_JOB_CANCELLED; virReportError(VIR_ERR_OPERATION_ABORTED, _("%s: %s"), job, _("canceled by client")); break; } + jobInfo->status = status; return ret; } @@ -1803,11 +1782,14 @@ qemuMigrationUpdateJobStatus(virQEMUDriverPtr driver, * QEMU reports failed migration. */ static int -qemuMigrationWaitForCompletion(virQEMUDriverPtr driver, virDomainObjPtr vm, +qemuMigrationWaitForCompletion(virQEMUDriverPtr driver, + virDomainObjPtr vm, qemuDomainAsyncJob asyncJob, - virConnectPtr dconn, bool abort_on_error) + virConnectPtr dconn, + bool abort_on_error) { qemuDomainObjPrivatePtr priv = vm->privateData; + qemuDomainJobInfoPtr jobInfo = priv->job.current; const char *job; int pauseReason; @@ -1825,9 +1807,9 @@ qemuMigrationWaitForCompletion(virQEMUDriverPtr driver, virDomainObjPtr vm, job = _("job"); } - priv->job.info.type = VIR_DOMAIN_JOB_UNBOUNDED; + jobInfo->type = VIR_DOMAIN_JOB_UNBOUNDED; - while (priv->job.info.type == VIR_DOMAIN_JOB_UNBOUNDED) { + while (jobInfo->type == VIR_DOMAIN_JOB_UNBOUNDED) { /* Poll every 50ms for progress & to allow cancellation */ struct timespec ts = { .tv_sec = 0, .tv_nsec = 50 * 1000 * 1000ull }; @@ -1856,13 +1838,13 @@ qemuMigrationWaitForCompletion(virQEMUDriverPtr driver, virDomainObjPtr vm, virObjectLock(vm); } - if (priv->job.info.type == VIR_DOMAIN_JOB_COMPLETED) { + if (jobInfo->type == VIR_DOMAIN_JOB_COMPLETED) { return 0; - } else if (priv->job.info.type == VIR_DOMAIN_JOB_UNBOUNDED) { + } else if (jobInfo->type == VIR_DOMAIN_JOB_UNBOUNDED) { /* The migration was aborted by us rather than QEMU itself so let's * update the job type and notify the caller to send migrate_cancel. */ - priv->job.info.type = VIR_DOMAIN_JOB_FAILED; + jobInfo->type = VIR_DOMAIN_JOB_FAILED; return -2; } else { return -1; @@ -4912,7 +4894,7 @@ qemuMigrationJobStart(virQEMUDriverPtr driver, JOB_MASK(QEMU_JOB_MIGRATION_OP)); } - priv->job.info.type = VIR_DOMAIN_JOB_UNBOUNDED; + priv->job.current->type = VIR_DOMAIN_JOB_UNBOUNDED; return 0; } -- 2.1.0

On 09/01/2014 11:05 AM, Jiri Denemark wrote:
Job statistics data were tracked in several structures and variables. Let's make a new qemuDomainJobInfo structure which can be used as a single source of statistics data as a preparation for storing data about completed a job.
Signed-off-by: Jiri Denemark <jdenemar@redhat.com> --- src/qemu/qemu_domain.c | 157 ++++++++++++++++++++++++++++++++++++++++++++-- src/qemu/qemu_domain.h | 24 ++++++- src/qemu/qemu_driver.c | 119 ++++------------------------------- src/qemu/qemu_migration.c | 72 ++++++++------------- 4 files changed, 213 insertions(+), 159 deletions(-)
Curious why the decision to use "Ptr" instead of inline struct for current & completed? When I saw an alloc in the middle of a structure and then a few free's along the way I began to wonder and attempt to research the various paths to make sure all accesses knew/checked that the impending deref would be OK - I marked those I found... I think one or two may need double check since I assume you know the overall code paths better. I see there's a path from qemuProcessReconnect() - that's the libvirtd restart path right? So when the VIR_ALLOC() is done for current/completed - who's address space is that? Wouldn't that be address space from the "old" libvirtd? Maybe I'm overthinking it on a Friday :-)
diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c index e9506e0..2c709e9 100644 --- a/src/qemu/qemu_domain.c +++ b/src/qemu/qemu_domain.c @@ -163,11 +163,9 @@ qemuDomainObjResetAsyncJob(qemuDomainObjPrivatePtr priv) job->asyncOwner = 0; job->phase = 0; job->mask = DEFAULT_JOB_MASK; - job->start = 0; job->dump_memory_only = false; job->asyncAbort = false; - memset(&job->status, 0, sizeof(job->status)); - memset(&job->info, 0, sizeof(job->info)); + VIR_FREE(job->current);
This was one of those free paths where there were multiple callers and I wasn't 100% other accesses needed to be checked w/r/t to this occurring before an unchecked access in some other path...
}
void @@ -200,6 +198,7 @@ qemuDomainObjTransferJob(virDomainObjPtr obj) static void qemuDomainObjFreeJob(qemuDomainObjPrivatePtr priv) { + VIR_FREE(priv->job.current); virCondDestroy(&priv->job.cond); virCondDestroy(&priv->job.asyncCond); } @@ -211,6 +210,150 @@ qemuDomainTrackJob(qemuDomainJob job) }
+int +qemuDomainJobInfoUpdateTime(qemuDomainJobInfoPtr jobInfo) +{ + unsigned long long now; + + if (!jobInfo->started) + return 0;
any chance jobInfo == NULL? I think the only path where the caller doesn't check for current (or completed) is qemuMigrationUpdateJobStatus
+ + if (virTimeMillisNow(&now) < 0) + return -1; + + jobInfo->timeElapsed = now - jobInfo->started; + return 0; +} + +int +qemuDomainJobInfoToInfo(qemuDomainJobInfoPtr jobInfo, + virDomainJobInfoPtr info) +{
Called with job.current checked
+ info->timeElapsed = jobInfo->timeElapsed; + info->timeRemaining = jobInfo->timeRemaining; + + info->memTotal = jobInfo->status.ram_total; + info->memRemaining = jobInfo->status.ram_remaining; + info->memProcessed = jobInfo->status.ram_transferred; + + info->fileTotal = jobInfo->status.disk_total; + info->fileRemaining = jobInfo->status.disk_remaining; + info->fileProcessed = jobInfo->status.disk_transferred; + + info->dataTotal = info->memTotal + info->fileTotal; + info->dataRemaining = info->memRemaining + info->fileRemaining; + info->dataProcessed = info->memProcessed + info->fileProcessed; + + return 0; +} + +int +qemuDomainJobInfoToParams(qemuDomainJobInfoPtr jobInfo, + int *type, + virTypedParameterPtr *params, + int *nparams) +{
Called with job.current (and .completed) checked...
+ qemuMonitorMigrationStatus *status = &jobInfo->status; + virTypedParameterPtr par = NULL; + int maxpar = 0; + int npar = 0; + + if (virTypedParamsAddULLong(&par, &npar, &maxpar, + VIR_DOMAIN_JOB_TIME_ELAPSED, + jobInfo->timeElapsed) < 0) + goto error; + + if (jobInfo->type == VIR_DOMAIN_JOB_BOUNDED && + virTypedParamsAddULLong(&par, &npar, &maxpar, + VIR_DOMAIN_JOB_TIME_REMAINING, + jobInfo->timeRemaining) < 0) + goto error; + + if (status->downtime_set && + virTypedParamsAddULLong(&par, &npar, &maxpar, + VIR_DOMAIN_JOB_DOWNTIME, + status->downtime) < 0) + goto error; + + if (virTypedParamsAddULLong(&par, &npar, &maxpar, + VIR_DOMAIN_JOB_DATA_TOTAL, + status->ram_total + + status->disk_total) < 0 || + virTypedParamsAddULLong(&par, &npar, &maxpar, + VIR_DOMAIN_JOB_DATA_PROCESSED, + status->ram_transferred + + status->disk_transferred) < 0 || + virTypedParamsAddULLong(&par, &npar, &maxpar, + VIR_DOMAIN_JOB_DATA_REMAINING, + status->ram_remaining + + status->disk_remaining) < 0) + goto error; + + if (virTypedParamsAddULLong(&par, &npar, &maxpar, + VIR_DOMAIN_JOB_MEMORY_TOTAL, + status->ram_total) < 0 || + virTypedParamsAddULLong(&par, &npar, &maxpar, + VIR_DOMAIN_JOB_MEMORY_PROCESSED, + status->ram_transferred) < 0 || + virTypedParamsAddULLong(&par, &npar, &maxpar, + VIR_DOMAIN_JOB_MEMORY_REMAINING, + status->ram_remaining) < 0) + goto error; + + if (status->ram_duplicate_set) { + if (virTypedParamsAddULLong(&par, &npar, &maxpar, + VIR_DOMAIN_JOB_MEMORY_CONSTANT, + status->ram_duplicate) < 0 || + virTypedParamsAddULLong(&par, &npar, &maxpar, + VIR_DOMAIN_JOB_MEMORY_NORMAL, + status->ram_normal) < 0 || + virTypedParamsAddULLong(&par, &npar, &maxpar, + VIR_DOMAIN_JOB_MEMORY_NORMAL_BYTES, + status->ram_normal_bytes) < 0)
I have a note in patch 2 about ram_normal and ram_normal_bytes vis-a-vis qemu_monitor_json.c not checking status on their fetch and my latest coverity run complaining about it.
+ goto error; + } + + if (virTypedParamsAddULLong(&par, &npar, &maxpar, + VIR_DOMAIN_JOB_DISK_TOTAL, + status->disk_total) < 0 || + virTypedParamsAddULLong(&par, &npar, &maxpar, + VIR_DOMAIN_JOB_DISK_PROCESSED, + status->disk_transferred) < 0 || + virTypedParamsAddULLong(&par, &npar, &maxpar, + VIR_DOMAIN_JOB_DISK_REMAINING, + status->disk_remaining) < 0) + goto error; + + if (status->xbzrle_set) { + if (virTypedParamsAddULLong(&par, &npar, &maxpar, + VIR_DOMAIN_JOB_COMPRESSION_CACHE, + status->xbzrle_cache_size) < 0 || + virTypedParamsAddULLong(&par, &npar, &maxpar, + VIR_DOMAIN_JOB_COMPRESSION_BYTES, + status->xbzrle_bytes) < 0 || + virTypedParamsAddULLong(&par, &npar, &maxpar, + VIR_DOMAIN_JOB_COMPRESSION_PAGES, + status->xbzrle_pages) < 0 || + virTypedParamsAddULLong(&par, &npar, &maxpar, + VIR_DOMAIN_JOB_COMPRESSION_CACHE_MISSES, + status->xbzrle_cache_miss) < 0 || + virTypedParamsAddULLong(&par, &npar, &maxpar, + VIR_DOMAIN_JOB_COMPRESSION_OVERFLOW, + status->xbzrle_overflow) < 0) + goto error; + } + + *type = jobInfo->type; + *params = par; + *nparams = npar; + return 0; + + error: + virTypedParamsFree(par, npar); + return -1; +} + + static void * qemuDomainObjPrivateAlloc(void) { @@ -1071,7 +1214,7 @@ qemuDomainObjBeginJobInternal(virQEMUDriverPtr driver, unsigned long long then; bool nested = job == QEMU_JOB_ASYNC_NESTED; virQEMUDriverConfigPtr cfg = virQEMUDriverGetConfig(driver); - int ret; + int ret = -1;
VIR_DEBUG("Starting %s: %s (async=%s vm=%p name=%s)", job == QEMU_JOB_ASYNC ? "async job" : "job",
Just a note after this there's a priv->jobs_queued++; which can be decremented at cleanup; however, if virTimeMillisNow() fails, then there's a return -1 *and* a virObjectUnref(obj) before the virObjectRef(obj)...
@@ -1127,9 +1270,11 @@ qemuDomainObjBeginJobInternal(virQEMUDriverPtr driver, qemuDomainAsyncJobTypeToString(asyncJob), obj, obj->def->name); qemuDomainObjResetAsyncJob(priv); + if (VIR_ALLOC(priv->job.current) < 0) + goto cleanup;
If this fails - what are the chances some other path will check/access current without checking for it being NULL? Perhaps low and there's going to be other problems anyway in a low memory situation...
priv->job.asyncJob = asyncJob; priv->job.asyncOwner = virThreadSelfID(); - priv->job.start = now; + priv->job.current->started = now; }
if (qemuDomainTrackJob(job)) @@ -1163,6 +1308,8 @@ qemuDomainObjBeginJobInternal(virQEMUDriverPtr driver, virReportSystemError(errno, "%s", _("cannot acquire job mutex")); } + + cleanup: priv->jobs_queued--; virObjectUnref(obj); virObjectUnref(cfg); diff --git a/src/qemu/qemu_domain.h b/src/qemu/qemu_domain.h index 8736889..119590e 100644 --- a/src/qemu/qemu_domain.h +++ b/src/qemu/qemu_domain.h @@ -100,6 +100,18 @@ typedef enum { } qemuDomainAsyncJob; VIR_ENUM_DECL(qemuDomainAsyncJob)
+typedef struct _qemuDomainJobInfo qemuDomainJobInfo; +typedef qemuDomainJobInfo *qemuDomainJobInfoPtr; +struct _qemuDomainJobInfo { + virDomainJobType type; + unsigned long long started; /* When the async job started */ + /* Computed values */ + unsigned long long timeElapsed; + unsigned long long timeRemaining; + /* Raw values from QEMU */ + qemuMonitorMigrationStatus status; +}; + struct qemuDomainJobObj { virCond cond; /* Use to coordinate jobs */ qemuDomainJob active; /* Currently running job */ @@ -110,10 +122,8 @@ struct qemuDomainJobObj { unsigned long long asyncOwner; /* Thread which set current async job */ int phase; /* Job phase (mainly for migrations) */ unsigned long long mask; /* Jobs allowed during async job */ - unsigned long long start; /* When the async job started */ bool dump_memory_only; /* use dump-guest-memory to do dump */ - qemuMonitorMigrationStatus status; /* Raw async job progress data */ - virDomainJobInfo info; /* Processed async job progress data */ + qemuDomainJobInfoPtr current; /* async job progress data */ bool asyncAbort; /* abort of async job requested */ };
@@ -378,4 +388,12 @@ bool qemuDomainDefCheckABIStability(virQEMUDriverPtr driver, bool qemuDomainAgentAvailable(qemuDomainObjPrivatePtr priv, bool reportError);
+int qemuDomainJobInfoUpdateTime(qemuDomainJobInfoPtr jobInfo); +int qemuDomainJobInfoToInfo(qemuDomainJobInfoPtr jobInfo, + virDomainJobInfoPtr info); +int qemuDomainJobInfoToParams(qemuDomainJobInfoPtr jobInfo, + int *type, + virTypedParameterPtr *params, + int *nparams); +
Should any of these new defs need to have/use the ATTRIBUTE_NONNULL() for params?
#endif /* __QEMU_DOMAIN_H__ */ diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c index 239a300..265315d 100644 --- a/src/qemu/qemu_driver.c +++ b/src/qemu/qemu_driver.c @@ -2655,15 +2655,13 @@ qemuDomainGetControlInfo(virDomainPtr dom, if (priv->monError) { info->state = VIR_DOMAIN_CONTROL_ERROR; } else if (priv->job.active) { - if (!priv->monStart) { + if (virTimeMillisNow(&info->stateTime) < 0) + goto cleanup; + if (priv->job.current) { info->state = VIR_DOMAIN_CONTROL_JOB; - if (virTimeMillisNow(&info->stateTime) < 0) - goto cleanup; - info->stateTime -= priv->job.start; + info->stateTime -= priv->job.current->started; } else { info->state = VIR_DOMAIN_CONTROL_OCCUPIED; - if (virTimeMillisNow(&info->stateTime) < 0) - goto cleanup; info->stateTime -= priv->monStart; } } else { @@ -3110,8 +3108,7 @@ qemuDomainSaveInternal(virQEMUDriverPtr driver, virDomainPtr dom, goto endjob; }
- memset(&priv->job.info, 0, sizeof(priv->job.info)); - priv->job.info.type = VIR_DOMAIN_JOB_UNBOUNDED; + priv->job.current->type = VIR_DOMAIN_JOB_UNBOUNDED;
Assumes current-> exists, but occurs after BeginAsyncJob() is successful, so it seems safe.
/* Pause */ if (virDomainObjGetState(vm, NULL) == VIR_DOMAIN_RUNNING) { @@ -3460,6 +3457,7 @@ static int qemuDumpToFd(virQEMUDriverPtr driver, virDomainObjPtr vm, fd) < 0) return -1;
+ VIR_FREE(priv->job.current); priv->job.dump_memory_only = true;
if (qemuDomainObjEnterMonitorAsync(driver, vm, asyncJob) < 0) @@ -11616,17 +11614,15 @@ static int qemuDomainGetJobInfo(virDomainPtr dom, goto cleanup;
if (virDomainObjIsActive(vm)) { - if (priv->job.asyncJob && !priv->job.dump_memory_only) { - memcpy(info, &priv->job.info, sizeof(*info)); - + if (priv->job.current) {
Here there is a check for job.current before usage
/* Refresh elapsed time again just to ensure it * is fully updated. This is primarily for benefit * of incoming migration which we don't currently * monitor actively in the background thread */ - if (virTimeMillisNow(&info->timeElapsed) < 0) + if (qemuDomainJobInfoUpdateTime(priv->job.current) < 0 || + qemuDomainJobInfoToInfo(priv->job.current, info) < 0) goto cleanup; - info->timeElapsed -= priv->job.start; } else { memset(info, 0, sizeof(*info)); info->type = VIR_DOMAIN_JOB_NONE; @@ -11655,9 +11651,6 @@ qemuDomainGetJobStats(virDomainPtr dom, { virDomainObjPtr vm; qemuDomainObjPrivatePtr priv; - virTypedParameterPtr par = NULL; - int maxpar = 0; - int npar = 0; int ret = -1;
virCheckFlags(0, -1); @@ -11676,7 +11669,7 @@ qemuDomainGetJobStats(virDomainPtr dom, goto cleanup; }
- if (!priv->job.asyncJob || priv->job.dump_memory_only) { + if (!priv->job.current) { *type = VIR_DOMAIN_JOB_NONE; *params = NULL; *nparams = 0;
Here there is a check for job.current before usage below
@@ -11689,102 +11682,16 @@ qemuDomainGetJobStats(virDomainPtr dom, * of incoming migration which we don't currently * monitor actively in the background thread */ - if (virTimeMillisNow(&priv->job.info.timeElapsed) < 0) + if (qemuDomainJobInfoUpdateTime(priv->job.current) < 0 || + qemuDomainJobInfoToParams(priv->job.current, + type, params, nparams) < 0) goto cleanup; - priv->job.info.timeElapsed -= priv->job.start;
- if (virTypedParamsAddULLong(&par, &npar, &maxpar, - VIR_DOMAIN_JOB_TIME_ELAPSED, - priv->job.info.timeElapsed) < 0) - goto cleanup; - - if (priv->job.info.type == VIR_DOMAIN_JOB_BOUNDED && - virTypedParamsAddULLong(&par, &npar, &maxpar, - VIR_DOMAIN_JOB_TIME_REMAINING, - priv->job.info.timeRemaining) < 0) - goto cleanup; - - if (priv->job.status.downtime_set && - virTypedParamsAddULLong(&par, &npar, &maxpar, - VIR_DOMAIN_JOB_DOWNTIME, - priv->job.status.downtime) < 0) - goto cleanup; - - if (virTypedParamsAddULLong(&par, &npar, &maxpar, - VIR_DOMAIN_JOB_DATA_TOTAL, - priv->job.info.dataTotal) < 0 || - virTypedParamsAddULLong(&par, &npar, &maxpar, - VIR_DOMAIN_JOB_DATA_PROCESSED, - priv->job.info.dataProcessed) < 0 || - virTypedParamsAddULLong(&par, &npar, &maxpar, - VIR_DOMAIN_JOB_DATA_REMAINING, - priv->job.info.dataRemaining) < 0) - goto cleanup; - - if (virTypedParamsAddULLong(&par, &npar, &maxpar, - VIR_DOMAIN_JOB_MEMORY_TOTAL, - priv->job.info.memTotal) < 0 || - virTypedParamsAddULLong(&par, &npar, &maxpar, - VIR_DOMAIN_JOB_MEMORY_PROCESSED, - priv->job.info.memProcessed) < 0 || - virTypedParamsAddULLong(&par, &npar, &maxpar, - VIR_DOMAIN_JOB_MEMORY_REMAINING, - priv->job.info.memRemaining) < 0) - goto cleanup; - - if (priv->job.status.ram_duplicate_set) { - if (virTypedParamsAddULLong(&par, &npar, &maxpar, - VIR_DOMAIN_JOB_MEMORY_CONSTANT, - priv->job.status.ram_duplicate) < 0 || - virTypedParamsAddULLong(&par, &npar, &maxpar, - VIR_DOMAIN_JOB_MEMORY_NORMAL, - priv->job.status.ram_normal) < 0 || - virTypedParamsAddULLong(&par, &npar, &maxpar, - VIR_DOMAIN_JOB_MEMORY_NORMAL_BYTES, - priv->job.status.ram_normal_bytes) < 0) - goto cleanup; - } - - if (virTypedParamsAddULLong(&par, &npar, &maxpar, - VIR_DOMAIN_JOB_DISK_TOTAL, - priv->job.info.fileTotal) < 0 || - virTypedParamsAddULLong(&par, &npar, &maxpar, - VIR_DOMAIN_JOB_DISK_PROCESSED, - priv->job.info.fileProcessed) < 0 || - virTypedParamsAddULLong(&par, &npar, &maxpar, - VIR_DOMAIN_JOB_DISK_REMAINING, - priv->job.info.fileRemaining) < 0) - goto cleanup; - - if (priv->job.status.xbzrle_set) { - if (virTypedParamsAddULLong(&par, &npar, &maxpar, - VIR_DOMAIN_JOB_COMPRESSION_CACHE, - priv->job.status.xbzrle_cache_size) < 0 || - virTypedParamsAddULLong(&par, &npar, &maxpar, - VIR_DOMAIN_JOB_COMPRESSION_BYTES, - priv->job.status.xbzrle_bytes) < 0 || - virTypedParamsAddULLong(&par, &npar, &maxpar, - VIR_DOMAIN_JOB_COMPRESSION_PAGES, - priv->job.status.xbzrle_pages) < 0 || - virTypedParamsAddULLong(&par, &npar, &maxpar, - VIR_DOMAIN_JOB_COMPRESSION_CACHE_MISSES, - priv->job.status.xbzrle_cache_miss) < 0 || - virTypedParamsAddULLong(&par, &npar, &maxpar, - VIR_DOMAIN_JOB_COMPRESSION_OVERFLOW, - priv->job.status.xbzrle_overflow) < 0) - goto cleanup; - } - - *type = priv->job.info.type; - *params = par; - *nparams = npar; ret = 0;
cleanup: if (vm) virObjectUnlock(vm); - if (ret < 0) - virTypedParamsFree(par, npar); return ret; }
diff --git a/src/qemu/qemu_migration.c b/src/qemu/qemu_migration.c index 9cfb77e..64adab4 100644 --- a/src/qemu/qemu_migration.c +++ b/src/qemu/qemu_migration.c @@ -1723,8 +1723,9 @@ qemuMigrationUpdateJobStatus(virQEMUDriverPtr driver, qemuDomainAsyncJob asyncJob) { qemuDomainObjPrivatePtr priv = vm->privateData; - int ret; qemuMonitorMigrationStatus status; + qemuDomainJobInfoPtr jobInfo; + int ret;
memset(&status, 0, sizeof(status));
@@ -1738,62 +1739,40 @@ qemuMigrationUpdateJobStatus(virQEMUDriverPtr driver,
qemuDomainObjExitMonitor(driver, vm);
- priv->job.status = status; - - if (ret < 0 || virTimeMillisNow(&priv->job.info.timeElapsed) < 0) + if (ret < 0 || + qemuDomainJobInfoUpdateTime(priv->job.current) < 0)
Here (and below) it's assumed job.current is valid.
return -1;
- priv->job.info.timeElapsed -= priv->job.start; - ret = -1; - switch (priv->job.status.status) { - case QEMU_MONITOR_MIGRATION_STATUS_INACTIVE: - priv->job.info.type = VIR_DOMAIN_JOB_NONE; - virReportError(VIR_ERR_OPERATION_FAILED, - _("%s: %s"), job, _("is not active")); - break; - + jobInfo = priv->job.current; + switch (status.status) { + case QEMU_MONITOR_MIGRATION_STATUS_COMPLETED: + jobInfo->type = VIR_DOMAIN_JOB_COMPLETED; + /* fall through */ case QEMU_MONITOR_MIGRATION_STATUS_SETUP: - ret = 0; - break; - case QEMU_MONITOR_MIGRATION_STATUS_ACTIVE: - priv->job.info.fileTotal = priv->job.status.disk_total; - priv->job.info.fileRemaining = priv->job.status.disk_remaining; - priv->job.info.fileProcessed = priv->job.status.disk_transferred; - - priv->job.info.memTotal = priv->job.status.ram_total; - priv->job.info.memRemaining = priv->job.status.ram_remaining; - priv->job.info.memProcessed = priv->job.status.ram_transferred; - - priv->job.info.dataTotal = - priv->job.status.ram_total + priv->job.status.disk_total; - priv->job.info.dataRemaining = - priv->job.status.ram_remaining + priv->job.status.disk_remaining; - priv->job.info.dataProcessed = - priv->job.status.ram_transferred + - priv->job.status.disk_transferred; - ret = 0; break;
- case QEMU_MONITOR_MIGRATION_STATUS_COMPLETED: - priv->job.info.type = VIR_DOMAIN_JOB_COMPLETED; - ret = 0; + case QEMU_MONITOR_MIGRATION_STATUS_INACTIVE: + jobInfo->type = VIR_DOMAIN_JOB_NONE; + virReportError(VIR_ERR_OPERATION_FAILED, + _("%s: %s"), job, _("is not active")); break;
case QEMU_MONITOR_MIGRATION_STATUS_ERROR: - priv->job.info.type = VIR_DOMAIN_JOB_FAILED; + jobInfo->type = VIR_DOMAIN_JOB_FAILED; virReportError(VIR_ERR_OPERATION_FAILED, _("%s: %s"), job, _("unexpectedly failed")); break;
case QEMU_MONITOR_MIGRATION_STATUS_CANCELLED: - priv->job.info.type = VIR_DOMAIN_JOB_CANCELLED; + jobInfo->type = VIR_DOMAIN_JOB_CANCELLED; virReportError(VIR_ERR_OPERATION_ABORTED, _("%s: %s"), job, _("canceled by client")); break; } + jobInfo->status = status;
return ret; } @@ -1803,11 +1782,14 @@ qemuMigrationUpdateJobStatus(virQEMUDriverPtr driver, * QEMU reports failed migration. */ static int -qemuMigrationWaitForCompletion(virQEMUDriverPtr driver, virDomainObjPtr vm, +qemuMigrationWaitForCompletion(virQEMUDriverPtr driver, + virDomainObjPtr vm, qemuDomainAsyncJob asyncJob, - virConnectPtr dconn, bool abort_on_error) + virConnectPtr dconn, + bool abort_on_error) { qemuDomainObjPrivatePtr priv = vm->privateData; + qemuDomainJobInfoPtr jobInfo = priv->job.current;
Hopefully nothing has cleared or could clear this before this point...
const char *job; int pauseReason;
@@ -1825,9 +1807,9 @@ qemuMigrationWaitForCompletion(virQEMUDriverPtr driver, virDomainObjPtr vm, job = _("job"); }
- priv->job.info.type = VIR_DOMAIN_JOB_UNBOUNDED; + jobInfo->type = VIR_DOMAIN_JOB_UNBOUNDED;
- while (priv->job.info.type == VIR_DOMAIN_JOB_UNBOUNDED) { + while (jobInfo->type == VIR_DOMAIN_JOB_UNBOUNDED) { /* Poll every 50ms for progress & to allow cancellation */ struct timespec ts = { .tv_sec = 0, .tv_nsec = 50 * 1000 * 1000ull };
@@ -1856,13 +1838,13 @@ qemuMigrationWaitForCompletion(virQEMUDriverPtr driver, virDomainObjPtr vm, virObjectLock(vm); }
- if (priv->job.info.type == VIR_DOMAIN_JOB_COMPLETED) { + if (jobInfo->type == VIR_DOMAIN_JOB_COMPLETED) { return 0; - } else if (priv->job.info.type == VIR_DOMAIN_JOB_UNBOUNDED) { + } else if (jobInfo->type == VIR_DOMAIN_JOB_UNBOUNDED) { /* The migration was aborted by us rather than QEMU itself so let's * update the job type and notify the caller to send migrate_cancel. */ - priv->job.info.type = VIR_DOMAIN_JOB_FAILED; + jobInfo->type = VIR_DOMAIN_JOB_FAILED; return -2; } else { return -1; @@ -4912,7 +4894,7 @@ qemuMigrationJobStart(virQEMUDriverPtr driver, JOB_MASK(QEMU_JOB_MIGRATION_OP)); }
- priv->job.info.type = VIR_DOMAIN_JOB_UNBOUNDED; + priv->job.current->type = VIR_DOMAIN_JOB_UNBOUNDED;
This assumption of current-> should be OK since a failure from qemuDomainObjBeginAsyncJob() wouldn't go through here.
return 0; }
In general - I'm AOK with the code - my main concern has to do with alloc'd memory... John

On Fri, Sep 05, 2014 at 14:41:21 -0400, John Ferlan wrote:
On 09/01/2014 11:05 AM, Jiri Denemark wrote:
Job statistics data were tracked in several structures and variables. Let's make a new qemuDomainJobInfo structure which can be used as a single source of statistics data as a preparation for storing data about completed a job.
Signed-off-by: Jiri Denemark <jdenemar@redhat.com> --- src/qemu/qemu_domain.c | 157 ++++++++++++++++++++++++++++++++++++++++++++-- src/qemu/qemu_domain.h | 24 ++++++- src/qemu/qemu_driver.c | 119 ++++------------------------------- src/qemu/qemu_migration.c | 72 ++++++++------------- 4 files changed, 213 insertions(+), 159 deletions(-)
Curious why the decision to use "Ptr" instead of inline struct for current & completed?
It's just easier to check whether completed is filled in or not and current has the same type for consistency.
When I saw an alloc in the middle of a structure and then a few free's along the way I began to wonder and attempt to research the various paths to make sure all accesses knew/checked that the impending deref would be OK - I marked those I found... I think one or two may need double check since I assume you know the overall code paths better.
Basically, current is allocated iff there is an async job running so any code that knows an async job exists may access current without having to worry about it too much.
I see there's a path from qemuProcessReconnect() - that's the libvirtd restart path right? So when the VIR_ALLOC() is done for current/completed - who's address space is that? Wouldn't that be address space from the "old" libvirtd?
I'm not quite sure what you are talking about here :-) However, qemuProcessReconnect calls qemuProcessRecoverJob and that one does not care about any statistics.
diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c index e9506e0..2c709e9 100644 --- a/src/qemu/qemu_domain.c +++ b/src/qemu/qemu_domain.c @@ -163,11 +163,9 @@ qemuDomainObjResetAsyncJob(qemuDomainObjPrivatePtr priv) job->asyncOwner = 0; job->phase = 0; job->mask = DEFAULT_JOB_MASK; - job->start = 0; job->dump_memory_only = false; job->asyncAbort = false; - memset(&job->status, 0, sizeof(job->status)); - memset(&job->info, 0, sizeof(job->info)); + VIR_FREE(job->current);
This was one of those free paths where there were multiple callers and I wasn't 100% other accesses needed to be checked w/r/t to this occurring before an unchecked access in some other path...
Once an async job goes away so does current info about it.
}
void ... @@ -211,6 +210,150 @@ qemuDomainTrackJob(qemuDomainJob job) }
+int +qemuDomainJobInfoUpdateTime(qemuDomainJobInfoPtr jobInfo) +{ + unsigned long long now; + + if (!jobInfo->started) + return 0;
any chance jobInfo == NULL? I think the only path where the caller doesn't check for current (or completed) is qemuMigrationUpdateJobStatus
Shouldn't be. And this is definitely a good candidate for ATTRIBUTE_NONNULL. qemuMigrationUpdateJobStatus is always called within an async job so current is guaranteed to be non-NULL. ...
+ if (status->ram_duplicate_set) { + if (virTypedParamsAddULLong(&par, &npar, &maxpar, + VIR_DOMAIN_JOB_MEMORY_CONSTANT, + status->ram_duplicate) < 0 || + virTypedParamsAddULLong(&par, &npar, &maxpar, + VIR_DOMAIN_JOB_MEMORY_NORMAL, + status->ram_normal) < 0 || + virTypedParamsAddULLong(&par, &npar, &maxpar, + VIR_DOMAIN_JOB_MEMORY_NORMAL_BYTES, + status->ram_normal_bytes) < 0)
I have a note in patch 2 about ram_normal and ram_normal_bytes vis-a-vis qemu_monitor_json.c not checking status on their fetch and my latest coverity run complaining about it.
Not sure what you're talking about but I'll in patch 2 :-) ...
@@ -1071,7 +1214,7 @@ qemuDomainObjBeginJobInternal(virQEMUDriverPtr driver, unsigned long long then; bool nested = job == QEMU_JOB_ASYNC_NESTED; virQEMUDriverConfigPtr cfg = virQEMUDriverGetConfig(driver); - int ret; + int ret = -1;
VIR_DEBUG("Starting %s: %s (async=%s vm=%p name=%s)", job == QEMU_JOB_ASYNC ? "async job" : "job",
Just a note after this there's a
priv->jobs_queued++;
which can be decremented at cleanup; however, if virTimeMillisNow() fails, then there's a return -1 *and* a virObjectUnref(obj) before the virObjectRef(obj)...
This is unrelated to this patch, but you are right jobs_queued++ should be done after virTimeMillisNow(). The rest is correct because it's virObjectUnref(cfg) that is called before return -1 (note obj vs. cfg).
@@ -1127,9 +1270,11 @@ qemuDomainObjBeginJobInternal(virQEMUDriverPtr driver, qemuDomainAsyncJobTypeToString(asyncJob), obj, obj->def->name); qemuDomainObjResetAsyncJob(priv); + if (VIR_ALLOC(priv->job.current) < 0) + goto cleanup;
If this fails - what are the chances some other path will check/access current without checking for it being NULL? Perhaps low and there's going to be other problems anyway in a low memory situation...
If this fails then the caller will not get an async job and thus it shouldn't do anything and especially it shouldn't touch job.current.
priv->job.asyncJob = asyncJob; priv->job.asyncOwner = virThreadSelfID(); - priv->job.start = now; + priv->job.current->started = now; }
if (qemuDomainTrackJob(job))
...
diff --git a/src/qemu/qemu_domain.h b/src/qemu/qemu_domain.h index 8736889..119590e 100644 --- a/src/qemu/qemu_domain.h +++ b/src/qemu/qemu_domain.h ... @@ -378,4 +388,12 @@ bool qemuDomainDefCheckABIStability(virQEMUDriverPtr driver, bool qemuDomainAgentAvailable(qemuDomainObjPrivatePtr priv, bool reportError);
+int qemuDomainJobInfoUpdateTime(qemuDomainJobInfoPtr jobInfo); +int qemuDomainJobInfoToInfo(qemuDomainJobInfoPtr jobInfo, + virDomainJobInfoPtr info); +int qemuDomainJobInfoToParams(qemuDomainJobInfoPtr jobInfo, + int *type, + virTypedParameterPtr *params, + int *nparams); +
Should any of these new defs need to have/use the ATTRIBUTE_NONNULL() for params?
Yeah, all of them I guess.
diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c index 239a300..265315d 100644 --- a/src/qemu/qemu_driver.c +++ b/src/qemu/qemu_driver.c ... @@ -3110,8 +3108,7 @@ qemuDomainSaveInternal(virQEMUDriverPtr driver, virDomainPtr dom, goto endjob; }
- memset(&priv->job.info, 0, sizeof(priv->job.info)); - priv->job.info.type = VIR_DOMAIN_JOB_UNBOUNDED; + priv->job.current->type = VIR_DOMAIN_JOB_UNBOUNDED;
Assumes current-> exists, but occurs after BeginAsyncJob() is successful, so it seems safe.
Exactly.
/* Pause */ if (virDomainObjGetState(vm, NULL) == VIR_DOMAIN_RUNNING) {
...
diff --git a/src/qemu/qemu_migration.c b/src/qemu/qemu_migration.c index 9cfb77e..64adab4 100644 --- a/src/qemu/qemu_migration.c +++ b/src/qemu/qemu_migration.c ... @@ -1738,62 +1739,40 @@ qemuMigrationUpdateJobStatus(virQEMUDriverPtr driver,
qemuDomainObjExitMonitor(driver, vm);
- priv->job.status = status; - - if (ret < 0 || virTimeMillisNow(&priv->job.info.timeElapsed) < 0) + if (ret < 0 || + qemuDomainJobInfoUpdateTime(priv->job.current) < 0)
Here (and below) it's assumed job.current is valid.
qemuMigrationUpdateJobStatus is only called with an async job started. ...
@@ -1803,11 +1782,14 @@ qemuMigrationUpdateJobStatus(virQEMUDriverPtr driver, * QEMU reports failed migration. */ static int -qemuMigrationWaitForCompletion(virQEMUDriverPtr driver, virDomainObjPtr vm, +qemuMigrationWaitForCompletion(virQEMUDriverPtr driver, + virDomainObjPtr vm, qemuDomainAsyncJob asyncJob, - virConnectPtr dconn, bool abort_on_error) + virConnectPtr dconn, + bool abort_on_error) { qemuDomainObjPrivatePtr priv = vm->privateData; + qemuDomainJobInfoPtr jobInfo = priv->job.current;
Hopefully nothing has cleared or could clear this before this point...
qemuMigrationWaitForCompletion is also called with async job set. ... Jirka

virDomainGetJobStats gains new VIR_DOMAIN_JOB_STATS_COMPLETED flag that can be used to fetch statistics of a completed job rather than a currently running job. Signed-off-by: Jiri Denemark <jdenemar@redhat.com> --- include/libvirt/libvirt.h.in | 11 +++++++++++ src/libvirt.c | 8 +++++++- src/qemu/qemu_domain.c | 1 + src/qemu/qemu_domain.h | 1 + src/qemu/qemu_driver.c | 25 +++++++++++++++++++------ src/qemu/qemu_migration.c | 6 ++++++ src/qemu/qemu_monitor_json.c | 3 ++- 7 files changed, 47 insertions(+), 8 deletions(-) diff --git a/include/libvirt/libvirt.h.in b/include/libvirt/libvirt.h.in index 9358314..9670e06 100644 --- a/include/libvirt/libvirt.h.in +++ b/include/libvirt/libvirt.h.in @@ -4306,6 +4306,17 @@ struct _virDomainJobInfo { unsigned long long fileRemaining; }; +/** + * virDomainGetJobStatsFlags: + * + * Flags OR'ed together to provide specific behavior when querying domain + * job statistics. + */ +typedef enum { + VIR_DOMAIN_JOB_STATS_COMPLETED = 1 << 0, /* return stats of a recently + * completed job */ +} virDomainGetJobStatsFlags; + int virDomainGetJobInfo(virDomainPtr dom, virDomainJobInfoPtr info); int virDomainGetJobStats(virDomainPtr domain, diff --git a/src/libvirt.c b/src/libvirt.c index 5d8f01c..6fa0a6b 100644 --- a/src/libvirt.c +++ b/src/libvirt.c @@ -17567,7 +17567,7 @@ virDomainGetJobInfo(virDomainPtr domain, virDomainJobInfoPtr info) * @type: where to store the job type (one of virDomainJobType) * @params: where to store job statistics * @nparams: number of items in @params - * @flags: extra flags; not used yet, so callers should always pass 0 + * @flags: bitwise-OR of virDomainGetJobStatsFlags * * Extract information about progress of a background job on a domain. * Will return an error if the domain is not active. The function returns @@ -17577,6 +17577,12 @@ virDomainGetJobInfo(virDomainPtr domain, virDomainJobInfoPtr info) * may receive fields that they do not understand in case they talk to a * newer server. * + * When @flags contains VIR_DOMAIN_JOB_STATS_COMPLETED, the function will + * return statistics about a recently completed job. Specifically, this + * flag may be used to query statistics of a completed incoming migration. + * Statistics of a completed job are automatically destroyed once read or + * when libvirtd is restarted. + * * Returns 0 in case of success and -1 in case of failure. */ int diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c index 2c709e9..18a3761 100644 --- a/src/qemu/qemu_domain.c +++ b/src/qemu/qemu_domain.c @@ -199,6 +199,7 @@ static void qemuDomainObjFreeJob(qemuDomainObjPrivatePtr priv) { VIR_FREE(priv->job.current); + VIR_FREE(priv->job.completed); virCondDestroy(&priv->job.cond); virCondDestroy(&priv->job.asyncCond); } diff --git a/src/qemu/qemu_domain.h b/src/qemu/qemu_domain.h index 119590e..365238b 100644 --- a/src/qemu/qemu_domain.h +++ b/src/qemu/qemu_domain.h @@ -124,6 +124,7 @@ struct qemuDomainJobObj { unsigned long long mask; /* Jobs allowed during async job */ bool dump_memory_only; /* use dump-guest-memory to do dump */ qemuDomainJobInfoPtr current; /* async job progress data */ + qemuDomainJobInfoPtr completed; /* statistics data of a recently completed job */ bool asyncAbort; /* abort of async job requested */ }; diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c index 265315d..cd6932a 100644 --- a/src/qemu/qemu_driver.c +++ b/src/qemu/qemu_driver.c @@ -11651,9 +11651,10 @@ qemuDomainGetJobStats(virDomainPtr dom, { virDomainObjPtr vm; qemuDomainObjPrivatePtr priv; + qemuDomainJobInfoPtr jobInfo; int ret = -1; - virCheckFlags(0, -1); + virCheckFlags(VIR_DOMAIN_JOB_STATS_COMPLETED, -1); if (!(vm = qemuDomObjFromDomain(dom))) goto cleanup; @@ -11663,13 +11664,19 @@ qemuDomainGetJobStats(virDomainPtr dom, if (virDomainGetJobStatsEnsureACL(dom->conn, vm->def) < 0) goto cleanup; - if (!virDomainObjIsActive(vm)) { + if (!(flags & VIR_DOMAIN_JOB_STATS_COMPLETED) && + !virDomainObjIsActive(vm)) { virReportError(VIR_ERR_OPERATION_INVALID, "%s", _("domain is not running")); goto cleanup; } - if (!priv->job.current) { + if (flags & VIR_DOMAIN_JOB_STATS_COMPLETED) + jobInfo = priv->job.completed; + else + jobInfo = priv->job.current; + + if (!jobInfo) { *type = VIR_DOMAIN_JOB_NONE; *params = NULL; *nparams = 0; @@ -11682,11 +11689,17 @@ qemuDomainGetJobStats(virDomainPtr dom, * of incoming migration which we don't currently * monitor actively in the background thread */ - if (qemuDomainJobInfoUpdateTime(priv->job.current) < 0 || - qemuDomainJobInfoToParams(priv->job.current, - type, params, nparams) < 0) + if ((jobInfo->type == VIR_DOMAIN_JOB_BOUNDED || + jobInfo->type == VIR_DOMAIN_JOB_UNBOUNDED) && + qemuDomainJobInfoUpdateTime(jobInfo) < 0) goto cleanup; + if (qemuDomainJobInfoToParams(jobInfo, type, params, nparams) < 0) + goto cleanup; + + if (flags & VIR_DOMAIN_JOB_STATS_COMPLETED) + VIR_FREE(priv->job.completed); + ret = 0; cleanup: diff --git a/src/qemu/qemu_migration.c b/src/qemu/qemu_migration.c index 64adab4..208a21f 100644 --- a/src/qemu/qemu_migration.c +++ b/src/qemu/qemu_migration.c @@ -1839,6 +1839,9 @@ qemuMigrationWaitForCompletion(virQEMUDriverPtr driver, } if (jobInfo->type == VIR_DOMAIN_JOB_COMPLETED) { + VIR_FREE(priv->job.completed); + if (VIR_ALLOC(priv->job.completed) == 0) + *priv->job.completed = *jobInfo; return 0; } else if (jobInfo->type == VIR_DOMAIN_JOB_UNBOUNDED) { /* The migration was aborted by us rather than QEMU itself so let's @@ -3418,6 +3421,9 @@ qemuMigrationRun(virQEMUDriverPtr driver, VIR_FORCE_CLOSE(fd); } + if (priv->job.completed) + qemuDomainJobInfoUpdateTime(priv->job.completed); + cookieFlags |= QEMU_MIGRATION_COOKIE_NETWORK; if (flags & VIR_MIGRATE_PERSIST_DEST) cookieFlags |= QEMU_MIGRATION_COOKIE_PERSISTENT; diff --git a/src/qemu/qemu_monitor_json.c b/src/qemu/qemu_monitor_json.c index 62e7d5d..263fa8b 100644 --- a/src/qemu/qemu_monitor_json.c +++ b/src/qemu/qemu_monitor_json.c @@ -2478,7 +2478,8 @@ qemuMonitorJSONGetMigrationStatusReply(virJSONValuePtr reply, if (rc == 0) status->downtime_set = true; - if (status->status == QEMU_MONITOR_MIGRATION_STATUS_ACTIVE) { + if (status->status == QEMU_MONITOR_MIGRATION_STATUS_ACTIVE || + status->status == QEMU_MONITOR_MIGRATION_STATUS_COMPLETED) { virJSONValuePtr ram = virJSONValueObjectGet(ret, "ram"); if (!ram) { virReportError(VIR_ERR_INTERNAL_ERROR, "%s", -- 2.1.0

On 09/01/2014 11:05 AM, Jiri Denemark wrote:
virDomainGetJobStats gains new VIR_DOMAIN_JOB_STATS_COMPLETED flag that can be used to fetch statistics of a completed job rather than a currently running job.
Signed-off-by: Jiri Denemark <jdenemar@redhat.com> --- include/libvirt/libvirt.h.in | 11 +++++++++++ src/libvirt.c | 8 +++++++- src/qemu/qemu_domain.c | 1 + src/qemu/qemu_domain.h | 1 + src/qemu/qemu_driver.c | 25 +++++++++++++++++++------ src/qemu/qemu_migration.c | 6 ++++++ src/qemu/qemu_monitor_json.c | 3 ++- 7 files changed, 47 insertions(+), 8 deletions(-)
This seems AOK - see my note at the end about qemu_monitor_json.c though.
diff --git a/include/libvirt/libvirt.h.in b/include/libvirt/libvirt.h.in index 9358314..9670e06 100644 --- a/include/libvirt/libvirt.h.in +++ b/include/libvirt/libvirt.h.in @@ -4306,6 +4306,17 @@ struct _virDomainJobInfo { unsigned long long fileRemaining; };
+/** + * virDomainGetJobStatsFlags: + * + * Flags OR'ed together to provide specific behavior when querying domain + * job statistics. + */ +typedef enum { + VIR_DOMAIN_JOB_STATS_COMPLETED = 1 << 0, /* return stats of a recently + * completed job */ +} virDomainGetJobStatsFlags; + int virDomainGetJobInfo(virDomainPtr dom, virDomainJobInfoPtr info); int virDomainGetJobStats(virDomainPtr domain, diff --git a/src/libvirt.c b/src/libvirt.c index 5d8f01c..6fa0a6b 100644 --- a/src/libvirt.c +++ b/src/libvirt.c @@ -17567,7 +17567,7 @@ virDomainGetJobInfo(virDomainPtr domain, virDomainJobInfoPtr info) * @type: where to store the job type (one of virDomainJobType) * @params: where to store job statistics * @nparams: number of items in @params - * @flags: extra flags; not used yet, so callers should always pass 0 + * @flags: bitwise-OR of virDomainGetJobStatsFlags * * Extract information about progress of a background job on a domain. * Will return an error if the domain is not active. The function returns @@ -17577,6 +17577,12 @@ virDomainGetJobInfo(virDomainPtr domain, virDomainJobInfoPtr info) * may receive fields that they do not understand in case they talk to a * newer server. * + * When @flags contains VIR_DOMAIN_JOB_STATS_COMPLETED, the function will + * return statistics about a recently completed job. Specifically, this + * flag may be used to query statistics of a completed incoming migration. + * Statistics of a completed job are automatically destroyed once read or + * when libvirtd is restarted. + *
^^^^^^^^^ Yeah - my question from patch 1 with respect to checking access for job.current especially... The job.completed seems fine.
* Returns 0 in case of success and -1 in case of failure. */ int diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c index 2c709e9..18a3761 100644 --- a/src/qemu/qemu_domain.c +++ b/src/qemu/qemu_domain.c @@ -199,6 +199,7 @@ static void qemuDomainObjFreeJob(qemuDomainObjPrivatePtr priv) { VIR_FREE(priv->job.current); + VIR_FREE(priv->job.completed); virCondDestroy(&priv->job.cond); virCondDestroy(&priv->job.asyncCond); } diff --git a/src/qemu/qemu_domain.h b/src/qemu/qemu_domain.h index 119590e..365238b 100644 --- a/src/qemu/qemu_domain.h +++ b/src/qemu/qemu_domain.h @@ -124,6 +124,7 @@ struct qemuDomainJobObj { unsigned long long mask; /* Jobs allowed during async job */ bool dump_memory_only; /* use dump-guest-memory to do dump */ qemuDomainJobInfoPtr current; /* async job progress data */ + qemuDomainJobInfoPtr completed; /* statistics data of a recently completed job */ bool asyncAbort; /* abort of async job requested */ };
diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c index 265315d..cd6932a 100644 --- a/src/qemu/qemu_driver.c +++ b/src/qemu/qemu_driver.c @@ -11651,9 +11651,10 @@ qemuDomainGetJobStats(virDomainPtr dom, { virDomainObjPtr vm; qemuDomainObjPrivatePtr priv; + qemuDomainJobInfoPtr jobInfo; int ret = -1;
- virCheckFlags(0, -1); + virCheckFlags(VIR_DOMAIN_JOB_STATS_COMPLETED, -1);
if (!(vm = qemuDomObjFromDomain(dom))) goto cleanup; @@ -11663,13 +11664,19 @@ qemuDomainGetJobStats(virDomainPtr dom, if (virDomainGetJobStatsEnsureACL(dom->conn, vm->def) < 0) goto cleanup;
- if (!virDomainObjIsActive(vm)) { + if (!(flags & VIR_DOMAIN_JOB_STATS_COMPLETED) && + !virDomainObjIsActive(vm)) { virReportError(VIR_ERR_OPERATION_INVALID, "%s", _("domain is not running")); goto cleanup; }
- if (!priv->job.current) { + if (flags & VIR_DOMAIN_JOB_STATS_COMPLETED) + jobInfo = priv->job.completed; + else + jobInfo = priv->job.current; + + if (!jobInfo) { *type = VIR_DOMAIN_JOB_NONE; *params = NULL; *nparams = 0;
Here there is a check for job.{completed|current} before access
@@ -11682,11 +11689,17 @@ qemuDomainGetJobStats(virDomainPtr dom, * of incoming migration which we don't currently * monitor actively in the background thread */ - if (qemuDomainJobInfoUpdateTime(priv->job.current) < 0 || - qemuDomainJobInfoToParams(priv->job.current, - type, params, nparams) < 0) + if ((jobInfo->type == VIR_DOMAIN_JOB_BOUNDED || + jobInfo->type == VIR_DOMAIN_JOB_UNBOUNDED) && + qemuDomainJobInfoUpdateTime(jobInfo) < 0) goto cleanup;
+ if (qemuDomainJobInfoToParams(jobInfo, type, params, nparams) < 0) + goto cleanup; + + if (flags & VIR_DOMAIN_JOB_STATS_COMPLETED) + VIR_FREE(priv->job.completed); + ret = 0;
cleanup: diff --git a/src/qemu/qemu_migration.c b/src/qemu/qemu_migration.c index 64adab4..208a21f 100644 --- a/src/qemu/qemu_migration.c +++ b/src/qemu/qemu_migration.c @@ -1839,6 +1839,9 @@ qemuMigrationWaitForCompletion(virQEMUDriverPtr driver, }
if (jobInfo->type == VIR_DOMAIN_JOB_COMPLETED) { + VIR_FREE(priv->job.completed); + if (VIR_ALLOC(priv->job.completed) == 0) + *priv->job.completed = *jobInfo; return 0; } else if (jobInfo->type == VIR_DOMAIN_JOB_UNBOUNDED) { /* The migration was aborted by us rather than QEMU itself so let's @@ -3418,6 +3421,9 @@ qemuMigrationRun(virQEMUDriverPtr driver, VIR_FORCE_CLOSE(fd); }
+ if (priv->job.completed) + qemuDomainJobInfoUpdateTime(priv->job.completed); +
Here there is a check for job.completed before usage
cookieFlags |= QEMU_MIGRATION_COOKIE_NETWORK; if (flags & VIR_MIGRATE_PERSIST_DEST) cookieFlags |= QEMU_MIGRATION_COOKIE_PERSISTENT; diff --git a/src/qemu/qemu_monitor_json.c b/src/qemu/qemu_monitor_json.c index 62e7d5d..263fa8b 100644 --- a/src/qemu/qemu_monitor_json.c +++ b/src/qemu/qemu_monitor_json.c @@ -2478,7 +2478,8 @@ qemuMonitorJSONGetMigrationStatusReply(virJSONValuePtr reply, if (rc == 0) status->downtime_set = true;
- if (status->status == QEMU_MONITOR_MIGRATION_STATUS_ACTIVE) { + if (status->status == QEMU_MONITOR_MIGRATION_STATUS_ACTIVE || + status->status == QEMU_MONITOR_MIGRATION_STATUS_COMPLETED) { virJSONValuePtr ram = virJSONValueObjectGet(ret, "ram"); if (!ram) { virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
While you're in this module - the following 3 calls don't check status (and were flagged by my new coverity): virJSONValueObjectGetNumberUlong(ret, "total-time", &status->total_time); virJSONValueObjectGetNumberUlong(ram, "normal", &status->ram_normal); virJSONValueObjectGetNumberUlong(ram, "normal-bytes", &status->ram_normal_bytes); I know "somewhat" outside the bounds of these changes but since they're in qemuMonitorJSONGetMigrationStatusReply() and used by the changes here - there's enough of a reason to adjust that I think. I don't see where "total_time" is ever used though... John

On Fri, Sep 05, 2014 at 14:42:00 -0400, John Ferlan wrote:
On 09/01/2014 11:05 AM, Jiri Denemark wrote:
virDomainGetJobStats gains new VIR_DOMAIN_JOB_STATS_COMPLETED flag that can be used to fetch statistics of a completed job rather than a currently running job.
Signed-off-by: Jiri Denemark <jdenemar@redhat.com> --- include/libvirt/libvirt.h.in | 11 +++++++++++ src/libvirt.c | 8 +++++++- src/qemu/qemu_domain.c | 1 + src/qemu/qemu_domain.h | 1 + src/qemu/qemu_driver.c | 25 +++++++++++++++++++------ src/qemu/qemu_migration.c | 6 ++++++ src/qemu/qemu_monitor_json.c | 3 ++- 7 files changed, 47 insertions(+), 8 deletions(-)
This seems AOK - see my note at the end about qemu_monitor_json.c though.
...
diff --git a/src/qemu/qemu_monitor_json.c b/src/qemu/qemu_monitor_json.c index 62e7d5d..263fa8b 100644 --- a/src/qemu/qemu_monitor_json.c +++ b/src/qemu/qemu_monitor_json.c @@ -2478,7 +2478,8 @@ qemuMonitorJSONGetMigrationStatusReply(virJSONValuePtr reply, if (rc == 0) status->downtime_set = true;
- if (status->status == QEMU_MONITOR_MIGRATION_STATUS_ACTIVE) { + if (status->status == QEMU_MONITOR_MIGRATION_STATUS_ACTIVE || + status->status == QEMU_MONITOR_MIGRATION_STATUS_COMPLETED) { virJSONValuePtr ram = virJSONValueObjectGet(ret, "ram"); if (!ram) { virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
While you're in this module - the following 3 calls don't check status (and were flagged by my new coverity):
virJSONValueObjectGetNumberUlong(ret, "total-time", &status->total_time);
virJSONValueObjectGetNumberUlong(ram, "normal", &status->ram_normal);
virJSONValueObjectGetNumberUlong(ram, "normal-bytes", &status->ram_normal_bytes);
QEMU does not always report all values so we just ignore failures to get some of them. I'll look at them and use ignore_value() when appropriate.
I know "somewhat" outside the bounds of these changes but since they're in qemuMonitorJSONGetMigrationStatusReply() and used by the changes here - there's enough of a reason to adjust that I think.
I don't see where "total_time" is ever used though...
Right, we compute total time ourselves. This is just for completeness to parse all fields QEMU supports in case we want to use them sometime somewhere :-) Jirka

New --completed flag for virsh domjobinfo command. Signed-off-by: Jiri Denemark <jdenemar@redhat.com> --- tools/virsh-domain.c | 27 ++++++++++++++++++++++++--- tools/virsh.pod | 5 +++-- 2 files changed, 27 insertions(+), 5 deletions(-) diff --git a/tools/virsh-domain.c b/tools/virsh-domain.c index c75cd73..1ff264e 100644 --- a/tools/virsh-domain.c +++ b/tools/virsh-domain.c @@ -5164,6 +5164,10 @@ static const vshCmdOptDef opts_domjobinfo[] = { .flags = VSH_OFLAG_REQ, .help = N_("domain name, id or uuid") }, + {.name = "completed", + .type = VSH_OT_BOOL, + .help = N_("return statistics of a recently completed job") + }, {.name = NULL} }; @@ -5195,14 +5199,18 @@ cmdDomjobinfo(vshControl *ctl, const vshCmd *cmd) virTypedParameterPtr params = NULL; int nparams = 0; unsigned long long value; + unsigned int flags = 0; int rc; if (!(dom = vshCommandOptDomain(ctl, cmd, NULL))) return false; + if (vshCommandOptBool(cmd, "completed")) + flags |= VIR_DOMAIN_JOB_STATS_COMPLETED; + memset(&info, 0, sizeof(info)); - rc = virDomainGetJobStats(dom, &info.type, ¶ms, &nparams, 0); + rc = virDomainGetJobStats(dom, &info.type, ¶ms, &nparams, flags); if (rc == 0) { if (virTypedParamsGetULLong(params, nparams, VIR_DOMAIN_JOB_TIME_ELAPSED, @@ -5239,6 +5247,11 @@ cmdDomjobinfo(vshControl *ctl, const vshCmd *cmd) &info.fileRemaining) < 0) goto save_error; } else if (last_error->code == VIR_ERR_NO_SUPPORT) { + if (flags) { + vshError(ctl, "%s", _("Optional flags are not supported by the " + "daemon")); + goto cleanup; + } vshDebug(ctl, VSH_ERR_DEBUG, "detailed statistics not supported\n"); vshResetLibvirtError(); rc = virDomainGetJobInfo(dom, &info); @@ -5249,7 +5262,9 @@ cmdDomjobinfo(vshControl *ctl, const vshCmd *cmd) vshPrint(ctl, "%-17s %-12s\n", _("Job type:"), vshDomainJobToString(info.type)); if (info.type != VIR_DOMAIN_JOB_BOUNDED && - info.type != VIR_DOMAIN_JOB_UNBOUNDED) { + info.type != VIR_DOMAIN_JOB_UNBOUNDED && + (!(flags & VIR_DOMAIN_JOB_STATS_COMPLETED) || + info.type != VIR_DOMAIN_JOB_COMPLETED)) { ret = true; goto cleanup; } @@ -5314,7 +5329,13 @@ cmdDomjobinfo(vshControl *ctl, const vshCmd *cmd) &value)) < 0) { goto save_error; } else if (rc) { - vshPrint(ctl, "%-17s %-12llu ms\n", _("Expected downtime:"), value); + if (info.type == VIR_DOMAIN_JOB_COMPLETED) { + vshPrint(ctl, "%-17s %-12llu ms\n", + _("Total downtime:"), value); + } else { + vshPrint(ctl, "%-17s %-12llu ms\n", + _("Expected downtime:"), value); + } } if ((rc = virTypedParamsGetULLong(params, nparams, diff --git a/tools/virsh.pod b/tools/virsh.pod index ea9267e..3c71db9 100644 --- a/tools/virsh.pod +++ b/tools/virsh.pod @@ -1112,9 +1112,10 @@ Convert a domain name (or UUID) to a domain id Abort the currently running domain job. -=item B<domjobinfo> I<domain> +=item B<domjobinfo> I<domain> [I<--completed>] -Returns information about jobs running on a domain. +Returns information about jobs running on a domain. I<--completed> tells +virsh to return information about a recently finished job. =item B<domname> I<domain-id-or-uuid> -- 2.1.0

On 09/01/2014 11:05 AM, Jiri Denemark wrote:
New --completed flag for virsh domjobinfo command.
Signed-off-by: Jiri Denemark <jdenemar@redhat.com> --- tools/virsh-domain.c | 27 ++++++++++++++++++++++++--- tools/virsh.pod | 5 +++-- 2 files changed, 27 insertions(+), 5 deletions(-)
diff --git a/tools/virsh-domain.c b/tools/virsh-domain.c index c75cd73..1ff264e 100644 --- a/tools/virsh-domain.c +++ b/tools/virsh-domain.c @@ -5164,6 +5164,10 @@ static const vshCmdOptDef opts_domjobinfo[] = { .flags = VSH_OFLAG_REQ, .help = N_("domain name, id or uuid") }, + {.name = "completed", + .type = VSH_OT_BOOL, + .help = N_("return statistics of a recently completed job") + }, {.name = NULL} };
@@ -5195,14 +5199,18 @@ cmdDomjobinfo(vshControl *ctl, const vshCmd *cmd) virTypedParameterPtr params = NULL; int nparams = 0; unsigned long long value; + unsigned int flags = 0; int rc;
if (!(dom = vshCommandOptDomain(ctl, cmd, NULL))) return false;
+ if (vshCommandOptBool(cmd, "completed")) + flags |= VIR_DOMAIN_JOB_STATS_COMPLETED; + memset(&info, 0, sizeof(info));
- rc = virDomainGetJobStats(dom, &info.type, ¶ms, &nparams, 0); + rc = virDomainGetJobStats(dom, &info.type, ¶ms, &nparams, flags); if (rc == 0) { if (virTypedParamsGetULLong(params, nparams, VIR_DOMAIN_JOB_TIME_ELAPSED, @@ -5239,6 +5247,11 @@ cmdDomjobinfo(vshControl *ctl, const vshCmd *cmd) &info.fileRemaining) < 0) goto save_error; } else if (last_error->code == VIR_ERR_NO_SUPPORT) { + if (flags) { + vshError(ctl, "%s", _("Optional flags are not supported by the " + "daemon")); + goto cleanup; + } vshDebug(ctl, VSH_ERR_DEBUG, "detailed statistics not supported\n"); vshResetLibvirtError(); rc = virDomainGetJobInfo(dom, &info); @@ -5249,7 +5262,9 @@ cmdDomjobinfo(vshControl *ctl, const vshCmd *cmd) vshPrint(ctl, "%-17s %-12s\n", _("Job type:"), vshDomainJobToString(info.type)); if (info.type != VIR_DOMAIN_JOB_BOUNDED && - info.type != VIR_DOMAIN_JOB_UNBOUNDED) { + info.type != VIR_DOMAIN_JOB_UNBOUNDED && + (!(flags & VIR_DOMAIN_JOB_STATS_COMPLETED) || + info.type != VIR_DOMAIN_JOB_COMPLETED)) { ret = true; goto cleanup; } @@ -5314,7 +5329,13 @@ cmdDomjobinfo(vshControl *ctl, const vshCmd *cmd) &value)) < 0) { goto save_error; } else if (rc) { - vshPrint(ctl, "%-17s %-12llu ms\n", _("Expected downtime:"), value); + if (info.type == VIR_DOMAIN_JOB_COMPLETED) { + vshPrint(ctl, "%-17s %-12llu ms\n", + _("Total downtime:"), value); + } else { + vshPrint(ctl, "%-17s %-12llu ms\n", + _("Expected downtime:"), value); + } }
if ((rc = virTypedParamsGetULLong(params, nparams, diff --git a/tools/virsh.pod b/tools/virsh.pod index ea9267e..3c71db9 100644 --- a/tools/virsh.pod +++ b/tools/virsh.pod @@ -1112,9 +1112,10 @@ Convert a domain name (or UUID) to a domain id
Abort the currently running domain job.
-=item B<domjobinfo> I<domain> +=item B<domjobinfo> I<domain> [I<--completed>]
-Returns information about jobs running on a domain. +Returns information about jobs running on a domain. I<--completed> tells +virsh to return information about a recently finished job.
Perhaps a bit more about the "once read" or "libvirtd" restart will cause statistics to be destroyed. ACK with that John
=item B<domname> I<domain-id-or-uuid>

When migrating a transient domain or with VIR_MIGRATE_UNDEFINE_SOURCE flag, the domain may disappear from source host. And so will migration statistics associated with the domain. We need to transfer the statistics at the end of a migration so that they can be queried at the destination host. Signed-off-by: Jiri Denemark <jdenemar@redhat.com> --- src/qemu/qemu_migration.c | 190 +++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 187 insertions(+), 3 deletions(-) diff --git a/src/qemu/qemu_migration.c b/src/qemu/qemu_migration.c index 208a21f..f1b3d50 100644 --- a/src/qemu/qemu_migration.c +++ b/src/qemu/qemu_migration.c @@ -80,6 +80,7 @@ enum qemuMigrationCookieFlags { QEMU_MIGRATION_COOKIE_FLAG_PERSISTENT, QEMU_MIGRATION_COOKIE_FLAG_NETWORK, QEMU_MIGRATION_COOKIE_FLAG_NBD, + QEMU_MIGRATION_COOKIE_FLAG_STATS, QEMU_MIGRATION_COOKIE_FLAG_LAST }; @@ -91,7 +92,8 @@ VIR_ENUM_IMPL(qemuMigrationCookieFlag, "lockstate", "persistent", "network", - "nbd"); + "nbd", + "statistics"); enum qemuMigrationCookieFeatures { QEMU_MIGRATION_COOKIE_GRAPHICS = (1 << QEMU_MIGRATION_COOKIE_FLAG_GRAPHICS), @@ -99,6 +101,7 @@ enum qemuMigrationCookieFeatures { QEMU_MIGRATION_COOKIE_PERSISTENT = (1 << QEMU_MIGRATION_COOKIE_FLAG_PERSISTENT), QEMU_MIGRATION_COOKIE_NETWORK = (1 << QEMU_MIGRATION_COOKIE_FLAG_NETWORK), QEMU_MIGRATION_COOKIE_NBD = (1 << QEMU_MIGRATION_COOKIE_FLAG_NBD), + QEMU_MIGRATION_COOKIE_STATS = (1 << QEMU_MIGRATION_COOKIE_FLAG_STATS), }; typedef struct _qemuMigrationCookieGraphics qemuMigrationCookieGraphics; @@ -169,6 +172,9 @@ struct _qemuMigrationCookie { /* If (flags & QEMU_MIGRATION_COOKIE_NBD) */ qemuMigrationCookieNBDPtr nbd; + + /* If (flags & QEMU_MIGRATION_COOKIE_STATS) */ + qemuDomainJobInfoPtr jobInfo; }; static void qemuMigrationCookieGraphicsFree(qemuMigrationCookieGraphicsPtr grap) @@ -533,6 +539,25 @@ qemuMigrationCookieAddNBD(qemuMigrationCookiePtr mig, } +static int +qemuMigrationCookieAddStatistics(qemuMigrationCookiePtr mig, + virDomainObjPtr vm) +{ + qemuDomainObjPrivatePtr priv = vm->privateData; + + if (!priv->job.completed) + return 0; + + if (!mig->jobInfo && VIR_ALLOC(mig->jobInfo) < 0) + return -1; + + *mig->jobInfo = *priv->job.completed; + mig->flags |= QEMU_MIGRATION_COOKIE_STATS; + + return 0; +} + + static void qemuMigrationCookieGraphicsXMLFormat(virBufferPtr buf, qemuMigrationCookieGraphicsPtr grap) { @@ -589,6 +614,81 @@ qemuMigrationCookieNetworkXMLFormat(virBufferPtr buf, } +static void +qemuMigrationCookieStatisticsXMLFormat(virBufferPtr buf, + qemuDomainJobInfoPtr jobInfo) +{ + qemuMonitorMigrationStatus *status = &jobInfo->status; + + virBufferAddLit(buf, "<statistics>\n"); + virBufferAdjustIndent(buf, 2); + + virBufferAsprintf(buf, "<%1$s>%2$llu</%1$s>\n", + VIR_DOMAIN_JOB_TIME_ELAPSED, + jobInfo->timeElapsed); + virBufferAsprintf(buf, "<%1$s>%2$llu</%1$s>\n", + VIR_DOMAIN_JOB_TIME_REMAINING, + jobInfo->timeRemaining); + if (status->downtime_set) + virBufferAsprintf(buf, "<%1$s>%2$llu</%1$s>\n", + VIR_DOMAIN_JOB_DOWNTIME, + status->downtime); + + virBufferAsprintf(buf, "<%1$s>%2$llu</%1$s>\n", + VIR_DOMAIN_JOB_MEMORY_TOTAL, + status->ram_total); + virBufferAsprintf(buf, "<%1$s>%2$llu</%1$s>\n", + VIR_DOMAIN_JOB_MEMORY_PROCESSED, + status->ram_transferred); + virBufferAsprintf(buf, "<%1$s>%2$llu</%1$s>\n", + VIR_DOMAIN_JOB_MEMORY_REMAINING, + status->ram_remaining); + + if (status->ram_duplicate_set) { + virBufferAsprintf(buf, "<%1$s>%2$llu</%1$s>\n", + VIR_DOMAIN_JOB_MEMORY_CONSTANT, + status->ram_duplicate); + virBufferAsprintf(buf, "<%1$s>%2$llu</%1$s>\n", + VIR_DOMAIN_JOB_MEMORY_NORMAL, + status->ram_normal); + virBufferAsprintf(buf, "<%1$s>%2$llu</%1$s>\n", + VIR_DOMAIN_JOB_MEMORY_NORMAL_BYTES, + status->ram_normal_bytes); + } + + virBufferAsprintf(buf, "<%1$s>%2$llu</%1$s>\n", + VIR_DOMAIN_JOB_DISK_TOTAL, + status->disk_total); + virBufferAsprintf(buf, "<%1$s>%2$llu</%1$s>\n", + VIR_DOMAIN_JOB_DISK_PROCESSED, + status->disk_transferred); + virBufferAsprintf(buf, "<%1$s>%2$llu</%1$s>\n", + VIR_DOMAIN_JOB_DISK_REMAINING, + status->disk_remaining); + + if (status->xbzrle_set) { + virBufferAsprintf(buf, "<%1$s>%2$llu</%1$s>\n", + VIR_DOMAIN_JOB_COMPRESSION_CACHE, + status->xbzrle_cache_size); + virBufferAsprintf(buf, "<%1$s>%2$llu</%1$s>\n", + VIR_DOMAIN_JOB_COMPRESSION_BYTES, + status->xbzrle_bytes); + virBufferAsprintf(buf, "<%1$s>%2$llu</%1$s>\n", + VIR_DOMAIN_JOB_COMPRESSION_PAGES, + status->xbzrle_pages); + virBufferAsprintf(buf, "<%1$s>%2$llu</%1$s>\n", + VIR_DOMAIN_JOB_COMPRESSION_CACHE_MISSES, + status->xbzrle_cache_miss); + virBufferAsprintf(buf, "<%1$s>%2$llu</%1$s>\n", + VIR_DOMAIN_JOB_COMPRESSION_OVERFLOW, + status->xbzrle_overflow); + } + + virBufferAdjustIndent(buf, -2); + virBufferAddLit(buf, "</statistics>\n"); +} + + static int qemuMigrationCookieXMLFormat(virQEMUDriverPtr driver, virBufferPtr buf, @@ -650,6 +750,9 @@ qemuMigrationCookieXMLFormat(virQEMUDriverPtr driver, virBufferAddLit(buf, "/>\n"); } + if (mig->flags & QEMU_MIGRATION_COOKIE_STATS && mig->jobInfo) + qemuMigrationCookieStatisticsXMLFormat(buf, mig->jobInfo); + virBufferAdjustIndent(buf, -2); virBufferAddLit(buf, "</qemu-migration>\n"); return 0; @@ -772,6 +875,70 @@ qemuMigrationCookieNetworkXMLParse(xmlXPathContextPtr ctxt) } +static qemuDomainJobInfoPtr +qemuMigrationCookieStatisticsXMLParse(xmlXPathContextPtr ctxt) +{ + qemuDomainJobInfoPtr jobInfo = NULL; + qemuMonitorMigrationStatus *status; + xmlNodePtr save_ctxt = ctxt->node; + + if (!(ctxt->node = virXPathNode("./statistics", ctxt))) + goto cleanup; + + if (VIR_ALLOC(jobInfo) < 0) + goto cleanup; + + status = &jobInfo->status; + jobInfo->type = VIR_DOMAIN_JOB_COMPLETED; + + virXPathULongLong("string(./" VIR_DOMAIN_JOB_TIME_ELAPSED "[1])", + ctxt, &jobInfo->timeElapsed); + virXPathULongLong("string(./" VIR_DOMAIN_JOB_TIME_REMAINING "[1])", + ctxt, &jobInfo->timeRemaining); + if (virXPathULongLong("string(./" VIR_DOMAIN_JOB_DOWNTIME "[1])", + ctxt, &status->downtime) == 0) + status->downtime_set = true; + + virXPathULongLong("string(./" VIR_DOMAIN_JOB_MEMORY_TOTAL "[1])", + ctxt, &status->ram_total); + virXPathULongLong("string(./" VIR_DOMAIN_JOB_MEMORY_PROCESSED "[1])", + ctxt, &status->ram_transferred); + virXPathULongLong("string(./" VIR_DOMAIN_JOB_MEMORY_REMAINING "[1])", + ctxt, &status->ram_remaining); + + if (virXPathULongLong("string(./" VIR_DOMAIN_JOB_MEMORY_CONSTANT "[1])", + ctxt, &status->ram_duplicate) == 0) + status->ram_duplicate_set = true; + virXPathULongLong("string(./" VIR_DOMAIN_JOB_MEMORY_NORMAL "[1])", + ctxt, &status->ram_normal); + virXPathULongLong("string(./" VIR_DOMAIN_JOB_MEMORY_NORMAL_BYTES "[1])", + ctxt, &status->ram_normal_bytes); + + virXPathULongLong("string(./" VIR_DOMAIN_JOB_DISK_TOTAL "[1])", + ctxt, &status->disk_total); + virXPathULongLong("string(./" VIR_DOMAIN_JOB_DISK_PROCESSED "[1])", + ctxt, &status->disk_transferred); + virXPathULongLong("string(./" VIR_DOMAIN_JOB_DISK_REMAINING "[1])", + ctxt, &status->disk_remaining); + + if (virXPathULongLong("string(./" VIR_DOMAIN_JOB_COMPRESSION_CACHE "[1])", + ctxt, &status->xbzrle_cache_size) == 0) + status->xbzrle_set = true; + virXPathULongLong("string(./" VIR_DOMAIN_JOB_COMPRESSION_BYTES "[1])", + ctxt, &status->xbzrle_bytes); + virXPathULongLong("string(./" VIR_DOMAIN_JOB_COMPRESSION_PAGES "[1])", + ctxt, &status->xbzrle_pages); + virXPathULongLong("string(./" VIR_DOMAIN_JOB_COMPRESSION_CACHE_MISSES "[1])", + ctxt, &status->xbzrle_cache_miss); + virXPathULongLong("string(./" VIR_DOMAIN_JOB_COMPRESSION_OVERFLOW "[1])", + ctxt, &status->xbzrle_overflow); + + cleanup: + ctxt->node = save_ctxt; + return jobInfo; +} + + static int qemuMigrationCookieXMLParse(qemuMigrationCookiePtr mig, virQEMUDriverPtr driver, @@ -947,6 +1114,11 @@ qemuMigrationCookieXMLParse(qemuMigrationCookiePtr mig, VIR_FREE(port); } + if (flags & QEMU_MIGRATION_COOKIE_STATS && + virXPathBoolean("boolean(./statistics)", ctxt) && + (!(mig->jobInfo = qemuMigrationCookieStatisticsXMLParse(ctxt)))) + goto error; + virObjectUnref(caps); return 0; @@ -1017,6 +1189,10 @@ qemuMigrationBakeCookie(qemuMigrationCookiePtr mig, qemuMigrationCookieAddNBD(mig, driver, dom) < 0) return -1; + if (flags & QEMU_MIGRATION_COOKIE_STATS && + qemuMigrationCookieAddStatistics(mig, dom) < 0) + return -1; + if (!(*cookieout = qemuMigrationCookieXMLFormatStr(driver, mig))) return -1; @@ -3424,7 +3600,8 @@ qemuMigrationRun(virQEMUDriverPtr driver, if (priv->job.completed) qemuDomainJobInfoUpdateTime(priv->job.completed); - cookieFlags |= QEMU_MIGRATION_COOKIE_NETWORK; + cookieFlags |= QEMU_MIGRATION_COOKIE_NETWORK | + QEMU_MIGRATION_COOKIE_STATS; if (flags & VIR_MIGRATE_PERSIST_DEST) cookieFlags |= QEMU_MIGRATION_COOKIE_PERSISTENT; if (ret == 0 && @@ -4508,8 +4685,10 @@ qemuMigrationFinish(virQEMUDriverPtr driver, : QEMU_MIGRATION_PHASE_FINISH2); qemuDomainCleanupRemove(vm, qemuMigrationPrepareCleanup); + VIR_FREE(priv->job.completed); - cookie_flags = QEMU_MIGRATION_COOKIE_NETWORK; + cookie_flags = QEMU_MIGRATION_COOKIE_NETWORK | + QEMU_MIGRATION_COOKIE_STATS; if (flags & VIR_MIGRATE_PERSIST_DEST) cookie_flags |= QEMU_MIGRATION_COOKIE_PERSISTENT; @@ -4527,6 +4706,11 @@ qemuMigrationFinish(virQEMUDriverPtr driver, goto endjob; } + if (mig->jobInfo) { + priv->job.completed = mig->jobInfo; + mig->jobInfo = NULL; + } + if (!(flags & VIR_MIGRATE_OFFLINE)) { if (qemuMigrationVPAssociatePortProfiles(vm->def) < 0) { qemuProcessStop(driver, vm, VIR_DOMAIN_SHUTOFF_FAILED, -- 2.1.0

On 09/01/2014 11:05 AM, Jiri Denemark wrote:
When migrating a transient domain or with VIR_MIGRATE_UNDEFINE_SOURCE flag, the domain may disappear from source host. And so will migration statistics associated with the domain. We need to transfer the statistics at the end of a migration so that they can be queried at the destination host.
Signed-off-by: Jiri Denemark <jdenemar@redhat.com> --- src/qemu/qemu_migration.c | 190 +++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 187 insertions(+), 3 deletions(-)
diff --git a/src/qemu/qemu_migration.c b/src/qemu/qemu_migration.c index 208a21f..f1b3d50 100644 --- a/src/qemu/qemu_migration.c +++ b/src/qemu/qemu_migration.c @@ -80,6 +80,7 @@ enum qemuMigrationCookieFlags { QEMU_MIGRATION_COOKIE_FLAG_PERSISTENT, QEMU_MIGRATION_COOKIE_FLAG_NETWORK, QEMU_MIGRATION_COOKIE_FLAG_NBD, + QEMU_MIGRATION_COOKIE_FLAG_STATS,
QEMU_MIGRATION_COOKIE_FLAG_LAST }; @@ -91,7 +92,8 @@ VIR_ENUM_IMPL(qemuMigrationCookieFlag, "lockstate", "persistent", "network", - "nbd"); + "nbd", + "statistics");
enum qemuMigrationCookieFeatures { QEMU_MIGRATION_COOKIE_GRAPHICS = (1 << QEMU_MIGRATION_COOKIE_FLAG_GRAPHICS), @@ -99,6 +101,7 @@ enum qemuMigrationCookieFeatures { QEMU_MIGRATION_COOKIE_PERSISTENT = (1 << QEMU_MIGRATION_COOKIE_FLAG_PERSISTENT), QEMU_MIGRATION_COOKIE_NETWORK = (1 << QEMU_MIGRATION_COOKIE_FLAG_NETWORK), QEMU_MIGRATION_COOKIE_NBD = (1 << QEMU_MIGRATION_COOKIE_FLAG_NBD), + QEMU_MIGRATION_COOKIE_STATS = (1 << QEMU_MIGRATION_COOKIE_FLAG_STATS), };
typedef struct _qemuMigrationCookieGraphics qemuMigrationCookieGraphics; @@ -169,6 +172,9 @@ struct _qemuMigrationCookie {
/* If (flags & QEMU_MIGRATION_COOKIE_NBD) */ qemuMigrationCookieNBDPtr nbd; + + /* If (flags & QEMU_MIGRATION_COOKIE_STATS) */ + qemuDomainJobInfoPtr jobInfo; };
static void qemuMigrationCookieGraphicsFree(qemuMigrationCookieGraphicsPtr grap) @@ -533,6 +539,25 @@ qemuMigrationCookieAddNBD(qemuMigrationCookiePtr mig, }
+static int +qemuMigrationCookieAddStatistics(qemuMigrationCookiePtr mig, + virDomainObjPtr vm) +{ + qemuDomainObjPrivatePtr priv = vm->privateData; + + if (!priv->job.completed) + return 0; + + if (!mig->jobInfo && VIR_ALLOC(mig->jobInfo) < 0) + return -1; + + *mig->jobInfo = *priv->job.completed; + mig->flags |= QEMU_MIGRATION_COOKIE_STATS; + + return 0; +} + + static void qemuMigrationCookieGraphicsXMLFormat(virBufferPtr buf, qemuMigrationCookieGraphicsPtr grap) { @@ -589,6 +614,81 @@ qemuMigrationCookieNetworkXMLFormat(virBufferPtr buf, }
+static void +qemuMigrationCookieStatisticsXMLFormat(virBufferPtr buf, + qemuDomainJobInfoPtr jobInfo) +{ + qemuMonitorMigrationStatus *status = &jobInfo->status; + + virBufferAddLit(buf, "<statistics>\n"); + virBufferAdjustIndent(buf, 2); + + virBufferAsprintf(buf, "<%1$s>%2$llu</%1$s>\n", + VIR_DOMAIN_JOB_TIME_ELAPSED, + jobInfo->timeElapsed); + virBufferAsprintf(buf, "<%1$s>%2$llu</%1$s>\n", + VIR_DOMAIN_JOB_TIME_REMAINING, + jobInfo->timeRemaining);
qemuDomainJobInfoToParams will use jobInfo->type == VIR_DOMAIN_JOB_BOUNDED when printing the above - dies this need to as well? I would suspect this would be zero anyway, right? Since the job is done.
+ if (status->downtime_set) + virBufferAsprintf(buf, "<%1$s>%2$llu</%1$s>\n", + VIR_DOMAIN_JOB_DOWNTIME, + status->downtime);
What about the VIR_DOMAIN_JOB_DATA_* values? I know they are calculable, but since they're
+ + virBufferAsprintf(buf, "<%1$s>%2$llu</%1$s>\n", + VIR_DOMAIN_JOB_MEMORY_TOTAL, + status->ram_total); + virBufferAsprintf(buf, "<%1$s>%2$llu</%1$s>\n", + VIR_DOMAIN_JOB_MEMORY_PROCESSED, + status->ram_transferred); + virBufferAsprintf(buf, "<%1$s>%2$llu</%1$s>\n", + VIR_DOMAIN_JOB_MEMORY_REMAINING, + status->ram_remaining); + + if (status->ram_duplicate_set) { + virBufferAsprintf(buf, "<%1$s>%2$llu</%1$s>\n", + VIR_DOMAIN_JOB_MEMORY_CONSTANT, + status->ram_duplicate); + virBufferAsprintf(buf, "<%1$s>%2$llu</%1$s>\n", + VIR_DOMAIN_JOB_MEMORY_NORMAL, + status->ram_normal); + virBufferAsprintf(buf, "<%1$s>%2$llu</%1$s>\n", + VIR_DOMAIN_JOB_MEMORY_NORMAL_BYTES, + status->ram_normal_bytes); + } + + virBufferAsprintf(buf, "<%1$s>%2$llu</%1$s>\n", + VIR_DOMAIN_JOB_DISK_TOTAL, + status->disk_total); + virBufferAsprintf(buf, "<%1$s>%2$llu</%1$s>\n", + VIR_DOMAIN_JOB_DISK_PROCESSED, + status->disk_transferred); + virBufferAsprintf(buf, "<%1$s>%2$llu</%1$s>\n", + VIR_DOMAIN_JOB_DISK_REMAINING, + status->disk_remaining); + + if (status->xbzrle_set) { + virBufferAsprintf(buf, "<%1$s>%2$llu</%1$s>\n", + VIR_DOMAIN_JOB_COMPRESSION_CACHE, + status->xbzrle_cache_size); + virBufferAsprintf(buf, "<%1$s>%2$llu</%1$s>\n", + VIR_DOMAIN_JOB_COMPRESSION_BYTES, + status->xbzrle_bytes); + virBufferAsprintf(buf, "<%1$s>%2$llu</%1$s>\n", + VIR_DOMAIN_JOB_COMPRESSION_PAGES, + status->xbzrle_pages); + virBufferAsprintf(buf, "<%1$s>%2$llu</%1$s>\n", + VIR_DOMAIN_JOB_COMPRESSION_CACHE_MISSES, + status->xbzrle_cache_miss); + virBufferAsprintf(buf, "<%1$s>%2$llu</%1$s>\n", + VIR_DOMAIN_JOB_COMPRESSION_OVERFLOW, + status->xbzrle_overflow); + } + + virBufferAdjustIndent(buf, -2); + virBufferAddLit(buf, "</statistics>\n"); +} + + static int qemuMigrationCookieXMLFormat(virQEMUDriverPtr driver, virBufferPtr buf, @@ -650,6 +750,9 @@ qemuMigrationCookieXMLFormat(virQEMUDriverPtr driver, virBufferAddLit(buf, "/>\n"); }
+ if (mig->flags & QEMU_MIGRATION_COOKIE_STATS && mig->jobInfo) + qemuMigrationCookieStatisticsXMLFormat(buf, mig->jobInfo); + virBufferAdjustIndent(buf, -2); virBufferAddLit(buf, "</qemu-migration>\n"); return 0; @@ -772,6 +875,70 @@ qemuMigrationCookieNetworkXMLParse(xmlXPathContextPtr ctxt) }
+static qemuDomainJobInfoPtr +qemuMigrationCookieStatisticsXMLParse(xmlXPathContextPtr ctxt) +{ + qemuDomainJobInfoPtr jobInfo = NULL; + qemuMonitorMigrationStatus *status; + xmlNodePtr save_ctxt = ctxt->node; + + if (!(ctxt->node = virXPathNode("./statistics", ctxt))) + goto cleanup; + + if (VIR_ALLOC(jobInfo) < 0) + goto cleanup; + + status = &jobInfo->status; + jobInfo->type = VIR_DOMAIN_JOB_COMPLETED; + + virXPathULongLong("string(./" VIR_DOMAIN_JOB_TIME_ELAPSED "[1])", + ctxt, &jobInfo->timeElapsed); + virXPathULongLong("string(./" VIR_DOMAIN_JOB_TIME_REMAINING "[1])", + ctxt, &jobInfo->timeRemaining); + if (virXPathULongLong("string(./" VIR_DOMAIN_JOB_DOWNTIME "[1])", + ctxt, &status->downtime) == 0) + status->downtime_set = true; + + virXPathULongLong("string(./" VIR_DOMAIN_JOB_MEMORY_TOTAL "[1])", + ctxt, &status->ram_total); + virXPathULongLong("string(./" VIR_DOMAIN_JOB_MEMORY_PROCESSED "[1])", + ctxt, &status->ram_transferred); + virXPathULongLong("string(./" VIR_DOMAIN_JOB_MEMORY_REMAINING "[1])", + ctxt, &status->ram_remaining); + + if (virXPathULongLong("string(./" VIR_DOMAIN_JOB_MEMORY_CONSTANT "[1])", + ctxt, &status->ram_duplicate) == 0) + status->ram_duplicate_set = true; + virXPathULongLong("string(./" VIR_DOMAIN_JOB_MEMORY_NORMAL "[1])", + ctxt, &status->ram_normal); + virXPathULongLong("string(./" VIR_DOMAIN_JOB_MEMORY_NORMAL_BYTES "[1])", + ctxt, &status->ram_normal_bytes); + + virXPathULongLong("string(./" VIR_DOMAIN_JOB_DISK_TOTAL "[1])", + ctxt, &status->disk_total); + virXPathULongLong("string(./" VIR_DOMAIN_JOB_DISK_PROCESSED "[1])", + ctxt, &status->disk_transferred); + virXPathULongLong("string(./" VIR_DOMAIN_JOB_DISK_REMAINING "[1])", + ctxt, &status->disk_remaining); + + if (virXPathULongLong("string(./" VIR_DOMAIN_JOB_COMPRESSION_CACHE "[1])", + ctxt, &status->xbzrle_cache_size) == 0) + status->xbzrle_set = true; + virXPathULongLong("string(./" VIR_DOMAIN_JOB_COMPRESSION_BYTES "[1])", + ctxt, &status->xbzrle_bytes); + virXPathULongLong("string(./" VIR_DOMAIN_JOB_COMPRESSION_PAGES "[1])", + ctxt, &status->xbzrle_pages); + virXPathULongLong("string(./" VIR_DOMAIN_JOB_COMPRESSION_CACHE_MISSES "[1])", + ctxt, &status->xbzrle_cache_miss); + virXPathULongLong("string(./" VIR_DOMAIN_JOB_COMPRESSION_OVERFLOW "[1])", + ctxt, &status->xbzrle_overflow); + + cleanup: + ctxt->node = save_ctxt; + return jobInfo; +} + + static int qemuMigrationCookieXMLParse(qemuMigrationCookiePtr mig, virQEMUDriverPtr driver, @@ -947,6 +1114,11 @@ qemuMigrationCookieXMLParse(qemuMigrationCookiePtr mig, VIR_FREE(port); }
+ if (flags & QEMU_MIGRATION_COOKIE_STATS && + virXPathBoolean("boolean(./statistics)", ctxt) && + (!(mig->jobInfo = qemuMigrationCookieStatisticsXMLParse(ctxt)))) + goto error; + virObjectUnref(caps); return 0;
@@ -1017,6 +1189,10 @@ qemuMigrationBakeCookie(qemuMigrationCookiePtr mig, qemuMigrationCookieAddNBD(mig, driver, dom) < 0) return -1;
+ if (flags & QEMU_MIGRATION_COOKIE_STATS && + qemuMigrationCookieAddStatistics(mig, dom) < 0) + return -1; +
In the not that it matters department, but some flags checks have "(flags & FLAG)" while others go with just "flags & FLAG" - doesn't matter to me, but it's not consistent.
if (!(*cookieout = qemuMigrationCookieXMLFormatStr(driver, mig))) return -1;
@@ -3424,7 +3600,8 @@ qemuMigrationRun(virQEMUDriverPtr driver, if (priv->job.completed) qemuDomainJobInfoUpdateTime(priv->job.completed);
- cookieFlags |= QEMU_MIGRATION_COOKIE_NETWORK; + cookieFlags |= QEMU_MIGRATION_COOKIE_NETWORK | + QEMU_MIGRATION_COOKIE_STATS; if (flags & VIR_MIGRATE_PERSIST_DEST) cookieFlags |= QEMU_MIGRATION_COOKIE_PERSISTENT; if (ret == 0 && @@ -4508,8 +4685,10 @@ qemuMigrationFinish(virQEMUDriverPtr driver, : QEMU_MIGRATION_PHASE_FINISH2);
qemuDomainCleanupRemove(vm, qemuMigrationPrepareCleanup); + VIR_FREE(priv->job.completed);
This seems out of place - is it because of the impending copy mig->jobInfo copy? What if that data wasn't there? We just wipe out whatever was possibly there. ACK - seems OK to me. Just curious about the above. John
- cookie_flags = QEMU_MIGRATION_COOKIE_NETWORK; + cookie_flags = QEMU_MIGRATION_COOKIE_NETWORK | + QEMU_MIGRATION_COOKIE_STATS; if (flags & VIR_MIGRATE_PERSIST_DEST) cookie_flags |= QEMU_MIGRATION_COOKIE_PERSISTENT;
@@ -4527,6 +4706,11 @@ qemuMigrationFinish(virQEMUDriverPtr driver, goto endjob; }
+ if (mig->jobInfo) { + priv->job.completed = mig->jobInfo; + mig->jobInfo = NULL; + } + if (!(flags & VIR_MIGRATE_OFFLINE)) { if (qemuMigrationVPAssociatePortProfiles(vm->def) < 0) { qemuProcessStop(driver, vm, VIR_DOMAIN_SHUTOFF_FAILED,

On Fri, Sep 05, 2014 at 14:47:09 -0400, John Ferlan wrote:
On 09/01/2014 11:05 AM, Jiri Denemark wrote:
When migrating a transient domain or with VIR_MIGRATE_UNDEFINE_SOURCE flag, the domain may disappear from source host. And so will migration statistics associated with the domain. We need to transfer the statistics at the end of a migration so that they can be queried at the destination host.
Signed-off-by: Jiri Denemark <jdenemar@redhat.com> --- src/qemu/qemu_migration.c | 190 +++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 187 insertions(+), 3 deletions(-)
diff --git a/src/qemu/qemu_migration.c b/src/qemu/qemu_migration.c index 208a21f..f1b3d50 100644 --- a/src/qemu/qemu_migration.c +++ b/src/qemu/qemu_migration.c
...
@@ -589,6 +614,81 @@ qemuMigrationCookieNetworkXMLFormat(virBufferPtr buf, }
+static void +qemuMigrationCookieStatisticsXMLFormat(virBufferPtr buf, + qemuDomainJobInfoPtr jobInfo) +{ + qemuMonitorMigrationStatus *status = &jobInfo->status; + + virBufferAddLit(buf, "<statistics>\n"); + virBufferAdjustIndent(buf, 2); + + virBufferAsprintf(buf, "<%1$s>%2$llu</%1$s>\n", + VIR_DOMAIN_JOB_TIME_ELAPSED, + jobInfo->timeElapsed); + virBufferAsprintf(buf, "<%1$s>%2$llu</%1$s>\n", + VIR_DOMAIN_JOB_TIME_REMAINING, + jobInfo->timeRemaining);
qemuDomainJobInfoToParams will use jobInfo->type == VIR_DOMAIN_JOB_BOUNDED when printing the above - dies this need to as well?
Not really, this just passes raw data from qemuDomainJobInfo and the destination host will handle them in the same way. That is, it will basically ignore this.
I would suspect this would be zero anyway, right? Since the job is done.
Right. It's actually pretty useless but I just wanted to blindly transfer anything that the source knows about without thinking whether it's needed or not. The destination will interpret the data in the way it needs to.
+ if (status->downtime_set) + virBufferAsprintf(buf, "<%1$s>%2$llu</%1$s>\n", + VIR_DOMAIN_JOB_DOWNTIME, + status->downtime);
What about the VIR_DOMAIN_JOB_DATA_* values? I know they are calculable, but since they're
Hmm, NMI in the middle of sentence? :-) Anyway, as I said the goal is to transfer raw data from qemuDomainJobInfo rather than processed values that the end user will get from virDomainGetJobStats. And qemuDomainJobInfo only contains separated memory and disk data. Jirka

Total time of a migration and total downtime transfered from a source to a destination host do not count with the transfer time to the destination host and with the time elapsed before guest CPUs are resumed. Thus, source libvirtd remembers when migration started and when guest CPUs were paused. Both timestamps are transferred to destination libvirtd which uses them to compute total migration time and total downtime. This, obviously, requires clock to be synchronized between the two hosts. The reported times are useless otherwise but they would be equally useless if we didn't do this recomputation so don't lose anything by doing it. Signed-off-by: Jiri Denemark <jdenemar@redhat.com> --- src/libvirt.c | 5 ++++- src/qemu/qemu_domain.c | 28 ++++++++++++++++++++++++++++ src/qemu/qemu_domain.h | 2 ++ src/qemu/qemu_migration.c | 15 ++++++++++++++- src/qemu/qemu_process.c | 9 ++++++++- tools/virsh.pod | 5 ++++- 6 files changed, 60 insertions(+), 4 deletions(-) diff --git a/src/libvirt.c b/src/libvirt.c index 6fa0a6b..61d0543 100644 --- a/src/libvirt.c +++ b/src/libvirt.c @@ -17581,7 +17581,10 @@ virDomainGetJobInfo(virDomainPtr domain, virDomainJobInfoPtr info) * return statistics about a recently completed job. Specifically, this * flag may be used to query statistics of a completed incoming migration. * Statistics of a completed job are automatically destroyed once read or - * when libvirtd is restarted. + * when libvirtd is restarted. Note that time information returned for + * completed migrations may be completely irrelevant unless both source and + * destination hosts have synchronized time (i.e., NTP daemon is running on + * both of them). * * Returns 0 in case of success and -1 in case of failure. */ diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c index 18a3761..cec7828 100644 --- a/src/qemu/qemu_domain.c +++ b/src/qemu/qemu_domain.c @@ -222,11 +222,39 @@ qemuDomainJobInfoUpdateTime(qemuDomainJobInfoPtr jobInfo) if (virTimeMillisNow(&now) < 0) return -1; + if (now < jobInfo->started) { + VIR_WARN("Async job starts in the future"); + jobInfo->started = 0; + return 0; + } + jobInfo->timeElapsed = now - jobInfo->started; return 0; } int +qemuDomainJobInfoUpdateDowntime(qemuDomainJobInfoPtr jobInfo) +{ + unsigned long long now; + + if (!jobInfo->stopped) + return 0; + + if (virTimeMillisNow(&now) < 0) + return -1; + + if (now < jobInfo->stopped) { + VIR_WARN("Guest's CPUs stopped in the future"); + jobInfo->stopped = 0; + return 0; + } + + jobInfo->status.downtime = now - jobInfo->stopped; + jobInfo->status.downtime_set = true; + return 0; +} + +int qemuDomainJobInfoToInfo(qemuDomainJobInfoPtr jobInfo, virDomainJobInfoPtr info) { diff --git a/src/qemu/qemu_domain.h b/src/qemu/qemu_domain.h index 365238b..435a22b 100644 --- a/src/qemu/qemu_domain.h +++ b/src/qemu/qemu_domain.h @@ -105,6 +105,7 @@ typedef qemuDomainJobInfo *qemuDomainJobInfoPtr; struct _qemuDomainJobInfo { virDomainJobType type; unsigned long long started; /* When the async job started */ + unsigned long long stopped; /* When the domain's CPUs were stopped */ /* Computed values */ unsigned long long timeElapsed; unsigned long long timeRemaining; @@ -390,6 +391,7 @@ bool qemuDomainAgentAvailable(qemuDomainObjPrivatePtr priv, bool reportError); int qemuDomainJobInfoUpdateTime(qemuDomainJobInfoPtr jobInfo); +int qemuDomainJobInfoUpdateDowntime(qemuDomainJobInfoPtr jobInfo); int qemuDomainJobInfoToInfo(qemuDomainJobInfoPtr jobInfo, virDomainJobInfoPtr info); int qemuDomainJobInfoToParams(qemuDomainJobInfoPtr jobInfo, diff --git a/src/qemu/qemu_migration.c b/src/qemu/qemu_migration.c index f1b3d50..43b42ac 100644 --- a/src/qemu/qemu_migration.c +++ b/src/qemu/qemu_migration.c @@ -623,6 +623,9 @@ qemuMigrationCookieStatisticsXMLFormat(virBufferPtr buf, virBufferAddLit(buf, "<statistics>\n"); virBufferAdjustIndent(buf, 2); + virBufferAsprintf(buf, "<started>%llu</started>\n", jobInfo->started); + virBufferAsprintf(buf, "<stopped>%llu</stopped>\n", jobInfo->stopped); + virBufferAsprintf(buf, "<%1$s>%2$llu</%1$s>\n", VIR_DOMAIN_JOB_TIME_ELAPSED, jobInfo->timeElapsed); @@ -891,6 +894,9 @@ qemuMigrationCookieStatisticsXMLParse(xmlXPathContextPtr ctxt) status = &jobInfo->status; jobInfo->type = VIR_DOMAIN_JOB_COMPLETED; + virXPathULongLong("string(./started[1])", ctxt, &jobInfo->started); + virXPathULongLong("string(./stopped[1])", ctxt, &jobInfo->stopped); + virXPathULongLong("string(./" VIR_DOMAIN_JOB_TIME_ELAPSED "[1])", ctxt, &jobInfo->timeElapsed); virXPathULongLong("string(./" VIR_DOMAIN_JOB_TIME_REMAINING "[1])", @@ -2015,6 +2021,7 @@ qemuMigrationWaitForCompletion(virQEMUDriverPtr driver, } if (jobInfo->type == VIR_DOMAIN_JOB_COMPLETED) { + qemuDomainJobInfoUpdateDowntime(jobInfo); VIR_FREE(priv->job.completed); if (VIR_ALLOC(priv->job.completed) == 0) *priv->job.completed = *jobInfo; @@ -3597,8 +3604,10 @@ qemuMigrationRun(virQEMUDriverPtr driver, VIR_FORCE_CLOSE(fd); } - if (priv->job.completed) + if (priv->job.completed) { qemuDomainJobInfoUpdateTime(priv->job.completed); + qemuDomainJobInfoUpdateDowntime(priv->job.completed); + } cookieFlags |= QEMU_MIGRATION_COOKIE_NETWORK | QEMU_MIGRATION_COOKIE_STATS; @@ -4811,6 +4820,10 @@ qemuMigrationFinish(virQEMUDriverPtr driver, } goto endjob; } + if (priv->job.completed) { + qemuDomainJobInfoUpdateTime(priv->job.completed); + qemuDomainJobInfoUpdateDowntime(priv->job.completed); + } } dom = virGetDomain(dconn, vm->def->name, vm->def->uuid); diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c index f68dfbe..c4c1ce5 100644 --- a/src/qemu/qemu_process.c +++ b/src/qemu/qemu_process.c @@ -754,6 +754,9 @@ qemuProcessHandleStop(qemuMonitorPtr mon ATTRIBUTE_UNUSED, VIR_DEBUG("Transitioned guest %s to paused state", vm->def->name); + if (priv->job.current) + ignore_value(virTimeMillisNow(&priv->job.current->stopped)); + virDomainObjSetState(vm, VIR_DOMAIN_PAUSED, VIR_DOMAIN_PAUSED_UNKNOWN); event = virDomainEventLifecycleNewFromObj(vm, VIR_DOMAIN_EVENT_SUSPENDED, @@ -2888,7 +2891,8 @@ qemuProcessStartCPUs(virQEMUDriverPtr driver, virDomainObjPtr vm, } -int qemuProcessStopCPUs(virQEMUDriverPtr driver, virDomainObjPtr vm, +int qemuProcessStopCPUs(virQEMUDriverPtr driver, + virDomainObjPtr vm, virDomainPausedReason reason, qemuDomainAsyncJob asyncJob) { @@ -2906,6 +2910,9 @@ int qemuProcessStopCPUs(virQEMUDriverPtr driver, virDomainObjPtr vm, if (ret < 0) goto cleanup; + if (priv->job.current) + ignore_value(virTimeMillisNow(&priv->job.current->stopped)); + virDomainObjSetState(vm, VIR_DOMAIN_PAUSED, reason); if (virDomainLockProcessPause(driver->lockManager, vm, &priv->lockState) < 0) VIR_WARN("Unable to release lease on %s", vm->def->name); diff --git a/tools/virsh.pod b/tools/virsh.pod index 3c71db9..13187ce 100644 --- a/tools/virsh.pod +++ b/tools/virsh.pod @@ -1115,7 +1115,10 @@ Abort the currently running domain job. =item B<domjobinfo> I<domain> [I<--completed>] Returns information about jobs running on a domain. I<--completed> tells -virsh to return information about a recently finished job. +virsh to return information about a recently finished job. Note that time +information returned for completed migrations may be completely irrelevant +unless both source and destination hosts have synchronized time (i.e., NTP +daemon is running on both of them). =item B<domname> I<domain-id-or-uuid> -- 2.1.0

On 09/01/2014 11:05 AM, Jiri Denemark wrote:
Total time of a migration and total downtime transfered from a source to a destination host do not count with the transfer time to the destination host and with the time elapsed before guest CPUs are resumed. Thus, source libvirtd remembers when migration started and when guest CPUs were paused. Both timestamps are transferred to destination libvirtd which uses them to compute total migration time and total downtime. This, obviously, requires clock to be synchronized between the
s/This, obviously,/Obviously this/ "requires clock" reads funny... "requires the time" seems closer
two hosts. The reported times are useless otherwise but they would be equally useless if we didn't do this recomputation so don't lose anything by doing it.
Say nothing of inter-timezone migrations right?
Signed-off-by: Jiri Denemark <jdenemar@redhat.com> --- src/libvirt.c | 5 ++++- src/qemu/qemu_domain.c | 28 ++++++++++++++++++++++++++++ src/qemu/qemu_domain.h | 2 ++ src/qemu/qemu_migration.c | 15 ++++++++++++++- src/qemu/qemu_process.c | 9 ++++++++- tools/virsh.pod | 5 ++++- 6 files changed, 60 insertions(+), 4 deletions(-)
diff --git a/src/libvirt.c b/src/libvirt.c index 6fa0a6b..61d0543 100644 --- a/src/libvirt.c +++ b/src/libvirt.c @@ -17581,7 +17581,10 @@ virDomainGetJobInfo(virDomainPtr domain, virDomainJobInfoPtr info) * return statistics about a recently completed job. Specifically, this * flag may be used to query statistics of a completed incoming migration. * Statistics of a completed job are automatically destroyed once read or - * when libvirtd is restarted. + * when libvirtd is restarted. Note that time information returned for + * completed migrations may be completely irrelevant unless both source and + * destination hosts have synchronized time (i.e., NTP daemon is running on + * both of them). * * Returns 0 in case of success and -1 in case of failure. */ diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c index 18a3761..cec7828 100644 --- a/src/qemu/qemu_domain.c +++ b/src/qemu/qemu_domain.c @@ -222,11 +222,39 @@ qemuDomainJobInfoUpdateTime(qemuDomainJobInfoPtr jobInfo) if (virTimeMillisNow(&now) < 0) return -1;
+ if (now < jobInfo->started) { + VIR_WARN("Async job starts in the future"); + jobInfo->started = 0; + return 0; + } + jobInfo->timeElapsed = now - jobInfo->started; return 0; }
int +qemuDomainJobInfoUpdateDowntime(qemuDomainJobInfoPtr jobInfo) +{ + unsigned long long now; +
Can jobInfo == NULL? - It's the qemuMigrationWaitForCompletion() path timing concern from patch 1.
+ if (!jobInfo->stopped) + return 0; + + if (virTimeMillisNow(&now) < 0) + return -1; + + if (now < jobInfo->stopped) { + VIR_WARN("Guest's CPUs stopped in the future"); + jobInfo->stopped = 0; + return 0; + } + + jobInfo->status.downtime = now - jobInfo->stopped; + jobInfo->status.downtime_set = true; + return 0; +} + +int qemuDomainJobInfoToInfo(qemuDomainJobInfoPtr jobInfo, virDomainJobInfoPtr info) { diff --git a/src/qemu/qemu_domain.h b/src/qemu/qemu_domain.h index 365238b..435a22b 100644 --- a/src/qemu/qemu_domain.h +++ b/src/qemu/qemu_domain.h @@ -105,6 +105,7 @@ typedef qemuDomainJobInfo *qemuDomainJobInfoPtr; struct _qemuDomainJobInfo { virDomainJobType type; unsigned long long started; /* When the async job started */ + unsigned long long stopped; /* When the domain's CPUs were stopped */ /* Computed values */ unsigned long long timeElapsed; unsigned long long timeRemaining; @@ -390,6 +391,7 @@ bool qemuDomainAgentAvailable(qemuDomainObjPrivatePtr priv, bool reportError);
int qemuDomainJobInfoUpdateTime(qemuDomainJobInfoPtr jobInfo); +int qemuDomainJobInfoUpdateDowntime(qemuDomainJobInfoPtr jobInfo);
Does this also need some sort of ATTRIBUTE_NONNULL(1)? ACK in general John
int qemuDomainJobInfoToInfo(qemuDomainJobInfoPtr jobInfo, virDomainJobInfoPtr info); int qemuDomainJobInfoToParams(qemuDomainJobInfoPtr jobInfo, diff --git a/src/qemu/qemu_migration.c b/src/qemu/qemu_migration.c index f1b3d50..43b42ac 100644 --- a/src/qemu/qemu_migration.c +++ b/src/qemu/qemu_migration.c @@ -623,6 +623,9 @@ qemuMigrationCookieStatisticsXMLFormat(virBufferPtr buf, virBufferAddLit(buf, "<statistics>\n"); virBufferAdjustIndent(buf, 2);
+ virBufferAsprintf(buf, "<started>%llu</started>\n", jobInfo->started); + virBufferAsprintf(buf, "<stopped>%llu</stopped>\n", jobInfo->stopped); + virBufferAsprintf(buf, "<%1$s>%2$llu</%1$s>\n", VIR_DOMAIN_JOB_TIME_ELAPSED, jobInfo->timeElapsed); @@ -891,6 +894,9 @@ qemuMigrationCookieStatisticsXMLParse(xmlXPathContextPtr ctxt) status = &jobInfo->status; jobInfo->type = VIR_DOMAIN_JOB_COMPLETED;
+ virXPathULongLong("string(./started[1])", ctxt, &jobInfo->started); + virXPathULongLong("string(./stopped[1])", ctxt, &jobInfo->stopped); + virXPathULongLong("string(./" VIR_DOMAIN_JOB_TIME_ELAPSED "[1])", ctxt, &jobInfo->timeElapsed); virXPathULongLong("string(./" VIR_DOMAIN_JOB_TIME_REMAINING "[1])", @@ -2015,6 +2021,7 @@ qemuMigrationWaitForCompletion(virQEMUDriverPtr driver, }
if (jobInfo->type == VIR_DOMAIN_JOB_COMPLETED) { + qemuDomainJobInfoUpdateDowntime(jobInfo); VIR_FREE(priv->job.completed); if (VIR_ALLOC(priv->job.completed) == 0) *priv->job.completed = *jobInfo; @@ -3597,8 +3604,10 @@ qemuMigrationRun(virQEMUDriverPtr driver, VIR_FORCE_CLOSE(fd); }
- if (priv->job.completed) + if (priv->job.completed) { qemuDomainJobInfoUpdateTime(priv->job.completed); + qemuDomainJobInfoUpdateDowntime(priv->job.completed); + }
cookieFlags |= QEMU_MIGRATION_COOKIE_NETWORK | QEMU_MIGRATION_COOKIE_STATS; @@ -4811,6 +4820,10 @@ qemuMigrationFinish(virQEMUDriverPtr driver, } goto endjob; } + if (priv->job.completed) { + qemuDomainJobInfoUpdateTime(priv->job.completed); + qemuDomainJobInfoUpdateDowntime(priv->job.completed); + } }
dom = virGetDomain(dconn, vm->def->name, vm->def->uuid); diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c index f68dfbe..c4c1ce5 100644 --- a/src/qemu/qemu_process.c +++ b/src/qemu/qemu_process.c @@ -754,6 +754,9 @@ qemuProcessHandleStop(qemuMonitorPtr mon ATTRIBUTE_UNUSED, VIR_DEBUG("Transitioned guest %s to paused state", vm->def->name);
+ if (priv->job.current) + ignore_value(virTimeMillisNow(&priv->job.current->stopped)); + virDomainObjSetState(vm, VIR_DOMAIN_PAUSED, VIR_DOMAIN_PAUSED_UNKNOWN); event = virDomainEventLifecycleNewFromObj(vm, VIR_DOMAIN_EVENT_SUSPENDED, @@ -2888,7 +2891,8 @@ qemuProcessStartCPUs(virQEMUDriverPtr driver, virDomainObjPtr vm, }
-int qemuProcessStopCPUs(virQEMUDriverPtr driver, virDomainObjPtr vm, +int qemuProcessStopCPUs(virQEMUDriverPtr driver, + virDomainObjPtr vm, virDomainPausedReason reason, qemuDomainAsyncJob asyncJob) { @@ -2906,6 +2910,9 @@ int qemuProcessStopCPUs(virQEMUDriverPtr driver, virDomainObjPtr vm, if (ret < 0) goto cleanup;
+ if (priv->job.current) + ignore_value(virTimeMillisNow(&priv->job.current->stopped)); + virDomainObjSetState(vm, VIR_DOMAIN_PAUSED, reason); if (virDomainLockProcessPause(driver->lockManager, vm, &priv->lockState) < 0) VIR_WARN("Unable to release lease on %s", vm->def->name); diff --git a/tools/virsh.pod b/tools/virsh.pod index 3c71db9..13187ce 100644 --- a/tools/virsh.pod +++ b/tools/virsh.pod @@ -1115,7 +1115,10 @@ Abort the currently running domain job. =item B<domjobinfo> I<domain> [I<--completed>]
Returns information about jobs running on a domain. I<--completed> tells -virsh to return information about a recently finished job. +virsh to return information about a recently finished job. Note that time +information returned for completed migrations may be completely irrelevant +unless both source and destination hosts have synchronized time (i.e., NTP +daemon is running on both of them).
=item B<domname> I<domain-id-or-uuid>

On Fri, Sep 05, 2014 at 14:47:35 -0400, John Ferlan wrote:
On 09/01/2014 11:05 AM, Jiri Denemark wrote:
Total time of a migration and total downtime transfered from a source to a destination host do not count with the transfer time to the destination host and with the time elapsed before guest CPUs are resumed. Thus, source libvirtd remembers when migration started and when guest CPUs were paused. Both timestamps are transferred to destination libvirtd which uses them to compute total migration time and total downtime. This, obviously, requires clock to be synchronized between the
s/This, obviously,/Obviously this/
"requires clock" reads funny... "requires the time" seems closer
two hosts. The reported times are useless otherwise but they would be equally useless if we didn't do this recomputation so don't lose anything by doing it.
Say nothing of inter-timezone migrations right?
There are no timezones in timestamps. It's all UTC until you try to convert it into something humans understand, which we don't need to do :-)
Signed-off-by: Jiri Denemark <jdenemar@redhat.com> --- src/libvirt.c | 5 ++++- src/qemu/qemu_domain.c | 28 ++++++++++++++++++++++++++++ src/qemu/qemu_domain.h | 2 ++ src/qemu/qemu_migration.c | 15 ++++++++++++++- src/qemu/qemu_process.c | 9 ++++++++- tools/virsh.pod | 5 ++++- 6 files changed, 60 insertions(+), 4 deletions(-)
...
diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c index 18a3761..cec7828 100644 --- a/src/qemu/qemu_domain.c +++ b/src/qemu/qemu_domain.c @@ -222,11 +222,39 @@ qemuDomainJobInfoUpdateTime(qemuDomainJobInfoPtr jobInfo) if (virTimeMillisNow(&now) < 0) return -1;
+ if (now < jobInfo->started) { + VIR_WARN("Async job starts in the future"); + jobInfo->started = 0; + return 0; + } + jobInfo->timeElapsed = now - jobInfo->started; return 0; }
int +qemuDomainJobInfoUpdateDowntime(qemuDomainJobInfoPtr jobInfo) +{ + unsigned long long now; +
Can jobInfo == NULL? - It's the qemuMigrationWaitForCompletion() path timing concern from patch 1.
No, I added ATTRIBUTE_NONNULL to this API too.
+ if (!jobInfo->stopped) + return 0; + + if (virTimeMillisNow(&now) < 0) + return -1; + + if (now < jobInfo->stopped) { + VIR_WARN("Guest's CPUs stopped in the future"); + jobInfo->stopped = 0; + return 0; + } + + jobInfo->status.downtime = now - jobInfo->stopped; + jobInfo->status.downtime_set = true; + return 0; +} + +int qemuDomainJobInfoToInfo(qemuDomainJobInfoPtr jobInfo, virDomainJobInfoPtr info) { diff --git a/src/qemu/qemu_domain.h b/src/qemu/qemu_domain.h index 365238b..435a22b 100644 --- a/src/qemu/qemu_domain.h +++ b/src/qemu/qemu_domain.h @@ -105,6 +105,7 @@ typedef qemuDomainJobInfo *qemuDomainJobInfoPtr; struct _qemuDomainJobInfo { virDomainJobType type; unsigned long long started; /* When the async job started */ + unsigned long long stopped; /* When the domain's CPUs were stopped */ /* Computed values */ unsigned long long timeElapsed; unsigned long long timeRemaining; @@ -390,6 +391,7 @@ bool qemuDomainAgentAvailable(qemuDomainObjPrivatePtr priv, bool reportError);
int qemuDomainJobInfoUpdateTime(qemuDomainJobInfoPtr jobInfo); +int qemuDomainJobInfoUpdateDowntime(qemuDomainJobInfoPtr jobInfo);
Does this also need some sort of ATTRIBUTE_NONNULL(1)?
Done. Jirka

After previous commit, migration statistics on source and destination hosts are not equal because destination updated time statistics. Let's send the result back so that the same data can be queried on both end of a migration. Signed-off-by: Jiri Denemark <jdenemar@redhat.com> --- src/qemu/qemu_migration.c | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/src/qemu/qemu_migration.c b/src/qemu/qemu_migration.c index 43b42ac..873a756 100644 --- a/src/qemu/qemu_migration.c +++ b/src/qemu/qemu_migration.c @@ -3019,9 +3019,27 @@ qemuMigrationConfirmPhase(virQEMUDriverPtr driver, ? QEMU_MIGRATION_PHASE_CONFIRM3 : QEMU_MIGRATION_PHASE_CONFIRM3_CANCELLED); - if (!(mig = qemuMigrationEatCookie(driver, vm, cookiein, cookieinlen, 0))) + if (!(mig = qemuMigrationEatCookie(driver, vm, cookiein, cookieinlen, + QEMU_MIGRATION_COOKIE_STATS))) goto cleanup; + /* Update total times with the values sent by the destination daemon */ + if (mig->jobInfo) { + qemuDomainObjPrivatePtr priv = vm->privateData; + if (priv->job.completed) { + qemuDomainJobInfoPtr jobInfo = priv->job.completed; + if (mig->jobInfo->status.downtime_set) { + jobInfo->status.downtime = mig->jobInfo->status.downtime; + jobInfo->status.downtime_set = true; + } + if (mig->jobInfo->timeElapsed) + jobInfo->timeElapsed = mig->jobInfo->timeElapsed; + } else { + priv->job.completed = mig->jobInfo; + mig->jobInfo = NULL; + } + } + if (flags & VIR_MIGRATE_OFFLINE) goto done; @@ -4860,7 +4878,8 @@ qemuMigrationFinish(virQEMUDriverPtr driver, VIR_DOMAIN_EVENT_STOPPED_FAILED); } - if (qemuMigrationBakeCookie(mig, driver, vm, cookieout, cookieoutlen, 0) < 0) + if (qemuMigrationBakeCookie(mig, driver, vm, cookieout, cookieoutlen, + QEMU_MIGRATION_COOKIE_STATS) < 0) VIR_WARN("Unable to encode migration cookie"); endjob: -- 2.1.0

On 09/01/2014 11:05 AM, Jiri Denemark wrote:
After previous commit, migration statistics on source and destination hosts are not equal because destination updated time statistics. Let's send the result back so that the same data can be queried on both end of a migration.
My grammar bells and whistles are going off... "After the previous"... "on the source and..." "because the destination" "both ends of the migration" (or both sides)
Signed-off-by: Jiri Denemark <jdenemar@redhat.com> --- src/qemu/qemu_migration.c | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-)
ACK John
participants (2)
-
Jiri Denemark
-
John Ferlan