[libvirt] [PATCH v3 REBASE 00/16] qemu: migration: show disks stats for nbd migration

diff from v2: ============ 1. Fix style issues. 2. Rework patch for fetching job info (save logic to use temporary variable when drop vm lock) 3. Update disk stats when block jobs are canceled. 4. Adress a few more corner cases. This patch series add disks stats to domain job info(stats) as well as to migration completed event in case nbd scheme is used. There is little nuisance with qcow2 disks (which is the main scenario I guess) tied to the way qemu reports stats for this type of disks. For example if we have 64G disk filled only to 1G then stats start from 63G and will grow up to 64G on completion. The same way disk stats will be reported by this patch. I guess the better way to express the situation is to say we have 64G 'total', and have 'processed' field grow from 0G to 1G, like in case of memory stats. [1] is the example of completed memory stats of empty guest domain, which show difference between processed and total. There can be a workaround like getting initial blockjob offset position as a zero but is is rather ugly and racy and like uses undocumented behaviour. Patches that were explicitly ACKed in previous review (up to style issues) marked with A. [1] memory migration stats example Memory processed: 3.307 MiB Memory remaining: 0.000 B Memory total: 1.032 GiB Nikolay Shirokovskiy (16): qemu: drop code for VIR_DOMAIN_JOB_BOUNDED and timeRemaining A qemu: introduce qemu domain job status A qemu: introduce QEMU_DOMAIN_JOB_STATUS_POSTCOPY A qemu: drop QEMU_MIGRATION_COMPLETED_UPDATE_STATS A qemu: drop excessive zero-out in qemuMigrationFetchJobStatus qemu: refactor fetching migration stats A qemu: simplify getting completed job stats qemu: fail querying destination migration statistics always qemu: start all async job with job status active qemu: introduce migrating job status qemu: always get job condition on getting job stats qemu: migrate: show disks stats on job info requests qemu: support getting disks stats during stopping block jobs qemu: migation: resolve race on getting job info and stopping block jobs qemu: migrate: copy disks stats to completed job qemu: migration: don't expose incomplete job as complete src/qemu/qemu_blockjob.c | 1 + src/qemu/qemu_domain.c | 38 +++++-- src/qemu/qemu_domain.h | 15 ++- src/qemu/qemu_driver.c | 94 ++++++++-------- src/qemu/qemu_migration.c | 236 ++++++++++++++++++++++++++------------- src/qemu/qemu_migration.h | 15 ++- src/qemu/qemu_migration_cookie.c | 7 +- src/qemu/qemu_monitor.c | 5 +- src/qemu/qemu_monitor.h | 4 +- src/qemu/qemu_monitor_json.c | 4 +- src/qemu/qemu_process.c | 11 +- tests/qemumonitorjsontest.c | 1 + 12 files changed, 272 insertions(+), 159 deletions(-) -- 1.8.3.1

qemu driver does not have VIR_DOMAIN_JOB_BOUNDED jobs and timeRemaining is always 0. --- src/qemu/qemu_domain.c | 7 ------- src/qemu/qemu_domain.h | 1 - src/qemu/qemu_migration_cookie.c | 5 ----- 3 files changed, 13 deletions(-) diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c index e2531cd..8545137 100644 --- a/src/qemu/qemu_domain.c +++ b/src/qemu/qemu_domain.c @@ -418,7 +418,6 @@ qemuDomainJobInfoToInfo(qemuDomainJobInfoPtr jobInfo, { info->type = jobInfo->type; info->timeElapsed = jobInfo->timeElapsed; - info->timeRemaining = jobInfo->timeRemaining; info->memTotal = jobInfo->stats.ram_total; info->memRemaining = jobInfo->stats.ram_remaining; @@ -463,12 +462,6 @@ qemuDomainJobInfoToParams(qemuDomainJobInfoPtr jobInfo, jobInfo->timeElapsed - jobInfo->timeDelta) < 0) goto error; - if (jobInfo->type == VIR_DOMAIN_JOB_BOUNDED && - virTypedParamsAddULLong(&par, &npar, &maxpar, - VIR_DOMAIN_JOB_TIME_REMAINING, - jobInfo->timeRemaining) < 0) - goto error; - if (stats->downtime_set && virTypedParamsAddULLong(&par, &npar, &maxpar, VIR_DOMAIN_JOB_DOWNTIME, diff --git a/src/qemu/qemu_domain.h b/src/qemu/qemu_domain.h index 4c9050a..860e70f 100644 --- a/src/qemu/qemu_domain.h +++ b/src/qemu/qemu_domain.h @@ -112,7 +112,6 @@ struct _qemuDomainJobInfo { info from the source (migrations only). */ /* Computed values */ unsigned long long timeElapsed; - unsigned long long timeRemaining; long long timeDelta; /* delta = received - sent, i.e., the difference between the source and the destination time plus the time between the end of Perform phase on the diff --git a/src/qemu/qemu_migration_cookie.c b/src/qemu/qemu_migration_cookie.c index af0ac03..5f8595f 100644 --- a/src/qemu/qemu_migration_cookie.c +++ b/src/qemu/qemu_migration_cookie.c @@ -612,9 +612,6 @@ qemuMigrationCookieStatisticsXMLFormat(virBufferPtr buf, virBufferAsprintf(buf, "<%1$s>%2$llu</%1$s>\n", VIR_DOMAIN_JOB_TIME_ELAPSED, jobInfo->timeElapsed); - virBufferAsprintf(buf, "<%1$s>%2$llu</%1$s>\n", - VIR_DOMAIN_JOB_TIME_REMAINING, - jobInfo->timeRemaining); if (stats->downtime_set) virBufferAsprintf(buf, "<%1$s>%2$llu</%1$s>\n", VIR_DOMAIN_JOB_DOWNTIME, @@ -987,8 +984,6 @@ qemuMigrationCookieStatisticsXMLParse(xmlXPathContextPtr ctxt) virXPathULongLong("string(./" VIR_DOMAIN_JOB_TIME_ELAPSED "[1])", ctxt, &jobInfo->timeElapsed); - virXPathULongLong("string(./" VIR_DOMAIN_JOB_TIME_REMAINING "[1])", - ctxt, &jobInfo->timeRemaining); if (virXPathULongLong("string(./" VIR_DOMAIN_JOB_DOWNTIME "[1])", ctxt, &stats->downtime) == 0) -- 1.8.3.1

On Thu, Aug 24, 2017 at 09:56:38 +0300, Nikolay Shirokovskiy wrote:
qemu driver does not have VIR_DOMAIN_JOB_BOUNDED jobs and timeRemaining is always 0. --- src/qemu/qemu_domain.c | 7 ------- src/qemu/qemu_domain.h | 1 - src/qemu/qemu_migration_cookie.c | 5 ----- 3 files changed, 13 deletions(-)
ACK Jirka

This patch simply switches code from using VIR_DOMAIN_JOB_* to introduced QEMU_DOMAIN_JOB_STATUS_*. Later this gives us freedom to introduce states for postcopy and mirroring phases. --- src/qemu/qemu_domain.c | 27 ++++++++++++++++++++-- src/qemu/qemu_domain.h | 10 +++++++- src/qemu/qemu_driver.c | 11 ++++----- src/qemu/qemu_migration.c | 50 +++++++++++++++++++--------------------- src/qemu/qemu_migration_cookie.c | 2 +- src/qemu/qemu_process.c | 2 +- 6 files changed, 65 insertions(+), 37 deletions(-) diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c index 8545137..bdd59b5 100644 --- a/src/qemu/qemu_domain.c +++ b/src/qemu/qemu_domain.c @@ -412,11 +412,34 @@ qemuDomainJobInfoUpdateDowntime(qemuDomainJobInfoPtr jobInfo) return 0; } +static virDomainJobType +qemuDomainJobStatusToType(qemuDomainJobStatus status) +{ + switch (status) { + case QEMU_DOMAIN_JOB_STATUS_NONE: + break; + + case QEMU_DOMAIN_JOB_STATUS_ACTIVE: + return VIR_DOMAIN_JOB_UNBOUNDED; + + case QEMU_DOMAIN_JOB_STATUS_COMPLETED: + return VIR_DOMAIN_JOB_COMPLETED; + + case QEMU_DOMAIN_JOB_STATUS_FAILED: + return VIR_DOMAIN_JOB_FAILED; + + case QEMU_DOMAIN_JOB_STATUS_CANCELED: + return VIR_DOMAIN_JOB_CANCELLED; + } + + return VIR_DOMAIN_JOB_NONE; +} + int qemuDomainJobInfoToInfo(qemuDomainJobInfoPtr jobInfo, virDomainJobInfoPtr info) { - info->type = jobInfo->type; + info->type = qemuDomainJobStatusToType(jobInfo->status); info->timeElapsed = jobInfo->timeElapsed; info->memTotal = jobInfo->stats.ram_total; @@ -576,7 +599,7 @@ qemuDomainJobInfoToParams(qemuDomainJobInfoPtr jobInfo, stats->cpu_throttle_percentage) < 0) goto error; - *type = jobInfo->type; + *type = qemuDomainJobStatusToType(jobInfo->status); *params = par; *nparams = npar; return 0; diff --git a/src/qemu/qemu_domain.h b/src/qemu/qemu_domain.h index 860e70f..7836dc5 100644 --- a/src/qemu/qemu_domain.h +++ b/src/qemu/qemu_domain.h @@ -99,10 +99,18 @@ typedef enum { } qemuDomainAsyncJob; VIR_ENUM_DECL(qemuDomainAsyncJob) +typedef enum { + QEMU_DOMAIN_JOB_STATUS_NONE = 0, + QEMU_DOMAIN_JOB_STATUS_ACTIVE, + QEMU_DOMAIN_JOB_STATUS_COMPLETED, + QEMU_DOMAIN_JOB_STATUS_FAILED, + QEMU_DOMAIN_JOB_STATUS_CANCELED, +} qemuDomainJobStatus; + typedef struct _qemuDomainJobInfo qemuDomainJobInfo; typedef qemuDomainJobInfo *qemuDomainJobInfoPtr; struct _qemuDomainJobInfo { - virDomainJobType type; + qemuDomainJobStatus status; virDomainJobOperation operation; unsigned long long started; /* When the async job started */ unsigned long long stopped; /* When the domain's CPUs were stopped */ diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c index e9f07c6..d5c70ee 100644 --- a/src/qemu/qemu_driver.c +++ b/src/qemu/qemu_driver.c @@ -3310,7 +3310,7 @@ qemuDomainSaveInternal(virQEMUDriverPtr driver, virDomainPtr dom, goto endjob; } - priv->job.current->type = VIR_DOMAIN_JOB_UNBOUNDED; + priv->job.current->status = QEMU_DOMAIN_JOB_STATUS_ACTIVE; /* Pause */ if (virDomainObjGetState(vm, NULL) == VIR_DOMAIN_RUNNING) { @@ -12942,14 +12942,13 @@ qemuDomainGetJobStatsInternal(virQEMUDriverPtr driver, info = priv->job.current; if (!info) { - jobInfo->type = VIR_DOMAIN_JOB_NONE; + jobInfo->status = QEMU_DOMAIN_JOB_STATUS_NONE; ret = 0; goto cleanup; } *jobInfo = *info; - if (jobInfo->type == VIR_DOMAIN_JOB_BOUNDED || - jobInfo->type == VIR_DOMAIN_JOB_UNBOUNDED) { + if (jobInfo->status == QEMU_DOMAIN_JOB_STATUS_ACTIVE) { if (fetch) ret = qemuMigrationFetchJobStatus(driver, vm, QEMU_ASYNC_JOB_NONE, jobInfo); @@ -12984,7 +12983,7 @@ qemuDomainGetJobInfo(virDomainPtr dom, if (qemuDomainGetJobStatsInternal(driver, vm, false, &jobInfo) < 0) goto cleanup; - if (jobInfo.type == VIR_DOMAIN_JOB_NONE) { + if (jobInfo.status == QEMU_DOMAIN_JOB_STATUS_NONE) { memset(info, 0, sizeof(*info)); info->type = VIR_DOMAIN_JOB_NONE; ret = 0; @@ -13025,7 +13024,7 @@ qemuDomainGetJobStats(virDomainPtr dom, if (qemuDomainGetJobStatsInternal(driver, vm, completed, &jobInfo) < 0) goto cleanup; - if (jobInfo.type == VIR_DOMAIN_JOB_NONE) { + if (jobInfo.status == QEMU_DOMAIN_JOB_STATUS_NONE) { *type = VIR_DOMAIN_JOB_NONE; *params = NULL; *nparams = 0; diff --git a/src/qemu/qemu_migration.c b/src/qemu/qemu_migration.c index ca1f671..01416a6 100644 --- a/src/qemu/qemu_migration.c +++ b/src/qemu/qemu_migration.c @@ -966,7 +966,7 @@ qemuMigrationDriveMirror(virQEMUDriverPtr driver, goto cleanup; if (priv->job.abortJob) { - priv->job.current->type = VIR_DOMAIN_JOB_CANCELLED; + priv->job.current->status = QEMU_DOMAIN_JOB_STATUS_CANCELED; virReportError(VIR_ERR_OPERATION_ABORTED, _("%s: %s"), qemuDomainAsyncJobTypeToString(priv->job.asyncJob), _("canceled by client")); @@ -1347,19 +1347,19 @@ qemuMigrationUpdateJobType(qemuDomainJobInfoPtr jobInfo) { switch ((qemuMonitorMigrationStatus) jobInfo->stats.status) { case QEMU_MONITOR_MIGRATION_STATUS_COMPLETED: - jobInfo->type = VIR_DOMAIN_JOB_COMPLETED; + jobInfo->status = QEMU_DOMAIN_JOB_STATUS_COMPLETED; break; case QEMU_MONITOR_MIGRATION_STATUS_INACTIVE: - jobInfo->type = VIR_DOMAIN_JOB_NONE; + jobInfo->status = QEMU_DOMAIN_JOB_STATUS_NONE; break; case QEMU_MONITOR_MIGRATION_STATUS_ERROR: - jobInfo->type = VIR_DOMAIN_JOB_FAILED; + jobInfo->status = QEMU_DOMAIN_JOB_STATUS_FAILED; break; case QEMU_MONITOR_MIGRATION_STATUS_CANCELLED: - jobInfo->type = VIR_DOMAIN_JOB_CANCELLED; + jobInfo->status = QEMU_DOMAIN_JOB_STATUS_CANCELED; break; case QEMU_MONITOR_MIGRATION_STATUS_SETUP: @@ -1446,32 +1446,30 @@ qemuMigrationCheckJobStatus(virQEMUDriverPtr driver, else if (qemuMigrationUpdateJobStatus(driver, vm, asyncJob) < 0) return -1; - switch (jobInfo->type) { - case VIR_DOMAIN_JOB_NONE: + switch (jobInfo->status) { + case QEMU_DOMAIN_JOB_STATUS_NONE: virReportError(VIR_ERR_OPERATION_FAILED, _("%s: %s"), qemuMigrationJobName(vm), _("is not active")); return -1; - case VIR_DOMAIN_JOB_FAILED: + case QEMU_DOMAIN_JOB_STATUS_FAILED: virReportError(VIR_ERR_OPERATION_FAILED, _("%s: %s"), qemuMigrationJobName(vm), _("unexpectedly failed")); return -1; - case VIR_DOMAIN_JOB_CANCELLED: + case QEMU_DOMAIN_JOB_STATUS_CANCELED: virReportError(VIR_ERR_OPERATION_ABORTED, _("%s: %s"), qemuMigrationJobName(vm), _("canceled by client")); return -1; - case VIR_DOMAIN_JOB_COMPLETED: + case QEMU_DOMAIN_JOB_STATUS_COMPLETED: /* Fetch statistics of a completed migration */ if (events && updateJobStats && qemuMigrationUpdateJobStatus(driver, vm, asyncJob) < 0) return -1; break; - case VIR_DOMAIN_JOB_BOUNDED: - case VIR_DOMAIN_JOB_UNBOUNDED: - case VIR_DOMAIN_JOB_LAST: + case QEMU_DOMAIN_JOB_STATUS_ACTIVE: break; } return 0; @@ -1529,7 +1527,7 @@ qemuMigrationCompleted(virQEMUDriverPtr driver, * will continue waiting until the migrate state changes to completed. */ if (flags & QEMU_MIGRATION_COMPLETED_POSTCOPY && - jobInfo->type == VIR_DOMAIN_JOB_UNBOUNDED && + jobInfo->status == QEMU_DOMAIN_JOB_STATUS_ACTIVE && jobInfo->stats.status == QEMU_MONITOR_MIGRATION_STATUS_POSTCOPY) { VIR_DEBUG("Migration switched to post-copy"); if (updateStats && @@ -1538,18 +1536,18 @@ qemuMigrationCompleted(virQEMUDriverPtr driver, return 1; } - if (jobInfo->type == VIR_DOMAIN_JOB_COMPLETED) + if (jobInfo->status == QEMU_DOMAIN_JOB_STATUS_COMPLETED) return 1; else return 0; error: - if (jobInfo->type == VIR_DOMAIN_JOB_UNBOUNDED) { + if (jobInfo->status == QEMU_DOMAIN_JOB_STATUS_ACTIVE) { /* The migration was aborted by us rather than QEMU itself. */ - jobInfo->type = VIR_DOMAIN_JOB_FAILED; + jobInfo->status = QEMU_DOMAIN_JOB_STATUS_FAILED; return -2; - } else if (jobInfo->type == VIR_DOMAIN_JOB_COMPLETED) { - jobInfo->type = VIR_DOMAIN_JOB_FAILED; + } else if (jobInfo->status == QEMU_DOMAIN_JOB_STATUS_COMPLETED) { + jobInfo->status = QEMU_DOMAIN_JOB_STATUS_FAILED; return -1; } else { return -1; @@ -1574,7 +1572,7 @@ qemuMigrationWaitForCompletion(virQEMUDriverPtr driver, flags |= QEMU_MIGRATION_COMPLETED_UPDATE_STATS; - jobInfo->type = VIR_DOMAIN_JOB_UNBOUNDED; + jobInfo->status = QEMU_DOMAIN_JOB_STATUS_ACTIVE; while ((rv = qemuMigrationCompleted(driver, vm, asyncJob, dconn, flags)) != 1) { if (rv < 0) @@ -1582,7 +1580,7 @@ qemuMigrationWaitForCompletion(virQEMUDriverPtr driver, if (events) { if (virDomainObjWait(vm) < 0) { - jobInfo->type = VIR_DOMAIN_JOB_FAILED; + jobInfo->status = QEMU_DOMAIN_JOB_STATUS_FAILED; return -2; } } else { @@ -3756,7 +3754,7 @@ qemuMigrationRun(virQEMUDriverPtr driver, * as this is a critical section so we are guaranteed * priv->job.abortJob will not change */ ignore_value(qemuDomainObjExitMonitor(driver, vm)); - priv->job.current->type = VIR_DOMAIN_JOB_CANCELLED; + priv->job.current->status = QEMU_DOMAIN_JOB_STATUS_CANCELED; virReportError(VIR_ERR_OPERATION_ABORTED, _("%s: %s"), qemuDomainAsyncJobTypeToString(priv->job.asyncJob), _("canceled by client")); @@ -3890,8 +3888,8 @@ qemuMigrationRun(virQEMUDriverPtr driver, ignore_value(virTimeMillisNow(&priv->job.completed->sent)); } - if (priv->job.current->type == VIR_DOMAIN_JOB_UNBOUNDED && !inPostCopy) - priv->job.current->type = VIR_DOMAIN_JOB_FAILED; + if (priv->job.current->status == QEMU_DOMAIN_JOB_STATUS_ACTIVE && !inPostCopy) + priv->job.current->status = QEMU_DOMAIN_JOB_STATUS_FAILED; cookieFlags |= QEMU_MIGRATION_COOKIE_NETWORK | QEMU_MIGRATION_COOKIE_STATS; @@ -3933,7 +3931,7 @@ qemuMigrationRun(virQEMUDriverPtr driver, goto cleanup; cancelPostCopy: - priv->job.current->type = VIR_DOMAIN_JOB_FAILED; + priv->job.current->status = QEMU_DOMAIN_JOB_STATUS_FAILED; if (inPostCopy) goto cancel; else @@ -5652,7 +5650,7 @@ qemuMigrationJobStart(virQEMUDriverPtr driver, return -1; qemuDomainObjSetAsyncJobMask(vm, mask); - priv->job.current->type = VIR_DOMAIN_JOB_UNBOUNDED; + priv->job.current->status = QEMU_DOMAIN_JOB_STATUS_ACTIVE; return 0; } diff --git a/src/qemu/qemu_migration_cookie.c b/src/qemu/qemu_migration_cookie.c index 5f8595f..4914c77 100644 --- a/src/qemu/qemu_migration_cookie.c +++ b/src/qemu/qemu_migration_cookie.c @@ -974,7 +974,7 @@ qemuMigrationCookieStatisticsXMLParse(xmlXPathContextPtr ctxt) goto cleanup; stats = &jobInfo->stats; - jobInfo->type = VIR_DOMAIN_JOB_COMPLETED; + jobInfo->status = QEMU_DOMAIN_JOB_STATUS_COMPLETED; virXPathULongLong("string(./started[1])", ctxt, &jobInfo->started); virXPathULongLong("string(./stopped[1])", ctxt, &jobInfo->stopped); diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c index 589d0ed..0f96b28 100644 --- a/src/qemu/qemu_process.c +++ b/src/qemu/qemu_process.c @@ -3979,7 +3979,7 @@ qemuProcessBeginJob(virQEMUDriverPtr driver, return -1; qemuDomainObjSetAsyncJobMask(vm, QEMU_JOB_NONE); - priv->job.current->type = VIR_DOMAIN_JOB_UNBOUNDED; + priv->job.current->status = QEMU_DOMAIN_JOB_STATUS_ACTIVE; return 0; } -- 1.8.3.1

On Thu, Aug 24, 2017 at 09:56:39 +0300, Nikolay Shirokovskiy wrote:
This patch simply switches code from using VIR_DOMAIN_JOB_* to introduced QEMU_DOMAIN_JOB_STATUS_*. Later this gives us freedom to introduce states for postcopy and mirroring phases. ... diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c index e9f07c6..d5c70ee 100644 --- a/src/qemu/qemu_driver.c +++ b/src/qemu/qemu_driver.c @@ -12942,14 +12942,13 @@ qemuDomainGetJobStatsInternal(virQEMUDriverPtr driver, info = priv->job.current;
if (!info) { - jobInfo->type = VIR_DOMAIN_JOB_NONE; + jobInfo->status = QEMU_DOMAIN_JOB_STATUS_NONE; ret = 0; goto cleanup; } *jobInfo = *info;
- if (jobInfo->type == VIR_DOMAIN_JOB_BOUNDED ||
This line should have been removed in the previous patch. Not a big deal, though.
- jobInfo->type == VIR_DOMAIN_JOB_UNBOUNDED) { + if (jobInfo->status == QEMU_DOMAIN_JOB_STATUS_ACTIVE) { if (fetch) ret = qemuMigrationFetchJobStatus(driver, vm, QEMU_ASYNC_JOB_NONE, jobInfo);
ACK Jirka

Current code consults job.current->stats.status to check for postcopy state. First it is more correct to check for both job.current->status and job.current->stats.status.code because on some paths on failures we change only the former. Second if qemu supports migration events then stats can change unexpectedly. Let's introduce QEMU_DOMAIN_JOB_STATUS_POSTCOPY state for job.current->status. This patch removes all state checking usage of stats except for qemuDomainGetJobStatsInternal. This place will be handled separately. --- src/qemu/qemu_domain.c | 1 + src/qemu/qemu_domain.h | 1 + src/qemu/qemu_driver.c | 5 +++-- src/qemu/qemu_migration.c | 18 +++++++++++------- src/qemu/qemu_process.c | 4 ++-- 5 files changed, 18 insertions(+), 11 deletions(-) diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c index bdd59b5..bfe1eaf 100644 --- a/src/qemu/qemu_domain.c +++ b/src/qemu/qemu_domain.c @@ -420,6 +420,7 @@ qemuDomainJobStatusToType(qemuDomainJobStatus status) break; case QEMU_DOMAIN_JOB_STATUS_ACTIVE: + case QEMU_DOMAIN_JOB_STATUS_POSTCOPY: return VIR_DOMAIN_JOB_UNBOUNDED; case QEMU_DOMAIN_JOB_STATUS_COMPLETED: diff --git a/src/qemu/qemu_domain.h b/src/qemu/qemu_domain.h index 7836dc5..fbc5732 100644 --- a/src/qemu/qemu_domain.h +++ b/src/qemu/qemu_domain.h @@ -102,6 +102,7 @@ VIR_ENUM_DECL(qemuDomainAsyncJob) typedef enum { QEMU_DOMAIN_JOB_STATUS_NONE = 0, QEMU_DOMAIN_JOB_STATUS_ACTIVE, + QEMU_DOMAIN_JOB_STATUS_POSTCOPY, QEMU_DOMAIN_JOB_STATUS_COMPLETED, QEMU_DOMAIN_JOB_STATUS_FAILED, QEMU_DOMAIN_JOB_STATUS_CANCELED, diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c index d5c70ee..42d3422 100644 --- a/src/qemu/qemu_driver.c +++ b/src/qemu/qemu_driver.c @@ -12948,7 +12948,8 @@ qemuDomainGetJobStatsInternal(virQEMUDriverPtr driver, } *jobInfo = *info; - if (jobInfo->status == QEMU_DOMAIN_JOB_STATUS_ACTIVE) { + if (jobInfo->status == QEMU_DOMAIN_JOB_STATUS_ACTIVE || + jobInfo->status == QEMU_DOMAIN_JOB_STATUS_POSTCOPY) { if (fetch) ret = qemuMigrationFetchJobStatus(driver, vm, QEMU_ASYNC_JOB_NONE, jobInfo); @@ -13082,7 +13083,7 @@ static int qemuDomainAbortJob(virDomainPtr dom) } if (priv->job.asyncJob == QEMU_ASYNC_JOB_MIGRATION_OUT && - (priv->job.current->stats.status == QEMU_MONITOR_MIGRATION_STATUS_POSTCOPY || + (priv->job.current->status == QEMU_DOMAIN_JOB_STATUS_POSTCOPY || (virDomainObjGetState(vm, &reason) == VIR_DOMAIN_PAUSED && reason == VIR_DOMAIN_PAUSED_POSTCOPY))) { virReportError(VIR_ERR_OPERATION_INVALID, "%s", diff --git a/src/qemu/qemu_migration.c b/src/qemu/qemu_migration.c index 01416a6..d6bdb3e 100644 --- a/src/qemu/qemu_migration.c +++ b/src/qemu/qemu_migration.c @@ -1346,6 +1346,10 @@ static void qemuMigrationUpdateJobType(qemuDomainJobInfoPtr jobInfo) { switch ((qemuMonitorMigrationStatus) jobInfo->stats.status) { + case QEMU_MONITOR_MIGRATION_STATUS_POSTCOPY: + jobInfo->status = QEMU_DOMAIN_JOB_STATUS_POSTCOPY; + break; + case QEMU_MONITOR_MIGRATION_STATUS_COMPLETED: jobInfo->status = QEMU_DOMAIN_JOB_STATUS_COMPLETED; break; @@ -1364,7 +1368,6 @@ qemuMigrationUpdateJobType(qemuDomainJobInfoPtr jobInfo) case QEMU_MONITOR_MIGRATION_STATUS_SETUP: case QEMU_MONITOR_MIGRATION_STATUS_ACTIVE: - case QEMU_MONITOR_MIGRATION_STATUS_POSTCOPY: case QEMU_MONITOR_MIGRATION_STATUS_CANCELLING: case QEMU_MONITOR_MIGRATION_STATUS_LAST: break; @@ -1470,6 +1473,7 @@ qemuMigrationCheckJobStatus(virQEMUDriverPtr driver, break; case QEMU_DOMAIN_JOB_STATUS_ACTIVE: + case QEMU_DOMAIN_JOB_STATUS_POSTCOPY: break; } return 0; @@ -1527,8 +1531,7 @@ qemuMigrationCompleted(virQEMUDriverPtr driver, * will continue waiting until the migrate state changes to completed. */ if (flags & QEMU_MIGRATION_COMPLETED_POSTCOPY && - jobInfo->status == QEMU_DOMAIN_JOB_STATUS_ACTIVE && - jobInfo->stats.status == QEMU_MONITOR_MIGRATION_STATUS_POSTCOPY) { + jobInfo->status == QEMU_DOMAIN_JOB_STATUS_POSTCOPY) { VIR_DEBUG("Migration switched to post-copy"); if (updateStats && qemuMigrationUpdateJobStatus(driver, vm, asyncJob) < 0) @@ -1542,7 +1545,8 @@ qemuMigrationCompleted(virQEMUDriverPtr driver, return 0; error: - if (jobInfo->status == QEMU_DOMAIN_JOB_STATUS_ACTIVE) { + if (jobInfo->status == QEMU_DOMAIN_JOB_STATUS_ACTIVE || + jobInfo->status == QEMU_DOMAIN_JOB_STATUS_POSTCOPY) { /* The migration was aborted by us rather than QEMU itself. */ jobInfo->status = QEMU_DOMAIN_JOB_STATUS_FAILED; return -2; @@ -3836,7 +3840,7 @@ qemuMigrationRun(virQEMUDriverPtr driver, else if (rc == -1) goto cleanup; - if (priv->job.current->stats.status == QEMU_MONITOR_MIGRATION_STATUS_POSTCOPY) + if (priv->job.current->status == QEMU_DOMAIN_JOB_STATUS_POSTCOPY) inPostCopy = true; /* When migration completed, QEMU will have paused the CPUs for us. @@ -3888,7 +3892,7 @@ qemuMigrationRun(virQEMUDriverPtr driver, ignore_value(virTimeMillisNow(&priv->job.completed->sent)); } - if (priv->job.current->status == QEMU_DOMAIN_JOB_STATUS_ACTIVE && !inPostCopy) + if (priv->job.current->status == QEMU_DOMAIN_JOB_STATUS_ACTIVE) priv->job.current->status = QEMU_DOMAIN_JOB_STATUS_FAILED; cookieFlags |= QEMU_MIGRATION_COOKIE_NETWORK | @@ -5261,7 +5265,7 @@ qemuMigrationFinish(virQEMUDriverPtr driver, goto endjob; } - if (priv->job.current->stats.status == QEMU_MONITOR_MIGRATION_STATUS_POSTCOPY) + if (priv->job.current->status == QEMU_DOMAIN_JOB_STATUS_POSTCOPY) inPostCopy = true; if (!(flags & VIR_MIGRATE_PAUSED)) { diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c index 0f96b28..97f4c40 100644 --- a/src/qemu/qemu_process.c +++ b/src/qemu/qemu_process.c @@ -720,8 +720,8 @@ qemuProcessHandleStop(qemuMonitorPtr mon ATTRIBUTE_UNUSED, } if (priv->job.asyncJob == QEMU_ASYNC_JOB_MIGRATION_OUT) { - if (priv->job.current->stats.status == - QEMU_MONITOR_MIGRATION_STATUS_POSTCOPY) { + if (priv->job.current->status == + QEMU_DOMAIN_JOB_STATUS_POSTCOPY) { reason = VIR_DOMAIN_PAUSED_POSTCOPY; detail = VIR_DOMAIN_EVENT_SUSPENDED_POSTCOPY; } else { -- 1.8.3.1

On Thu, Aug 24, 2017 at 09:56:40 +0300, Nikolay Shirokovskiy wrote:
Current code consults job.current->stats.status to check for postcopy state. First it is more correct to check for both job.current->status and job.current->stats.status.code because on some paths on failures we change only the former. Second if qemu supports migration events then stats can change unexpectedly.
I'm not sure I understand what you're trying to say. Could you explain this a bit more?
Let's introduce QEMU_DOMAIN_JOB_STATUS_POSTCOPY state for job.current->status.
This patch removes all state checking usage of stats except for qemuDomainGetJobStatsInternal. This place will be handled separately. --- src/qemu/qemu_domain.c | 1 + src/qemu/qemu_domain.h | 1 + src/qemu/qemu_driver.c | 5 +++-- src/qemu/qemu_migration.c | 18 +++++++++++------- src/qemu/qemu_process.c | 4 ++-- 5 files changed, 18 insertions(+), 11 deletions(-)
ACK Jirka

On 28.08.2017 17:55, Jiri Denemark wrote:
On Thu, Aug 24, 2017 at 09:56:40 +0300, Nikolay Shirokovskiy wrote:
Current code consults job.current->stats.status to check for postcopy state. First it is more correct to check for both job.current->status and job.current->stats.status.code because on some paths on failures we change only the former. Second if qemu supports migration events then stats can change unexpectedly.
I'm not sure I understand what you're trying to say. Could you explain this a bit more?
This place looks dangerous to me: @@ -3836,7 +3840,7 @@ qemuMigrationRun(virQEMUDriverPtr driver, else if (rc == -1) goto cleanup; - if (priv->job.current->stats.status == QEMU_MONITOR_MIGRATION_STATUS_POSTCOPY) + if (priv->job.current->status == QEMU_DOMAIN_JOB_STATUS_POSTCOPY) inPostCopy = true; It makes decisions based on current state of migration and consult stats.status for this purpose. Such a check can be mistaken because in the code above this hunk if error occurs we change only current->status value. Also after patch 6 we can update stats.status during fetching migrations stats but not change current->status. However in a more detail analysis it looks like this code do not have undesired effects and only look dangerous (at least to me). So I guess I'd better say it is easier to reason about if we have postcopy as one of the status states.
Let's introduce QEMU_DOMAIN_JOB_STATUS_POSTCOPY state for job.current->status.
This patch removes all state checking usage of stats except for qemuDomainGetJobStatsInternal. This place will be handled separately. --- src/qemu/qemu_domain.c | 1 + src/qemu/qemu_domain.h | 1 + src/qemu/qemu_driver.c | 5 +++-- src/qemu/qemu_migration.c | 18 +++++++++++------- src/qemu/qemu_process.c | 4 ++-- 5 files changed, 18 insertions(+), 11 deletions(-)
ACK
Jirka

This way we get stats only in one place. The former code waits for complete/postcopy status basically and don't need to mess with stats. The patch drops raising an error on stats updates failure. This does not make much sense anyway. --- src/qemu/qemu_migration.c | 24 +++++++----------------- 1 file changed, 7 insertions(+), 17 deletions(-) diff --git a/src/qemu/qemu_migration.c b/src/qemu/qemu_migration.c index d6bdb3e..b7ad65d 100644 --- a/src/qemu/qemu_migration.c +++ b/src/qemu/qemu_migration.c @@ -1436,8 +1436,7 @@ qemuMigrationUpdateJobStatus(virQEMUDriverPtr driver, static int qemuMigrationCheckJobStatus(virQEMUDriverPtr driver, virDomainObjPtr vm, - qemuDomainAsyncJob asyncJob, - bool updateJobStats) + qemuDomainAsyncJob asyncJob) { qemuDomainObjPrivatePtr priv = vm->privateData; qemuDomainJobInfoPtr jobInfo = priv->job.current; @@ -1466,12 +1465,6 @@ qemuMigrationCheckJobStatus(virQEMUDriverPtr driver, return -1; case QEMU_DOMAIN_JOB_STATUS_COMPLETED: - /* Fetch statistics of a completed migration */ - if (events && updateJobStats && - qemuMigrationUpdateJobStatus(driver, vm, asyncJob) < 0) - return -1; - break; - case QEMU_DOMAIN_JOB_STATUS_ACTIVE: case QEMU_DOMAIN_JOB_STATUS_POSTCOPY: break; @@ -1483,10 +1476,10 @@ qemuMigrationCheckJobStatus(virQEMUDriverPtr driver, enum qemuMigrationCompletedFlags { QEMU_MIGRATION_COMPLETED_ABORT_ON_ERROR = (1 << 0), QEMU_MIGRATION_COMPLETED_CHECK_STORAGE = (1 << 1), - QEMU_MIGRATION_COMPLETED_UPDATE_STATS = (1 << 2), - QEMU_MIGRATION_COMPLETED_POSTCOPY = (1 << 3), + QEMU_MIGRATION_COMPLETED_POSTCOPY = (1 << 2), }; + /** * Returns 1 if migration completed successfully, * 0 if the domain is still being migrated, @@ -1503,9 +1496,8 @@ qemuMigrationCompleted(virQEMUDriverPtr driver, qemuDomainObjPrivatePtr priv = vm->privateData; qemuDomainJobInfoPtr jobInfo = priv->job.current; int pauseReason; - bool updateStats = !!(flags & QEMU_MIGRATION_COMPLETED_UPDATE_STATS); - if (qemuMigrationCheckJobStatus(driver, vm, asyncJob, updateStats) < 0) + if (qemuMigrationCheckJobStatus(driver, vm, asyncJob) < 0) goto error; if (flags & QEMU_MIGRATION_COMPLETED_CHECK_STORAGE && @@ -1533,9 +1525,6 @@ qemuMigrationCompleted(virQEMUDriverPtr driver, if (flags & QEMU_MIGRATION_COMPLETED_POSTCOPY && jobInfo->status == QEMU_DOMAIN_JOB_STATUS_POSTCOPY) { VIR_DEBUG("Migration switched to post-copy"); - if (updateStats && - qemuMigrationUpdateJobStatus(driver, vm, asyncJob) < 0) - goto error; return 1; } @@ -1574,8 +1563,6 @@ qemuMigrationWaitForCompletion(virQEMUDriverPtr driver, bool events = virQEMUCapsGet(priv->qemuCaps, QEMU_CAPS_MIGRATION_EVENT); int rv; - flags |= QEMU_MIGRATION_COMPLETED_UPDATE_STATS; - jobInfo->status = QEMU_DOMAIN_JOB_STATUS_ACTIVE; while ((rv = qemuMigrationCompleted(driver, vm, asyncJob, dconn, flags)) != 1) { @@ -1597,6 +1584,9 @@ qemuMigrationWaitForCompletion(virQEMUDriverPtr driver, } } + if (events) + ignore_value(qemuMigrationUpdateJobStatus(driver, vm, asyncJob)); + qemuDomainJobInfoUpdateDowntime(jobInfo); VIR_FREE(priv->job.completed); if (VIR_ALLOC(priv->job.completed) == 0) -- 1.8.3.1

On Thu, Aug 24, 2017 at 09:56:41 +0300, Nikolay Shirokovskiy wrote:
This way we get stats only in one place. The former code waits for complete/postcopy status basically and don't need to mess with stats.
The patch drops raising an error on stats updates failure. This does not make much sense anyway. --- src/qemu/qemu_migration.c | 24 +++++++----------------- 1 file changed, 7 insertions(+), 17 deletions(-)
ACK Jirka

qemuMonitorGetMigrationStats will do it for us anyway. --- src/qemu/qemu_migration.c | 1 - 1 file changed, 1 deletion(-) diff --git a/src/qemu/qemu_migration.c b/src/qemu/qemu_migration.c index b7ad65d..cc42f7a 100644 --- a/src/qemu/qemu_migration.c +++ b/src/qemu/qemu_migration.c @@ -1387,7 +1387,6 @@ qemuMigrationFetchJobStatus(virQEMUDriverPtr driver, if (qemuDomainObjEnterMonitorAsync(driver, vm, asyncJob) < 0) return -1; - memset(&jobInfo->stats, 0, sizeof(jobInfo->stats)); rv = qemuMonitorGetMigrationStats(priv->mon, &jobInfo->stats); if (qemuDomainObjExitMonitor(driver, vm) < 0 || rv < 0) -- 1.8.3.1

On Thu, Aug 24, 2017 at 09:56:42 +0300, Nikolay Shirokovskiy wrote:
qemuMonitorGetMigrationStats will do it for us anyway. --- src/qemu/qemu_migration.c | 1 - 1 file changed, 1 deletion(-)
diff --git a/src/qemu/qemu_migration.c b/src/qemu/qemu_migration.c index b7ad65d..cc42f7a 100644 --- a/src/qemu/qemu_migration.c +++ b/src/qemu/qemu_migration.c @@ -1387,7 +1387,6 @@ qemuMigrationFetchJobStatus(virQEMUDriverPtr driver, if (qemuDomainObjEnterMonitorAsync(driver, vm, asyncJob) < 0) return -1;
- memset(&jobInfo->stats, 0, sizeof(jobInfo->stats)); rv = qemuMonitorGetMigrationStats(priv->mon, &jobInfo->stats);
if (qemuDomainObjExitMonitor(driver, vm) < 0 || rv < 0)
ACK Jirka

qemuMigrationFetchJobStatus is rather inconvinient. Some of its callers don't need status to be updated, some don't need to update elapsed time right away. So let's update status or elapsed time in callers instead. In qemuMigrationConfirmPhase we should fetch stats with copy flag set as stats variable refers to domain object not the stack. This patch drops updating job status on getting job stats by client. This way we will not provide status 'completed' while it is not yet updated by migration routine. --- src/qemu/qemu_driver.c | 16 ++++++++------- src/qemu/qemu_migration.c | 52 +++++++++++++++++++---------------------------- src/qemu/qemu_migration.h | 9 ++++---- 3 files changed, 35 insertions(+), 42 deletions(-) diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c index 42d3422..c62d416 100644 --- a/src/qemu/qemu_driver.c +++ b/src/qemu/qemu_driver.c @@ -12950,15 +12950,17 @@ qemuDomainGetJobStatsInternal(virQEMUDriverPtr driver, if (jobInfo->status == QEMU_DOMAIN_JOB_STATUS_ACTIVE || jobInfo->status == QEMU_DOMAIN_JOB_STATUS_POSTCOPY) { - if (fetch) - ret = qemuMigrationFetchJobStatus(driver, vm, QEMU_ASYNC_JOB_NONE, - jobInfo); - else - ret = qemuDomainJobInfoUpdateTime(jobInfo); - } else { - ret = 0; + if (fetch && + qemuMigrationFetchMigrationStats(driver, vm, QEMU_ASYNC_JOB_NONE, + &jobInfo->stats, false) < 0) + goto cleanup; + + if (qemuDomainJobInfoUpdateTime(jobInfo) < 0) + goto cleanup; } + ret = 0; + cleanup: if (fetch) qemuDomainObjEndJob(driver, vm); diff --git a/src/qemu/qemu_migration.c b/src/qemu/qemu_migration.c index cc42f7a..dec0a08 100644 --- a/src/qemu/qemu_migration.c +++ b/src/qemu/qemu_migration.c @@ -1376,24 +1376,28 @@ qemuMigrationUpdateJobType(qemuDomainJobInfoPtr jobInfo) int -qemuMigrationFetchJobStatus(virQEMUDriverPtr driver, - virDomainObjPtr vm, - qemuDomainAsyncJob asyncJob, - qemuDomainJobInfoPtr jobInfo) +qemuMigrationFetchMigrationStats(virQEMUDriverPtr driver, + virDomainObjPtr vm, + qemuDomainAsyncJob asyncJob, + qemuMonitorMigrationStatsPtr stats, + bool copy) { qemuDomainObjPrivatePtr priv = vm->privateData; + qemuMonitorMigrationStats statsCopy; int rv; if (qemuDomainObjEnterMonitorAsync(driver, vm, asyncJob) < 0) return -1; - rv = qemuMonitorGetMigrationStats(priv->mon, &jobInfo->stats); + rv = qemuMonitorGetMigrationStats(priv->mon, copy ? &statsCopy : stats); if (qemuDomainObjExitMonitor(driver, vm) < 0 || rv < 0) return -1; - qemuMigrationUpdateJobType(jobInfo); - return qemuDomainJobInfoUpdateTime(jobInfo); + if (copy) + *stats = statsCopy; + + return 0; } @@ -1416,23 +1420,6 @@ qemuMigrationJobName(virDomainObjPtr vm) static int -qemuMigrationUpdateJobStatus(virQEMUDriverPtr driver, - virDomainObjPtr vm, - qemuDomainAsyncJob asyncJob) -{ - qemuDomainObjPrivatePtr priv = vm->privateData; - qemuDomainJobInfoPtr jobInfo = priv->job.current; - qemuDomainJobInfo newInfo = *jobInfo; - - if (qemuMigrationFetchJobStatus(driver, vm, asyncJob, &newInfo) < 0) - return -1; - - *jobInfo = newInfo; - return 0; -} - - -static int qemuMigrationCheckJobStatus(virQEMUDriverPtr driver, virDomainObjPtr vm, qemuDomainAsyncJob asyncJob) @@ -1442,11 +1429,12 @@ qemuMigrationCheckJobStatus(virQEMUDriverPtr driver, bool events = virQEMUCapsGet(priv->qemuCaps, QEMU_CAPS_MIGRATION_EVENT); - if (events) - qemuMigrationUpdateJobType(jobInfo); - else if (qemuMigrationUpdateJobStatus(driver, vm, asyncJob) < 0) + if (!events && qemuMigrationFetchMigrationStats(driver, vm, asyncJob, + &jobInfo->stats, true) < 0) return -1; + qemuMigrationUpdateJobType(jobInfo); + switch (jobInfo->status) { case QEMU_DOMAIN_JOB_STATUS_NONE: virReportError(VIR_ERR_OPERATION_FAILED, _("%s: %s"), @@ -1584,8 +1572,10 @@ qemuMigrationWaitForCompletion(virQEMUDriverPtr driver, } if (events) - ignore_value(qemuMigrationUpdateJobStatus(driver, vm, asyncJob)); + ignore_value(qemuMigrationFetchMigrationStats(driver, vm, asyncJob, + &jobInfo->stats, true)); + qemuDomainJobInfoUpdateTime(jobInfo); qemuDomainJobInfoUpdateDowntime(jobInfo); VIR_FREE(priv->job.completed); if (VIR_ALLOC(priv->job.completed) == 0) @@ -3176,9 +3166,9 @@ qemuMigrationConfirmPhase(virQEMUDriverPtr driver, */ if (virDomainObjGetState(vm, &reason) == VIR_DOMAIN_PAUSED && reason == VIR_DOMAIN_PAUSED_POSTCOPY && - qemuMigrationFetchJobStatus(driver, vm, - QEMU_ASYNC_JOB_MIGRATION_OUT, - jobInfo) < 0) + qemuMigrationFetchMigrationStats(driver, vm, + QEMU_ASYNC_JOB_MIGRATION_OUT, + &jobInfo->stats, true) < 0) VIR_WARN("Could not refresh migration statistics"); qemuDomainJobInfoUpdateTime(jobInfo); diff --git a/src/qemu/qemu_migration.h b/src/qemu/qemu_migration.h index 6c51f5f..1f6ddba 100644 --- a/src/qemu/qemu_migration.h +++ b/src/qemu/qemu_migration.h @@ -279,10 +279,11 @@ qemuMigrationCancel(virQEMUDriverPtr driver, virDomainObjPtr vm); int -qemuMigrationFetchJobStatus(virQEMUDriverPtr driver, - virDomainObjPtr vm, - qemuDomainAsyncJob asyncJob, - qemuDomainJobInfoPtr jobInfo); +qemuMigrationFetchMigrationStats(virQEMUDriverPtr driver, + virDomainObjPtr vm, + qemuDomainAsyncJob asyncJob, + qemuMonitorMigrationStatsPtr stats, + bool copy); int qemuMigrationErrorInit(virQEMUDriverPtr driver); -- 1.8.3.1

On Thu, Aug 24, 2017 at 09:56:43 +0300, Nikolay Shirokovskiy wrote:
qemuMigrationFetchJobStatus is rather inconvinient. Some of its callers don't need status to be updated, some don't need to update elapsed time right away. So let's update status or elapsed time in callers instead.
In qemuMigrationConfirmPhase we should fetch stats with copy flag set as stats variable refers to domain object not the stack.
This patch drops updating job status on getting job stats by client. This way we will not provide status 'completed' while it is not yet updated by migration routine. ... diff --git a/src/qemu/qemu_migration.c b/src/qemu/qemu_migration.c index cc42f7a..dec0a08 100644 --- a/src/qemu/qemu_migration.c +++ b/src/qemu/qemu_migration.c @@ -1376,24 +1376,28 @@ qemuMigrationUpdateJobType(qemuDomainJobInfoPtr jobInfo)
int -qemuMigrationFetchJobStatus(virQEMUDriverPtr driver, - virDomainObjPtr vm, - qemuDomainAsyncJob asyncJob, - qemuDomainJobInfoPtr jobInfo) +qemuMigrationFetchMigrationStats(virQEMUDriverPtr driver,
The name contains "Migration" twice. How about qemuMigrationFetchStats or qemuMigrationFetchJobStats?
+ virDomainObjPtr vm, + qemuDomainAsyncJob asyncJob, + qemuMonitorMigrationStatsPtr stats,
It looks like all callers are always passing something like &jobInfo->stats so keeping qemuDomainJobInfoPtr jobInfo argument could make the callers a bit simpler.
+ bool copy)
I'd just drop the "copy" parameter completely and let the function always fetch stats in a local variable and then copy its content into the "stats" argument. I.e., make it always work as if copy == true.
{ qemuDomainObjPrivatePtr priv = vm->privateData; + qemuMonitorMigrationStats statsCopy; int rv;
if (qemuDomainObjEnterMonitorAsync(driver, vm, asyncJob) < 0) return -1;
- rv = qemuMonitorGetMigrationStats(priv->mon, &jobInfo->stats); + rv = qemuMonitorGetMigrationStats(priv->mon, copy ? &statsCopy : stats);
if (qemuDomainObjExitMonitor(driver, vm) < 0 || rv < 0) return -1;
- qemuMigrationUpdateJobType(jobInfo); - return qemuDomainJobInfoUpdateTime(jobInfo); + if (copy) + *stats = statsCopy; + + return 0; }
...
@@ -1442,11 +1429,12 @@ qemuMigrationCheckJobStatus(virQEMUDriverPtr driver,
bool events = virQEMUCapsGet(priv->qemuCaps, QEMU_CAPS_MIGRATION_EVENT);
- if (events) - qemuMigrationUpdateJobType(jobInfo); - else if (qemuMigrationUpdateJobStatus(driver, vm, asyncJob) < 0) + if (!events && qemuMigrationFetchMigrationStats(driver, vm, asyncJob,
Break the line after &&, please.
+ &jobInfo->stats, true) < 0) return -1;
+ qemuMigrationUpdateJobType(jobInfo); + switch (jobInfo->status) { case QEMU_DOMAIN_JOB_STATUS_NONE: virReportError(VIR_ERR_OPERATION_FAILED, _("%s: %s"), ...
Jirka

On 29.08.2017 16:45, Jiri Denemark wrote:
On Thu, Aug 24, 2017 at 09:56:43 +0300, Nikolay Shirokovskiy wrote:
qemuMigrationFetchJobStatus is rather inconvinient. Some of its callers don't need status to be updated, some don't need to update elapsed time right away. So let's update status or elapsed time in callers instead.
In qemuMigrationConfirmPhase we should fetch stats with copy flag set as stats variable refers to domain object not the stack.
This patch drops updating job status on getting job stats by client. This way we will not provide status 'completed' while it is not yet updated by migration routine. ... diff --git a/src/qemu/qemu_migration.c b/src/qemu/qemu_migration.c index cc42f7a..dec0a08 100644 --- a/src/qemu/qemu_migration.c +++ b/src/qemu/qemu_migration.c @@ -1376,24 +1376,28 @@ qemuMigrationUpdateJobType(qemuDomainJobInfoPtr jobInfo)
int -qemuMigrationFetchJobStatus(virQEMUDriverPtr driver, - virDomainObjPtr vm, - qemuDomainAsyncJob asyncJob, - qemuDomainJobInfoPtr jobInfo) +qemuMigrationFetchMigrationStats(virQEMUDriverPtr driver,
The name contains "Migration" twice. How about qemuMigrationFetchStats or qemuMigrationFetchJobStats?
Both are good. I like qemuMigrationFetchStats.
+ virDomainObjPtr vm, + qemuDomainAsyncJob asyncJob, + qemuMonitorMigrationStatsPtr stats,
It looks like all callers are always passing something like &jobInfo->stats so keeping qemuDomainJobInfoPtr jobInfo argument could make the callers a bit simpler.
Ok.
+ bool copy)
I'd just drop the "copy" parameter completely and let the function always fetch stats in a local variable and then copy its content into the "stats" argument. I.e., make it always work as if copy == true.
Ok.
{ qemuDomainObjPrivatePtr priv = vm->privateData; + qemuMonitorMigrationStats statsCopy; int rv;
if (qemuDomainObjEnterMonitorAsync(driver, vm, asyncJob) < 0) return -1;
- rv = qemuMonitorGetMigrationStats(priv->mon, &jobInfo->stats); + rv = qemuMonitorGetMigrationStats(priv->mon, copy ? &statsCopy : stats);
if (qemuDomainObjExitMonitor(driver, vm) < 0 || rv < 0) return -1;
- qemuMigrationUpdateJobType(jobInfo); - return qemuDomainJobInfoUpdateTime(jobInfo); + if (copy) + *stats = statsCopy; + + return 0; }
...
@@ -1442,11 +1429,12 @@ qemuMigrationCheckJobStatus(virQEMUDriverPtr driver,
bool events = virQEMUCapsGet(priv->qemuCaps, QEMU_CAPS_MIGRATION_EVENT);
- if (events) - qemuMigrationUpdateJobType(jobInfo); - else if (qemuMigrationUpdateJobStatus(driver, vm, asyncJob) < 0) + if (!events && qemuMigrationFetchMigrationStats(driver, vm, asyncJob,
Break the line after &&, please.
+ &jobInfo->stats, true) < 0) return -1;
+ qemuMigrationUpdateJobType(jobInfo); + switch (jobInfo->status) { case QEMU_DOMAIN_JOB_STATUS_NONE: virReportError(VIR_ERR_OPERATION_FAILED, _("%s: %s"), ...
Jirka

--- src/qemu/qemu_driver.c | 26 ++++++++++++-------------- 1 file changed, 12 insertions(+), 14 deletions(-) diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c index c62d416..b8a4df7 100644 --- a/src/qemu/qemu_driver.c +++ b/src/qemu/qemu_driver.c @@ -12905,12 +12905,18 @@ qemuDomainGetJobStatsInternal(virQEMUDriverPtr driver, qemuDomainJobInfoPtr jobInfo) { qemuDomainObjPrivatePtr priv = vm->privateData; - qemuDomainJobInfoPtr info; bool fetch = virQEMUCapsGet(priv->qemuCaps, QEMU_CAPS_MIGRATION_EVENT); int ret = -1; - if (completed) - fetch = false; + if (completed) { + if (priv->job.current || !priv->job.completed) { + jobInfo->status = QEMU_DOMAIN_JOB_STATUS_NONE; + return 0; + } + + *jobInfo = *priv->job.completed; + return 0; + } /* Do not ask QEMU if migration is not even running yet */ if (!priv->job.current || !priv->job.current->stats.status) @@ -12927,26 +12933,18 @@ qemuDomainGetJobStatsInternal(virQEMUDriverPtr driver, return -1; } - if (!completed && - !virDomainObjIsActive(vm)) { + if (!virDomainObjIsActive(vm)) { virReportError(VIR_ERR_OPERATION_INVALID, "%s", _("domain is not running")); goto cleanup; } - if (completed && priv->job.current) - info = NULL; - else if (completed) - info = priv->job.completed; - else - info = priv->job.current; - - if (!info) { + if (!priv->job.current) { jobInfo->status = QEMU_DOMAIN_JOB_STATUS_NONE; ret = 0; goto cleanup; } - *jobInfo = *info; + *jobInfo = *priv->job.current; if (jobInfo->status == QEMU_DOMAIN_JOB_STATUS_ACTIVE || jobInfo->status == QEMU_DOMAIN_JOB_STATUS_POSTCOPY) { -- 1.8.3.1

On Thu, Aug 24, 2017 at 09:56:44 +0300, Nikolay Shirokovskiy wrote:
--- src/qemu/qemu_driver.c | 26 ++++++++++++-------------- 1 file changed, 12 insertions(+), 14 deletions(-)
diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c index c62d416..b8a4df7 100644 --- a/src/qemu/qemu_driver.c +++ b/src/qemu/qemu_driver.c @@ -12905,12 +12905,18 @@ qemuDomainGetJobStatsInternal(virQEMUDriverPtr driver, qemuDomainJobInfoPtr jobInfo) { qemuDomainObjPrivatePtr priv = vm->privateData; - qemuDomainJobInfoPtr info; bool fetch = virQEMUCapsGet(priv->qemuCaps, QEMU_CAPS_MIGRATION_EVENT); int ret = -1;
- if (completed) - fetch = false; + if (completed) { + if (priv->job.current || !priv->job.completed) { + jobInfo->status = QEMU_DOMAIN_JOB_STATUS_NONE; + return 0; + } + + *jobInfo = *priv->job.completed; + return 0;
I think keeping just one return would make the code a bit easier to read: if (priv->job.current || !priv->job.completed) jobInfo->status = QEMU_DOMAIN_JOB_STATUS_NONE; else *jobInfo = *priv->job.completed; return 0; Or even if (priv->job.completed && !priv->job.current) *jobInfo = *priv->job.completed; else jobInfo->status = QEMU_DOMAIN_JOB_STATUS_NONE; return 0;
+ }
/* Do not ask QEMU if migration is not even running yet */ if (!priv->job.current || !priv->job.current->stats.status) ...
Jirka

Querying destination migration statistics may result in getting a failure or getting a elapsed time value depending on stats.status value which is odd. Instead let's always fail. Clients should be ready to handle this as currently getting failure period can be considerable. --- src/qemu/qemu_driver.c | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c index b8a4df7..397cadb 100644 --- a/src/qemu/qemu_driver.c +++ b/src/qemu/qemu_driver.c @@ -12918,20 +12918,19 @@ qemuDomainGetJobStatsInternal(virQEMUDriverPtr driver, return 0; } + if (priv->job.asyncJob == QEMU_ASYNC_JOB_MIGRATION_IN) { + virReportError(VIR_ERR_OPERATION_UNSUPPORTED, "%s", + _("migration statistics are available only on " + "the source host")); + return -1; + } + /* Do not ask QEMU if migration is not even running yet */ if (!priv->job.current || !priv->job.current->stats.status) fetch = false; - if (fetch) { - if (priv->job.asyncJob == QEMU_ASYNC_JOB_MIGRATION_IN) { - virReportError(VIR_ERR_OPERATION_UNSUPPORTED, "%s", - _("migration statistics are available only on " - "the source host")); - return -1; - } - if (qemuDomainObjBeginJob(driver, vm, QEMU_JOB_QUERY) < 0) - return -1; - } + if (fetch && qemuDomainObjBeginJob(driver, vm, QEMU_JOB_QUERY) < 0) + return -1; if (!virDomainObjIsActive(vm)) { virReportError(VIR_ERR_OPERATION_INVALID, "%s", -- 1.8.3.1

On Thu, Aug 24, 2017 at 09:56:45 +0300, Nikolay Shirokovskiy wrote:
Querying destination migration statistics may result in getting a failure or getting a elapsed time value depending on stats.status value which is odd. Instead let's always fail. Clients should be ready to handle this as currently getting failure period can be considerable.
Alternatively, we could always return elapsed time only. ACK Jirka

Setting status to none has little value - getting job status will not return even elapsed time. After this patch getting job stats stays correct in a sence it will not fetch migration stats because it consults stats.status before doing the fetch. --- src/qemu/qemu_domain.c | 1 + src/qemu/qemu_driver.c | 2 -- src/qemu/qemu_migration.c | 4 ---- src/qemu/qemu_process.c | 4 ---- 4 files changed, 1 insertion(+), 10 deletions(-) diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c index bfe1eaf..2c2f52d 100644 --- a/src/qemu/qemu_domain.c +++ b/src/qemu/qemu_domain.c @@ -3957,6 +3957,7 @@ qemuDomainObjBeginJobInternal(virQEMUDriverPtr driver, qemuDomainObjResetAsyncJob(priv); if (VIR_ALLOC(priv->job.current) < 0) goto cleanup; + priv->job.current->status = QEMU_DOMAIN_JOB_STATUS_ACTIVE; priv->job.asyncJob = asyncJob; priv->job.asyncOwner = virThreadSelfID(); priv->job.asyncOwnerAPI = virThreadJobGet(); diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c index 397cadb..c6be08b 100644 --- a/src/qemu/qemu_driver.c +++ b/src/qemu/qemu_driver.c @@ -3310,8 +3310,6 @@ qemuDomainSaveInternal(virQEMUDriverPtr driver, virDomainPtr dom, goto endjob; } - priv->job.current->status = QEMU_DOMAIN_JOB_STATUS_ACTIVE; - /* Pause */ if (virDomainObjGetState(vm, NULL) == VIR_DOMAIN_RUNNING) { was_running = true; diff --git a/src/qemu/qemu_migration.c b/src/qemu/qemu_migration.c index dec0a08..448fffa 100644 --- a/src/qemu/qemu_migration.c +++ b/src/qemu/qemu_migration.c @@ -1550,7 +1550,6 @@ qemuMigrationWaitForCompletion(virQEMUDriverPtr driver, bool events = virQEMUCapsGet(priv->qemuCaps, QEMU_CAPS_MIGRATION_EVENT); int rv; - jobInfo->status = QEMU_DOMAIN_JOB_STATUS_ACTIVE; while ((rv = qemuMigrationCompleted(driver, vm, asyncJob, dconn, flags)) != 1) { if (rv < 0) @@ -5615,7 +5614,6 @@ qemuMigrationJobStart(virQEMUDriverPtr driver, virDomainObjPtr vm, qemuDomainAsyncJob job) { - qemuDomainObjPrivatePtr priv = vm->privateData; virDomainJobOperation op; unsigned long long mask; @@ -5633,8 +5631,6 @@ qemuMigrationJobStart(virQEMUDriverPtr driver, return -1; qemuDomainObjSetAsyncJobMask(vm, mask); - priv->job.current->status = QEMU_DOMAIN_JOB_STATUS_ACTIVE; - return 0; } diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c index 97f4c40..ca075e7 100644 --- a/src/qemu/qemu_process.c +++ b/src/qemu/qemu_process.c @@ -3972,15 +3972,11 @@ qemuProcessBeginJob(virQEMUDriverPtr driver, virDomainObjPtr vm, virDomainJobOperation operation) { - qemuDomainObjPrivatePtr priv = vm->privateData; - if (qemuDomainObjBeginAsyncJob(driver, vm, QEMU_ASYNC_JOB_START, operation) < 0) return -1; qemuDomainObjSetAsyncJobMask(vm, QEMU_JOB_NONE); - priv->job.current->status = QEMU_DOMAIN_JOB_STATUS_ACTIVE; - return 0; } -- 1.8.3.1

On Thu, Aug 24, 2017 at 09:56:46 +0300, Nikolay Shirokovskiy wrote:
Setting status to none has little value - getting job status will not return even elapsed time.
After this patch getting job stats stays correct in a sence it will not fetch migration stats because it consults stats.status before doing the fetch. --- src/qemu/qemu_domain.c | 1 + src/qemu/qemu_driver.c | 2 -- src/qemu/qemu_migration.c | 4 ---- src/qemu/qemu_process.c | 4 ---- 4 files changed, 1 insertion(+), 10 deletions(-)
Makes sense. ACK Jirka

Instead of checking stat.status let's set status to migrating as soon as migrate command is send (waiting for completion is a good place too). --- src/qemu/qemu_domain.c | 1 + src/qemu/qemu_domain.h | 1 + src/qemu/qemu_driver.c | 4 +++- src/qemu/qemu_migration.c | 9 +++++++-- 4 files changed, 12 insertions(+), 3 deletions(-) diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c index 2c2f52d..18f91d2 100644 --- a/src/qemu/qemu_domain.c +++ b/src/qemu/qemu_domain.c @@ -420,6 +420,7 @@ qemuDomainJobStatusToType(qemuDomainJobStatus status) break; case QEMU_DOMAIN_JOB_STATUS_ACTIVE: + case QEMU_DOMAIN_JOB_STATUS_MIGRATING: case QEMU_DOMAIN_JOB_STATUS_POSTCOPY: return VIR_DOMAIN_JOB_UNBOUNDED; diff --git a/src/qemu/qemu_domain.h b/src/qemu/qemu_domain.h index fbc5732..ce0a080 100644 --- a/src/qemu/qemu_domain.h +++ b/src/qemu/qemu_domain.h @@ -102,6 +102,7 @@ VIR_ENUM_DECL(qemuDomainAsyncJob) typedef enum { QEMU_DOMAIN_JOB_STATUS_NONE = 0, QEMU_DOMAIN_JOB_STATUS_ACTIVE, + QEMU_DOMAIN_JOB_STATUS_MIGRATING, QEMU_DOMAIN_JOB_STATUS_POSTCOPY, QEMU_DOMAIN_JOB_STATUS_COMPLETED, QEMU_DOMAIN_JOB_STATUS_FAILED, diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c index c6be08b..e8c0d83 100644 --- a/src/qemu/qemu_driver.c +++ b/src/qemu/qemu_driver.c @@ -12924,7 +12924,8 @@ qemuDomainGetJobStatsInternal(virQEMUDriverPtr driver, } /* Do not ask QEMU if migration is not even running yet */ - if (!priv->job.current || !priv->job.current->stats.status) + if (!priv->job.current || + priv->job.current->status == QEMU_DOMAIN_JOB_STATUS_ACTIVE) fetch = false; if (fetch && qemuDomainObjBeginJob(driver, vm, QEMU_JOB_QUERY) < 0) @@ -12944,6 +12945,7 @@ qemuDomainGetJobStatsInternal(virQEMUDriverPtr driver, *jobInfo = *priv->job.current; if (jobInfo->status == QEMU_DOMAIN_JOB_STATUS_ACTIVE || + jobInfo->status == QEMU_DOMAIN_JOB_STATUS_MIGRATING || jobInfo->status == QEMU_DOMAIN_JOB_STATUS_POSTCOPY) { if (fetch && qemuMigrationFetchMigrationStats(driver, vm, QEMU_ASYNC_JOB_NONE, diff --git a/src/qemu/qemu_migration.c b/src/qemu/qemu_migration.c index 448fffa..2a8a721 100644 --- a/src/qemu/qemu_migration.c +++ b/src/qemu/qemu_migration.c @@ -1453,6 +1453,7 @@ qemuMigrationCheckJobStatus(virQEMUDriverPtr driver, case QEMU_DOMAIN_JOB_STATUS_COMPLETED: case QEMU_DOMAIN_JOB_STATUS_ACTIVE: + case QEMU_DOMAIN_JOB_STATUS_MIGRATING: case QEMU_DOMAIN_JOB_STATUS_POSTCOPY: break; } @@ -1521,7 +1522,8 @@ qemuMigrationCompleted(virQEMUDriverPtr driver, return 0; error: - if (jobInfo->status == QEMU_DOMAIN_JOB_STATUS_ACTIVE || + /* state can not be active at this point */ + if (jobInfo->status == QEMU_DOMAIN_JOB_STATUS_MIGRATING || jobInfo->status == QEMU_DOMAIN_JOB_STATUS_POSTCOPY) { /* The migration was aborted by us rather than QEMU itself. */ jobInfo->status = QEMU_DOMAIN_JOB_STATUS_FAILED; @@ -1550,6 +1552,8 @@ qemuMigrationWaitForCompletion(virQEMUDriverPtr driver, bool events = virQEMUCapsGet(priv->qemuCaps, QEMU_CAPS_MIGRATION_EVENT); int rv; + jobInfo->status = QEMU_DOMAIN_JOB_STATUS_MIGRATING; + while ((rv = qemuMigrationCompleted(driver, vm, asyncJob, dconn, flags)) != 1) { if (rv < 0) @@ -3870,7 +3874,8 @@ qemuMigrationRun(virQEMUDriverPtr driver, ignore_value(virTimeMillisNow(&priv->job.completed->sent)); } - if (priv->job.current->status == QEMU_DOMAIN_JOB_STATUS_ACTIVE) + if (priv->job.current->status == QEMU_DOMAIN_JOB_STATUS_ACTIVE || + priv->job.current->status == QEMU_DOMAIN_JOB_STATUS_MIGRATING) priv->job.current->status = QEMU_DOMAIN_JOB_STATUS_FAILED; cookieFlags |= QEMU_MIGRATION_COOKIE_NETWORK | -- 1.8.3.1

On Thu, Aug 24, 2017 at 09:56:47 +0300, Nikolay Shirokovskiy wrote:
Instead of checking stat.status let's set status to migrating as soon as migrate command is send (waiting for completion is a good place too). --- src/qemu/qemu_domain.c | 1 + src/qemu/qemu_domain.h | 1 + src/qemu/qemu_driver.c | 4 +++- src/qemu/qemu_migration.c | 9 +++++++-- 4 files changed, 12 insertions(+), 3 deletions(-)
ACK Jirka

Looks like it is more simple to drop this optimization as we are going to add getting disks stats during migration via quering qemu process and checking if we have to acquire job condition becomes more complicate. --- src/qemu/qemu_driver.c | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c index e8c0d83..20ae879 100644 --- a/src/qemu/qemu_driver.c +++ b/src/qemu/qemu_driver.c @@ -12903,7 +12903,7 @@ qemuDomainGetJobStatsInternal(virQEMUDriverPtr driver, qemuDomainJobInfoPtr jobInfo) { qemuDomainObjPrivatePtr priv = vm->privateData; - bool fetch = virQEMUCapsGet(priv->qemuCaps, QEMU_CAPS_MIGRATION_EVENT); + bool events = virQEMUCapsGet(priv->qemuCaps, QEMU_CAPS_MIGRATION_EVENT); int ret = -1; if (completed) { @@ -12923,12 +12923,7 @@ qemuDomainGetJobStatsInternal(virQEMUDriverPtr driver, return -1; } - /* Do not ask QEMU if migration is not even running yet */ - if (!priv->job.current || - priv->job.current->status == QEMU_DOMAIN_JOB_STATUS_ACTIVE) - fetch = false; - - if (fetch && qemuDomainObjBeginJob(driver, vm, QEMU_JOB_QUERY) < 0) + if (qemuDomainObjBeginJob(driver, vm, QEMU_JOB_QUERY) < 0) return -1; if (!virDomainObjIsActive(vm)) { @@ -12947,7 +12942,8 @@ qemuDomainGetJobStatsInternal(virQEMUDriverPtr driver, if (jobInfo->status == QEMU_DOMAIN_JOB_STATUS_ACTIVE || jobInfo->status == QEMU_DOMAIN_JOB_STATUS_MIGRATING || jobInfo->status == QEMU_DOMAIN_JOB_STATUS_POSTCOPY) { - if (fetch && + + if (events && jobInfo->status != QEMU_DOMAIN_JOB_STATUS_ACTIVE && qemuMigrationFetchMigrationStats(driver, vm, QEMU_ASYNC_JOB_NONE, &jobInfo->stats, false) < 0) goto cleanup; @@ -12959,8 +12955,7 @@ qemuDomainGetJobStatsInternal(virQEMUDriverPtr driver, ret = 0; cleanup: - if (fetch) - qemuDomainObjEndJob(driver, vm); + qemuDomainObjEndJob(driver, vm); return ret; } -- 1.8.3.1

On Thu, Aug 24, 2017 at 09:56:48 +0300, Nikolay Shirokovskiy wrote:
Looks like it is more simple to drop this optimization as we are going to add getting disks stats during migration via quering qemu process and checking if we have to acquire job condition becomes more complicate. ... @@ -12947,7 +12942,8 @@ qemuDomainGetJobStatsInternal(virQEMUDriverPtr driver, if (jobInfo->status == QEMU_DOMAIN_JOB_STATUS_ACTIVE || jobInfo->status == QEMU_DOMAIN_JOB_STATUS_MIGRATING || jobInfo->status == QEMU_DOMAIN_JOB_STATUS_POSTCOPY) { - if (fetch && +
Remove the empty line here
+ if (events && jobInfo->status != QEMU_DOMAIN_JOB_STATUS_ACTIVE &&
and break this line after the first "&&".
qemuMigrationFetchMigrationStats(driver, vm, QEMU_ASYNC_JOB_NONE, &jobInfo->stats, false) < 0) goto cleanup;
ACK with the trivial change. Jirka

This patch shows incorrect info when client request comes when migration routine is stopping mirror jobs or mirror jobs already stopped. This issue will be addressed in next patch. --- src/qemu/qemu_driver.c | 4 ++++ src/qemu/qemu_migration.c | 49 +++++++++++++++++++++++++++++++++++++++++++++++ src/qemu/qemu_migration.h | 6 ++++++ 3 files changed, 59 insertions(+) diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c index 20ae879..d012fd0 100644 --- a/src/qemu/qemu_driver.c +++ b/src/qemu/qemu_driver.c @@ -12948,6 +12948,10 @@ qemuDomainGetJobStatsInternal(virQEMUDriverPtr driver, &jobInfo->stats, false) < 0) goto cleanup; + if (qemuMigrationFetchMirrorStats(driver, vm, QEMU_ASYNC_JOB_NONE, + &jobInfo->stats) < 0) + goto cleanup; + if (qemuDomainJobInfoUpdateTime(jobInfo) < 0) goto cleanup; } diff --git a/src/qemu/qemu_migration.c b/src/qemu/qemu_migration.c index 2a8a721..c7af1ac 100644 --- a/src/qemu/qemu_migration.c +++ b/src/qemu/qemu_migration.c @@ -5922,3 +5922,52 @@ qemuMigrationReset(virQEMUDriverPtr driver, virFreeError(err); } } + + +int +qemuMigrationFetchMirrorStats(virQEMUDriverPtr driver, + virDomainObjPtr vm, + qemuDomainAsyncJob asyncJob, + qemuMonitorMigrationStatsPtr stats) +{ + size_t i; + qemuDomainObjPrivatePtr priv = vm->privateData; + bool nbd = false; + virHashTablePtr blockinfo = NULL; + + for (i = 0; i < vm->def->ndisks; i++) { + virDomainDiskDefPtr disk = vm->def->disks[i]; + if (QEMU_DOMAIN_DISK_PRIVATE(disk)->migrating) { + nbd = true; + break; + } + } + + if (!nbd) + return 0; + + if (qemuDomainObjEnterMonitorAsync(driver, vm, asyncJob) < 0) + return -1; + + blockinfo = qemuMonitorGetAllBlockJobInfo(priv->mon); + + if (qemuDomainObjExitMonitor(driver, vm) < 0 || !blockinfo) + return -1; + + for (i = 0; i < vm->def->ndisks; i++) { + virDomainDiskDefPtr disk = vm->def->disks[i]; + qemuDomainDiskPrivatePtr diskPriv = QEMU_DOMAIN_DISK_PRIVATE(disk); + qemuMonitorBlockJobInfoPtr data; + + if (!diskPriv->migrating || + !(data = virHashLookup(blockinfo, disk->info.alias))) + continue; + + stats->disk_transferred += data->cur; + stats->disk_total += data->end; + stats->disk_remaining += data->end - data->cur; + } + + virHashFree(blockinfo); + return 0; +} diff --git a/src/qemu/qemu_migration.h b/src/qemu/qemu_migration.h index 1f6ddba..13cfe47 100644 --- a/src/qemu/qemu_migration.h +++ b/src/qemu/qemu_migration.h @@ -320,4 +320,10 @@ qemuMigrationReset(virQEMUDriverPtr driver, virDomainObjPtr vm, qemuDomainAsyncJob job); +int +qemuMigrationFetchMirrorStats(virQEMUDriverPtr driver, + virDomainObjPtr vm, + qemuDomainAsyncJob asyncJob, + qemuMonitorMigrationStatsPtr stats); + #endif /* __QEMU_MIGRATION_H__ */ -- 1.8.3.1

On Thu, Aug 24, 2017 at 09:56:49 +0300, Nikolay Shirokovskiy wrote:
This patch shows incorrect info when client request comes when migration routine is stopping mirror jobs or mirror jobs already stopped. This issue will be addressed in next patch. ... diff --git a/src/qemu/qemu_migration.c b/src/qemu/qemu_migration.c index 2a8a721..c7af1ac 100644 --- a/src/qemu/qemu_migration.c +++ b/src/qemu/qemu_migration.c @@ -5922,3 +5922,52 @@ qemuMigrationReset(virQEMUDriverPtr driver, virFreeError(err); } } + + +int +qemuMigrationFetchMirrorStats(virQEMUDriverPtr driver, + virDomainObjPtr vm, + qemuDomainAsyncJob asyncJob, + qemuMonitorMigrationStatsPtr stats) +{ + size_t i; + qemuDomainObjPrivatePtr priv = vm->privateData; + bool nbd = false; + virHashTablePtr blockinfo = NULL; + + for (i = 0; i < vm->def->ndisks; i++) { + virDomainDiskDefPtr disk = vm->def->disks[i]; + if (QEMU_DOMAIN_DISK_PRIVATE(disk)->migrating) { + nbd = true; + break; + } + } + + if (!nbd) + return 0; + + if (qemuDomainObjEnterMonitorAsync(driver, vm, asyncJob) < 0) + return -1; + + blockinfo = qemuMonitorGetAllBlockJobInfo(priv->mon); + + if (qemuDomainObjExitMonitor(driver, vm) < 0 || !blockinfo) + return -1; + + for (i = 0; i < vm->def->ndisks; i++) { + virDomainDiskDefPtr disk = vm->def->disks[i]; + qemuDomainDiskPrivatePtr diskPriv = QEMU_DOMAIN_DISK_PRIVATE(disk); + qemuMonitorBlockJobInfoPtr data; + + if (!diskPriv->migrating || + !(data = virHashLookup(blockinfo, disk->info.alias))) + continue; + + stats->disk_transferred += data->cur; + stats->disk_total += data->end; + stats->disk_remaining += data->end - data->cur;
You should not touch qemuMonitorMigrationStatsPtr here. The scructure is used to store raw data from query_migrate QMP command. Just add another structure to jobInfo for storing NBD migration statistics. If you do this, you don't need the hack to conditionally clear qemuMonitorMigrationStats. Both structs can be combined in qemuDomainJobInfoTo*. Jirka

On 29.08.2017 18:20, Jiri Denemark wrote:
On Thu, Aug 24, 2017 at 09:56:49 +0300, Nikolay Shirokovskiy wrote:
This patch shows incorrect info when client request comes when migration routine is stopping mirror jobs or mirror jobs already stopped. This issue will be addressed in next patch. ... diff --git a/src/qemu/qemu_migration.c b/src/qemu/qemu_migration.c index 2a8a721..c7af1ac 100644 --- a/src/qemu/qemu_migration.c +++ b/src/qemu/qemu_migration.c @@ -5922,3 +5922,52 @@ qemuMigrationReset(virQEMUDriverPtr driver, virFreeError(err); } } + + +int +qemuMigrationFetchMirrorStats(virQEMUDriverPtr driver, + virDomainObjPtr vm, + qemuDomainAsyncJob asyncJob, + qemuMonitorMigrationStatsPtr stats) +{ + size_t i; + qemuDomainObjPrivatePtr priv = vm->privateData; + bool nbd = false; + virHashTablePtr blockinfo = NULL; + + for (i = 0; i < vm->def->ndisks; i++) { + virDomainDiskDefPtr disk = vm->def->disks[i]; + if (QEMU_DOMAIN_DISK_PRIVATE(disk)->migrating) { + nbd = true; + break; + } + } + + if (!nbd) + return 0; + + if (qemuDomainObjEnterMonitorAsync(driver, vm, asyncJob) < 0) + return -1; + + blockinfo = qemuMonitorGetAllBlockJobInfo(priv->mon); + + if (qemuDomainObjExitMonitor(driver, vm) < 0 || !blockinfo) + return -1; + + for (i = 0; i < vm->def->ndisks; i++) { + virDomainDiskDefPtr disk = vm->def->disks[i]; + qemuDomainDiskPrivatePtr diskPriv = QEMU_DOMAIN_DISK_PRIVATE(disk); + qemuMonitorBlockJobInfoPtr data; + + if (!diskPriv->migrating || + !(data = virHashLookup(blockinfo, disk->info.alias))) + continue; + + stats->disk_transferred += data->cur; + stats->disk_total += data->end; + stats->disk_remaining += data->end - data->cur;
You should not touch qemuMonitorMigrationStatsPtr here. The scructure is used to store raw data from query_migrate QMP command. Just add another structure to jobInfo for storing NBD migration statistics. If you do this, you don't need the hack to conditionally clear qemuMonitorMigrationStats. Both structs can be combined in qemuDomainJobInfoTo*.
Yep, this simple change makes that quite ugly hack unnessesary. Great!
Jirka

Let's store disks stats for completed mirror jobs in current job info. So on getting migration job stats thru API we take records for completed jobs from current job info and records for still active jobs by querying qemu process. As we need to keep disks stats for completed mirror jobs in current job let's zero out migration stats conditionally in fetching function. --- src/qemu/qemu_blockjob.c | 1 + src/qemu/qemu_domain.h | 1 + src/qemu/qemu_driver.c | 3 +++ src/qemu/qemu_migration.c | 12 +++++++++++- src/qemu/qemu_monitor.c | 5 +++-- src/qemu/qemu_monitor.h | 4 +++- src/qemu/qemu_monitor_json.c | 4 +--- src/qemu/qemu_process.c | 3 +++ tests/qemumonitorjsontest.c | 1 + 9 files changed, 27 insertions(+), 7 deletions(-) diff --git a/src/qemu/qemu_blockjob.c b/src/qemu/qemu_blockjob.c index 415768d..73e32b2 100644 --- a/src/qemu/qemu_blockjob.c +++ b/src/qemu/qemu_blockjob.c @@ -234,6 +234,7 @@ qemuBlockJobSyncBegin(virDomainDiskDefPtr disk) VIR_DEBUG("disk=%s", disk->dst); diskPriv->blockJobSync = true; diskPriv->blockJobStatus = -1; + diskPriv->blockJobLength = 0; } diff --git a/src/qemu/qemu_domain.h b/src/qemu/qemu_domain.h index ce0a080..d517719 100644 --- a/src/qemu/qemu_domain.h +++ b/src/qemu/qemu_domain.h @@ -332,6 +332,7 @@ struct _qemuDomainDiskPrivate { /* for some synchronous block jobs, we need to notify the owner */ int blockJobType; /* type of the block job from the event */ int blockJobStatus; /* status of the finished block job */ + unsigned long long blockJobLength; /* length of the finished block job */ bool blockJobSync; /* the block job needs synchronized termination */ bool migrating; /* the disk is being migrated */ diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c index d012fd0..ec5e0b1 100644 --- a/src/qemu/qemu_driver.c +++ b/src/qemu/qemu_driver.c @@ -12943,6 +12943,9 @@ qemuDomainGetJobStatsInternal(virQEMUDriverPtr driver, jobInfo->status == QEMU_DOMAIN_JOB_STATUS_MIGRATING || jobInfo->status == QEMU_DOMAIN_JOB_STATUS_POSTCOPY) { + /* Disks stats accounting presumes that fetching migration + * stats will not touch disk stats records if disks are migrated via nbd. + */ if (events && jobInfo->status != QEMU_DOMAIN_JOB_STATUS_ACTIVE && qemuMigrationFetchMigrationStats(driver, vm, QEMU_ASYNC_JOB_NONE, &jobInfo->stats, false) < 0) diff --git a/src/qemu/qemu_migration.c b/src/qemu/qemu_migration.c index c7af1ac..afe1804 100644 --- a/src/qemu/qemu_migration.c +++ b/src/qemu/qemu_migration.c @@ -658,6 +658,7 @@ qemuMigrationDriveMirrorCancelled(virQEMUDriverPtr driver, size_t completed = 0; int status; bool failed = false; + qemuDomainObjPrivatePtr priv = vm->privateData; retry: for (i = 0; i < vm->def->ndisks; i++) { @@ -687,8 +688,14 @@ qemuMigrationDriveMirrorCancelled(virQEMUDriverPtr driver, active++; } - if (status == VIR_DOMAIN_BLOCK_JOB_COMPLETED) + if (status == VIR_DOMAIN_BLOCK_JOB_COMPLETED) { + qemuMonitorMigrationStatsPtr stats = &priv->job.current->stats; + + stats->disk_transferred += diskPriv->blockJobLength; + stats->disk_total += diskPriv->blockJobLength; + completed++; + } } /* Updating completed block job drops the lock thus we have to recheck @@ -1389,6 +1396,9 @@ qemuMigrationFetchMigrationStats(virQEMUDriverPtr driver, if (qemuDomainObjEnterMonitorAsync(driver, vm, asyncJob) < 0) return -1; + if (copy) + memset(&statsCopy, 0, sizeof(statsCopy)); + rv = qemuMonitorGetMigrationStats(priv->mon, copy ? &statsCopy : stats); if (qemuDomainObjExitMonitor(driver, vm) < 0 || rv < 0) diff --git a/src/qemu/qemu_monitor.c b/src/qemu/qemu_monitor.c index 19082d8..a4d2dae 100644 --- a/src/qemu/qemu_monitor.c +++ b/src/qemu/qemu_monitor.c @@ -1514,13 +1514,14 @@ int qemuMonitorEmitBlockJob(qemuMonitorPtr mon, const char *diskAlias, int type, - int status) + int status, + unsigned long long len) { int ret = -1; VIR_DEBUG("mon=%p", mon); QEMU_MONITOR_CALLBACK(mon, ret, domainBlockJob, mon->vm, - diskAlias, type, status); + diskAlias, type, status, len); return ret; } diff --git a/src/qemu/qemu_monitor.h b/src/qemu/qemu_monitor.h index 31f7e97..09d93ad 100644 --- a/src/qemu/qemu_monitor.h +++ b/src/qemu/qemu_monitor.h @@ -176,6 +176,7 @@ typedef int (*qemuMonitorDomainBlockJobCallback)(qemuMonitorPtr mon, const char *diskAlias, int type, int status, + unsigned long long len, void *opaque); typedef int (*qemuMonitorDomainTrayChangeCallback)(qemuMonitorPtr mon, virDomainObjPtr vm, @@ -375,7 +376,8 @@ int qemuMonitorEmitPMSuspend(qemuMonitorPtr mon); int qemuMonitorEmitBlockJob(qemuMonitorPtr mon, const char *diskAlias, int type, - int status); + int status, + unsigned long long len); int qemuMonitorEmitBalloonChange(qemuMonitorPtr mon, unsigned long long actual); int qemuMonitorEmitPMSuspendDisk(qemuMonitorPtr mon); diff --git a/src/qemu/qemu_monitor_json.c b/src/qemu/qemu_monitor_json.c index b8a6815..3fbb5fb 100644 --- a/src/qemu/qemu_monitor_json.c +++ b/src/qemu/qemu_monitor_json.c @@ -908,7 +908,7 @@ qemuMonitorJSONHandleBlockJobImpl(qemuMonitorPtr mon, } out: - qemuMonitorEmitBlockJob(mon, device, type, event); + qemuMonitorEmitBlockJob(mon, device, type, event, len); } static void @@ -2990,8 +2990,6 @@ int qemuMonitorJSONGetMigrationStats(qemuMonitorPtr mon, NULL); virJSONValuePtr reply = NULL; - memset(stats, 0, sizeof(*stats)); - if (!cmd) return -1; diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c index ca075e7..2228298 100644 --- a/src/qemu/qemu_process.c +++ b/src/qemu/qemu_process.c @@ -979,6 +979,7 @@ qemuProcessHandleBlockJob(qemuMonitorPtr mon ATTRIBUTE_UNUSED, const char *diskAlias, int type, int status, + unsigned long long len, void *opaque) { virQEMUDriverPtr driver = opaque; @@ -1000,6 +1001,8 @@ qemuProcessHandleBlockJob(qemuMonitorPtr mon ATTRIBUTE_UNUSED, /* We have a SYNC API waiting for this event, dispatch it back */ diskPriv->blockJobType = type; diskPriv->blockJobStatus = status; + diskPriv->blockJobLength = len; + virDomainObjBroadcast(vm); } else { /* there is no waiting SYNC API, dispatch the update to a thread */ diff --git a/tests/qemumonitorjsontest.c b/tests/qemumonitorjsontest.c index df3ef0a..c514bf9 100644 --- a/tests/qemumonitorjsontest.c +++ b/tests/qemumonitorjsontest.c @@ -1934,6 +1934,7 @@ testQemuMonitorJSONqemuMonitorJSONGetMigrationStats(const void *data) "}") < 0) goto cleanup; + memset(&stats, 0, sizeof(stats)); if (qemuMonitorJSONGetMigrationStats(qemuMonitorTestGetMonitor(test), &stats) < 0) goto cleanup; -- 1.8.3.1

On Thu, Aug 24, 2017 at 09:56:50 +0300, Nikolay Shirokovskiy wrote:
Let's store disks stats for completed mirror jobs in current job info. So on getting migration job stats thru API we take records for completed jobs from current job info and records for still active jobs by querying qemu process.
As we need to keep disks stats for completed mirror jobs in current job let's zero out migration stats conditionally in fetching function.
This and the following hacks are not really needed. You can keep reading block jobs stats until they become ready (VIR_DOMAIN_DISK_MIRROR_STATE_READY) at which point the stats will report the disk was completely transferred and they will not be updated anymore. QEMU does not provide any statistics about data transferred during the mirroring phase. Thus you can just fetch and store all block job stats at the end of qemuMigrationDriveMirror when storage migration phase is finished. Asking QEMU after this point doesn't make sense. Jirka

On 30.08.2017 11:07, Jiri Denemark wrote:
On Thu, Aug 24, 2017 at 09:56:50 +0300, Nikolay Shirokovskiy wrote:
Let's store disks stats for completed mirror jobs in current job info. So on getting migration job stats thru API we take records for completed jobs from current job info and records for still active jobs by querying qemu process.
As we need to keep disks stats for completed mirror jobs in current job let's zero out migration stats conditionally in fetching function.
This and the following hacks are not really needed. You can keep reading block jobs stats until they become ready (VIR_DOMAIN_DISK_MIRROR_STATE_READY) at which point the stats will report the disk was completely transferred and they will not be updated anymore. QEMU does not provide any statistics about data transferred during the mirroring phase. Thus you can just fetch and store all block job stats at the end of qemuMigrationDriveMirror when storage migration phase is finished. Asking QEMU after this point doesn't make sense.
Jirka
I see. I obviously supposed we can fetch disks stats even after ready state. Well if not then code will be simplier) Nikolay

During stopping mirror block jobs vm lock is droped on awating block job events, thus next scenario is possible: 1. stop mirror block job is sent 2. migration routine awaits for block job event 3. mirror job stopped and event send 4. getting migration job info routine asks for block job info and as qemu block job stopped and disapperaed we get no block job info. As a result block job info for the disk will be missed in the result. To handle this case let's ask qemu for block job info and store it just before stopping mirror jobs. Next in getting migration job info routine we detect this race condition and take stored block job info instead. I guess info will be just slightly outdated and at least will not go backwards. --- src/qemu/qemu_migration.c | 33 ++++++++++++++++++++++++++++----- 1 file changed, 28 insertions(+), 5 deletions(-) diff --git a/src/qemu/qemu_migration.c b/src/qemu/qemu_migration.c index afe1804..906f8fe 100644 --- a/src/qemu/qemu_migration.c +++ b/src/qemu/qemu_migration.c @@ -1588,6 +1588,7 @@ qemuMigrationWaitForCompletion(virQEMUDriverPtr driver, ignore_value(qemuMigrationFetchMigrationStats(driver, vm, asyncJob, &jobInfo->stats, true)); + qemuMigrationFetchMirrorStats(driver, vm, asyncJob, NULL); qemuDomainJobInfoUpdateTime(jobInfo); qemuDomainJobInfoUpdateDowntime(jobInfo); VIR_FREE(priv->job.completed); @@ -5934,6 +5935,9 @@ qemuMigrationReset(virQEMUDriverPtr driver, } +/* Query qemu for block job stats. If @stats is not NULL then sum them up + * in @stats, otherwise store them in disks private area for each disk. + */ int qemuMigrationFetchMirrorStats(virQEMUDriverPtr driver, virDomainObjPtr vm, @@ -5968,14 +5972,33 @@ qemuMigrationFetchMirrorStats(virQEMUDriverPtr driver, virDomainDiskDefPtr disk = vm->def->disks[i]; qemuDomainDiskPrivatePtr diskPriv = QEMU_DOMAIN_DISK_PRIVATE(disk); qemuMonitorBlockJobInfoPtr data; + unsigned long long transferred; + unsigned long long total; - if (!diskPriv->migrating || - !(data = virHashLookup(blockinfo, disk->info.alias))) + if (!diskPriv->migrating) continue; - stats->disk_transferred += data->cur; - stats->disk_total += data->end; - stats->disk_remaining += data->end - data->cur; + data = virHashLookup(blockinfo, disk->info.alias); + + if (stats) { + if (data) { + transferred = data->cur; + total = data->end; + } else { + /* Race condition. There is no blockjob for disk already + * but is has been migrating via nbd. Thanx we store job + * len value just before stopping mirror jobs which can be + * good approximation at this point. + */ + total = transferred = diskPriv->blockJobLength; + } + + stats->disk_transferred += transferred; + stats->disk_total += total; + stats->disk_remaining += total - transferred; + } else if (data) { + diskPriv->blockJobLength = data->end; + } } virHashFree(blockinfo); -- 1.8.3.1

--- src/qemu/qemu_migration.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/qemu/qemu_migration.c b/src/qemu/qemu_migration.c index 906f8fe..54cfdd0 100644 --- a/src/qemu/qemu_migration.c +++ b/src/qemu/qemu_migration.c @@ -3867,6 +3867,13 @@ qemuMigrationRun(virQEMUDriverPtr driver, QEMU_ASYNC_JOB_MIGRATION_OUT, dconn) < 0) ret = -1; + + if (ret == 0 && priv->job.completed) { + priv->job.completed->stats.disk_transferred = + priv->job.current->stats.disk_transferred; + priv->job.completed->stats.disk_total = + priv->job.current->stats.disk_total; + } } VIR_FREE(tlsAlias); -- 1.8.3.1

In case of real migration (not migrating to file on save, dump etc) migration info is not complete at time qemu finishes migration in normal (non postcopy) mode. We need to update disks stats, downtime info etc. Thus let's not expose this job status as completed. To archive this let's set status to 'qemu completed' after qemu reports migration is finished. It is not visible as complete job to clients. Cookie code on confirm phase will finally turn job into completed. As we don't need more things to do when migrating to file status is set to 'completed' as before in this case. --- src/qemu/qemu_domain.c | 1 + src/qemu/qemu_domain.h | 1 + src/qemu/qemu_driver.c | 1 + src/qemu/qemu_migration.c | 13 +++++++++---- 4 files changed, 12 insertions(+), 4 deletions(-) diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c index 18f91d2..a794673 100644 --- a/src/qemu/qemu_domain.c +++ b/src/qemu/qemu_domain.c @@ -421,6 +421,7 @@ qemuDomainJobStatusToType(qemuDomainJobStatus status) case QEMU_DOMAIN_JOB_STATUS_ACTIVE: case QEMU_DOMAIN_JOB_STATUS_MIGRATING: + case QEMU_DOMAIN_JOB_STATUS_QEMU_COMPLETED: case QEMU_DOMAIN_JOB_STATUS_POSTCOPY: return VIR_DOMAIN_JOB_UNBOUNDED; diff --git a/src/qemu/qemu_domain.h b/src/qemu/qemu_domain.h index d517719..9c1f691 100644 --- a/src/qemu/qemu_domain.h +++ b/src/qemu/qemu_domain.h @@ -103,6 +103,7 @@ typedef enum { QEMU_DOMAIN_JOB_STATUS_NONE = 0, QEMU_DOMAIN_JOB_STATUS_ACTIVE, QEMU_DOMAIN_JOB_STATUS_MIGRATING, + QEMU_DOMAIN_JOB_STATUS_QEMU_COMPLETED, QEMU_DOMAIN_JOB_STATUS_POSTCOPY, QEMU_DOMAIN_JOB_STATUS_COMPLETED, QEMU_DOMAIN_JOB_STATUS_FAILED, diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c index ec5e0b1..f67d33d 100644 --- a/src/qemu/qemu_driver.c +++ b/src/qemu/qemu_driver.c @@ -12941,6 +12941,7 @@ qemuDomainGetJobStatsInternal(virQEMUDriverPtr driver, if (jobInfo->status == QEMU_DOMAIN_JOB_STATUS_ACTIVE || jobInfo->status == QEMU_DOMAIN_JOB_STATUS_MIGRATING || + jobInfo->status == QEMU_DOMAIN_JOB_STATUS_QEMU_COMPLETED || jobInfo->status == QEMU_DOMAIN_JOB_STATUS_POSTCOPY) { /* Disks stats accounting presumes that fetching migration diff --git a/src/qemu/qemu_migration.c b/src/qemu/qemu_migration.c index 54cfdd0..8f7825f 100644 --- a/src/qemu/qemu_migration.c +++ b/src/qemu/qemu_migration.c @@ -1358,7 +1358,7 @@ qemuMigrationUpdateJobType(qemuDomainJobInfoPtr jobInfo) break; case QEMU_MONITOR_MIGRATION_STATUS_COMPLETED: - jobInfo->status = QEMU_DOMAIN_JOB_STATUS_COMPLETED; + jobInfo->status = QEMU_DOMAIN_JOB_STATUS_QEMU_COMPLETED; break; case QEMU_MONITOR_MIGRATION_STATUS_INACTIVE: @@ -1464,6 +1464,7 @@ qemuMigrationCheckJobStatus(virQEMUDriverPtr driver, case QEMU_DOMAIN_JOB_STATUS_COMPLETED: case QEMU_DOMAIN_JOB_STATUS_ACTIVE: case QEMU_DOMAIN_JOB_STATUS_MIGRATING: + case QEMU_DOMAIN_JOB_STATUS_QEMU_COMPLETED: case QEMU_DOMAIN_JOB_STATUS_POSTCOPY: break; } @@ -1526,19 +1527,19 @@ qemuMigrationCompleted(virQEMUDriverPtr driver, return 1; } - if (jobInfo->status == QEMU_DOMAIN_JOB_STATUS_COMPLETED) + if (jobInfo->status == QEMU_DOMAIN_JOB_STATUS_QEMU_COMPLETED) return 1; else return 0; error: - /* state can not be active at this point */ + /* state can not be active or completed at this point */ if (jobInfo->status == QEMU_DOMAIN_JOB_STATUS_MIGRATING || jobInfo->status == QEMU_DOMAIN_JOB_STATUS_POSTCOPY) { /* The migration was aborted by us rather than QEMU itself. */ jobInfo->status = QEMU_DOMAIN_JOB_STATUS_FAILED; return -2; - } else if (jobInfo->status == QEMU_DOMAIN_JOB_STATUS_COMPLETED) { + } else if (jobInfo->status == QEMU_DOMAIN_JOB_STATUS_QEMU_COMPLETED) { jobInfo->status = QEMU_DOMAIN_JOB_STATUS_FAILED; return -1; } else { @@ -1595,6 +1596,10 @@ qemuMigrationWaitForCompletion(virQEMUDriverPtr driver, if (VIR_ALLOC(priv->job.completed) == 0) *priv->job.completed = *jobInfo; + if (asyncJob != QEMU_ASYNC_JOB_MIGRATION_OUT && + jobInfo->status == QEMU_DOMAIN_JOB_STATUS_QEMU_COMPLETED) + jobInfo->status = QEMU_DOMAIN_JOB_STATUS_COMPLETED; + return 0; } -- 1.8.3.1

On Thu, Aug 24, 2017 at 09:56:53 +0300, Nikolay Shirokovskiy wrote:
In case of real migration (not migrating to file on save, dump etc) migration info is not complete at time qemu finishes migration in normal (non postcopy) mode. We need to update disks stats, downtime info etc. Thus let's not expose this job status as completed.
To archive this let's set status to 'qemu completed' after qemu reports migration is finished. It is not visible as complete job to clients. Cookie code on confirm phase will finally turn job into completed. As we don't need more things to do when migrating to file status is set to 'completed' as before in this case. --- src/qemu/qemu_domain.c | 1 + src/qemu/qemu_domain.h | 1 + src/qemu/qemu_driver.c | 1 + src/qemu/qemu_migration.c | 13 +++++++++---- 4 files changed, 12 insertions(+), 4 deletions(-)
ACK Jirka
participants (2)
-
Jiri Denemark
-
Nikolay Shirokovskiy