From: Federico Simoncelli <fsimonce(a)redhat.com>
Originally most of libvirt domain-specific calls were blocking
during a migration.
A new mechanism to allow specific calls (blkstat/blkinfo) to be
executed in such condition has been implemented.
In the long term it'd be desirable to get a more general
solution to mark further APIs as migration safe, without needing
special case code.
* src/qemu/qemu_migration.c: add some additional job signal
flags for doing blkstat/blkinfo during a migration
* src/qemu/qemu_domain.c: add a condition variable that can be
used to efficiently wait for the migration code to clear the
signal flag
* src/qemu/qemu_driver.c: execute blkstat/blkinfo using the
job signal flags during migration
---
My changes in v4:
Fix the mailmap (elided from this mail) for Federico's preferred address.
Add cleanup for jobCond.
Defer changes to jobSignals until after jobSignalData is stable.
Alter qemuMigrationProcessJobSignals to have a mode of operation
that guarantees that a jobSignals bit is cleared, regardless of
other errors.
.mailmap | 1 +
AUTHORS | 2 +-
src/qemu/qemu_domain.c | 14 ++++++
src/qemu/qemu_domain.h | 9 ++++
src/qemu/qemu_driver.c | 107 +++++++++++++++++++++++++++++++-------------
src/qemu/qemu_migration.c | 76 +++++++++++++++++++++++++-------
6 files changed, 159 insertions(+), 50 deletions(-)
diff --git a/.mailmap b/.mailmap
index f73e26b..2b98322 100644
--- a/.mailmap
+++ b/.mailmap
diff --git a/AUTHORS b/AUTHORS
index 1bb1f0f..47860a9 100644
--- a/AUTHORS
+++ b/AUTHORS
diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c
index bcacb18..8f4915c 100644
--- a/src/qemu/qemu_domain.c
+++ b/src/qemu/qemu_domain.c
@@ -89,7 +89,19 @@ static void *qemuDomainObjPrivateAlloc(void)
if (VIR_ALLOC(priv) < 0)
return NULL;
+ if (virCondInit(&priv->jobCond) < 0)
+ goto initfail;
+
+ if (virCondInit(&priv->signalCond) < 0) {
+ ignore_value(virCondDestroy(&priv->jobCond));
+ goto initfail;
+ }
+
return priv;
+
+initfail:
+ VIR_FREE(priv);
+ return NULL;
}
static void qemuDomainObjPrivateFree(void *data)
@@ -101,6 +113,8 @@ static void qemuDomainObjPrivateFree(void *data)
qemuDomainPCIAddressSetFree(priv->pciaddrs);
virDomainChrSourceDefFree(priv->monConfig);
VIR_FREE(priv->vcpupids);
+ ignore_value(virCondDestroy(&priv->jobCond));
+ ignore_value(virCondDestroy(&priv->signalCond));
/* This should never be non-NULL if we get here, but just in case... */
if (priv->mon) {
diff --git a/src/qemu/qemu_domain.h b/src/qemu/qemu_domain.h
index 6d24f53..af513e7 100644
--- a/src/qemu/qemu_domain.h
+++ b/src/qemu/qemu_domain.h
@@ -47,11 +47,19 @@ enum qemuDomainJobSignals {
QEMU_JOB_SIGNAL_SUSPEND = 1 << 1, /* Request VM suspend to finish live
migration offline */
QEMU_JOB_SIGNAL_MIGRATE_DOWNTIME = 1 << 2, /* Request migration downtime change
*/
QEMU_JOB_SIGNAL_MIGRATE_SPEED = 1 << 3, /* Request migration speed change */
+ QEMU_JOB_SIGNAL_BLKSTAT = 1 << 4, /* Request blkstat during migration */
+ QEMU_JOB_SIGNAL_BLKINFO = 1 << 5, /* Request blkinfo during migration */
};
struct qemuDomainJobSignalsData {
unsigned long long migrateDowntime; /* Data for QEMU_JOB_SIGNAL_MIGRATE_DOWNTIME */
unsigned long migrateBandwidth; /* Data for QEMU_JOB_SIGNAL_MIGRATE_SPEED */
+ char *statDevName; /* Device name used by blkstat calls */
+ virDomainBlockStatsPtr blockStat; /* Block statistics for QEMU_JOB_SIGNAL_BLKSTAT */
+ int statRetCode; /* Return code for the blkstat calls */
+ char *infoDevName; /* Device name used by blkinfo calls */
+ virDomainBlockInfoPtr blockInfo; /* Block information for QEMU_JOB_SIGNAL_BLKINFO */
+ int infoRetCode; /* Return code for the blkinfo calls */
};
typedef struct _qemuDomainPCIAddressSet qemuDomainPCIAddressSet;
@@ -61,6 +69,7 @@ typedef struct _qemuDomainObjPrivate qemuDomainObjPrivate;
typedef qemuDomainObjPrivate *qemuDomainObjPrivatePtr;
struct _qemuDomainObjPrivate {
virCond jobCond; /* Use in conjunction with main virDomainObjPtr lock */
+ virCond signalCond; /* Use to coordinate the safe queries during migration */
enum qemuDomainJob jobActive; /* Currently running job */
unsigned int jobSignals; /* Signals for running job */
struct qemuDomainJobSignalsData jobSignalsData; /* Signal specific data */
diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c
index ccaae66..2bd4d0b 100644
--- a/src/qemu/qemu_driver.c
+++ b/src/qemu/qemu_driver.c
@@ -5205,15 +5205,6 @@ qemudDomainBlockStats (virDomainPtr dom,
goto cleanup;
}
- if (qemuDomainObjBeginJob(vm) < 0)
- goto cleanup;
-
- if (!virDomainObjIsActive(vm)) {
- qemuReportError(VIR_ERR_OPERATION_INVALID,
- "%s", _("domain is not running"));
- goto endjob;
- }
-
for (i = 0 ; i < vm->def->ndisks ; i++) {
if (STREQ(path, vm->def->disks[i]->dst)) {
disk = vm->def->disks[i];
@@ -5224,29 +5215,57 @@ qemudDomainBlockStats (virDomainPtr dom,
if (!disk) {
qemuReportError(VIR_ERR_INVALID_ARG,
_("invalid path: %s"), path);
- goto endjob;
+ goto cleanup;
}
if (!disk->info.alias) {
qemuReportError(VIR_ERR_INTERNAL_ERROR,
_("missing disk device alias name for %s"),
disk->dst);
- goto endjob;
+ goto cleanup;
}
priv = vm->privateData;
- qemuDomainObjEnterMonitor(vm);
- ret = qemuMonitorGetBlockStatsInfo(priv->mon,
- disk->info.alias,
- &stats->rd_req,
- &stats->rd_bytes,
- &stats->wr_req,
- &stats->wr_bytes,
- &stats->errs);
- qemuDomainObjExitMonitor(vm);
+ if ((priv->jobActive == QEMU_JOB_MIGRATION_OUT)
+ || (priv->jobActive == QEMU_JOB_SAVE)) {
+ virDomainObjRef(vm);
+ while (priv->jobSignals & QEMU_JOB_SIGNAL_BLKSTAT)
+ ignore_value(virCondWait(&priv->signalCond, &vm->lock));
+
+ priv->jobSignalsData.statDevName = disk->info.alias;
+ priv->jobSignalsData.blockStat = stats;
+ priv->jobSignalsData.statRetCode = -1;
+ priv->jobSignals |= QEMU_JOB_SIGNAL_BLKSTAT;
+
+ while (priv->jobSignals & QEMU_JOB_SIGNAL_BLKSTAT)
+ ignore_value(virCondWait(&priv->signalCond, &vm->lock));
+
+ ret = priv->jobSignalsData.statRetCode;
+ if (virDomainObjUnref(vm) == 0)
+ vm = NULL;
+ } else {
+ if (qemuDomainObjBeginJob(vm) < 0)
+ goto cleanup;
+
+ if (!virDomainObjIsActive(vm)) {
+ qemuReportError(VIR_ERR_OPERATION_INVALID,
+ "%s", _("domain is not running"));
+ goto endjob;
+ }
+
+ qemuDomainObjEnterMonitor(vm);
+ ret = qemuMonitorGetBlockStatsInfo(priv->mon,
+ disk->info.alias,
+ &stats->rd_req,
+ &stats->rd_bytes,
+ &stats->wr_req,
+ &stats->wr_bytes,
+ &stats->errs);
+ qemuDomainObjExitMonitor(vm);
endjob:
- if (qemuDomainObjEndJob(vm) == 0)
- vm = NULL;
+ if (qemuDomainObjEndJob(vm) == 0)
+ vm = NULL;
+ }
cleanup:
if (vm)
@@ -5650,20 +5669,44 @@ static int qemuDomainGetBlockInfo(virDomainPtr dom,
format != VIR_STORAGE_FILE_RAW &&
S_ISBLK(sb.st_mode)) {
qemuDomainObjPrivatePtr priv = vm->privateData;
- if (qemuDomainObjBeginJob(vm) < 0)
- goto cleanup;
- if (!virDomainObjIsActive(vm))
- ret = 0;
- else {
+
+ if ((priv->jobActive == QEMU_JOB_MIGRATION_OUT)
+ || (priv->jobActive == QEMU_JOB_SAVE)) {
+ virDomainObjRef(vm);
+ while (priv->jobSignals & QEMU_JOB_SIGNAL_BLKINFO)
+ ignore_value(virCondWait(&priv->signalCond, &vm->lock));
+
+ priv->jobSignalsData.infoDevName = disk->info.alias;
+ priv->jobSignalsData.blockInfo = info;
+ priv->jobSignalsData.infoRetCode = -1;
+ priv->jobSignals |= QEMU_JOB_SIGNAL_BLKINFO;
+
+ while (priv->jobSignals & QEMU_JOB_SIGNAL_BLKINFO)
+ ignore_value(virCondWait(&priv->signalCond, &vm->lock));
+
+ ret = priv->jobSignalsData.infoRetCode;
+ if (virDomainObjUnref(vm) == 0)
+ vm = NULL;
+ } else {
+ if (qemuDomainObjBeginJob(vm) < 0)
+ goto cleanup;
+
+ if (!virDomainObjIsActive(vm)) {
+ qemuReportError(VIR_ERR_OPERATION_INVALID,
+ "%s", _("domain is not running"));
+ goto endjob;
+ }
+
qemuDomainObjEnterMonitor(vm);
ret = qemuMonitorGetBlockExtent(priv->mon,
disk->info.alias,
&info->allocation);
qemuDomainObjExitMonitor(vm);
- }
- if (qemuDomainObjEndJob(vm) == 0)
- vm = NULL;
+endjob:
+ if (qemuDomainObjEndJob(vm) == 0)
+ vm = NULL;
+ }
} else {
ret = 0;
}
@@ -6543,8 +6586,8 @@ qemuDomainMigrateSetMaxDowntime(virDomainPtr dom,
}
VIR_DEBUG("Requesting migration downtime change to %llums", downtime);
- priv->jobSignals |= QEMU_JOB_SIGNAL_MIGRATE_DOWNTIME;
priv->jobSignalsData.migrateDowntime = downtime;
+ priv->jobSignals |= QEMU_JOB_SIGNAL_MIGRATE_DOWNTIME;
ret = 0;
cleanup:
@@ -6592,8 +6635,8 @@ qemuDomainMigrateSetMaxSpeed(virDomainPtr dom,
}
VIR_DEBUG("Requesting migration speed change to %luMbs", bandwidth);
- priv->jobSignals |= QEMU_JOB_SIGNAL_MIGRATE_SPEED;
priv->jobSignalsData.migrateBandwidth = bandwidth;
+ priv->jobSignals |= QEMU_JOB_SIGNAL_MIGRATE_SPEED;
ret = 0;
cleanup:
diff --git a/src/qemu/qemu_migration.c b/src/qemu/qemu_migration.c
index cb408ac..a091e2a 100644
--- a/src/qemu/qemu_migration.c
+++ b/src/qemu/qemu_migration.c
@@ -586,7 +586,8 @@ qemuMigrationSetOffline(struct qemud_driver *driver,
static int
qemuMigrationProcessJobSignals(struct qemud_driver *driver,
virDomainObjPtr vm,
- const char *job)
+ const char *job,
+ bool cleanup)
{
qemuDomainObjPrivatePtr priv = vm->privateData;
int ret = -1;
@@ -594,6 +595,11 @@ qemuMigrationProcessJobSignals(struct qemud_driver *driver,
if (!virDomainObjIsActive(vm)) {
qemuReportError(VIR_ERR_INTERNAL_ERROR, _("%s: %s"),
job, _("guest unexpectedly quit"));
+ if (cleanup) {
+ priv->jobSignals = 0;
+ priv->jobSignalsData.statRetCode = -1;
+ priv->jobSignalsData.infoRetCode = -1;
+ }
return -1;
}
@@ -633,6 +639,34 @@ qemuMigrationProcessJobSignals(struct qemud_driver *driver,
qemuDomainObjExitMonitorWithDriver(driver, vm);
if (ret < 0)
VIR_WARN("Unable to set migration speed");
+ } else if (priv->jobSignals & QEMU_JOB_SIGNAL_BLKSTAT) {
+ qemuDomainObjEnterMonitorWithDriver(driver, vm);
+ ret = qemuMonitorGetBlockStatsInfo(priv->mon,
+ priv->jobSignalsData.statDevName,
+ &priv->jobSignalsData.blockStat->rd_req,
+ &priv->jobSignalsData.blockStat->rd_bytes,
+ &priv->jobSignalsData.blockStat->wr_req,
+ &priv->jobSignalsData.blockStat->wr_bytes,
+ &priv->jobSignalsData.blockStat->errs);
+ qemuDomainObjExitMonitorWithDriver(driver, vm);
+
+ priv->jobSignalsData.statRetCode = ret;
+ priv->jobSignals ^= QEMU_JOB_SIGNAL_BLKSTAT;
+
+ if (ret < 0)
+ VIR_WARN("Unable to get block statistics");
+ } else if (priv->jobSignals & QEMU_JOB_SIGNAL_BLKINFO) {
+ qemuDomainObjEnterMonitorWithDriver(driver, vm);
+ ret = qemuMonitorGetBlockExtent(priv->mon,
+ priv->jobSignalsData.infoDevName,
+ &priv->jobSignalsData.blockInfo->allocation);
+ qemuDomainObjExitMonitorWithDriver(driver, vm);
+
+ priv->jobSignalsData.infoRetCode = ret;
+ priv->jobSignals ^= QEMU_JOB_SIGNAL_BLKINFO;
+
+ if (ret < 0)
+ VIR_WARN("Unable to get block information");
} else {
ret = 0;
}
@@ -726,30 +760,33 @@ int
qemuMigrationWaitForCompletion(struct qemud_driver *driver, virDomainObjPtr vm)
{
qemuDomainObjPrivatePtr priv = vm->privateData;
+ const char *job;
+
+ switch (priv->jobActive) {
+ case QEMU_JOB_MIGRATION_OUT:
+ job = _("migration job");
+ break;
+ case QEMU_JOB_SAVE:
+ job = _("domain save job");
+ break;
+ case QEMU_JOB_DUMP:
+ job = _("domain core dump job");
+ break;
+ default:
+ job = _("job");
+ }
priv->jobInfo.type = VIR_DOMAIN_JOB_UNBOUNDED;
while (priv->jobInfo.type == VIR_DOMAIN_JOB_UNBOUNDED) {
/* Poll every 50ms for progress & to allow cancellation */
struct timespec ts = { .tv_sec = 0, .tv_nsec = 50 * 1000 * 1000ull };
- const char *job;
-
- switch (priv->jobActive) {
- case QEMU_JOB_MIGRATION_OUT:
- job = _("migration job");
- break;
- case QEMU_JOB_SAVE:
- job = _("domain save job");
- break;
- case QEMU_JOB_DUMP:
- job = _("domain core dump job");
- break;
- default:
- job = _("job");
+ while (priv->jobSignals) {
+ if (qemuMigrationProcessJobSignals(driver, vm, job, false) < 0)
+ goto cleanup;
}
- if (qemuMigrationProcessJobSignals(driver, vm, job) < 0)
- goto cleanup;
+ virCondSignal(&priv->signalCond);
if (qemuMigrationUpdateJobStatus(driver, vm, job) < 0)
goto cleanup;
@@ -765,6 +802,11 @@ qemuMigrationWaitForCompletion(struct qemud_driver *driver,
virDomainObjPtr vm)
}
cleanup:
+ while (priv->jobSignals) {
+ qemuMigrationProcessJobSignals(driver, vm, job, true);
+ }
+ virCondBroadcast(&priv->signalCond);
+
if (priv->jobInfo.type == VIR_DOMAIN_JOB_COMPLETED)
return 0;
else
--
1.7.4.4