This patch introduce a lock for protecting the long-running
api (save, dump, migration and so on) from the other api
which may update the status of the virtual machine.
Signed-off-by: Bamvor Jian Zhang <bjzhang(a)suse.com>
---
src/libxl/libxl_conf.h | 58 ++++++
src/libxl/libxl_driver.c | 446 +++++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 504 insertions(+)
diff --git a/src/libxl/libxl_conf.h b/src/libxl/libxl_conf.h
index 56bf85c..dab1466 100644
--- a/src/libxl/libxl_conf.h
+++ b/src/libxl/libxl_conf.h
@@ -73,6 +73,62 @@ struct _libxlDriverPrivate {
char *saveDir;
};
+# define JOB_MASK(job) (1 << (job - 1))
+# define DEFAULT_JOB_MASK \
+ (JOB_MASK(LIBXL_JOB_DESTROY) | \
+ JOB_MASK(LIBXL_JOB_ABORT))
+
+/* Jobs which have to be tracked in domain state XML. */
+# define LIBXL_DOMAIN_TRACK_JOBS \
+ (JOB_MASK(LIBXL_JOB_DESTROY) | \
+ JOB_MASK(LIBXL_JOB_ASYNC))
+
+/* Only 1 job is allowed at any time
+ * A job includes *all* libxl.so api, even those just querying
+ * information, not merely actions */
+enum libxlDomainJob {
+ LIBXL_JOB_NONE = 0, /* Always set to 0 for easy if (jobActive) conditions */
+ LIBXL_JOB_DESTROY, /* Destroys the domain (cannot be masked out) */
+ LIBXL_JOB_MODIFY, /* May change state */
+ LIBXL_JOB_ABORT, /* Abort current async job */
+ LIBXL_JOB_MIGRATION_OP, /* Operation influencing outgoing migration */
+
+ /* The following two items must always be the last items before JOB_LAST */
+ LIBXL_JOB_ASYNC, /* Asynchronous job */
+
+ LIBXL_JOB_LAST
+};
+VIR_ENUM_DECL(libxlDomainJob)
+
+/* Async job consists of a series of jobs that may change state. Independent
+ * jobs that do not change state (and possibly others if explicitly allowed by
+ * current async job) are allowed to be run even if async job is active.
+ */
+enum libxlDomainAsyncJob {
+ LIBXL_ASYNC_JOB_NONE = 0,
+ LIBXL_ASYNC_JOB_MIGRATION_OUT,
+ LIBXL_ASYNC_JOB_MIGRATION_IN,
+ LIBXL_ASYNC_JOB_SAVE,
+ LIBXL_ASYNC_JOB_DUMP,
+
+ LIBXL_ASYNC_JOB_LAST
+};
+VIR_ENUM_DECL(libxlDomainAsyncJob)
+
+struct libxlDomainJobObj {
+ virCond cond; /* Use to coordinate jobs */
+ enum libxlDomainJob active; /* Currently running job */
+ int owner; /* Thread which set current job */
+
+ virCond asyncCond; /* Use to coordinate with async jobs */
+ enum libxlDomainAsyncJob asyncJob; /* Currently active async job */
+ int asyncOwner; /* Thread which set current async job */
+ int phase; /* Job phase (mainly for migrations) */
+ unsigned long long mask; /* Jobs allowed during async job */
+ unsigned long long start; /* When the async job started */
+ virDomainJobInfo info; /* Async job progress data */
+};
+
typedef struct _libxlDomainObjPrivate libxlDomainObjPrivate;
typedef libxlDomainObjPrivate *libxlDomainObjPrivatePtr;
struct _libxlDomainObjPrivate {
@@ -81,6 +137,8 @@ struct _libxlDomainObjPrivate {
libxl_waiter *dWaiter;
int waiterFD;
int eventHdl;
+
+ struct libxlDomainJobObj job;
};
# define LIBXL_SAVE_MAGIC "libvirt-xml\n \0 \r"
diff --git a/src/libxl/libxl_driver.c b/src/libxl/libxl_driver.c
index 6fe284f..d01d915 100644
--- a/src/libxl/libxl_driver.c
+++ b/src/libxl/libxl_driver.c
@@ -45,6 +45,7 @@
#include "xen_xm.h"
#include "virtypedparam.h"
#include "viruri.h"
+#include "virtime.h"
#define VIR_FROM_THIS VIR_FROM_LIBXL
@@ -84,6 +85,336 @@ libxlDriverUnlock(libxlDriverPrivatePtr driver)
virMutexUnlock(&driver->lock);
}
+/* job */
+VIR_ENUM_IMPL(libxlDomainJob, LIBXL_JOB_LAST,
+ "none",
+ "destroy",
+ "modify",
+ "abort",
+ "migration operation",
+ "none", /* async job is never stored in job.active */
+);
+
+VIR_ENUM_IMPL(libxlDomainAsyncJob, LIBXL_ASYNC_JOB_LAST,
+ "none",
+ "migration out",
+ "migration in",
+ "save",
+ "dump",
+);
+
+static int
+libxlDomainObjInitJob(libxlDomainObjPrivatePtr priv)
+{
+ memset(&priv->job, 0, sizeof(priv->job));
+
+ if (virCondInit(&priv->job.cond) < 0)
+ return -1;
+
+ if (virCondInit(&priv->job.asyncCond) < 0) {
+ ignore_value(virCondDestroy(&priv->job.cond));
+ return -1;
+ }
+
+ return 0;
+}
+
+static void
+libxlDomainObjResetJob(libxlDomainObjPrivatePtr priv)
+{
+ struct libxlDomainJobObj *job = &priv->job;
+
+ job->active = LIBXL_JOB_NONE;
+ job->owner = 0;
+}
+
+static void
+libxlDomainObjResetAsyncJob(libxlDomainObjPrivatePtr priv)
+{
+ struct libxlDomainJobObj *job = &priv->job;
+
+ job->asyncJob = LIBXL_ASYNC_JOB_NONE;
+ job->asyncOwner = 0;
+ job->phase = 0;
+ job->mask = DEFAULT_JOB_MASK;
+ job->start = 0;
+ memset(&job->info, 0, sizeof(job->info));
+}
+
+static void
+libxlDomainObjFreeJob(libxlDomainObjPrivatePtr priv)
+{
+ ignore_value(virCondDestroy(&priv->job.cond));
+ ignore_value(virCondDestroy(&priv->job.asyncCond));
+}
+
+static bool
+libxlDomainTrackJob(enum libxlDomainJob job)
+{
+ return (LIBXL_DOMAIN_TRACK_JOBS & JOB_MASK(job)) != 0;
+}
+
+static void
+libxlDomainObjSaveJob(libxlDriverPrivatePtr driver, virDomainObjPtr obj)
+{
+ if (!virDomainObjIsActive(obj)) {
+ /* don't write the state file yet, it will be written once the domain
+ * gets activated */
+ return;
+ }
+
+ if (virDomainSaveStatus(driver->caps, driver->stateDir, obj) < 0)
+ VIR_WARN("Failed to save status on vm %s", obj->def->name);
+}
+
+static bool
+libxlDomainNestedJobAllowed(libxlDomainObjPrivatePtr priv, enum libxlDomainJob job)
+{
+ return !priv->job.asyncJob || (priv->job.mask & JOB_MASK(job)) != 0;
+}
+
+static void
+libxlDomainObjSetAsyncJobMask(virDomainObjPtr obj,
+ unsigned long long allowedJobs)
+{
+ libxlDomainObjPrivatePtr priv = obj->privateData;
+
+ if (!priv->job.asyncJob)
+ return;
+
+ priv->job.mask = allowedJobs | JOB_MASK(LIBXL_JOB_DESTROY);
+}
+
+/* Give up waiting for mutex after 30 seconds */
+#define LIBXL_JOB_WAIT_TIME (1000ull * 30)
+
+/*
+ * obj must be locked before calling; driver_locked says if libxlDriverPrivatePtr
+ * is locked or not.
+ */
+static int ATTRIBUTE_NONNULL(1)
+libxlDomainObjBeginJobInternal(libxlDriverPrivatePtr driver,
+ bool driver_locked,
+ virDomainObjPtr obj,
+ enum libxlDomainJob job,
+ enum libxlDomainAsyncJob asyncJob)
+{
+ libxlDomainObjPrivatePtr priv = obj->privateData;
+ unsigned long long now;
+ unsigned long long then;
+
+ if (virTimeMillisNow(&now) < 0)
+ return -1;
+ then = now + LIBXL_JOB_WAIT_TIME;
+
+ virObjectRef(obj);
+ if (driver_locked) {
+ libxlDriverUnlock(driver);
+ }
+
+retry:
+ while (!libxlDomainNestedJobAllowed(priv, job)) {
+ VIR_DEBUG("Wait async job condition for starting job: %s (async=%s)",
+ libxlDomainJobTypeToString(job),
+ libxlDomainAsyncJobTypeToString(priv->job.asyncJob));
+ if (virCondWaitUntil(&priv->job.asyncCond, &obj->lock, then) <
0)
+ goto error;
+ }
+
+ while (priv->job.active) {
+ VIR_DEBUG("Wait normal job condition for starting job: %s (async=%s)",
+ libxlDomainJobTypeToString(job),
+ libxlDomainAsyncJobTypeToString(priv->job.asyncJob));
+ if (virCondWaitUntil(&priv->job.cond, &obj->lock, then) < 0)
+ goto error;
+ }
+
+ /* No job is active but a new async job could have been started while obj
+ * was unlocked, so we need to recheck it. */
+ if (!libxlDomainNestedJobAllowed(priv, job))
+ goto retry;
+
+ libxlDomainObjResetJob(priv);
+
+ if (job != LIBXL_JOB_ASYNC) {
+ VIR_DEBUG("Starting job: %s (async=%s)",
+ libxlDomainJobTypeToString(job),
+ libxlDomainAsyncJobTypeToString(priv->job.asyncJob));
+ priv->job.active = job;
+ priv->job.owner = virThreadSelfID();
+ } else {
+ VIR_DEBUG("Starting async job: %s",
+ libxlDomainAsyncJobTypeToString(asyncJob));
+ libxlDomainObjResetAsyncJob(priv);
+ priv->job.asyncJob = asyncJob;
+ priv->job.asyncOwner = virThreadSelfID();
+ priv->job.start = now;
+ }
+
+ if (driver_locked) {
+ virDomainObjUnlock(obj);
+ libxlDriverLock(driver);
+ virDomainObjLock(obj);
+ }
+
+ if (libxlDomainTrackJob(job))
+ libxlDomainObjSaveJob(driver, obj);
+
+ return 0;
+
+error:
+ VIR_WARN("Cannot start job (%s, %s) for domain %s;"
+ " current job is (%s, %s) owned by (%d, %d)",
+ libxlDomainJobTypeToString(job),
+ libxlDomainAsyncJobTypeToString(asyncJob),
+ obj->def->name,
+ libxlDomainJobTypeToString(priv->job.active),
+ libxlDomainAsyncJobTypeToString(priv->job.asyncJob),
+ priv->job.owner, priv->job.asyncOwner);
+
+ if (errno == ETIMEDOUT)
+ virReportError(VIR_ERR_OPERATION_TIMEOUT,
+ "%s", _("cannot acquire state change lock"));
+ else
+ virReportSystemError(errno,
+ "%s", _("cannot acquire job mutex"));
+ if (driver_locked) {
+ virDomainObjUnlock(obj);
+ libxlDriverLock(driver);
+ virDomainObjLock(obj);
+ }
+ virObjectUnref(obj);
+ return -1;
+}
+
+/*
+ * obj must be locked before calling, libxlDriverPrivatePtr must NOT be locked
+ *
+ * This must be called by anything that will change the VM state
+ * in any way
+ *
+ * Upon successful return, the object will have its ref count increased,
+ * successful calls must be followed by EndJob eventually
+ */
+static int
+libxlDomainObjBeginJob(libxlDriverPrivatePtr driver,
+ virDomainObjPtr obj,
+ enum libxlDomainJob job)
+{
+ return libxlDomainObjBeginJobInternal(driver, false, obj, job,
+ LIBXL_ASYNC_JOB_NONE);
+}
+
+static int ATTRIBUTE_UNUSED
+libxlDomainObjBeginAsyncJob(libxlDriverPrivatePtr driver,
+ virDomainObjPtr obj,
+ enum libxlDomainAsyncJob asyncJob)
+{
+ return libxlDomainObjBeginJobInternal(driver, false, obj, LIBXL_JOB_ASYNC,
+ asyncJob);
+}
+
+/*
+ * obj must be locked before calling. If libxlDriverPrivatePtr is passed, it
+ * MUST be locked; otherwise it MUST NOT be locked.
+ *
+ * This must be called by anything that will change the VM state
+ * in any way
+ *
+ * Upon successful return, the object will have its ref count increased,
+ * successful calls must be followed by EndJob eventually
+ */
+static int
+libxlDomainObjBeginJobWithDriver(libxlDriverPrivatePtr driver,
+ virDomainObjPtr obj,
+ enum libxlDomainJob job)
+{
+ if (job <= LIBXL_JOB_NONE || job >= LIBXL_JOB_ASYNC) {
+ virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
+ _("Attempt to start invalid job"));
+ return -1;
+ }
+
+ return libxlDomainObjBeginJobInternal(driver, true, obj, job,
+ LIBXL_ASYNC_JOB_NONE);
+}
+
+static int
+libxlDomainObjBeginAsyncJobWithDriver(libxlDriverPrivatePtr driver,
+ virDomainObjPtr obj,
+ enum libxlDomainAsyncJob asyncJob)
+{
+ return libxlDomainObjBeginJobInternal(driver, true, obj, LIBXL_JOB_ASYNC,
+ asyncJob);
+}
+
+/*
+ * obj must be locked before calling, libxlDriverPrivatePtr does not matter
+ *
+ * To be called after completing the work associated with the
+ * earlier libxlDomainBeginJob() call
+ *
+ * Returns remaining refcount on 'obj', maybe 0 to indicated it
+ * was deleted
+ */
+static bool
+libxlDomainObjEndJob(libxlDriverPrivatePtr driver, virDomainObjPtr obj)
+{
+ libxlDomainObjPrivatePtr priv = obj->privateData;
+ enum libxlDomainJob job = priv->job.active;
+
+ VIR_DEBUG("Stopping job: %s (async=%s)",
+ libxlDomainJobTypeToString(job),
+ libxlDomainAsyncJobTypeToString(priv->job.asyncJob));
+
+ libxlDomainObjResetJob(priv);
+ if (libxlDomainTrackJob(job))
+ libxlDomainObjSaveJob(driver, obj);
+ virCondSignal(&priv->job.cond);
+
+ return virObjectUnref(obj);
+}
+
+static bool
+libxlDomainObjEndAsyncJob(libxlDriverPrivatePtr driver, virDomainObjPtr obj)
+{
+ libxlDomainObjPrivatePtr priv = obj->privateData;
+
+ VIR_DEBUG("Stopping async job: %s",
+ libxlDomainAsyncJobTypeToString(priv->job.asyncJob));
+
+ libxlDomainObjResetAsyncJob(priv);
+ libxlDomainObjSaveJob(driver, obj);
+ virCondBroadcast(&priv->job.asyncCond);
+
+ return virObjectUnref(obj);
+}
+
+static int ATTRIBUTE_UNUSED
+libxlMigrationJobStart(libxlDriverPrivatePtr driver,
+ virDomainObjPtr vm,
+ enum libxlDomainAsyncJob job)
+{
+ libxlDomainObjPrivatePtr priv = vm->privateData;
+
+ if (libxlDomainObjBeginAsyncJobWithDriver(driver, vm, job) < 0)
+ return -1;
+
+ libxlDomainObjSetAsyncJobMask(vm, DEFAULT_JOB_MASK |
+ JOB_MASK(LIBXL_JOB_MIGRATION_OP));
+
+ priv->job.info.type = VIR_DOMAIN_JOB_UNBOUNDED;
+
+ return 0;
+}
+
+static bool ATTRIBUTE_UNUSED
+libxlMigrationJobFinish(libxlDriverPrivatePtr driver, virDomainObjPtr vm)
+{
+ return libxlDomainObjEndAsyncJob(driver, vm);
+}
+/* job function finish */
+
static void *
libxlDomainObjPrivateAlloc(void)
{
@@ -92,11 +423,18 @@ libxlDomainObjPrivateAlloc(void)
if (VIR_ALLOC(priv) < 0)
return NULL;
+ if (libxlDomainObjInitJob(priv) < 0)
+ goto error;
+
libxl_ctx_init(&priv->ctx, LIBXL_VERSION, libxl_driver->logger);
priv->waiterFD = -1;
priv->eventHdl = -1;
return priv;
+
+error:
+ VIR_FREE(priv);
+ return NULL;
}
static void
@@ -114,6 +452,7 @@ libxlDomainObjPrivateFree(void *data)
}
libxl_ctx_free(&priv->ctx);
+ libxlDomainObjFreeJob(priv);
VIR_FREE(priv);
}
@@ -3830,6 +4169,111 @@ cleanup:
}
static int
+libxlDomainGetJobInfo(virDomainPtr dom,
+ virDomainJobInfoPtr info)
+{
+ libxlDriverPrivatePtr driver = dom->conn->privateData;
+ virDomainObjPtr vm;
+ int ret = -1;
+ libxlDomainObjPrivatePtr priv;
+
+ libxlDriverLock(driver);
+ vm = virDomainFindByUUID(&driver->domains, dom->uuid);
+ libxlDriverUnlock(driver);
+ if (!vm) {
+ char uuidstr[VIR_UUID_STRING_BUFLEN];
+ virUUIDFormat(dom->uuid, uuidstr);
+ virReportError(VIR_ERR_NO_DOMAIN,
+ _("no domain with matching uuid '%s'"),
uuidstr);
+ goto cleanup;
+ }
+
+ priv = vm->privateData;
+
+ if (virDomainObjIsActive(vm)) {
+ if (priv->job.asyncJob) {
+ memcpy(info, &priv->job.info, sizeof(*info));
+
+ /* Refresh elapsed time again just to ensure it
+ * is fully updated. This is primarily for benefit
+ * of incoming migration which we don't currently
+ * monitor actively in the background thread
+ */
+ if (virTimeMillisNow(&info->timeElapsed) < 0)
+ goto cleanup;
+ info->timeElapsed -= priv->job.start;
+ } else {
+ memset(info, 0, sizeof(*info));
+ info->type = VIR_DOMAIN_JOB_NONE;
+ }
+ } else {
+ virReportError(VIR_ERR_OPERATION_INVALID,
+ "%s", _("domain is not running"));
+ goto cleanup;
+ }
+
+ ret = 0;
+
+cleanup:
+ if (vm)
+ virDomainObjUnlock(vm);
+ return ret;
+}
+
+static int
+libxlDomainAbortJob(virDomainPtr dom)
+{
+ libxlDriverPrivatePtr driver = dom->conn->privateData;
+ virDomainObjPtr vm;
+ int ret = -1;
+ libxlDomainObjPrivatePtr priv;
+
+ libxlDriverLock(driver);
+ vm = virDomainFindByUUID(&driver->domains, dom->uuid);
+ libxlDriverUnlock(driver);
+ if (!vm) {
+ char uuidstr[VIR_UUID_STRING_BUFLEN];
+ virUUIDFormat(dom->uuid, uuidstr);
+ virReportError(VIR_ERR_NO_DOMAIN,
+ _("no domain with matching uuid '%s'"),
uuidstr);
+ goto cleanup;
+ }
+
+ if (libxlDomainObjBeginJob(driver, vm, LIBXL_JOB_ABORT) < 0)
+ goto cleanup;
+
+ if (!virDomainObjIsActive(vm)) {
+ virReportError(VIR_ERR_OPERATION_INVALID,
+ "%s", _("domain is not running"));
+ goto endjob;
+ }
+
+ priv = vm->privateData;
+
+ if (!priv->job.asyncJob) {
+ virReportError(VIR_ERR_OPERATION_INVALID,
+ "%s", _("no job is active on the domain"));
+ goto endjob;
+ } else {
+ virReportError(VIR_ERR_OPERATION_INVALID,
+ _("cannot abort %s; use virDomainDestroy instead"),
+ libxlDomainAsyncJobTypeToString(priv->job.asyncJob));
+ goto endjob;
+ }
+
+ VIR_DEBUG("Not job could be cancelled at current version");
+
+endjob:
+ if (libxlDomainObjEndJob(driver, vm) == 0)
+ vm = NULL;
+
+cleanup:
+ if (vm)
+ virDomainObjUnlock(vm);
+ return ret;
+}
+
+static int
libxlDomainEventRegisterAny(virConnectPtr conn, virDomainPtr dom, int eventID,
virConnectDomainEventGenericCallback callback,
void *opaque, virFreeCallback freecb)
@@ -3963,6 +4407,8 @@ static virDriver libxlDriver = {
.domainIsActive = libxlDomainIsActive, /* 0.9.0 */
.domainIsPersistent = libxlDomainIsPersistent, /* 0.9.0 */
.domainIsUpdated = libxlDomainIsUpdated, /* 0.9.0 */
+ .domainGetJobInfo = libxlDomainGetJobInfo, /* 0.10.0 */
+ .domainAbortJob = libxlDomainAbortJob, /* 0.10.0 */
.domainEventRegisterAny = libxlDomainEventRegisterAny, /* 0.9.0 */
.domainEventDeregisterAny = libxlDomainEventDeregisterAny, /* 0.9.0 */
.isAlive = libxlIsAlive, /* 0.9.8 */
--
1.7.12