Hi, Jim
thanks your reply.
i have update my patch and resend to the list.
besides, there are two comments from you not modify in my v2 patch.
see my comments below.
>>Jim Fehlig <jfehlig(a)suse.com> wrote:
Bamvor Jian Zhang wrote:
> This patch introduce a lock for protecting the long-running
> api (save, dump, migration and so on) from the other api
> which may update the status of the virtual machine.
>
Hi Bamvor,
Thanks for the patches and sorry for the delayed response. I've been
traveling quite a bit lately and just got around to reviewing and
testing your work.
Testing so far looks good. I can save and dump vm's while at the same
time list and retrieve info.
See my comments inline, but did want to raise a more general comment
first. There is a quite a bit of code here borrowed from the qemu
driver, which in general is fine since the libxl driver does not need
the same locking features as the qemu one. I'd like to hear the opinion
of other libvirt maintainers wrt the duplicated code.
> Signed-off-by: Bamvor Jian Zhang <bjzhang(a)suse.com>
> ---
> src/libxl/libxl_conf.h | 58 ++++++
> src/libxl/libxl_driver.c | 446
+++++++++++++++++++++++++++++++++++++++++++++++
> 2 files changed, 504 insertions(+)
>
> diff --git a/src/libxl/libxl_conf.h b/src/libxl/libxl_conf.h
> index 56bf85c..dab1466 100644
> --- a/src/libxl/libxl_conf.h
> +++ b/src/libxl/libxl_conf.h
> @@ -73,6 +73,62 @@ struct _libxlDriverPrivate {
> char *saveDir;
> };
>
> +# define JOB_MASK(job) (1 << (job - 1))
> +# define DEFAULT_JOB_MASK \
> + (JOB_MASK(LIBXL_JOB_DESTROY) | \
> + JOB_MASK(LIBXL_JOB_ABORT))
> +
> +/* Jobs which have to be tracked in domain state XML. */
> +# define LIBXL_DOMAIN_TRACK_JOBS \
> + (JOB_MASK(LIBXL_JOB_DESTROY) | \
> + JOB_MASK(LIBXL_JOB_ASYNC))
> +
> +/* Only 1 job is allowed at any time
> + * A job includes *all* libxl.so api, even those just querying
> + * information, not merely actions */
> +enum libxlDomainJob {
> + LIBXL_JOB_NONE = 0, /* Always set to 0 for easy if (jobActive)
conditions */
> + LIBXL_JOB_DESTROY, /* Destroys the domain (cannot be masked out)
*/
> + LIBXL_JOB_MODIFY, /* May change state */
> + LIBXL_JOB_ABORT, /* Abort current async job */
> + LIBXL_JOB_MIGRATION_OP, /* Operation influencing outgoing migration
*/
> +
> + /* The following two items must always be the last items before
JOB_LAST */
> + LIBXL_JOB_ASYNC, /* Asynchronous job */
>
There is only one item, so the comment needs updated.
> +
> + LIBXL_JOB_LAST
> +};
> +VIR_ENUM_DECL(libxlDomainJob)
> +
> +/* Async job consists of a series of jobs that may change state.
Independent
> + * jobs that do not change state (and possibly others if explicitly
allowed by
> + * current async job) are allowed to be run even if async job is active.
> + */
> +enum libxlDomainAsyncJob {
> + LIBXL_ASYNC_JOB_NONE = 0,
> + LIBXL_ASYNC_JOB_MIGRATION_OUT,
> + LIBXL_ASYNC_JOB_MIGRATION_IN,
>
The libxl driver doesn't currently support migration. These could be
added in a patch set implementing migration.
> + LIBXL_ASYNC_JOB_SAVE,
> + LIBXL_ASYNC_JOB_DUMP,
>
IMO, LIBXL_ASYNC_JOB_RESTORE should be added to this list. I see It is
omitted in the qemu driver as well, so perhaps there is a good reason
for it?
i do not know why it is omitted by qemu driver. but for libxl driver, it
is
hard to let other operation execution even if it is a async job. because
virDomainObjPtr is used by libxlVmStart, especially by
libxl_domain_create_restore.
> +
> + LIBXL_ASYNC_JOB_LAST
> +};
> +VIR_ENUM_DECL(libxlDomainAsyncJob)
> +
> +struct libxlDomainJobObj {
> + virCond cond; /* Use to coordinate jobs */
> + enum libxlDomainJob active; /* Currently running job */
> + int owner; /* Thread which set current job */
> +
> + virCond asyncCond; /* Use to coordinate with async
jobs */
> + enum libxlDomainAsyncJob asyncJob; /* Currently active async job */
> + int asyncOwner; /* Thread which set current async
job */
> + int phase; /* Job phase (mainly for
migrations) */
> + unsigned long long mask; /* Jobs allowed during async job
*/
> + unsigned long long start; /* When the async job started */
> + virDomainJobInfo info; /* Async job progress data */
> +};
> +
> typedef struct _libxlDomainObjPrivate libxlDomainObjPrivate;
> typedef libxlDomainObjPrivate *libxlDomainObjPrivatePtr;
> struct _libxlDomainObjPrivate {
> @@ -81,6 +137,8 @@ struct _libxlDomainObjPrivate {
> libxl_waiter *dWaiter;
> int waiterFD;
> int eventHdl;
> +
> + struct libxlDomainJobObj job;
> };
>
> # define LIBXL_SAVE_MAGIC "libvirt-xml\n \0 \r"
> diff --git a/src/libxl/libxl_driver.c b/src/libxl/libxl_driver.c
> index 6fe284f..d01d915 100644
> --- a/src/libxl/libxl_driver.c
> +++ b/src/libxl/libxl_driver.c
> @@ -45,6 +45,7 @@
> #include "xen_xm.h"
> #include "virtypedparam.h"
> #include "viruri.h"
> +#include "virtime.h"
>
> #define VIR_FROM_THIS VIR_FROM_LIBXL
>
> @@ -84,6 +85,336 @@ libxlDriverUnlock(libxlDriverPrivatePtr driver)
> virMutexUnlock(&driver->lock);
> }
>
> +/* job */
> +VIR_ENUM_IMPL(libxlDomainJob, LIBXL_JOB_LAST,
> + "none",
> + "destroy",
> + "modify",
> + "abort",
> + "migration operation",
> + "none", /* async job is never stored in job.active */
> +);
> +
> +VIR_ENUM_IMPL(libxlDomainAsyncJob, LIBXL_ASYNC_JOB_LAST,
> + "none",
> + "migration out",
> + "migration in",
>
Can be added with a migration implementation.
> + "save",
> + "dump",
>
"restore"?
> +);
> +
> +static int
> +libxlDomainObjInitJob(libxlDomainObjPrivatePtr priv)
> +{
> + memset(&priv->job, 0, sizeof(priv->job));
> +
> + if (virCondInit(&priv->job.cond) < 0)
> + return -1;
> +
> + if (virCondInit(&priv->job.asyncCond) < 0) {
> + ignore_value(virCondDestroy(&priv->job.cond));
> + return -1;
> + }
> +
> + return 0;
> +}
> +
> +static void
> +libxlDomainObjResetJob(libxlDomainObjPrivatePtr priv)
> +{
> + struct libxlDomainJobObj *job = &priv->job;
> +
> + job->active = LIBXL_JOB_NONE;
> + job->owner = 0;
> +}
> +
> +static void
> +libxlDomainObjResetAsyncJob(libxlDomainObjPrivatePtr priv)
> +{
> + struct libxlDomainJobObj *job = &priv->job;
> +
> + job->asyncJob = LIBXL_ASYNC_JOB_NONE;
> + job->asyncOwner = 0;
> + job->phase = 0;
> + job->mask = DEFAULT_JOB_MASK;
> + job->start = 0;
> + memset(&job->info, 0, sizeof(job->info));
> +}
> +
> +static void
> +libxlDomainObjFreeJob(libxlDomainObjPrivatePtr priv)
> +{
> + ignore_value(virCondDestroy(&priv->job.cond));
> + ignore_value(virCondDestroy(&priv->job.asyncCond));
> +}
> +
> +static bool
> +libxlDomainTrackJob(enum libxlDomainJob job)
> +{
> + return (LIBXL_DOMAIN_TRACK_JOBS & JOB_MASK(job)) != 0;
> +}
> +
> +static void
> +libxlDomainObjSaveJob(libxlDriverPrivatePtr driver, virDomainObjPtr obj)
> +{
> + if (!virDomainObjIsActive(obj)) {
> + /* don't write the state file yet, it will be written once the
domain
> + * gets activated */
> + return;
> + }
> +
> + if (virDomainSaveStatus(driver->caps, driver->stateDir, obj) < 0)
> + VIR_WARN("Failed to save status on vm %s", obj->def->name);
> +}
> +
> +static bool
> +libxlDomainNestedJobAllowed(libxlDomainObjPrivatePtr priv, enum
libxlDomainJob job)
> +{
> + return !priv->job.asyncJob || (priv->job.mask & JOB_MASK(job)) != 0;
> +}
> +
> +static void
> +libxlDomainObjSetAsyncJobMask(virDomainObjPtr obj,
> + unsigned long long allowedJobs)
> +{
> + libxlDomainObjPrivatePtr priv = obj->privateData;
> +
> + if (!priv->job.asyncJob)
> + return;
> +
> + priv->job.mask = allowedJobs | JOB_MASK(LIBXL_JOB_DESTROY);
> +}
> +
> +/* Give up waiting for mutex after 30 seconds */
> +#define LIBXL_JOB_WAIT_TIME (1000ull * 30)
> +
> +/*
> + * obj must be locked before calling; driver_locked says if
libxlDriverPrivatePtr
> + * is locked or not.
> + */
> +static int ATTRIBUTE_NONNULL(1)
> +libxlDomainObjBeginJobInternal(libxlDriverPrivatePtr driver,
> + bool driver_locked,
> + virDomainObjPtr obj,
> + enum libxlDomainJob job,
> + enum libxlDomainAsyncJob asyncJob)
> +{
> + libxlDomainObjPrivatePtr priv = obj->privateData;
> + unsigned long long now;
> + unsigned long long then;
> +
> + if (virTimeMillisNow(&now) < 0)
> + return -1;
> + then = now + LIBXL_JOB_WAIT_TIME;
> +
> + virObjectRef(obj);
> + if (driver_locked) {
> + libxlDriverUnlock(driver);
> + }
> +
> +retry:
> + while (!libxlDomainNestedJobAllowed(priv, job)) {
> + VIR_DEBUG("Wait async job condition for starting job: %s
(async=%s)",
> + libxlDomainJobTypeToString(job),
> + libxlDomainAsyncJobTypeToString(priv->job.asyncJob));
> + if (virCondWaitUntil(&priv->job.asyncCond, &obj->lock, then)
< 0)
> + goto error;
> + }
> +
> + while (priv->job.active) {
> + VIR_DEBUG("Wait normal job condition for starting job: %s
(async=%s)",
> + libxlDomainJobTypeToString(job),
> + libxlDomainAsyncJobTypeToString(priv->job.asyncJob));
> + if (virCondWaitUntil(&priv->job.cond, &obj->lock, then) <
0)
> + goto error;
> + }
> +
> + /* No job is active but a new async job could have been started while
obj
> + * was unlocked, so we need to recheck it. */
> + if (!libxlDomainNestedJobAllowed(priv, job))
> + goto retry;
> +
> + libxlDomainObjResetJob(priv);
> +
> + if (job != LIBXL_JOB_ASYNC) {
> + VIR_DEBUG("Starting job: %s (async=%s)",
> + libxlDomainJobTypeToString(job),
> + libxlDomainAsyncJobTypeToString(priv->job.asyncJob));
> + priv->job.active = job;
> + priv->job.owner = virThreadSelfID();
> + } else {
> + VIR_DEBUG("Starting async job: %s",
> + libxlDomainAsyncJobTypeToString(asyncJob));
> + libxlDomainObjResetAsyncJob(priv);
> + priv->job.asyncJob = asyncJob;
> + priv->job.asyncOwner = virThreadSelfID();
> + priv->job.start = now;
> + }
> +
> + if (driver_locked) {
> + virDomainObjUnlock(obj);
> + libxlDriverLock(driver);
> + virDomainObjLock(obj);
> + }
> +
> + if (libxlDomainTrackJob(job))
> + libxlDomainObjSaveJob(driver, obj);
> +
> + return 0;
> +
> +error:
> + VIR_WARN("Cannot start job (%s, %s) for domain %s;"
> + " current job is (%s, %s) owned by (%d, %d)",
> + libxlDomainJobTypeToString(job),
> + libxlDomainAsyncJobTypeToString(asyncJob),
> + obj->def->name,
> + libxlDomainJobTypeToString(priv->job.active),
> + libxlDomainAsyncJobTypeToString(priv->job.asyncJob),
> + priv->job.owner, priv->job.asyncOwner);
> +
> + if (errno == ETIMEDOUT)
> + virReportError(VIR_ERR_OPERATION_TIMEOUT,
> + "%s", _("cannot acquire state change
lock"));
> + else
> + virReportSystemError(errno,
> + "%s", _("cannot acquire job
mutex"));
> + if (driver_locked) {
> + virDomainObjUnlock(obj);
> + libxlDriverLock(driver);
> + virDomainObjLock(obj);
> + }
> + virObjectUnref(obj);
> + return -1;
> +}
> +
> +/*
> + * obj must be locked before calling, libxlDriverPrivatePtr must NOT be
locked
> + *
> + * This must be called by anything that will change the VM state
> + * in any way
> + *
> + * Upon successful return, the object will have its ref count increased,
> + * successful calls must be followed by EndJob eventually
> + */
> +static int
> +libxlDomainObjBeginJob(libxlDriverPrivatePtr driver,
> + virDomainObjPtr obj,
> + enum libxlDomainJob job)
> +{
> + return libxlDomainObjBeginJobInternal(driver, false, obj, job,
> + LIBXL_ASYNC_JOB_NONE);
> +}
> +
> +static int ATTRIBUTE_UNUSED
> +libxlDomainObjBeginAsyncJob(libxlDriverPrivatePtr driver,
> + virDomainObjPtr obj,
> + enum libxlDomainAsyncJob asyncJob)
> +{
> + return libxlDomainObjBeginJobInternal(driver, false, obj,
LIBXL_JOB_ASYNC,
> + asyncJob);
> +}
> +
> +/*
> + * obj must be locked before calling. If libxlDriverPrivatePtr is passed,
it
> + * MUST be locked; otherwise it MUST NOT be locked.
>
IMO, libxlDriverPrivatePtr should always be passed, in which case it
MUST be locked.
> + *
> + * This must be called by anything that will change the VM state
> + * in any way
> + *
> + * Upon successful return, the object will have its ref count increased,
> + * successful calls must be followed by EndJob eventually
> + */
> +static int
> +libxlDomainObjBeginJobWithDriver(libxlDriverPrivatePtr driver,
> + virDomainObjPtr obj,
> + enum libxlDomainJob job)
> +{
> + if (job <= LIBXL_JOB_NONE || job >= LIBXL_JOB_ASYNC) {
> + virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
> + _("Attempt to start invalid job"));
> + return -1;
> + }
> +
> + return libxlDomainObjBeginJobInternal(driver, true, obj, job,
> + LIBXL_ASYNC_JOB_NONE);
> +}
> +
> +static int
> +libxlDomainObjBeginAsyncJobWithDriver(libxlDriverPrivatePtr driver,
> + virDomainObjPtr obj,
> + enum libxlDomainAsyncJob asyncJob)
> +{
> + return libxlDomainObjBeginJobInternal(driver, true, obj,
LIBXL_JOB_ASYNC,
> + asyncJob);
> +}
> +
> +/*
> + * obj must be locked before calling, libxlDriverPrivatePtr does not
matter
> + *
> + * To be called after completing the work associated with the
> + * earlier libxlDomainBeginJob() call
> + *
> + * Returns remaining refcount on 'obj', maybe 0 to indicated it
> + * was deleted
> + */
> +static bool
> +libxlDomainObjEndJob(libxlDriverPrivatePtr driver, virDomainObjPtr obj)
> +{
> + libxlDomainObjPrivatePtr priv = obj->privateData;
> + enum libxlDomainJob job = priv->job.active;
> +
> + VIR_DEBUG("Stopping job: %s (async=%s)",
> + libxlDomainJobTypeToString(job),
> + libxlDomainAsyncJobTypeToString(priv->job.asyncJob));
> +
> + libxlDomainObjResetJob(priv);
> + if (libxlDomainTrackJob(job))
> + libxlDomainObjSaveJob(driver, obj);
> + virCondSignal(&priv->job.cond);
> +
> + return virObjectUnref(obj);
> +}
> +
> +static bool
> +libxlDomainObjEndAsyncJob(libxlDriverPrivatePtr driver, virDomainObjPtr
obj)
> +{
> + libxlDomainObjPrivatePtr priv = obj->privateData;
> +
> + VIR_DEBUG("Stopping async job: %s",
> + libxlDomainAsyncJobTypeToString(priv->job.asyncJob));
> +
> + libxlDomainObjResetAsyncJob(priv);
> + libxlDomainObjSaveJob(driver, obj);
> + virCondBroadcast(&priv->job.asyncCond);
> +
> + return virObjectUnref(obj);
> +}
> +
> +static int ATTRIBUTE_UNUSED
> +libxlMigrationJobStart(libxlDriverPrivatePtr driver,
> + virDomainObjPtr vm,
> + enum libxlDomainAsyncJob job)
>
This function is not used in the patch series and should be removed.
> +{
> + libxlDomainObjPrivatePtr priv = vm->privateData;
> +
> + if (libxlDomainObjBeginAsyncJobWithDriver(driver, vm, job) < 0)
> + return -1;
> +
> + libxlDomainObjSetAsyncJobMask(vm, DEFAULT_JOB_MASK |
> + JOB_MASK(LIBXL_JOB_MIGRATION_OP));
> +
> + priv->job.info.type = VIR_DOMAIN_JOB_UNBOUNDED;
> +
> + return 0;
> +}
> +
> +static bool ATTRIBUTE_UNUSED
> +libxlMigrationJobFinish(libxlDriverPrivatePtr driver, virDomainObjPtr vm)
>
Same here, not used so should be removed.
> +{
> + return libxlDomainObjEndAsyncJob(driver, vm);
> +}
> +/* job function finish */
> +
> static void *
> libxlDomainObjPrivateAlloc(void)
> {
> @@ -92,11 +423,18 @@ libxlDomainObjPrivateAlloc(void)
> if (VIR_ALLOC(priv) < 0)
> return NULL;
>
> + if (libxlDomainObjInitJob(priv) < 0)
> + goto error;
> +
> libxl_ctx_init(&priv->ctx, LIBXL_VERSION, libxl_driver->logger);
> priv->waiterFD = -1;
> priv->eventHdl = -1;
>
> return priv;
> +
> +error:
> + VIR_FREE(priv);
> + return NULL;
> }
>
> static void
> @@ -114,6 +452,7 @@ libxlDomainObjPrivateFree(void *data)
> }
>
> libxl_ctx_free(&priv->ctx);
> + libxlDomainObjFreeJob(priv);
> VIR_FREE(priv);
> }
>
> @@ -3830,6 +4169,111 @@ cleanup:
> }
>
> static int
> +libxlDomainGetJobInfo(virDomainPtr dom,
> + virDomainJobInfoPtr info)
> +{
> + libxlDriverPrivatePtr driver = dom->conn->privateData;
> + virDomainObjPtr vm;
> + int ret = -1;
> + libxlDomainObjPrivatePtr priv;
> +
> + libxlDriverLock(driver);
> + vm = virDomainFindByUUID(&driver->domains, dom->uuid);
> + libxlDriverUnlock(driver);
> + if (!vm) {
> + char uuidstr[VIR_UUID_STRING_BUFLEN];
> + virUUIDFormat(dom->uuid, uuidstr);
> + virReportError(VIR_ERR_NO_DOMAIN,
> + _("no domain with matching uuid '%s'"),
uuidstr);
> + goto cleanup;
> + }
> +
> + priv = vm->privateData;
> +
> + if (virDomainObjIsActive(vm)) {
> + if (priv->job.asyncJob) {
> + memcpy(info, &priv->job.info, sizeof(*info));
> +
> + /* Refresh elapsed time again just to ensure it
> + * is fully updated. This is primarily for benefit
> + * of incoming migration which we don't currently
> + * monitor actively in the background thread
> + */
> + if (virTimeMillisNow(&info->timeElapsed) < 0)
> + goto cleanup;
> + info->timeElapsed -= priv->job.start;
> + } else {
> + memset(info, 0, sizeof(*info));
> + info->type = VIR_DOMAIN_JOB_NONE;
> + }
> + } else {
> + virReportError(VIR_ERR_OPERATION_INVALID,
> + "%s", _("domain is not running"));
> + goto cleanup;
> + }
> +
> + ret = 0;
> +
> +cleanup:
> + if (vm)
> + virDomainObjUnlock(vm);
> + return ret;
> +}
> +
> +static int
> +libxlDomainAbortJob(virDomainPtr dom)
> +{
> + libxlDriverPrivatePtr driver = dom->conn->privateData;
> + virDomainObjPtr vm;
> + int ret = -1;
> + libxlDomainObjPrivatePtr priv;
> +
> + libxlDriverLock(driver);
> + vm = virDomainFindByUUID(&driver->domains, dom->uuid);
> + libxlDriverUnlock(driver);
> + if (!vm) {
> + char uuidstr[VIR_UUID_STRING_BUFLEN];
> + virUUIDFormat(dom->uuid, uuidstr);
> + virReportError(VIR_ERR_NO_DOMAIN,
> + _("no domain with matching uuid '%s'"),
uuidstr);
> + goto cleanup;
> + }
> +
> + if (libxlDomainObjBeginJob(driver, vm, LIBXL_JOB_ABORT) < 0)
> + goto cleanup;
> +
> + if (!virDomainObjIsActive(vm)) {
> + virReportError(VIR_ERR_OPERATION_INVALID,
> + "%s", _("domain is not running"));
> + goto endjob;
> + }
> +
> + priv = vm->privateData;
> +
> + if (!priv->job.asyncJob) {
> + virReportError(VIR_ERR_OPERATION_INVALID,
> + "%s", _("no job is active on the
domain"));
> + goto endjob;
> + } else {
> + virReportError(VIR_ERR_OPERATION_INVALID,
> + _("cannot abort %s; use virDomainDestroy
instead"),
> + libxlDomainAsyncJobTypeToString(priv->job.asyncJob));
> + goto endjob;
> + }
>
This function will always fail with the above logic. ret is initialized
to -1 and is never changed.
Is it even possible to safely abort a libxl operation? If not, this
function should probably remain unimplemented. Maybe it will be useful
when the libxl driver supports migration.
return error because of the there is no
cancelation opeartion in libvirt libxl
driver with xen 4.1. according to xen4.2 release document, maybe the
cancelation of long-running jobs is supported.
but it is still useful for save, dump and migration(in future), because libvirt
should block the user abort operation othervise xenlight might crash
Regards,
Jim
> +
> + VIR_DEBUG("Not job could be cancelled at current version");
> +
> +endjob:
> + if (libxlDomainObjEndJob(driver, vm) == 0)
> + vm = NULL;
> +
> +cleanup:
> + if (vm)
> + virDomainObjUnlock(vm);
> + return ret;
> +}
> +
> +static int
> libxlDomainEventRegisterAny(virConnectPtr conn, virDomainPtr dom, int
eventID,
> virConnectDomainEventGenericCallback callback,
> void *opaque, virFreeCallback freecb)
> @@ -3963,6 +4407,8 @@ static virDriver libxlDriver = {
> .domainIsActive = libxlDomainIsActive, /* 0.9.0 */
> .domainIsPersistent = libxlDomainIsPersistent, /* 0.9.0 */
> .domainIsUpdated = libxlDomainIsUpdated, /* 0.9.0 */
> + .domainGetJobInfo = libxlDomainGetJobInfo, /* 0.10.0 */
> + .domainAbortJob = libxlDomainAbortJob, /* 0.10.0 */
> .domainEventRegisterAny = libxlDomainEventRegisterAny, /* 0.9.0 */
> .domainEventDeregisterAny = libxlDomainEventDeregisterAny, /* 0.9.0 */
> .isAlive = libxlIsAlive, /* 0.9.8 */
>