On 06/08/2018 09:45 AM, Michal Privoznik wrote:
The point is to break QEMU_JOB_* into smaller pieces which
enables us to achieve higher throughput. For instance, if there
are two threads, one is trying to query something on qemu
monitor while the other is trying to query something on agent
monitor these two threads would serialize. There is not much
reason for that.
Signed-off-by: Michal Privoznik <mprivozn(a)redhat.com>
---
src/qemu/THREADS.txt | 62 +++++++++++++++++--
src/qemu/qemu_domain.c | 163 +++++++++++++++++++++++++++++++++++++++----------
src/qemu/qemu_domain.h | 12 ++++
3 files changed, 200 insertions(+), 37 deletions(-)
Looked at THREADS.txt before reading any code in order to get a sense
for what's being done... I won't go back after reading the code so that
you get a sense of how someone not knowing what's going on views things.
So when reading comments here - just take them with that in mind. I have
a feeling my mind/thoughts will change later on ;-).
diff --git a/src/qemu/THREADS.txt b/src/qemu/THREADS.txt
index 7243161fe3..6219f46a81 100644
--- a/src/qemu/THREADS.txt
+++ b/src/qemu/THREADS.txt
@@ -51,8 +51,8 @@ There are a number of locks on various objects
Since virDomainObjPtr lock must not be held during sleeps, the job
conditions provide additional protection for code making updates.
- QEMU driver uses two kinds of job conditions: asynchronous and
- normal.
+ QEMU driver uses three kinds of job conditions: asynchronous, agent
+ and normal.
Asynchronous job condition is used for long running jobs (such as
migration) that consist of several monitor commands and it is
@@ -69,9 +69,15 @@ There are a number of locks on various objects
it needs to wait until the asynchronous job ends and try to acquire
the job again.
+ Agent job condition is then used when thread wishes to talk to qemu
s/then//
s/when thread/when the thread/
s/to qemu/to the qemu/
+ agent monitor. It is possible to acquire just agent job
+ (qemuDomainObjBeginAgentJob), or only normal job
+ (qemuDomainObjBeginJob) or both at the same time
+ (qemuDomainObjBeginJobWithAgent).
+
Oh this seems like it's going to be tricky... How does one choose which
to use. At least w/ async you know it's a long running job and normal is
everything else. Now you have category 3 agent specific - at this point
I'm not sure you really want to mix normal and agent. If the purpose of
the series is to extricate agent job from normal job, then mixing when
it's convenient leads to speculation of what misunderstandings will
occur for future consumers.
Because of the text for "Normal job condition is used by all other
jobs", I wonder if the agent job description should go above Normal job;
otherwise, perhaps it'd be best to distinguish "qemu monitor" jobs in
the normal description.
Immediately after acquiring the virDomainObjPtr lock, any
method
- which intends to update state must acquire either asynchronous or
- normal job condition. The virDomainObjPtr lock is released while
+ which intends to update state must acquire asynchronous, normal or
+ agent job condition. The virDomainObjPtr lock is released while
There is no agent job condition, yet. And how on earth do you do both
agent and normal?
blocking on these condition variables. Once the job condition
is
acquired, a method can safely release the virDomainObjPtr lock
whenever it hits a piece of code which may sleep/wait, and
@@ -132,6 +138,30 @@ To acquire the normal job condition
+To acquire the agent job condition
+
+ qemuDomainObjBeginAgentJob()
+ - Waits until there is no other agent job set
So no agent job before starting, but...
+ - Sets job.agentActive tp the job type
+
+ qemuDomainObjEndAgentJob()
+ - Sets job.agentActive to 0
+ - Signals on job.cond condition
... we signal on job.cond even though we didn't necessarily obtain?
+
+
+
+To acquire both normal and agent job condition
+
+ qemuDomainObjBeginJobWithAgent()
+ - Waits until there is no normal and no agent job set
+ - Sets both job.active and job.agentActive with required job types
+
+ qemuDomainObjEndJobWithAgent()
+ - Sets both job.active and job.agentActive to 0
+ - Signals on job.cond condition
+
+
+
To acquire the asynchronous job condition
qemuDomainObjBeginAsyncJob()
@@ -247,6 +277,30 @@ Design patterns
virDomainObjEndAPI(&obj);
+ * Invoking an agent command on a virDomainObjPtr
+
+ virDomainObjPtr obj;
+ qemuAgentPtr agent;
+
+ obj = qemuDomObjFromDomain(dom);
+
+ qemuDomainObjBeginAgentJob(obj, QEMU_JOB_TYPE);
+
+ ...do prep work...
+
+ if (!qemuDomainAgentAvailable(obj, true))
+ goto cleanup;
+
+ agent = qemuDomainObjEnterAgent(obj);
+ qemuAgentXXXX(agent, ..);
+ qemuDomainObjExitAgent(obj, agent);
+
+ ...do final work...
+
+ qemuDomainObjEndAgentJob(obj);
+ virDomainObjEndAPI(&obj);
+
+
Can you add a using a JobWithAgent example?
* Running asynchronous job
virDomainObjPtr obj;
diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c
index b8e34c1c2c..09404f6569 100644
--- a/src/qemu/qemu_domain.c
+++ b/src/qemu/qemu_domain.c
@@ -319,6 +319,17 @@ qemuDomainObjResetJob(qemuDomainObjPrivatePtr priv)
job->started = 0;
}
Two empty lines (multiple occurrences w/ new functions).
+static void
+qemuDomainObjResetAgentJob(qemuDomainObjPrivatePtr priv)
+{
+ qemuDomainJobObjPtr job = &priv->job;
+
+ job->agentActive = QEMU_AGENT_JOB_NONE;
+ job->agentOwner = 0;
+ job->agentOwnerAPI = NULL;
+ job->agentStarted = 0;
+}
+
static void
qemuDomainObjResetAsyncJob(qemuDomainObjPrivatePtr priv)
{
@@ -6361,6 +6372,15 @@ qemuDomainJobAllowed(qemuDomainObjPrivatePtr priv, qemuDomainJob
job)
return !priv->job.active && qemuDomainNestedJobAllowed(priv, job);
}
+static bool
+qemuDomainObjCanSetJob(qemuDomainObjPrivatePtr priv,
+ qemuDomainJob job,
+ qemuDomainAgentJob agentJob)
+{
+ return (job == QEMU_JOB_NONE || !priv->job.active) &&
+ (agentJob == QEMU_AGENT_JOB_NONE || !priv->job.agentActive);
+}
+
/* Give up waiting for mutex after 30 seconds */
#define QEMU_JOB_WAIT_TIME (1000ull * 30)
@@ -6371,6 +6391,7 @@ static int ATTRIBUTE_NONNULL(1)
So you're competing with yourself with changes to this code from your
other series. Who merges first ;-)
qemuDomainObjBeginJobInternal(virQEMUDriverPtr driver,
virDomainObjPtr obj,
qemuDomainJob job,
+ qemuDomainAgentJob agentJob,
qemuDomainAsyncJob asyncJob)
{
qemuDomainObjPrivatePtr priv = obj->privateData;
@@ -6383,16 +6404,15 @@ qemuDomainObjBeginJobInternal(virQEMUDriverPtr driver,
int ret = -1;
unsigned long long duration = 0;
unsigned long long asyncDuration = 0;
- const char *jobStr;
- if (async)
- jobStr = qemuDomainAsyncJobTypeToString(asyncJob);
- else
- jobStr = qemuDomainJobTypeToString(job);
-
- VIR_DEBUG("Starting %s: %s (vm=%p name=%s, current job=%s async=%s)",
- async ? "async job" : "job", jobStr, obj,
obj->def->name,
+ VIR_DEBUG("Starting job: job=%s agentJob=%s asyncJob=%s "
+ "(vm=%p name=%s, current job=%s agentJob=%s async=%s)",
+ qemuDomainJobTypeToString(job),
+ qemuDomainAgentJobTypeToString(agentJob),
+ qemuDomainAsyncJobTypeToString(asyncJob),
+ obj, obj->def->name,
qemuDomainJobTypeToString(priv->job.active),
+ qemuDomainAgentJobTypeToString(priv->job.agentActive),
qemuDomainAsyncJobTypeToString(priv->job.asyncJob));
Some random grumbling about splitting patches ;-)
if (virTimeMillisNow(&now) < 0) {
@@ -6416,7 +6436,7 @@ qemuDomainObjBeginJobInternal(virQEMUDriverPtr driver,
goto error;
}
- while (priv->job.active) {
+ while (!qemuDomainObjCanSetJob(priv, job, agentJob)) {
VIR_DEBUG("Waiting for job (vm=%p name=%s)", obj,
obj->def->name);
if (virCondWaitUntil(&priv->job.cond, &obj->parent.lock, then)
< 0)
goto error;
Part of me says this check is ripe for some future coding error, but
logically AFAICT it works.
@@ -6427,32 +6447,48 @@
qemuDomainObjBeginJobInternal(virQEMUDriverPtr driver,
if (!nested && !qemuDomainNestedJobAllowed(priv, job))
goto retry;
- qemuDomainObjResetJob(priv);
-
ignore_value(virTimeMillisNow(&now));
- if (job != QEMU_JOB_ASYNC) {
- VIR_DEBUG("Started job: %s (async=%s vm=%p name=%s)",
- qemuDomainJobTypeToString(job),
- qemuDomainAsyncJobTypeToString(priv->job.asyncJob),
- obj, obj->def->name);
- priv->job.active = job;
- priv->job.owner = virThreadSelfID();
- priv->job.ownerAPI = virThreadJobGet();
+ if (job) {
+ qemuDomainObjResetJob(priv);
+
+ if (job != QEMU_JOB_ASYNC) {
+ VIR_DEBUG("Started job: %s (async=%s vm=%p name=%s)",
+ qemuDomainJobTypeToString(job),
+ qemuDomainAsyncJobTypeToString(priv->job.asyncJob),
+ obj, obj->def->name);
+ priv->job.active = job;
+ priv->job.owner = virThreadSelfID();
+ priv->job.ownerAPI = virThreadJobGet();
+ priv->job.started = now;
+ } else {
+ VIR_DEBUG("Started async job: %s (vm=%p name=%s)",
+ qemuDomainAsyncJobTypeToString(asyncJob),
+ obj, obj->def->name);
+ qemuDomainObjResetAsyncJob(priv);
+ if (VIR_ALLOC(priv->job.current) < 0)
+ goto cleanup;
+ priv->job.current->status = QEMU_DOMAIN_JOB_STATUS_ACTIVE;
+ priv->job.asyncJob = asyncJob;
+ priv->job.asyncOwner = virThreadSelfID();
+ priv->job.asyncOwnerAPI = virThreadJobGet();
+ priv->job.asyncStarted = now;
+ priv->job.current->started = now;
+ }
+ }
+
+ if (agentJob) {
+ qemuDomainObjResetAgentJob(priv);
+
+ VIR_DEBUG("Started agent job: %s (vm=%p name=%s job=%s async=%s)",
+ qemuDomainAgentJobTypeToString(agentJob),
+ obj, obj->def->name,
+ qemuDomainJobTypeToString(priv->job.active),
+ qemuDomainAsyncJobTypeToString(priv->job.asyncJob));
+ priv->job.agentActive = agentJob;
+ priv->job.agentOwner = virThreadSelfID();
+ priv->job.agentOwnerAPI = virThreadJobGet();
priv->job.started = now;
- } else {
- VIR_DEBUG("Started async job: %s (vm=%p name=%s)",
- qemuDomainAsyncJobTypeToString(asyncJob),
- obj, obj->def->name);
- qemuDomainObjResetAsyncJob(priv);
- if (VIR_ALLOC(priv->job.current) < 0)
- goto cleanup;
- priv->job.current->status = QEMU_DOMAIN_JOB_STATUS_ACTIVE;
- priv->job.asyncJob = asyncJob;
- priv->job.asyncOwner = virThreadSelfID();
- priv->job.asyncOwnerAPI = virThreadJobGet();
- priv->job.asyncStarted = now;
- priv->job.current->started = now;
So after reading the code and thinking about the documentation, IMO,
having agentJob use job.cond is confusing. Having JobWithAgent is even
more confusing. But it seems the code would work
I think what concerns me is there are 3 paths (I read patch 4) that
would seemingly indicate that by starting an agentJob we block a normal
domain job. In order to start one, IIUC both agentJob and normal job
must be 0. That seems reasonable until one starts thinking about races
when there's a thread waiting for a JobWithAgent and a thread waiting
for one or the other. Combine that with the check "if (!nested &&
!qemuDomainNestedJobAllowed(priv, job))" which may now pass NONE for
@job if one of those threads was an agentJob.
}
if (qemuDomainTrackJob(job))
@@ -6534,12 +6570,31 @@ int qemuDomainObjBeginJob(virQEMUDriverPtr driver,
qemuDomainJob job)
{
if (qemuDomainObjBeginJobInternal(driver, obj, job,
+ QEMU_AGENT_JOB_NONE,
QEMU_ASYNC_JOB_NONE) < 0)
return -1;
else
return 0;
}
This could use an intro comment regarding usage.
+int qemuDomainObjBeginAgentJob(virQEMUDriverPtr driver,
+ virDomainObjPtr obj,
+ qemuDomainAgentJob agentJob)
+{
Read patch 3 and the following will make sense...
if (agentJob == QEMU_AGENT_JOB_MODIFY_BLOCK)
return qemuDomainObjBeginJobInternal(driver, obj,
QEMU_JOB_MODIFY,
agentJob,
QEMU_ASYNC_JOB_NONE);
else
return qemuDomainObjBeginJobInternal(driver, obj,
QEMU_JOB_NONE,
agentJob,
QEMU_ASYNC_JOB_NONE);
+ return qemuDomainObjBeginJobInternal(driver, obj,
QEMU_JOB_NONE,
+ agentJob,
+ QEMU_ASYNC_JOB_NONE);
+}
+
+int qemuDomainObjBeginJobWithAgent(virQEMUDriverPtr driver,
+ virDomainObjPtr obj,
+ qemuDomainJob job,
+ qemuDomainAgentJob agentJob)
+{
+ return qemuDomainObjBeginJobInternal(driver, obj, job,
+ agentJob, QEMU_ASYNC_JOB_NONE);
+}
+
if you got with the above, then this one's not necessary.
int qemuDomainObjBeginAsyncJob(virQEMUDriverPtr driver,
virDomainObjPtr obj,
qemuDomainAsyncJob asyncJob,
@@ -6549,6 +6604,7 @@ int qemuDomainObjBeginAsyncJob(virQEMUDriverPtr driver,
qemuDomainObjPrivatePtr priv;
if (qemuDomainObjBeginJobInternal(driver, obj, QEMU_JOB_ASYNC,
+ QEMU_AGENT_JOB_NONE,
asyncJob) < 0)
return -1;
@@ -6578,6 +6634,7 @@ qemuDomainObjBeginNestedJob(virQEMUDriverPtr driver,
return qemuDomainObjBeginJobInternal(driver, obj,
QEMU_JOB_ASYNC_NESTED,
+ QEMU_AGENT_JOB_NONE,
QEMU_ASYNC_JOB_NONE);
}
@@ -6604,7 +6661,47 @@ qemuDomainObjEndJob(virQEMUDriverPtr driver, virDomainObjPtr obj)
qemuDomainObjResetJob(priv);
if (qemuDomainTrackJob(job))
qemuDomainObjSaveJob(driver, obj);
- virCondSignal(&priv->job.cond);
+ virCondBroadcast(&priv->job.cond);
+}
+
+void
+qemuDomainObjEndAgentJob(virDomainObjPtr obj)
+{
+ qemuDomainObjPrivatePtr priv = obj->privateData;
+ qemuDomainAgentJob agentJob = priv->job.agentActive;
+
+ priv->jobs_queued--;
+
+ VIR_DEBUG("Stopping agent job: %s (async=%s vm=%p name=%s)",
+ qemuDomainAgentJobTypeToString(agentJob),
+ qemuDomainAsyncJobTypeToString(priv->job.asyncJob),
+ obj, obj->def->name);
+
Read patch 3 and this will make sense:
if (priv->job.agentActive == QEMU_AGENT_JOB_MODIFY_BLOCK) {
qemuDomainObjResetJob(priv);
if (qemuDomainTrackJob(job))
qemuDomainObjSaveJob
}
Although the VIR_DEBUG above gets complicated...
+ qemuDomainObjResetAgentJob(priv);
+ virCondBroadcast(&priv->job.cond);
+}
+
+void
+qemuDomainObjEndJobWithAgent(virQEMUDriverPtr driver,
+ virDomainObjPtr obj)
+{
+ qemuDomainObjPrivatePtr priv = obj->privateData;
+ qemuDomainJob job = priv->job.active;
+ qemuDomainAgentJob agentJob = priv->job.agentActive;
+
+ priv->jobs_queued--;
+
+ VIR_DEBUG("Stopping both jobs: %s %s (async=%s vm=%p name=%s)",
+ qemuDomainJobTypeToString(job),
+ qemuDomainAgentJobTypeToString(agentJob),
+ qemuDomainAsyncJobTypeToString(priv->job.asyncJob),
+ obj, obj->def->name);
+
+ qemuDomainObjResetJob(priv);
+ qemuDomainObjResetAgentJob(priv);
+ if (qemuDomainTrackJob(job))
+ qemuDomainObjSaveJob(driver, obj);
+ virCondBroadcast(&priv->job.cond);
and this one goes away.
John
}
void
diff --git a/src/qemu/qemu_domain.h b/src/qemu/qemu_domain.h
index 709b42e6fd..6ada26ca99 100644
--- a/src/qemu/qemu_domain.h
+++ b/src/qemu/qemu_domain.h
@@ -517,6 +517,15 @@ int qemuDomainObjBeginJob(virQEMUDriverPtr driver,
virDomainObjPtr obj,
qemuDomainJob job)
ATTRIBUTE_RETURN_CHECK;
+int qemuDomainObjBeginAgentJob(virQEMUDriverPtr driver,
+ virDomainObjPtr obj,
+ qemuDomainAgentJob agentJob)
+ ATTRIBUTE_RETURN_CHECK;
+int qemuDomainObjBeginJobWithAgent(virQEMUDriverPtr driver,
+ virDomainObjPtr obj,
+ qemuDomainJob job,
+ qemuDomainAgentJob agentJob)
+ ATTRIBUTE_RETURN_CHECK;
int qemuDomainObjBeginAsyncJob(virQEMUDriverPtr driver,
virDomainObjPtr obj,
qemuDomainAsyncJob asyncJob,
@@ -530,6 +539,9 @@ int qemuDomainObjBeginNestedJob(virQEMUDriverPtr driver,
void qemuDomainObjEndJob(virQEMUDriverPtr driver,
virDomainObjPtr obj);
+void qemuDomainObjEndAgentJob(virDomainObjPtr obj);
+void qemuDomainObjEndJobWithAgent(virQEMUDriverPtr driver,
+ virDomainObjPtr obj);
void qemuDomainObjEndAsyncJob(virQEMUDriverPtr driver,
virDomainObjPtr obj);
void qemuDomainObjAbortAsyncJob(virDomainObjPtr obj);