On 09/19/2018 10:27 PM, Wang Yechao wrote:
qemuProcessReconnectHelper has hold lock on doms, if create
qemuProcessReconnect thread failed, calls qemuDomainRemoveInactiveJob
will lead to deadlock.
Add a qemuDomainRemoveInactiveJobLocked, modify qemuProcessReconnectHelper
to call it.
Signed-off-by: Wang Yechao <wang.yechao255(a)zte.com.cn>
---
v2 patch:
https://www.redhat.com/archives/libvir-list/2018-September/msg00635.html
Changes in v3:
- drop v2 patch, cause it is not good.
- split qemuDomainRemoveInactive into qemuDomainRemoveInactiveCommon
and virDomainObjListRemove.
- create a qemuDomainRemoveInactiveLocked, consists of
qemuDomainRemoveInactiveCommon and virDomainObjListRemoveLocked.
- create a qemuDomainRemoveInactiveJobLocked, which copies
qemuDomainRemoveInactiveJob except calling qemuDomainRemoveInactiveLocked
- Modify qemuProcessReconnectHelper to call qemuDomainRemoveInactiveJobLocked
---
src/qemu/qemu_domain.c | 75 ++++++++++++++++++++++++++++++++++++++++---------
src/qemu/qemu_domain.h | 9 ++++++
src/qemu/qemu_process.c | 2 +-
3 files changed, 71 insertions(+), 15 deletions(-)
Sigh, I thought I was clear - multiple patches would have been
preferred. It's easier to review and shows the progression to the fix.
Hopefully the next series will split them up. Each will have their own
commit message to describe what's happening.
diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c
index 2fd8a2a..ef0d91d 100644
--- a/src/qemu/qemu_domain.c
+++ b/src/qemu/qemu_domain.c
@@ -8343,24 +8343,13 @@ qemuDomainSnapshotDiscardAllMetadata(virQEMUDriverPtr driver,
return rem.err;
}
-
-/**
- * qemuDomainRemoveInactive:
- *
- * The caller must hold a lock to the vm.
- */
void
This will be a "static void"
-qemuDomainRemoveInactive(virQEMUDriverPtr driver,
- virDomainObjPtr vm)
+qemuDomainRemoveInactiveCommon(virQEMUDriverPtr driver,
+ virDomainObjPtr vm)
{
char *snapDir;
virQEMUDriverConfigPtr cfg;
- if (vm->persistent) {
- /* Short-circuit, we don't want to remove a persistent domain */
- return;
- }
-
cfg = virQEMUDriverGetConfig(driver);
/* Remove any snapshot metadata prior to removing the domain */
@@ -8379,9 +8368,47 @@ qemuDomainRemoveInactive(virQEMUDriverPtr driver,
}
qemuExtDevicesCleanupHost(driver, vm->def);
+ virObjectUnref(cfg);
+}
+
Two blank lines between functions
+/**
+ * qemuDomainRemoveInactive:
+ *
+ * The caller must hold a lock to the vm.
+ */
+void
+qemuDomainRemoveInactive(virQEMUDriverPtr driver,
+ virDomainObjPtr vm)
+{
+ if (vm->persistent) {
+ /* Short-circuit, we don't want to remove a persistent domain */
+ return;
+ }
+
+ qemuDomainRemoveInactiveCommon(driver, vm);
virDomainObjListRemove(driver->domains, vm);
If you're going to have a blank line, then have it between the two
calls, but not after the last one.
- virObjectUnref(cfg);
+}
+
Two blank lines
+/**
+ * qemuDomainRemoveInactiveLocked:
+ *
+ * The caller must hold a lock to the vm ,
+ * and hold a lock to the doms.
Change to:
* The caller must hold a lock to the vm and must hold the
* lock on driver->domains in order to call the remove obj
* from locked list method.
+ */
+
+void
static void
+qemuDomainRemoveInactiveLocked(virQEMUDriverPtr driver,
+ virDomainObjPtr vm)
+{
+ if (vm->persistent) {
+ /* Short-circuit, we don't want to remove a persistent domain */
+ return;
+ }
+
+ qemuDomainRemoveInactiveCommon(driver, vm);
+ virDomainObjListRemoveLocked(driver->domains, vm);
+
Similar with respect to the extra blank line here.
}
@@ -8407,6 +8434,26 @@ qemuDomainRemoveInactiveJob(virQEMUDriverPtr driver,
qemuDomainObjEndJob(driver, vm);
}
+/**
+ * qemuDomainRemoveInactiveJobLocked:
+ *
+ * Just like qemuDomainRemoveInactiveJob but it hold lock
+ * on 'doms'.
I would say:
* Similar to qemuDomainRemoveInactiveJob, except that the caller must
* also hold the lock @driver->domains
+ */
+
Remove the blank line above
+void
+qemuDomainRemoveInactiveJobLocked(virQEMUDriverPtr driver,
+ virDomainObjPtr vm)
+{
+ bool haveJob;
+
+ haveJob = qemuDomainObjBeginJob(driver, vm, QEMU_JOB_MODIFY) >= 0;
+
+ qemuDomainRemoveInactiveLocked(driver, vm);
+
+ if (haveJob)
+ qemuDomainObjEndJob(driver, vm);
+}
Two blank lines
void
qemuDomainSetFakeReboot(virQEMUDriverPtr driver,
diff --git a/src/qemu/qemu_domain.h b/src/qemu/qemu_domain.h
index 914c9a6..1d80621 100644
--- a/src/qemu/qemu_domain.h
+++ b/src/qemu/qemu_domain.h
@@ -698,12 +698,21 @@ int qemuDomainSnapshotDiscardAll(void *payload,
int qemuDomainSnapshotDiscardAllMetadata(virQEMUDriverPtr driver,
virDomainObjPtr vm);
+void qemuDomainRemoveInactiveCommon(virQEMUDriverPtr driver,
+ virDomainObjPtr vm);
+
Nope - this is not external
void qemuDomainRemoveInactive(virQEMUDriverPtr driver,
virDomainObjPtr vm);
+void qemuDomainRemoveInactiveLocked(virQEMUDriverPtr driver,
+ virDomainObjPtr vm);
+
and neither is this one
void qemuDomainRemoveInactiveJob(virQEMUDriverPtr driver,
virDomainObjPtr vm);
+void qemuDomainRemoveInactiveJobLocked(virQEMUDriverPtr driver,
+ virDomainObjPtr vm);
+
void qemuDomainSetFakeReboot(virQEMUDriverPtr driver,
virDomainObjPtr vm,
bool value);
diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c
index 72a59de..ed24447 100644
--- a/src/qemu/qemu_process.c
+++ b/src/qemu/qemu_process.c
@@ -8025,7 +8025,7 @@ qemuProcessReconnectHelper(virDomainObjPtr obj,
*/
qemuProcessStop(src->driver, obj, VIR_DOMAIN_SHUTOFF_FAILED,
QEMU_ASYNC_JOB_NONE, 0);
- qemuDomainRemoveInactiveJob(src->driver, obj);
+ qemuDomainRemoveInactiveJobLocked(src->driver, obj);
virDomainObjEndAPI(&obj);
virNWFilterUnlockFilterUpdates();
In the long run the above hung will be the "only" thing for patch3.
John