For
https://bugzilla.redhat.com/show_bug.cgi?id=1066801
The nwfilter conf update mutex previously serialized
updates to the internal data structures for firewall
rules, and updates to the firewall itself. The latter
was recently turned into a read/write lock, and filter
instantiation allowed to proceed in parallel. It was
believed that this was ok, since each filter is created
on a seperate iptables/ebtables chain.
It turns out that there is a sutle lock ordering problem
on virNWFilterObjPtr instances. __virNWFilterInstantiateFilter
will hold a lock on the virNWFilterObjPtr it is instantiating.
This in turn invokes virNWFilterInstantiate which then invokes
virNWFilterDetermineMissingVarsRec which then invokes
virNWFilterObjFindByName. This iterates over every single
virNWFilterObjPtr in the list, locking them and checking their
name. So if 2 or more threads try to instantiate a filter in
parallel, they'll all hold 1 lock at the top level in the
__virNWFilterInstantiateFilter method which will cause the
other thread to deadlock in virNWFilterObjFindByName.
The fix is to add an exclusive mutex to serialize the
execution of __virNWFilterInstantiateFilter.
Signed-off-by: Daniel P. Berrange <berrange(a)redhat.com>
---
src/nwfilter/nwfilter_driver.c | 6 ++++--
src/nwfilter/nwfilter_gentech_driver.c | 34 ++++++++++++++++++++++++++++++++--
src/nwfilter/nwfilter_gentech_driver.h | 2 +-
3 files changed, 37 insertions(+), 5 deletions(-)
diff --git a/src/nwfilter/nwfilter_driver.c b/src/nwfilter/nwfilter_driver.c
index 5908df7..2e89d07 100644
--- a/src/nwfilter/nwfilter_driver.c
+++ b/src/nwfilter/nwfilter_driver.c
@@ -200,7 +200,8 @@ nwfilterStateInitialize(bool privileged,
if (virNWFilterDHCPSnoopInit() < 0)
goto err_exit_learnshutdown;
- virNWFilterTechDriversInit(privileged);
+ if (virNWFilterTechDriversInit(privileged) < 0)
+ goto err_dhcpsnoop_shutdown;
if (virNWFilterConfLayerInit(virNWFilterDomainFWUpdateCB,
driverState) < 0)
@@ -251,6 +252,7 @@ error:
err_techdrivers_shutdown:
virNWFilterTechDriversShutdown();
+err_dhcpsnoop_shutdown:
virNWFilterDHCPSnoopShutdown();
err_exit_learnshutdown:
virNWFilterLearnShutdown();
@@ -327,10 +329,10 @@ nwfilterStateCleanup(void) {
if (driverState->privileged) {
virNWFilterConfLayerShutdown();
- virNWFilterTechDriversShutdown();
virNWFilterDHCPSnoopShutdown();
virNWFilterLearnShutdown();
virNWFilterIPAddrMapShutdown();
+ virNWFilterTechDriversShutdown();
nwfilterDriverLock(driverState);
diff --git a/src/nwfilter/nwfilter_gentech_driver.c
b/src/nwfilter/nwfilter_gentech_driver.c
index 8c5cd57..5144dce 100644
--- a/src/nwfilter/nwfilter_gentech_driver.c
+++ b/src/nwfilter/nwfilter_gentech_driver.c
@@ -55,15 +55,34 @@ static virNWFilterTechDriverPtr filter_tech_drivers[] = {
NULL
};
+/* Serializes instantiation of filters. This is neccessary
+ * to avoid lock ordering deadlocks. eg __virNWFilterInstantiateFilter
+ * will hold a lock on a virNWFilterObjPtr. This in turn invokes
+ * virNWFilterInstantiate which invokes virNWFilterDetermineMissingVarsRec
+ * which invokes virNWFilterObjFindByName. This iterates over every single
+ * virNWFilterObjPtr in the list. So if 2 threads try to instantiate a
+ * filter in parallel, they'll both hold 1 lock at the top level in
+ * __virNWFilterInstantiateFilter which will cause the other thread
+ * to dead lock in virNWFilterObjFindByName.
+ *
+ * XXX better long term solution is to make virNWFilterObjList use a
+ * hash table as is done for virDomainObjList. You can then get
+ * lockless lookup of objects by name.
+ */
+static virMutex updateMutex;
-void virNWFilterTechDriversInit(bool privileged) {
+int virNWFilterTechDriversInit(bool privileged) {
size_t i = 0;
VIR_DEBUG("Initializing NWFilter technology drivers");
+ if (virMutexInitRecursive(&updateMutex) < 0)
+ return -1;
+
while (filter_tech_drivers[i]) {
if (!(filter_tech_drivers[i]->flags & TECHDRV_FLAG_INITIALIZED))
filter_tech_drivers[i]->init(privileged);
i++;
}
+ return 0;
}
@@ -74,6 +93,7 @@ void virNWFilterTechDriversShutdown(void) {
filter_tech_drivers[i]->shutdown();
i++;
}
+ virMutexDestroy(&updateMutex);
}
@@ -935,6 +955,8 @@ _virNWFilterInstantiateFilter(virNWFilterDriverStatePtr driver,
int ifindex;
int rc;
+ virMutexLock(&updateMutex);
+
/* after grabbing the filter update lock check for the interface; if
it's not there anymore its filters will be or are being removed
(while holding the lock) and we don't want to build new ones */
@@ -962,6 +984,8 @@ _virNWFilterInstantiateFilter(virNWFilterDriverStatePtr driver,
foundNewFilter);
cleanup:
+ virMutexUnlock(&updateMutex);
+
return rc;
}
@@ -981,6 +1005,7 @@ virNWFilterInstantiateFilterLate(virNWFilterDriverStatePtr driver,
bool foundNewFilter = false;
virNWFilterReadLockFilterUpdates();
+ virMutexLock(&updateMutex);
rc = __virNWFilterInstantiateFilter(driver,
vmuuid,
@@ -1006,6 +1031,7 @@ virNWFilterInstantiateFilterLate(virNWFilterDriverStatePtr driver,
}
virNWFilterUnlockFilterUpdates();
+ virMutexUnlock(&updateMutex);
return rc;
}
@@ -1129,7 +1155,11 @@ _virNWFilterTeardownFilter(const char *ifname)
int
virNWFilterTeardownFilter(const virDomainNetDef *net)
{
- return _virNWFilterTeardownFilter(net->ifname);
+ int ret;
+ virMutexLock(&updateMutex);
+ ret = _virNWFilterTeardownFilter(net->ifname);
+ virMutexUnlock(&updateMutex);
+ return ret;
}
diff --git a/src/nwfilter/nwfilter_gentech_driver.h
b/src/nwfilter/nwfilter_gentech_driver.h
index f4789e1..d72e040 100644
--- a/src/nwfilter/nwfilter_gentech_driver.h
+++ b/src/nwfilter/nwfilter_gentech_driver.h
@@ -31,7 +31,7 @@ virNWFilterTechDriverPtr virNWFilterTechDriverForName(const char
*name);
int virNWFilterRuleInstAddData(virNWFilterRuleInstPtr res,
void *data);
-void virNWFilterTechDriversInit(bool privileged);
+int virNWFilterTechDriversInit(bool privileged);
void virNWFilterTechDriversShutdown(void);
enum instCase {
--
1.8.5.3