On 07/19/2017 09:58 AM, Erik Skultety wrote:
On Mon, Jul 17, 2017 at 11:02:22AM -0400, John Ferlan wrote:
> In order to avoid an eventual possible race, modify the call to not
> use the @def, but rather take the various fields needed. The race could
> occur because the Destroy path needs to Unlock (and Unref) the object.
> This could lead to an eventual change event from udevAddOneDevice which
> would free obj->def and it's fields that this code would be attempting
> to reference. So better safe than sorry.
>
> Signed-off-by: John Ferlan <jferlan(a)redhat.com>
> ---
> src/conf/virnodedeviceobj.c | 45 +++++++++++++++++++++++++++---------
> src/conf/virnodedeviceobj.h | 8 +++++--
> src/node_device/node_device_driver.c | 40 +++++++++++++++++++++++++++-----
> src/test/test_driver.c | 41 ++++++++++++++++++++++++--------
> 4 files changed, 105 insertions(+), 29 deletions(-)
>
> diff --git a/src/conf/virnodedeviceobj.c b/src/conf/virnodedeviceobj.c
> index 4c5ee8c..8416dda 100644
> --- a/src/conf/virnodedeviceobj.c
> +++ b/src/conf/virnodedeviceobj.c
> @@ -543,24 +543,47 @@ virNodeDeviceObjListFindVportParentHost(virNodeDeviceObjListPtr
devs)
> }
>
>
> +/**
> + * @devs: Pointer to the device list
> + * @name: The name of the device
> + * @parent: The parent name
> + * @parent_wwnn: Parent's WWNN value
> + * @parent_wwpn: Parent's WWPN value
> + * @parent_fabric_wwn: Parent's fabric WWN value
> + * @create: Whether this is a create or existing device
IMHO the fact that an unlikely race might happen should stay transparent to
this function, thus I'd personally drop this commentary and leave the argslist,
i.e. passing @def untouched.
The commentary is to make it obvious *here* rather than having someone
needing to research git history to get the same explanation from a
commit message before they go and change the arguments to be @def and
essentially undo what was done.
> + *
> + * Using the input name and parent values, let's look for a parent host.
> + * This API cannot take the @def because it's called in a Destroy path which
> + * needs to unlock the object prior to calling since the searching involves
> + * locking the object (avoids deadlock). Once the lock is removed, it's
> + * possible, but improbable that udevAddOneDevice could have a "change"
> + * event on an existing vHBA causing the @def to be replaced in the object.
> + * If we use that @def, then we run the risk of some bad things happening
> + *
> + * Returns parent_host value on success or -1 on failure
> + */
> int
> virNodeDeviceObjListGetParentHost(virNodeDeviceObjListPtr devs,
> - virNodeDeviceDefPtr def,
> + const char *name,
> + const char *parent,
> + const char *parent_wwnn,
> + const char *parent_wwpn,
> + const char *parent_fabric_wwn,
> int create)
> {
> int parent_host = -1;
>
> - if (def->parent) {
> - parent_host = virNodeDeviceObjListGetParentHostByParent(devs, def->name,
> - def->parent);
> - } else if (def->parent_wwnn && def->parent_wwpn) {
> - parent_host = virNodeDeviceObjListGetParentHostByWWNs(devs, def->name,
> - def->parent_wwnn,
> - def->parent_wwpn);
> - } else if (def->parent_fabric_wwn) {
> + if (parent) {
> + parent_host = virNodeDeviceObjListGetParentHostByParent(devs, name,
> + parent);
> + } else if (parent_wwnn && parent_wwpn) {
> + parent_host = virNodeDeviceObjListGetParentHostByWWNs(devs, name,
> + parent_wwnn,
> + parent_wwpn);
> + } else if (parent_fabric_wwn) {
> parent_host =
> - virNodeDeviceObjListGetParentHostByFabricWWN(devs, def->name,
> -
def->parent_fabric_wwn);
> + virNodeDeviceObjListGetParentHostByFabricWWN(devs, name,
> + parent_fabric_wwn);
> } else if (create == CREATE_DEVICE) {
> /* Try to find a vport capable scsi_host when no parent supplied */
> parent_host = virNodeDeviceObjListFindVportParentHost(devs);
> diff --git a/src/conf/virnodedeviceobj.h b/src/conf/virnodedeviceobj.h
> index 788fb66..f5ea8fe 100644
> --- a/src/conf/virnodedeviceobj.h
> +++ b/src/conf/virnodedeviceobj.h
> @@ -75,8 +75,12 @@ virNodeDeviceObjListRemove(virNodeDeviceObjListPtr devs,
>
> int
> virNodeDeviceObjListGetParentHost(virNodeDeviceObjListPtr devs,
> - virNodeDeviceDefPtr def,
> - int create);
> + const char *name,
> + const char *parent,
> + const char *parent_wwnn,
> + const char *parent_wwpn,
> + const char *parent_fabric_wwn,
> + int create);
>
> virNodeDeviceObjListPtr
> virNodeDeviceObjListNew(void);
> diff --git a/src/node_device/node_device_driver.c
b/src/node_device/node_device_driver.c
> index 0a19908..1f888fb 100644
> --- a/src/node_device/node_device_driver.c
> +++ b/src/node_device/node_device_driver.c
> @@ -563,7 +563,13 @@ nodeDeviceCreateXML(virConnectPtr conn,
> if (virNodeDeviceGetWWNs(def, &wwnn, &wwpn) == -1)
> goto cleanup;
>
> - if ((parent_host = virNodeDeviceObjListGetParentHost(driver->devs, def,
> + /* NB: Since there's no @obj for which @def is assigned to, we can use
> + * the def-> values directly - unlike the Destroy code */
Here the commentary makes perfect sense.
How about simply: No @obj has @def assigned so we can use it here safely ?
I can change to "@obj has not yet been added to the domain object list,
so use of @def fields directly is safe."
> + if ((parent_host =
virNodeDeviceObjListGetParentHost(driver->devs,
> + def->name,
def->parent,
> + def->parent_wwnn,
> + def->parent_wwpn,
> + def->parent_fabric_wwn,
> CREATE_DEVICE)) < 0)
> goto cleanup;
>
> @@ -594,6 +600,11 @@ nodeDeviceDestroy(virNodeDevicePtr device)
> int ret = -1;
> virNodeDeviceObjPtr obj = NULL;
> virNodeDeviceDefPtr def;
> + char *name = NULL;
> + char *parent = NULL;
> + char *parent_wwnn = NULL;
> + char *parent_wwpn = NULL;
> + char *parent_fabric_wwn = NULL;
> char *wwnn = NULL, *wwpn = NULL;
> int parent_host = -1;
>
> @@ -609,12 +620,24 @@ nodeDeviceDestroy(virNodeDevicePtr device)
> if (virNodeDeviceGetWWNs(def, &wwnn, &wwpn) < 0)
> goto cleanup;
>
> - /* virNodeDeviceGetParentHost will cause the device object's lock
> - * to be taken, so grab the object def which will have the various
> - * fields used to search (name, parent, parent_wwnn, parent_wwpn,
> - * or parent_fabric_wwn) and drop the object lock. */
> + /* Because we're about to release the lock and thus run into a race
> + * possibility (however improbably) with a udevAddOneDevice change
> + * event which would essentially free the existing @def (obj->def) and
> + * replace it with something new, we need to save off and use the
> + * various fields that virNodeDeviceObjListGetParentHost will use */
And as I originally suggested I would allocate a new temporary @def structure,
initialize it, pass it to the *GetParentHost method like nothing out of the
ordinary happened and mentioned in the commentary why we've done it that way
(which you already do in this patch).
So you'd prefer some sort of virNodeDeviceDefCopy function be created?
Or use VIR_ALLOC(def) and copy in the 5 fields only to then
virNodeDeviceDefFree() it afterwards?
Seems like overkill to me. And of course assumes that at no point in the
future would virNodeDeviceObjListGetParentHost need some other field
that every consumer would need to be made aware about/of...
John
> + if (VIR_STRDUP(name, def->name) < 0 ||
> + VIR_STRDUP(parent, def->parent) < 0 ||
> + VIR_STRDUP(parent_wwnn, def->parent_wwnn) < 0 ||
> + VIR_STRDUP(parent_wwpn, def->parent_wwpn) < 0 ||
> + VIR_STRDUP(parent_fabric_wwn, def->parent_fabric_wwn) < 0)
> + goto cleanup;
> +
> virNodeDeviceObjEndAPI(&obj);
> - if ((parent_host = virNodeDeviceObjListGetParentHost(driver->devs, def,
> + if ((parent_host = virNodeDeviceObjListGetParentHost(driver->devs,
> + name, parent,
> + parent_wwnn,
> + parent_wwpn,
> + parent_fabric_wwn,
> EXISTING_DEVICE)) < 0)
> goto cleanup;
>
> @@ -626,6 +649,11 @@ nodeDeviceDestroy(virNodeDevicePtr device)
> cleanup:
> nodeDeviceUnlock();
> virNodeDeviceObjEndAPI(&obj);
> + VIR_FREE(name);
> + VIR_FREE(parent);
> + VIR_FREE(parent_wwnn);
> + VIR_FREE(parent_wwpn);
> + VIR_FREE(parent_fabric_wwn);
> VIR_FREE(wwnn);
> VIR_FREE(wwpn);
> return ret;
> diff --git a/src/test/test_driver.c b/src/test/test_driver.c
> index 83ab9cc..0e22ec2 100644
> --- a/src/test/test_driver.c
> +++ b/src/test/test_driver.c
> @@ -5579,8 +5579,13 @@ testNodeDeviceCreateXML(virConnectPtr conn,
>
> /* Unlike the "real" code we don't need the parent_host in order
to
> * call virVHBAManageVport, but still let's make sure the code finds
> - * something valid and no one messed up the mock environment. */
> - if (virNodeDeviceObjListGetParentHost(driver->devs, def, CREATE_DEVICE) <
0)
> + * something valid and no one messed up the mock environment. We also
> + * don't need to make local copies since there's no @obj for which
> + * @def is assigned to, so we can use the def-> values directly. */
Same suggestion about the commentary as I mentioned above - would that work out
for you?
ACK if we can agree on the suggestions.
Erik