[libvirt] [PATCH] nodedev: Add retry logic to read fibre channel files

30 Jun 2016

https://bugzilla.redhat.com/show_bug.cgi?id=1319544

During processing of a vport_create event, udevEventHandleCallback
will call udevProcessSCSIHost to read the fibre channel configuration
files for wwpn, wwpn, and fabric_wwn; however, as it turns out those
files may not have valid data. Rather than carry around invalid data,
add some logic to re-read the files up to 5 times. If after 5 attempts
things still fail, don't hold up processing any longer.

The "ffffffffffffffff" value is what seems to be used to initialize the
wwnn and wwpn files, while "0" or "ffffffffffffffff" have been used to
initialize the fabric_wwn file (see bz 1240912 for some details).

Signed-off-by: John Ferlan <jferlan@redhat.com>
---

 This has been hanging in a local branch for a while. Figured I'd at the
 very least get some feedback and thoughts from others as to whether it's
 worth making the adjustments.  Lots of details in the bz, the essentially
 the tester is bypassing the libvirt create vport mechanism, then wants to
 use the libvirt delete vHBA; however, the timing of things seems to have
 gotten clogged up and not all the fields in the vHBA are read properly
 thus the deletion cannot be done.  A workaround to the issue is to run
 nodedev-dumpxml on the vHBA again and the data is read properly. This patch
 went with a retry mechanism to try and ensure the data we've read is
 correct. I'm a bit ambivalent about this, but it doesn't hurt to ask...


 src/node_device/node_device_driver.c      |  4 ++--
 src/node_device/node_device_linux_sysfs.c | 25 +++++++++++++++++++++++++
 src/node_device/node_device_udev.c        |  9 ++++++++-
 3 files changed, 35 insertions(+), 3 deletions(-)

diff --git a/src/node_device/node_device_driver.c b/src/node_device/node_device_driver.c
index 500caeb..838b3ee 100644
--- a/src/node_device/node_device_driver.c
+++ b/src/node_device/node_device_driver.c
@@ -53,7 +53,7 @@ static int update_caps(virNodeDeviceObjPtr dev)
     while (cap) {
         switch (cap->data.type) {
         case VIR_NODE_DEV_CAP_SCSI_HOST:
-            nodeDeviceSysfsGetSCSIHostCaps(&dev->def->caps->data);
+            ignore_value(nodeDeviceSysfsGetSCSIHostCaps(&dev->def->caps->data));
             break;
         case VIR_NODE_DEV_CAP_NET:
             if (virNetDevGetLinkInfo(cap->data.net.ifname, &cap->data.net.lnk) < 0)
@@ -292,7 +292,7 @@ nodeDeviceLookupSCSIHostByWWN(virConnectPtr conn,
 
         while (cap) {
             if (cap->data.type == VIR_NODE_DEV_CAP_SCSI_HOST) {
-                nodeDeviceSysfsGetSCSIHostCaps(&cap->data);
+                ignore_value(nodeDeviceSysfsGetSCSIHostCaps(&cap->data));
                 if (cap->data.scsi_host.flags &
                     VIR_NODE_DEV_CAP_FLAG_HBA_FC_HOST) {
                     if (STREQ(cap->data.scsi_host.wwnn, wwnn) &&
diff --git a/src/node_device/node_device_linux_sysfs.c b/src/node_device/node_device_linux_sysfs.c
index 549d32c..9363487 100644
--- a/src/node_device/node_device_linux_sysfs.c
+++ b/src/node_device/node_device_linux_sysfs.c
@@ -41,6 +41,21 @@
 
 VIR_LOG_INIT("node_device.node_device_linux_sysfs");
 
+
+/* nodeDeviceSysfsGetSCSIHostCaps:
+ * @d: Pointer to node device capabilities
+ *
+ * Read the various 'scsi_host' files for the device and fill in
+ * the relevant data for our internal representation. It is possible
+ * that the environment isn't completely setup or "stable" when we
+ * go to read, so check for invalid values and fail on those. In
+ * particular, if the wwnn or wwpn are read in as ffffffffffffffff
+ * or the fabric_wwn is read as 0 or ffffffffffffffff, then we need
+ * to force a retry.
+ *
+ * Returns 0 on success, -1 on bad failure, -2 on failure to indicate
+ * to retry
+ */
 int
 nodeDeviceSysfsGetSCSIHostCaps(virNodeDevCapDataPtr d)
 {
@@ -83,6 +98,16 @@ nodeDeviceSysfsGetSCSIHostCaps(virNodeDevCapDataPtr d)
                      d->scsi_host.host);
             goto cleanup;
         }
+
+        if (STREQ(d->scsi_host.wwnn, "ffffffffffffffff") ||
+            STREQ(d->scsi_host.wwpn, "ffffffffffffffff") ||
+            STREQ(d->scsi_host.fabric_wwn, "0") ||
+            STREQ(d->scsi_host.fabric_wwn, "ffffffffffffffff")) {
+            VIR_WARN("Failed to get valid wwnn, wwpn, or fabric_wwn for host%d",
+                     d->scsi_host.host);
+            ret = -2;
+            goto cleanup;
+        }
     }
 
     if (virIsCapableVport(NULL, d->scsi_host.host)) {
diff --git a/src/node_device/node_device_udev.c b/src/node_device/node_device_udev.c
index 76c60ea..54c9e58 100644
--- a/src/node_device/node_device_udev.c
+++ b/src/node_device/node_device_udev.c
@@ -523,6 +523,7 @@ static int udevProcessSCSIHost(struct udev_device *device ATTRIBUTE_UNUSED,
     virNodeDevCapDataPtr data = &def->caps->data;
     char *filename = NULL;
     char *str;
+    int retry = 5;
 
     filename = last_component(def->sysfs_path);
 
@@ -534,7 +535,13 @@ static int udevProcessSCSIHost(struct udev_device *device ATTRIBUTE_UNUSED,
         return -1;
     }
 
-    nodeDeviceSysfsGetSCSIHostCaps(&def->caps->data);
+    while (retry--) {
+        if (nodeDeviceSysfsGetSCSIHostCaps(&def->caps->data) == -2) {
+            sleep(1);
+            continue;
+        }
+        break;
+    }
 
     if (udevGenerateDeviceName(device, def, NULL) != 0)
         return -1;
-- 
2.5.5

    

John Ferlan

John Ferlan

Daniel P. Berrange

tags

participants (2)