[libvirt] [PATCH] esx: Use MD5 sum of mount path as storage pool UUID

With the previous storage pool UUID source not all storage pools had a proper UUID, especially GSX storage pools. The mount path is unique per host and cannot change during the lifetime of the datastore. Therefore, its MD5 sum can be used as UUID. Use gnulib's crypto/md5 module to generate the MD5 sum. --- bootstrap.conf | 1 + src/esx/esx_storage_driver.c | 107 +++++++++++------------------------------- 2 files changed, 28 insertions(+), 80 deletions(-) diff --git a/bootstrap.conf b/bootstrap.conf index 5af77a7..ca31a6e 100644 --- a/bootstrap.conf +++ b/bootstrap.conf @@ -26,6 +26,7 @@ canonicalize-lgpl close connect count-one-bits +crypto/md5 dirname-lgpl fcntl-h getaddrinfo diff --git a/src/esx/esx_storage_driver.c b/src/esx/esx_storage_driver.c index 644e66c..adbe8c6 100644 --- a/src/esx/esx_storage_driver.c +++ b/src/esx/esx_storage_driver.c @@ -24,6 +24,7 @@ #include <config.h> +#include "md5.h" #include "internal.h" #include "util.h" #include "memory.h" @@ -197,10 +198,7 @@ esxStoragePoolLookupByName(virConnectPtr conn, const char *name) esxPrivate *priv = conn->storagePrivateData; esxVI_ObjectContent *datastore = NULL; esxVI_DatastoreHostMount *hostMount = NULL; - char *suffix = NULL; - int suffixLength; - char uuid_string[VIR_UUID_STRING_BUFLEN] = "00000000-00000000-0000-000000000000"; - unsigned char uuid[VIR_UUID_BUFLEN]; + unsigned char md5[MD5_DIGEST_SIZE]; /* MD5_DIGEST_SIZE = VIR_UUID_BUFLEN = 16 */ virStoragePoolPtr pool = NULL; if (esxVI_EnsureSession(priv->primary) < 0) { @@ -213,53 +211,22 @@ esxStoragePoolLookupByName(virConnectPtr conn, const char *name) } /* - * Datastores don't have a UUID. We can use the 'host.mountInfo.path' - * property as source for a "UUID" on ESX, because the property value has - * this format: + * Datastores don't have a UUID, but we can use the 'host.mountInfo.path' + * property as source for a UUID. The mount path is unique per host and + * cannot change during the lifetime of the datastore. * - * host.mountInfo.path = /vmfs/volumes/4b0beca7-7fd401f3-1d7f-000ae484a6a3 - * host.mountInfo.path = /vmfs/volumes/b24b7a78-9d82b4f5 (short format) - * - * The 'host.mountInfo.path' property comes in two forms, with a complete - * "UUID" and a short "UUID". - * - * But this trailing "UUID" is not guaranteed to be there. On the other - * hand we already rely on another implementation detail of the ESX server: - * The object name of virtual machine contains an integer, we use that as - * domain ID. + * The MD5 sum of the mount path can be used as UUID, assuming MD5 is + * considered to be collision-free enough for this use case. */ if (esxVI_LookupDatastoreHostMount(priv->primary, datastore->obj, &hostMount) < 0) { goto cleanup; } - if ((suffix = STRSKIP(hostMount->mountInfo->path, "/vmfs/volumes/")) != NULL) { - suffixLength = strlen(suffix); + md5_buffer(hostMount->mountInfo->path, + strlen(hostMount->mountInfo->path), md5); - if ((suffixLength == 35 && /* = strlen("4b0beca7-7fd401f3-1d7f-000ae484a6a3") */ - suffix[8] == '-' && suffix[17] == '-' && suffix[22] == '-') || - (suffixLength == 17 && /* = strlen("b24b7a78-9d82b4f5") */ - suffix[8] == '-')) { - /* - * Intentionally use memcpy here, because we want to be able to - * replace a prefix of the initial Zero-UUID. virStrncpy would - * null-terminate the string in an unwanted place. - */ - memcpy(uuid_string, suffix, suffixLength); - } else { - VIR_WARN("Datastore host mount path suffix '%s' has unexpected " - "format, cannot deduce a UUID from it", suffix); - } - } - - if (virUUIDParse(uuid_string, uuid) < 0) { - ESX_ERROR(VIR_ERR_INTERNAL_ERROR, - _("Could not parse UUID from string '%s'"), - uuid_string); - goto cleanup; - } - - pool = virGetStoragePool(conn, name, uuid); + pool = virGetStoragePool(conn, name, md5); cleanup: esxVI_ObjectContent_Free(&datastore); @@ -275,9 +242,11 @@ esxStoragePoolLookupByUUID(virConnectPtr conn, const unsigned char *uuid) { esxPrivate *priv = conn->storagePrivateData; esxVI_String *propertyNameList = NULL; + esxVI_ObjectContent *datastoreList = NULL; esxVI_ObjectContent *datastore = NULL; + esxVI_DatastoreHostMount *hostMount = NULL; + unsigned char md5[MD5_DIGEST_SIZE]; /* MD5_DIGEST_SIZE = VIR_UUID_BUFLEN = 16 */ char uuid_string[VIR_UUID_STRING_BUFLEN] = ""; - char *absolutePath = NULL; char *name = NULL; virStoragePoolPtr pool = NULL; @@ -285,48 +254,26 @@ esxStoragePoolLookupByUUID(virConnectPtr conn, const unsigned char *uuid) return NULL; } - /* - * Convert UUID to 'host.mountInfo.path' form by stripping the second '-': - * - * <---- 14 ----><-------- 22 --------> <---- 13 ---><-------- 22 --------> - * 4b0beca7-7fd4-01f3-1d7f-000ae484a6a3 -> 4b0beca7-7fd401f3-1d7f-000ae484a6a3 - */ - virUUIDFormat(uuid, uuid_string); - memmove(uuid_string + 13, uuid_string + 14, 22 + 1); - - if (virAsprintf(&absolutePath, "/vmfs/volumes/%s", uuid_string) < 0) { - virReportOOMError(); - goto cleanup; - } - if (esxVI_String_AppendValueToList(&propertyNameList, "summary.name") < 0 || - esxVI_LookupDatastoreByAbsolutePath(priv->primary, absolutePath, - propertyNameList, &datastore, - esxVI_Occurrence_OptionalItem) < 0) { + esxVI_LookupDatastoreList(priv->primary, propertyNameList, + &datastoreList) < 0) { goto cleanup; } - /* - * If the first try didn't succeed and the trailing 16 digits are zero then - * the "UUID" could be a short one. Strip the 16 zeros and try again: - * - * <------ 17 -----> <------ 17 -----> - * b24b7a78-9d82b4f5-0000-000000000000 -> b24b7a78-9d82b4f5 - */ - if (datastore == NULL && STREQ(uuid_string + 17, "-0000-000000000000")) { - uuid_string[17] = '\0'; + for (datastore = datastoreList; datastore != NULL; + datastore = datastore->_next) { + esxVI_DatastoreHostMount_Free(&hostMount); - VIR_FREE(absolutePath); - - if (virAsprintf(&absolutePath, "/vmfs/volumes/%s", uuid_string) < 0) { - virReportOOMError(); + if (esxVI_LookupDatastoreHostMount(priv->primary, datastore->obj, + &hostMount) < 0) { goto cleanup; } - if (esxVI_LookupDatastoreByAbsolutePath(priv->primary, absolutePath, - propertyNameList, &datastore, - esxVI_Occurrence_RequiredItem) < 0) { - goto cleanup; + md5_buffer(hostMount->mountInfo->path, + strlen(hostMount->mountInfo->path), md5); + + if (memcmp(uuid, md5, VIR_UUID_BUFLEN) == 0) { + break; } } @@ -348,9 +295,9 @@ esxStoragePoolLookupByUUID(virConnectPtr conn, const unsigned char *uuid) pool = virGetStoragePool(conn, name, uuid); cleanup: - VIR_FREE(absolutePath); esxVI_String_Free(&propertyNameList); - esxVI_ObjectContent_Free(&datastore); + esxVI_ObjectContent_Free(&datastoreList); + esxVI_DatastoreHostMount_Free(&hostMount); return pool; } -- 1.7.0.4

On 08/09/2010 06:56 AM, Matthias Bolte wrote:
With the previous storage pool UUID source not all storage pools had a proper UUID, especially GSX storage pools. The mount path is unique per host and cannot change during the lifetime of the datastore. Therefore, its MD5 sum can be used as UUID.
Use gnulib's crypto/md5 module to generate the MD5 sum.
Just as a general thought, would it be preferably to use the SHA1 function instead of the MD5 one? Asking because MD5 seems to be trending "out of favour" (as a generalisation) compared to SHA1, after some cryptographers showed MD5 hash collisions could be practically achieved with some types of data. (or something along those lines) For all intents and purposes, I'm thinking it'll make no practical functional difference, but figure it's worth asking. :) Regards and best wishes, Justin Clift

2010/8/9 Justin Clift <jclift@redhat.com>:
On 08/09/2010 06:56 AM, Matthias Bolte wrote:
With the previous storage pool UUID source not all storage pools had a proper UUID, especially GSX storage pools. The mount path is unique per host and cannot change during the lifetime of the datastore. Therefore, its MD5 sum can be used as UUID.
Use gnulib's crypto/md5 module to generate the MD5 sum.
Just as a general thought, would it be preferably to use the SHA1 function instead of the MD5 one?
Every non-trivial hash function will do for the purpose of deriving a UUID from the mount path.
Asking because MD5 seems to be trending "out of favour" (as a generalisation) compared to SHA1, after some cryptographers showed MD5 hash collisions could be practically achieved with some types of data. (or something along those lines)
I'm aware of MD5 being considered as broken for cryptographic use cases, but this one isn't cryptographic so I don't care about the problem of crafted collisions here. The only real problem here would be when you have two datastores with different mount paths that hash to the same UUID, but I think md5 is collision-free enough so we can ignore this problem. There is another "problem" with the approach of using the mount path hash as UUID. In case of a Windows based GSX server you can use CIFS shares as datastores addressed via UNC paths like \\nas\share. If you have multiple GSX server sharing that datastore then all will have the same UUID because the have the same mount path. Actually I don't consider this as a real problem because in this case there is one datastore with one UUID shared between multiple hosts.
For all intents and purposes, I'm thinking it'll make no practical functional difference, but figure it's worth asking. :)
gnulib also provides SHA1 and SHA256 so we could use those instead of MD5, but I think MD5 is good enough for this special use case here. Matthias

Anno domini 2010 Matthias Bolte scripsit: [...]
There is another "problem" with the approach of using the mount path hash as UUID. In case of a Windows based GSX server you can use CIFS shares as datastores addressed via UNC paths like \\nas\share. If you have multiple GSX server sharing that datastore then all will have the same UUID because the have the same mount path. Actually I don't consider this as a real problem because in this case there is one datastore with one UUID shared between multiple hosts.
Would this become a problem if the user would use \\nas one node and \\nas.dom.aim on another node? The nasty thing about network paths is that people might use aliases.. This just came to my mind. Ciao Max -- Gib Dein Bestes. Dann übertriff Dich selbst!

2010/8/12 Maximilian Wilhelm <max@rfc2324.org>:
Anno domini 2010 Matthias Bolte scripsit:
[...]
There is another "problem" with the approach of using the mount path hash as UUID. In case of a Windows based GSX server you can use CIFS shares as datastores addressed via UNC paths like \\nas\share. If you have multiple GSX server sharing that datastore then all will have the same UUID because the have the same mount path. Actually I don't consider this as a real problem because in this case there is one datastore with one UUID shared between multiple hosts.
Would this become a problem if the user would use \\nas one node and \\nas.dom.aim on another node? The nasty thing about network paths is that people might use aliases..
This just came to my mind.
Ciao Max
It doesn't affect the per-connection UUID-to-datastore mapping, that's the important part. It will result in different UUIDs for the same datastore, so it affects the global datastore-to-UUID mapping, but that's okay, there's nothing the driver could do about that. The important point is that every datastore has a UUID now and that this UUID is at least unique per connection. Matthias

On Thu, Aug 12, 2010 at 01:40:14AM +0200, Matthias Bolte wrote:
2010/8/9 Justin Clift <jclift@redhat.com>:
On 08/09/2010 06:56 AM, Matthias Bolte wrote:
With the previous storage pool UUID source not all storage pools had a proper UUID, especially GSX storage pools. The mount path is unique per host and cannot change during the lifetime of the datastore. Therefore, its MD5 sum can be used as UUID.
Use gnulib's crypto/md5 module to generate the MD5 sum.
Just as a general thought, would it be preferably to use the SHA1 function instead of the MD5 one?
Every non-trivial hash function will do for the purpose of deriving a UUID from the mount path.
Asking because MD5 seems to be trending "out of favour" (as a generalisation) compared to SHA1, after some cryptographers showed MD5 hash collisions could be practically achieved with some types of data. (or something along those lines)
I'm aware of MD5 being considered as broken for cryptographic use cases, but this one isn't cryptographic so I don't care about the problem of crafted collisions here. The only real problem here would be when you have two datastores with different mount paths that hash to the same UUID, but I think md5 is collision-free enough so we can ignore this problem.
There is another "problem" with the approach of using the mount path hash as UUID. In case of a Windows based GSX server you can use CIFS shares as datastores addressed via UNC paths like \\nas\share. If you have multiple GSX server sharing that datastore then all will have the same UUID because the have the same mount path. Actually I don't consider this as a real problem because in this case there is one datastore with one UUID shared between multiple hosts.
This is actually a feature! If the same storage pool is visible on multiple hosts, it is absolutely desirable that it have the same UUID on each host.
For all intents and purposes, I'm thinking it'll make no practical functional difference, but figure it's worth asking. :)
gnulib also provides SHA1 and SHA256 so we could use those instead of MD5, but I think MD5 is good enough for this special use case here.
Agreed, we don't need cryptographic security for this use case, so MD5 is fine. Daniel -- |: Red Hat, Engineering, London -o- http://people.redhat.com/berrange/ :| |: http://libvirt.org -o- http://virt-manager.org -o- http://deltacloud.org :| |: http://autobuild.org -o- http://search.cpan.org/~danberr/ :| |: GnuPG: 7D3B9505 -o- F3C9 553F A1DA 4AC2 5648 23C1 B3DF F742 7D3B 9505 :|

On 08/08/2010 02:56 PM, Matthias Bolte wrote:
With the previous storage pool UUID source not all storage pools had a proper UUID, especially GSX storage pools. The mount path is unique per host and cannot change during the lifetime of the datastore. Therefore, its MD5 sum can be used as UUID.
Use gnulib's crypto/md5 module to generate the MD5 sum.
It looks like others have agreed on the approach; but I didn't see an actual ACK. So just in case:
@@ -197,10 +198,7 @@ esxStoragePoolLookupByName(virConnectPtr conn, const char *name) esxPrivate *priv = conn->storagePrivateData; esxVI_ObjectContent *datastore = NULL; esxVI_DatastoreHostMount *hostMount = NULL; - char *suffix = NULL; - int suffixLength; - char uuid_string[VIR_UUID_STRING_BUFLEN] = "00000000-00000000-0000-000000000000"; - unsigned char uuid[VIR_UUID_BUFLEN]; + unsigned char md5[MD5_DIGEST_SIZE]; /* MD5_DIGEST_SIZE = VIR_UUID_BUFLEN = 16 */
Is it worth also adding: #include "verify.h" ... verify(MD5_DIGEST_SIZE == VIR_UUID_BUFLEN) to enforce this comment's correctness? ACK with that nit addressed. -- Eric Blake eblake@redhat.com +1-801-349-2682 Libvirt virtualization library http://libvirt.org

2010/8/14 Eric Blake <eblake@redhat.com>:
On 08/08/2010 02:56 PM, Matthias Bolte wrote:
With the previous storage pool UUID source not all storage pools had a proper UUID, especially GSX storage pools. The mount path is unique per host and cannot change during the lifetime of the datastore. Therefore, its MD5 sum can be used as UUID.
Use gnulib's crypto/md5 module to generate the MD5 sum.
It looks like others have agreed on the approach; but I didn't see an actual ACK. So just in case:
@@ -197,10 +198,7 @@ esxStoragePoolLookupByName(virConnectPtr conn, const char *name) esxPrivate *priv = conn->storagePrivateData; esxVI_ObjectContent *datastore = NULL; esxVI_DatastoreHostMount *hostMount = NULL; - char *suffix = NULL; - int suffixLength; - char uuid_string[VIR_UUID_STRING_BUFLEN] = "00000000-00000000-0000-000000000000"; - unsigned char uuid[VIR_UUID_BUFLEN]; + unsigned char md5[MD5_DIGEST_SIZE]; /* MD5_DIGEST_SIZE = VIR_UUID_BUFLEN = 16 */
Is it worth also adding:
#include "verify.h" ... verify(MD5_DIGEST_SIZE == VIR_UUID_BUFLEN)
to enforce this comment's correctness?
ACK with that nit addressed.
That's a good idea. I've folded that in and pushed the result. Matthias
participants (5)
-
Daniel P. Berrange
-
Eric Blake
-
Justin Clift
-
Matthias Bolte
-
Maximilian Wilhelm