[libvirt] [PATCH] Replace hashing algorithm with murmurhash

From: "Daniel P. Berrange" <berrange@redhat.com> Recent discussions have illustrated the potential for DOS attacks with the hash table implementations used by most languages and libraries. https://lwn.net/Articles/474912/ libvirt has an internal hash table impl, and uses hash tables for a variety of purposes. The hash key generation code is pretty simple and thus not strongly collision resistant. This patch replaces the current libvirt hash key generator with the (public domain) Murmurhash3 code. In addition every hash table now gets a random seed value which is used to perturb the hashing code. This should make it impossible to mount any practical attack against libvirt hashing code. * src/Makefile.am: Add virhashcode.[ch] * src/util/util.c: Make virRandom() return a fixed 32 bit integer value. * src/util/hash.c, src/util/hash.h, src/util/cgroup.c: Replace hash code generation with a call to virHashCodeGen() * src/util/virhashcode.h, src/util/virhashcode.c: Add a new virHashCodeGen() API using the Murmurhash3 algorithm. --- src/Makefile.am | 1 + src/util/cgroup.c | 6 ++- src/util/hash.c | 24 ++++------ src/util/hash.h | 7 ++- src/util/util.c | 4 +- src/util/virhashcode.c | 125 ++++++++++++++++++++++++++++++++++++++++++++++++ src/util/virhashcode.h | 35 +++++++++++++ 7 files changed, 181 insertions(+), 21 deletions(-) create mode 100644 src/util/virhashcode.c create mode 100644 src/util/virhashcode.h diff --git a/src/Makefile.am b/src/Makefile.am index 0a1221a..dd5acc4 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -89,6 +89,7 @@ UTIL_SOURCES = \ util/virpidfile.c util/virpidfile.h \ util/xml.c util/xml.h \ util/virterror.c util/virterror_internal.h \ + util/virhashcode.c util/virhashcode.h \ util/virkeycode.c util/virkeycode.h \ util/virkeymaps.h \ util/virnetdev.h util/virnetdev.c \ diff --git a/src/util/cgroup.c b/src/util/cgroup.c index 25f2691..7623264 100644 --- a/src/util/cgroup.c +++ b/src/util/cgroup.c @@ -33,6 +33,7 @@ #include "logging.h" #include "virfile.h" #include "hash.h" +#include "virhashcode.h" #define CGROUP_MAX_VAL 512 @@ -1636,9 +1637,10 @@ cleanup: } -static unsigned long virCgroupPidCode(const void *name) +static int32_t virCgroupPidCode(const void *name, int32_t seed) { - return (unsigned long)name; + unsigned long pid = (unsigned long)name; + return virHashCodeGen(&pid, sizeof(pid), seed); } static bool virCgroupPidEqual(const void *namea, const void *nameb) { diff --git a/src/util/hash.c b/src/util/hash.c index 665bcce..e765a6c 100644 --- a/src/util/hash.c +++ b/src/util/hash.c @@ -28,6 +28,8 @@ #include "hash.h" #include "memory.h" #include "logging.h" +#include "virhashcode.h" +#include "util.h" #define VIR_FROM_THIS VIR_FROM_NONE @@ -57,6 +59,7 @@ struct _virHashEntry { */ struct _virHashTable { virHashEntryPtr *table; + uint32_t seed; int size; int nbElems; /* True iff we are iterating over hash entries. */ @@ -70,20 +73,11 @@ struct _virHashTable { virHashKeyFree keyFree; }; -static unsigned long virHashStrCode(const void *name) + + +static int32_t virHashStrCode(const void *name, int32_t seed) { - const char *str = name; - unsigned long value = 0L; - char ch; - - if (str != NULL) { - value += 30 * (*str); - while ((ch = *str++) != 0) { - value = - value ^ ((value << 5) + (value >> 3) + (unsigned long) ch); - } - } - return value; + return virHashCodeGen(name, strlen(name), seed); } static bool virHashStrEqual(const void *namea, const void *nameb) @@ -101,11 +95,10 @@ static void virHashStrFree(void *name) VIR_FREE(name); } - static unsigned long virHashComputeKey(virHashTablePtr table, const void *name) { - unsigned long value = table->keyCode(name); + uint32_t value = table->keyCode(name, table->seed); return (value % table->size); } @@ -139,6 +132,7 @@ virHashTablePtr virHashCreateFull(int size, return NULL; } + table->seed = virRandom(INT32_MAX); table->size = size; table->nbElems = 0; table->dataFree = dataFree; diff --git a/src/util/hash.h b/src/util/hash.h index 1ba12b9..c415844 100644 --- a/src/util/hash.h +++ b/src/util/hash.h @@ -55,12 +55,15 @@ typedef int (*virHashSearcher) (const void *payload, const void *name, /** * virHashKeyCode: * @name: the hash key + * @seed: random seed * - * Compute the hash code corresponding to the key @name + * Compute the hash code corresponding to the key @name, using + * @seed to perturb the hashing algorithm * * Returns the hash code */ -typedef unsigned long (*virHashKeyCode)(const void *name); +typedef int32_t (*virHashKeyCode)(const void *name, + int32_t seed); /** * virHashKeyEqual: * @namea: the first hash key diff --git a/src/util/util.c b/src/util/util.c index 8663c4d..c8588b5 100644 --- a/src/util/util.c +++ b/src/util/util.c @@ -2115,7 +2115,7 @@ int virRandomInitialize(unsigned int seed) return 0; } -int virRandom(int max) +int32_t virRandom(int32_t max) { int32_t ret; @@ -2123,7 +2123,7 @@ int virRandom(int max) random_r(&randomData, &ret); virMutexUnlock(&randomLock); - return (int) ((double)max * ((double)ret / (double)RAND_MAX)); + return (int32_t) ((double)max * ((double)ret / (double)RAND_MAX)); } diff --git a/src/util/virhashcode.c b/src/util/virhashcode.c new file mode 100644 index 0000000..50b46f4 --- /dev/null +++ b/src/util/virhashcode.c @@ -0,0 +1,125 @@ +/* + * virhashcode.c: hash code generation + * + * Copyright (C) 2012 Red Hat, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * The hash code generation is based on the public domain MurmurHash3 from Austin Appleby: + * http://code.google.com/p/smhasher/source/browse/trunk/MurmurHash3.cpp + * + * We use only the 32 bit variant because the 2 produce different result while + * we need to produce the same result regardless of the architecture as + * clients can be both 64 or 32 bit at the same time. + */ + +#include <config.h> + +#include "virhashcode.h" + +static uint32_t rotl(uint32_t x, int8_t r) +{ + return (x << r) | (x >> (32 - r)); +} + +/* slower than original but is endian neutral and handles platforms that + * do only aligned reads */ +__attribute__((always_inline)) +static uint32_t getblock(const uint8_t *p, int i) +{ + uint32_t r; + size_t size = sizeof(uint32_t); + + memcpy(&r, &p[i * size], size); + + return le32toh(r); +} + +/* + * Finalization mix - force all bits of a hash block to avalanche + */ +__attribute__((always_inline)) +static uint32_t fmix(uint32_t h) +{ + h ^= h >> 16; + h *= 0x85ebca6b; + h ^= h >> 13; + h *= 0xc2b2ae35; + h ^= h >> 16; + + return h; +} + + +uint32_t virHashCodeGen(const void *key, size_t len, uint32_t seed) +{ + const uint8_t *blocks; + const uint8_t *tail; + size_t nblocks; + uint32_t h1; + uint32_t k1; + uint32_t c1; + uint32_t c2; + size_t i; + + blocks = (const uint8_t *)key; + nblocks = len / 4; + h1 = seed; + c1 = 0xcc9e2d51; + c2 = 0x1b873593; + + /* body */ + + for (i = 0; i < nblocks; i++) { + + k1 = getblock(blocks, i); + + k1 *= c1; + k1 = rotl(k1, 15); + k1 *= c2; + + h1 ^= k1; + h1 = rotl(h1, 13); + h1 = h1 * 5 + 0xe6546b64; + } + + /* tail */ + + tail = (const uint8_t *)key + nblocks * 4; + + k1 = 0; + + switch (len & 3) { + case 3: + k1 ^= tail[2] << 16; + case 2: + k1 ^= tail[1] << 8; + case 1: + k1 ^= tail[0]; + k1 *= c1; + k1 = rotl(k1, 15); + k1 *= c2; + h1 ^= k1; + default: + break; + } + + /* finalization */ + + h1 ^= len; + h1 = fmix(h1); + + return h1; +} diff --git a/src/util/virhashcode.h b/src/util/virhashcode.h new file mode 100644 index 0000000..867e04e --- /dev/null +++ b/src/util/virhashcode.h @@ -0,0 +1,35 @@ +/* + * virhashcode.h: hash code generation + * + * Copyright (C) 2012 Red Hat, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * The hash code generation is based on the public domain MurmurHash3 from Austin Appleby: + * http://code.google.com/p/smhasher/source/browse/trunk/MurmurHash3.cpp + * + * We use only the 32 bit variant because the 2 produce different result while + * we need to produce the same result regardless of the architecture as + * clients can be both 64 or 32 bit at the same time. + */ + +#ifndef __VIR_HASH_CODE_H__ +# define __VIR_HASH_CODE_H__ + +# include "internal.h" + +extern uint32_t virHashCodeGen(const void *key, size_t len, uint32_t seed); + +#endif /* __VIR_HASH_CODE_H__ */ -- 1.7.7.5

On 01/18/2012 09:23 AM, Daniel P. Berrange wrote:
From: "Daniel P. Berrange" <berrange@redhat.com>
Recent discussions have illustrated the potential for DOS attacks with the hash table implementations used by most languages and libraries.
https://lwn.net/Articles/474912/
libvirt has an internal hash table impl, and uses hash tables for a variety of purposes. The hash key generation code is pretty simple and thus not strongly collision resistant.
This patch replaces the current libvirt hash key generator with the (public domain) Murmurhash3 code. In addition every hash table now gets a random seed value which is used to perturb the hashing code. This should make it impossible to mount any practical attack against libvirt hashing code.
* src/Makefile.am: Add virhashcode.[ch] * src/util/util.c: Make virRandom() return a fixed 32 bit integer value. * src/util/hash.c, src/util/hash.h, src/util/cgroup.c: Replace hash code generation with a call to virHashCodeGen() * src/util/virhashcode.h, src/util/virhashcode.c: Add a new virHashCodeGen() API using the Murmurhash3 algorithm.
Looks like a good idea, but I have to NAK this version of the patch and ask for a v2. Meanwhile, I wonder if gnulib should adopt the murmurhash3 algorithm (but that's a different story for a different list).
@@ -1636,9 +1637,10 @@ cleanup: }
-static unsigned long virCgroupPidCode(const void *name) +static int32_t virCgroupPidCode(const void *name, int32_t seed) { - return (unsigned long)name; + unsigned long pid = (unsigned long)name; + return virHashCodeGen(&pid, sizeof(pid), seed);
I'm not sure if it matters, but you could shorten these two lines to just one: return virHashCodeGen(&name, sizeof(unsigned long), seed);
@@ -101,11 +95,10 @@ static void virHashStrFree(void *name) VIR_FREE(name); }
- static unsigned long virHashComputeKey(virHashTablePtr table, const void *name) { - unsigned long value = table->keyCode(name); + uint32_t value = table->keyCode(name, table->seed); return (value % table->size);
If, on a 64-bit machine, table->size ever grows larger than uint32_t, then we've lost some hashing abilities compared to what the old unsigned long hash gave us. Conversely, if table-size ever grows that large, we're burning an awful lot of memory in the first place, and a hash collision at that point is the least of our worries :) I think this is okay, although you may want to consider changing virHashComputeKey to return uint32_t.
+++ b/src/util/hash.h @@ -55,12 +55,15 @@ typedef int (*virHashSearcher) (const void *payload, const void *name, /** * virHashKeyCode: * @name: the hash key + * @seed: random seed * - * Compute the hash code corresponding to the key @name + * Compute the hash code corresponding to the key @name, using + * @seed to perturb the hashing algorithm * * Returns the hash code */ -typedef unsigned long (*virHashKeyCode)(const void *name); +typedef int32_t (*virHashKeyCode)(const void *name, + int32_t seed);
Umm, you used uint32_t in virHashComputeKey, but int32_t here. While I don't _think_ signed values will bite us due to sign-extension in 64-bit operations introducing a bias, I'd rather make this signature: typedef uint32_t (*virHashKeyCode)(const void *name, uint32_t seed); and adjust the callers to comply with unsigned everywhere.
+++ b/src/util/util.c @@ -2115,7 +2115,7 @@ int virRandomInitialize(unsigned int seed) return 0; }
-int virRandom(int max) +int32_t virRandom(int32_t max)
Ouch. This means you only get 31 bits of randomness for virRandom(INT32_MAX). But you _want_ 32 bits of randomness, especially since you are storing _virHashTable.seed as uint32_t.
{ int32_t ret;
@@ -2123,7 +2123,7 @@ int virRandom(int max) random_r(&randomData, &ret); virMutexUnlock(&randomLock);
- return (int) ((double)max * ((double)ret / (double)RAND_MAX)); + return (int32_t) ((double)max * ((double)ret / (double)RAND_MAX));
Furthermore, this calculation leaves gaps on platforms where RAND_MAX is only the POSIX-mandated minimum of 32767, or 15 bits (thankfully, at least Linux uses RAND_MAX of INT_MAX for 31 bits). Furthermore, this computation is non-uniform for all but a handful of numbers: if you request a max larger than RAND_MAX, then there are some values you cannot get back; if you request a max smaller than RAND_MAX but which is not a power of 2, then floating point division followed by rounding to integers will cause some numbers to be selected more frequently than others. I also note that all existing clients of virRandom() passed in a power of 2 (your use of virRandom(INT_MAX) was the first instance of someone using less than a power of two; and even then, you really did want a power of 2). Since we want to avoid non-uniform distribution, it makes sense to _require_ that you request a max as a power of 2 (that is, you are requesting a certain quantity of random bits), and proceed to do everything without the use of any floating point math. Technically, it is also possible to write a capped random number generator for any range [0,non-power-of-2), by repeatedly querying the generator for random bits between [0,next-power-of-2) and discarding any queries that fall in the range [cap,power-of-2); but no need to add that complexity if we promise to always request a power-of-2 random bits in the first place. That is, I propose that we ditch virRandom, and instead implement a new function: /* Return an evenly distributed random number between [0,2^nbits), where nbits must be in the range (0,64]. */ uint64_t virRandomBits(int nbits) { int bits_per_iter = count_one_bits(RAND_MAX); uint64_t ret = 0; int32_t bits; /* This algorithm requires that RAND_MAX == 2^n-1 for some n; gnulib's random_r meets this property. */ verify(((RAND_MAX + 1U) & RAND_MAX) == 0); virMutexLock(&randomLock); while (nbits > bits_per_iter) { random_r(&randomData, &bits); ret = (ret << bits_per_iter) | (bits & RAND_MAX); nbits -= bits_per_iter; } random_r(&randomData, &bits); ret = (ret << nbits) | (bits & ((1 << nbits) - 1)); virMutexUnlock(&randomLock); return ret; } where existing calls to virRandom(256) will now be virRandomBits(8), virRandom(1024*1024*1024) will now be virRandomBits(30), and your code would use uint32_t seed = virRandom(32).
+#include "virhashcode.h" + +static uint32_t rotl(uint32_t x, int8_t r) +{ + return (x << r) | (x >> (32 - r)); +}
Gnulib provides the bitrotate module, which provides "bitrotate.h" and an implementation of this function that is guaranteed to be as efficient as possible when compiled by gcc (possibly by using compiler-builtins to access raw assembly on architectures that have builtin rotate instructions). I'd suggest using that rather than rolling your own.
+ +/* slower than original but is endian neutral and handles platforms that + * do only aligned reads */ +__attribute__((always_inline)) +static uint32_t getblock(const uint8_t *p, int i) +{ + uint32_t r; + size_t size = sizeof(uint32_t); + + memcpy(&r, &p[i * size], size); + + return le32toh(r);
<endian.h>, and thus le32toh(), is not yet standardized (although POSIX will be adding it in the future), nor is it currently provided by gnulib. We'd have to get that fixed first. -- Eric Blake eblake@redhat.com +1-919-301-3266 Libvirt virtualization library http://libvirt.org

On Wed, Jan 18, 2012 at 11:34:16AM -0700, Eric Blake wrote:
On 01/18/2012 09:23 AM, Daniel P. Berrange wrote:
@@ -1636,9 +1637,10 @@ cleanup: }
-static unsigned long virCgroupPidCode(const void *name) +static int32_t virCgroupPidCode(const void *name, int32_t seed) { - return (unsigned long)name; + unsigned long pid = (unsigned long)name; + return virHashCodeGen(&pid, sizeof(pid), seed);
I'm not sure if it matters, but you could shorten these two lines to just one:
return virHashCodeGen(&name, sizeof(unsigned long), seed);
I actually preferred the slightly more verbose style, to make it clearer that we're actually passing in the PID as an int, cast to a pointer, instead of an int pointer.
@@ -101,11 +95,10 @@ static void virHashStrFree(void *name) VIR_FREE(name); }
- static unsigned long virHashComputeKey(virHashTablePtr table, const void *name) { - unsigned long value = table->keyCode(name); + uint32_t value = table->keyCode(name, table->seed); return (value % table->size);
If, on a 64-bit machine, table->size ever grows larger than uint32_t, then we've lost some hashing abilities compared to what the old unsigned long hash gave us. Conversely, if table-size ever grows that large, we're burning an awful lot of memory in the first place, and a hash collision at that point is the least of our worries :) I think this is okay, although you may want to consider changing virHashComputeKey to return uint32_t.
I changed the signature here to actually be 'size_t', likewise for the table->size variable itself.
+++ b/src/util/hash.h @@ -55,12 +55,15 @@ typedef int (*virHashSearcher) (const void *payload, const void *name, /** * virHashKeyCode: * @name: the hash key + * @seed: random seed * - * Compute the hash code corresponding to the key @name + * Compute the hash code corresponding to the key @name, using + * @seed to perturb the hashing algorithm * * Returns the hash code */ -typedef unsigned long (*virHashKeyCode)(const void *name); +typedef int32_t (*virHashKeyCode)(const void *name, + int32_t seed);
Umm, you used uint32_t in virHashComputeKey, but int32_t here. While I don't _think_ signed values will bite us due to sign-extension in 64-bit operations introducing a bias, I'd rather make this signature:
typedef uint32_t (*virHashKeyCode)(const void *name, uint32_t seed);
and adjust the callers to comply with unsigned everywhere.
Yeah, that was a mistake - it should have been uint32.
That is, I propose that we ditch virRandom, and instead implement a new function:
/* Return an evenly distributed random number between [0,2^nbits), where nbits must be in the range (0,64]. */ uint64_t virRandomBits(int nbits) { int bits_per_iter = count_one_bits(RAND_MAX); uint64_t ret = 0; int32_t bits; /* This algorithm requires that RAND_MAX == 2^n-1 for some n; gnulib's random_r meets this property. */ verify(((RAND_MAX + 1U) & RAND_MAX) == 0);
virMutexLock(&randomLock);
while (nbits > bits_per_iter) { random_r(&randomData, &bits); ret = (ret << bits_per_iter) | (bits & RAND_MAX); nbits -= bits_per_iter; }
random_r(&randomData, &bits); ret = (ret << nbits) | (bits & ((1 << nbits) - 1));
virMutexUnlock(&randomLock); return ret; }
where existing calls to virRandom(256) will now be virRandomBits(8), virRandom(1024*1024*1024) will now be virRandomBits(30), and your code would use uint32_t seed = virRandom(32).
Yep, I've done that.
+#include "virhashcode.h" + +static uint32_t rotl(uint32_t x, int8_t r) +{ + return (x << r) | (x >> (32 - r)); +}
Gnulib provides the bitrotate module, which provides "bitrotate.h" and an implementation of this function that is guaranteed to be as efficient as possible when compiled by gcc (possibly by using compiler-builtins to access raw assembly on architectures that have builtin rotate instructions). I'd suggest using that rather than rolling your own.
Good point.
+ +/* slower than original but is endian neutral and handles platforms that + * do only aligned reads */ +__attribute__((always_inline)) +static uint32_t getblock(const uint8_t *p, int i) +{ + uint32_t r; + size_t size = sizeof(uint32_t); + + memcpy(&r, &p[i * size], size); + + return le32toh(r);
<endian.h>, and thus le32toh(), is not yet standardized (although POSIX will be adding it in the future), nor is it currently provided by gnulib. We'd have to get that fixed first.
The le32toh call was only here because the code I copied wanted to be endian neutral. I don't think libvirt really cares if its hash codes are endian neutral, so I trivially just removed the le32toh call and avoid the problem of endian.h Daniel -- |: http://berrange.com -o- http://www.flickr.com/photos/dberrange/ :| |: http://libvirt.org -o- http://virt-manager.org :| |: http://autobuild.org -o- http://search.cpan.org/~danberr/ :| |: http://entangle-photo.org -o- http://live.gnome.org/gtk-vnc :|

On 01/25/2012 09:38 AM, Daniel P. Berrange wrote:
-static unsigned long virCgroupPidCode(const void *name) +static int32_t virCgroupPidCode(const void *name, int32_t seed) { - return (unsigned long)name; + unsigned long pid = (unsigned long)name; + return virHashCodeGen(&pid, sizeof(pid), seed);
I'm not sure if it matters, but you could shorten these two lines to just one:
return virHashCodeGen(&name, sizeof(unsigned long), seed);
I actually preferred the slightly more verbose style, to make it clearer that we're actually passing in the PID as an int, cast to a pointer, instead of an int pointer.
And given the recent thread about mingw32, this is wrong anyways. There, 'unsigned long' is 32 bits, but pid_t is 64 bits, so we'd be losing information in our hash. We probably need to fix this code to be passing (void *)&pid_t and dereferencing the pointer to get to a full pid, rather than cheating with (void*)(unsigned long)pid_t and possibly losing bits (although since this is in a context of hashing, lost bits only means more chance for collision, and not a fatal algorithmic flaw).
where existing calls to virRandom(256) will now be virRandomBits(8), virRandom(1024*1024*1024) will now be virRandomBits(30), and your code would use uint32_t seed = virRandom(32).
Of course, here I meant virRandomBits(32),
Yep, I've done that.
but I'm sure you figured that out.
+ + return le32toh(r);
<endian.h>, and thus le32toh(), is not yet standardized (although POSIX will be adding it in the future), nor is it currently provided by gnulib. We'd have to get that fixed first.
The le32toh call was only here because the code I copied wanted to be endian neutral. I don't think libvirt really cares if its hash codes are endian neutral, so I trivially just removed the le32toh call and avoid the problem of endian.h
Agreed - we aren't sharing hash values over the wire, so all hash values within a particular libvirtd process will be the same endianness, without having to waste time on swapping bytes around. -- Eric Blake eblake@redhat.com +1-919-301-3266 Libvirt virtualization library http://libvirt.org

On Wed, Jan 25, 2012 at 09:55:25AM -0700, Eric Blake wrote:
On 01/25/2012 09:38 AM, Daniel P. Berrange wrote:
-static unsigned long virCgroupPidCode(const void *name) +static int32_t virCgroupPidCode(const void *name, int32_t seed) { - return (unsigned long)name; + unsigned long pid = (unsigned long)name; + return virHashCodeGen(&pid, sizeof(pid), seed);
I'm not sure if it matters, but you could shorten these two lines to just one:
return virHashCodeGen(&name, sizeof(unsigned long), seed);
I actually preferred the slightly more verbose style, to make it clearer that we're actually passing in the PID as an int, cast to a pointer, instead of an int pointer.
And given the recent thread about mingw32, this is wrong anyways. There, 'unsigned long' is 32 bits, but pid_t is 64 bits, so we'd be losing information in our hash. We probably need to fix this code to be passing (void *)&pid_t and dereferencing the pointer to get to a full pid, rather than cheating with (void*)(unsigned long)pid_t and possibly losing bits (although since this is in a context of hashing, lost bits only means more chance for collision, and not a fatal algorithmic flaw).
Actually given the context of this usage, we don't need to use pid_t here. The values come from doing scanf("%lu") on the cgroups task file, and this will only ever run on a Linux host. Daniel -- |: http://berrange.com -o- http://www.flickr.com/photos/dberrange/ :| |: http://libvirt.org -o- http://virt-manager.org :| |: http://autobuild.org -o- http://search.cpan.org/~danberr/ :| |: http://entangle-photo.org -o- http://live.gnome.org/gtk-vnc :|

On 01/25/2012 09:55 AM, Eric Blake wrote:
+ + return le32toh(r);
<endian.h>, and thus le32toh(), is not yet standardized (although POSIX will be adding it in the future), nor is it currently provided by gnulib. We'd have to get that fixed first.
The le32toh call was only here because the code I copied wanted to be endian neutral. I don't think libvirt really cares if its hash codes are endian neutral, so I trivially just removed the le32toh call and avoid the problem of endian.h
Agreed - we aren't sharing hash values over the wire, so all hash values within a particular libvirtd process will be the same endianness, without having to waste time on swapping bytes around.
Actually, the more I think about this, the more I have to wonder: Does the incoming alignment affect the output hash? That is, if I do int i; char array[12]; for (i = 0; i < 4; i++) { strcpy(array + i, "12345678"); printf("%x\n", (int) virHashStrCode(array + i, 0)); } do I get the same values for all four iterations, on both little- and big-endian architectures? If not, then the byte-rearranging really is important to the algorithm (that is, the algorithm is operating on 4-byte quantities, but must build up those quantities from 1-byte quantities regardless of starting alignment, so endianness looks like it plays a role in doing the conversion correctly). -- Eric Blake eblake@redhat.com +1-919-301-3266 Libvirt virtualization library http://libvirt.org

On Wed, Jan 25, 2012 at 10:16:05AM -0700, Eric Blake wrote:
On 01/25/2012 09:55 AM, Eric Blake wrote:
+ + return le32toh(r);
<endian.h>, and thus le32toh(), is not yet standardized (although POSIX will be adding it in the future), nor is it currently provided by gnulib. We'd have to get that fixed first.
The le32toh call was only here because the code I copied wanted to be endian neutral. I don't think libvirt really cares if its hash codes are endian neutral, so I trivially just removed the le32toh call and avoid the problem of endian.h
Agreed - we aren't sharing hash values over the wire, so all hash values within a particular libvirtd process will be the same endianness, without having to waste time on swapping bytes around.
Actually, the more I think about this, the more I have to wonder: Does the incoming alignment affect the output hash? That is, if I do
int i; char array[12]; for (i = 0; i < 4; i++) { strcpy(array + i, "12345678"); printf("%x\n", (int) virHashStrCode(array + i, 0)); }
do I get the same values for all four iterations, on both little- and big-endian architectures? If not, then the byte-rearranging really is important to the algorithm (that is, the algorithm is operating on 4-byte quantities, but must build up those quantities from 1-byte quantities regardless of starting alignment, so endianness looks like it plays a role in doing the conversion correctly).
Consulting the original CPP code, I believe we're fine without it https://code.google.com/p/smhasher/source/browse/trunk/MurmurHash3.cpp // Block read - if your platform needs to do endian-swapping or can only // handle aligned reads, do the conversion here FORCE_INLINE uint32_t getblock ( const uint32_t * p, int i ) { return p[i]; } FORCE_INLINE uint64_t getblock ( const uint64_t * p, int i ) { return p[i]; } Daniel -- |: http://berrange.com -o- http://www.flickr.com/photos/dberrange/ :| |: http://libvirt.org -o- http://virt-manager.org :| |: http://autobuild.org -o- http://search.cpan.org/~danberr/ :| |: http://entangle-photo.org -o- http://live.gnome.org/gtk-vnc :|
participants (2)
-
Daniel P. Berrange
-
Eric Blake