[libvirt] [PATCH 0/2] Fix race in qemu driver wrt vnc port allocation

The qemu driver contains a subtle race in the logic to find next available vnc port. Currently it iterates through all available ports and returns the first for which bind(2) succeeds. However it is possible that a previously issued port has not yet been bound by qemu, resulting in the same port used for a subsequent domain. The issue was briefly discussed on IRC and the consensus there was to track port allocation with a bitmap. The first patch adds some simple bitmap operations to utils. I added them to the generic util.[ch] for lack of a more suitable file. Alternate suggestions welcome. The second patch uses the bitmap to track vnc port allocations. The race was consistently encountered in HA environments [1]. This patch series has been successfully tested in such environment. [1] http://www.linux-ha.org/wiki/VirtualDomain_%28resource_agent%29#VNC_port_aut... Jim Fehlig (2): Add simple bitmap operations to utils Fix race in finding available vnc port src/qemu/qemu_driver.c | 22 ++++++++++++++++++++++ src/util/util.c | 38 ++++++++++++++++++++++++++++++++++++++ src/util/util.h | 10 ++++++++++ 3 files changed, 70 insertions(+), 0 deletions(-)

--- src/util/util.c | 38 ++++++++++++++++++++++++++++++++++++++ src/util/util.h | 10 ++++++++++ 2 files changed, 48 insertions(+), 0 deletions(-) diff --git a/src/util/util.c b/src/util/util.c index 26ac6ba..e600bef 100644 --- a/src/util/util.c +++ b/src/util/util.c @@ -2817,3 +2817,41 @@ int virBuildPathInternal(char **path, ...) return ret; } + +/* would CHAR_BIT or such be better than explicit '8'? */ +#define VIR_BITMAP_BITS_PER_UNIT (sizeof(virBitmap) * 8) +#define VIR_BITMAP_UNIT_OFFSET(b) ((b) / VIR_BITMAP_BITS_PER_UNIT) +#define VIR_BITMAP_BIT_OFFSET(b) ((b) % VIR_BITMAP_BITS_PER_UNIT) + +virBitmapPtr virBitmapAlloc(unsigned int size) +{ + virBitmapPtr bitmap; + unsigned int sz = (size / VIR_BITMAP_BITS_PER_UNIT) + + (size % VIR_BITMAP_BITS_PER_UNIT); + + if (VIR_ALLOC_N(bitmap, sz) < 0) + return NULL; + return bitmap; +} + +void virBitmapFree(virBitmapPtr bitmap) +{ + VIR_FREE(bitmap); +} + +void virBitmapSetBit(virBitmapPtr bitmap, unsigned int b) +{ + bitmap[VIR_BITMAP_UNIT_OFFSET(b)] |= (1 << VIR_BITMAP_BIT_OFFSET(b)); +} + +void virBitmapClearBit(virBitmapPtr bitmap, unsigned int b) +{ + bitmap[VIR_BITMAP_UNIT_OFFSET(b)] &= ~(1 << VIR_BITMAP_BIT_OFFSET(b)); +} + +int virBitmapGetBit(virBitmapPtr bitmap, unsigned int b) +{ + virBitmap bit = bitmap[VIR_BITMAP_UNIT_OFFSET(b)] & + (1 << VIR_BITMAP_BIT_OFFSET(b)); + return bit != 0; +} diff --git a/src/util/util.h b/src/util/util.h index 6bf6bcc..077e9c9 100644 --- a/src/util/util.h +++ b/src/util/util.h @@ -31,6 +31,7 @@ # include <unistd.h> # include <sys/select.h> # include <sys/types.h> +# include <stdint.h> # ifndef MIN # define MIN(a, b) ((a) < (b) ? (a) : (b)) @@ -274,4 +275,13 @@ void virFileWaitForDevices(void); # define virBuildPath(path, ...) virBuildPathInternal(path, __VA_ARGS__, NULL) int virBuildPathInternal(char **path, ...) ATTRIBUTE_SENTINEL; +typedef uint32_t virBitmap; +typedef virBitmap *virBitmapPtr; + +virBitmapPtr virBitmapAlloc(unsigned int size); +void virBitmapFree(virBitmapPtr bitmap); +void virBitmapSetBit(virBitmap *bitmap, unsigned int b); +void virBitmapClearBit(virBitmap *bitmap, unsigned int b); +int virBitmapGetBit(virBitmap *bitmap, unsigned int b); + #endif /* __VIR_UTIL_H__ */ -- 1.6.0.2

On Mon, 2010-05-17 at 12:09 -0600, Jim Fehlig wrote:
--- src/util/util.c | 38 ++++++++++++++++++++++++++++++++++++++ src/util/util.h | 10 ++++++++++ 2 files changed, 48 insertions(+), 0 deletions(-)
diff --git a/src/util/util.c b/src/util/util.c index 26ac6ba..e600bef 100644 --- a/src/util/util.c +++ b/src/util/util.c @@ -2817,3 +2817,41 @@ int virBuildPathInternal(char **path, ...)
return ret; } + +/* would CHAR_BIT or such be better than explicit '8'? */ +#define VIR_BITMAP_BITS_PER_UNIT (sizeof(virBitmap) * 8) +#define VIR_BITMAP_UNIT_OFFSET(b) ((b) / VIR_BITMAP_BITS_PER_UNIT) +#define VIR_BITMAP_BIT_OFFSET(b) ((b) % VIR_BITMAP_BITS_PER_UNIT) + +virBitmapPtr virBitmapAlloc(unsigned int size) +{ + virBitmapPtr bitmap; + unsigned int sz = (size / VIR_BITMAP_BITS_PER_UNIT) + + (size % VIR_BITMAP_BITS_PER_UNIT);
I think this should be calculated as (size + VIR_BITMAP_BITS_PER_UNIT - 1) / VIR_BITMAP_BITS_PER_UNIT mine 2 bit: ( 2 + 32 - 1 ) / 32 = 33 / 32 = 1 yours 2 bit: ( 2 / 32 ) + (2 % 32 ) = 0 + 2
+ + if (VIR_ALLOC_N(bitmap, sz) < 0) + return NULL; + return bitmap; +} + +void virBitmapFree(virBitmapPtr bitmap) +{ + VIR_FREE(bitmap); +} + +void virBitmapSetBit(virBitmapPtr bitmap, unsigned int b) +{ + bitmap[VIR_BITMAP_UNIT_OFFSET(b)] |= (1 << VIR_BITMAP_BIT_OFFSET(b)); +} + +void virBitmapClearBit(virBitmapPtr bitmap, unsigned int b) +{ + bitmap[VIR_BITMAP_UNIT_OFFSET(b)] &= ~(1 << VIR_BITMAP_BIT_OFFSET(b)); +} + +int virBitmapGetBit(virBitmapPtr bitmap, unsigned int b) +{ + virBitmap bit = bitmap[VIR_BITMAP_UNIT_OFFSET(b)] & + (1 << VIR_BITMAP_BIT_OFFSET(b)); + return bit != 0; +} diff --git a/src/util/util.h b/src/util/util.h index 6bf6bcc..077e9c9 100644 --- a/src/util/util.h +++ b/src/util/util.h @@ -31,6 +31,7 @@ # include <unistd.h> # include <sys/select.h> # include <sys/types.h> +# include <stdint.h>
# ifndef MIN # define MIN(a, b) ((a) < (b) ? (a) : (b)) @@ -274,4 +275,13 @@ void virFileWaitForDevices(void); # define virBuildPath(path, ...) virBuildPathInternal(path, __VA_ARGS__, NULL) int virBuildPathInternal(char **path, ...) ATTRIBUTE_SENTINEL;
+typedef uint32_t virBitmap; +typedef virBitmap *virBitmapPtr; + +virBitmapPtr virBitmapAlloc(unsigned int size); +void virBitmapFree(virBitmapPtr bitmap); +void virBitmapSetBit(virBitmap *bitmap, unsigned int b); +void virBitmapClearBit(virBitmap *bitmap, unsigned int b); +int virBitmapGetBit(virBitmap *bitmap, unsigned int b); + #endif /* __VIR_UTIL_H__ */

On 05/17/2010 12:09 PM, Jim Fehlig wrote:
--- src/util/util.c | 38 ++++++++++++++++++++++++++++++++++++++ src/util/util.h | 10 ++++++++++ 2 files changed, 48 insertions(+), 0 deletions(-)
diff --git a/src/util/util.c b/src/util/util.c index 26ac6ba..e600bef 100644 --- a/src/util/util.c +++ b/src/util/util.c @@ -2817,3 +2817,41 @@ int virBuildPathInternal(char **path, ...)
return ret; } + +/* would CHAR_BIT or such be better than explicit '8'? */ +#define VIR_BITMAP_BITS_PER_UNIT (sizeof(virBitmap) * 8)
In answer to the question, yes, #include <limits.h> and use CHAR_BIT (although it is guaranteed by POSIX to be 8).
+virBitmapPtr virBitmapAlloc(unsigned int size)
Why not size_t?
+{ + virBitmapPtr bitmap; + unsigned int sz = (size / VIR_BITMAP_BITS_PER_UNIT) + + (size % VIR_BITMAP_BITS_PER_UNIT);
Stefan already pointed out the bug here.
+ + if (VIR_ALLOC_N(bitmap, sz) < 0) + return NULL; + return bitmap; +} + +void virBitmapFree(virBitmapPtr bitmap) +{ + VIR_FREE(bitmap); +}
Another function to add to cfg.mk's list of free-like functions.
+ +void virBitmapSetBit(virBitmapPtr bitmap, unsigned int b) +{ + bitmap[VIR_BITMAP_UNIT_OFFSET(b)] |= (1 << VIR_BITMAP_BIT_OFFSET(b)); +} + +void virBitmapClearBit(virBitmapPtr bitmap, unsigned int b) +{ + bitmap[VIR_BITMAP_UNIT_OFFSET(b)] &= ~(1 << VIR_BITMAP_BIT_OFFSET(b)); +} + +int virBitmapGetBit(virBitmapPtr bitmap, unsigned int b)
bool instead of int
+{ + virBitmap bit = bitmap[VIR_BITMAP_UNIT_OFFSET(b)] & + (1 << VIR_BITMAP_BIT_OFFSET(b));
Should these interfaces add sanity checks that b is in range (that is, smaller than the original allocated size)? Do we want to support dynamically-growing bitsets? -- Eric Blake eblake@redhat.com +1-801-349-2682 Libvirt virtualization library http://libvirt.org

On Mon, 2010-05-17 at 14:05 -0600, Eric Blake wrote:
On 05/17/2010 12:09 PM, Jim Fehlig wrote:
+{ + virBitmap bit = bitmap[VIR_BITMAP_UNIT_OFFSET(b)] & + (1 << VIR_BITMAP_BIT_OFFSET(b));
Should these interfaces add sanity checks that b is in range (that is, smaller than the original allocated size)? Do we want to support dynamically-growing bitsets?
Right, should hold the number of elements it has. I think range checking would be a good idea. Stefan

On Mon, May 17, 2010 at 12:09:59PM -0600, Jim Fehlig wrote:
--- src/util/util.c | 38 ++++++++++++++++++++++++++++++++++++++ src/util/util.h | 10 ++++++++++ 2 files changed, 48 insertions(+), 0 deletions(-)
Can you put those in util/bitmap.{h,c} as the util.c file is getting far too large. Regards, Daniel -- |: Red Hat, Engineering, London -o- http://people.redhat.com/berrange/ :| |: http://libvirt.org -o- http://virt-manager.org -o- http://deltacloud.org :| |: http://autobuild.org -o- http://search.cpan.org/~danberr/ :| |: GnuPG: 7D3B9505 -o- F3C9 553F A1DA 4AC2 5648 23C1 B3DF F742 7D3B 9505 :|

The qemu driver contains a subtle race in the logic to find next available vnc port. Currently it iterates through all available ports and returns the first for which bind(2) succeeds. However it is possible that a previously issued port has not yet been bound by qemu, resulting in the same port used for a subsequent domain. This patch addresses the race by using a simple bitmap to "reserve" the ports allocated by libvirt. --- src/qemu/qemu_driver.c | 22 ++++++++++++++++++++++ 1 files changed, 22 insertions(+), 0 deletions(-) diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c index 582fdee..329859e 100644 --- a/src/qemu/qemu_driver.c +++ b/src/qemu/qemu_driver.c @@ -2629,13 +2629,24 @@ qemuInitPCIAddresses(struct qemud_driver *driver, return ret; } +static virBitmapPtr vnc_used_port_bitmap; + static int qemudNextFreeVNCPort(struct qemud_driver *driver ATTRIBUTE_UNUSED) { int i; + if (vnc_used_port_bitmap == NULL) { + if ((vnc_used_port_bitmap = virBitmapAlloc(59635)) == NULL) + return -1; + } + for (i = 5900 ; i < 65535 ; i++) { int fd; int reuse = 1; struct sockaddr_in addr; + + if (virBitmapGetBit(vnc_used_port_bitmap, i - 5900)) + continue; + addr.sin_family = AF_INET; addr.sin_port = htons(i); addr.sin_addr.s_addr = htonl(INADDR_ANY); @@ -2651,6 +2662,8 @@ static int qemudNextFreeVNCPort(struct qemud_driver *driver ATTRIBUTE_UNUSED) { if (bind(fd, (struct sockaddr*)&addr, sizeof(addr)) == 0) { /* Not in use, lets grab it */ close(fd); + /* Add port to bitmap of reserved ports */ + virBitmapSetBit(vnc_used_port_bitmap, i - 5900); return i; } close(fd); @@ -3717,6 +3730,15 @@ retry: qemudRemoveDomainStatus(driver, vm); + /* Remove VNC port from vnc_used_port_bitmap, but only if it was reserved + by the driver (autoport=yes) + */ + if ((vm->def->ngraphics == 1) && + vm->def->graphics[0]->type == VIR_DOMAIN_GRAPHICS_TYPE_VNC && + vm->def->graphics[0]->data.vnc.autoport) + virBitmapClearBit(vnc_used_port_bitmap, + vm->def->graphics[0]->data.vnc.port - 5900); + vm->pid = -1; vm->def->id = -1; vm->state = VIR_DOMAIN_SHUTOFF; -- 1.6.0.2

On Mon, 2010-05-17 at 12:10 -0600, Jim Fehlig wrote:
The qemu driver contains a subtle race in the logic to find next available vnc port. Currently it iterates through all available ports and returns the first for which bind(2) succeeds. However it is possible that a previously issued port has not yet been bound by qemu, resulting in the same port used for a subsequent domain.
This patch addresses the race by using a simple bitmap to "reserve" the ports allocated by libvirt. --- src/qemu/qemu_driver.c | 22 ++++++++++++++++++++++ 1 files changed, 22 insertions(+), 0 deletions(-)
diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c index 582fdee..329859e 100644 --- a/src/qemu/qemu_driver.c +++ b/src/qemu/qemu_driver.c @@ -2629,13 +2629,24 @@ qemuInitPCIAddresses(struct qemud_driver *driver, return ret; }
+static virBitmapPtr vnc_used_port_bitmap; + static int qemudNextFreeVNCPort(struct qemud_driver *driver ATTRIBUTE_UNUSED) { int i;
+ if (vnc_used_port_bitmap == NULL) { + if ((vnc_used_port_bitmap = virBitmapAlloc(59635)) == NULL) + return -1; + } + I think you could move the above into qmudStartup.
Maybe replace the 59635 with (VNC_PORT_MAX - VNC_PORT_MIN) so one can see the math ...
for (i = 5900 ; i < 65535 ; i++) {
... and replace 5900 here with VNC_PORT_MIN and 65535 with VNC_PORT_MAX.
int fd; int reuse = 1; struct sockaddr_in addr; + + if (virBitmapGetBit(vnc_used_port_bitmap, i - 5900)) + continue; + addr.sin_family = AF_INET; addr.sin_port = htons(i); addr.sin_addr.s_addr = htonl(INADDR_ANY); @@ -2651,6 +2662,8 @@ static int qemudNextFreeVNCPort(struct qemud_driver *driver ATTRIBUTE_UNUSED) { if (bind(fd, (struct sockaddr*)&addr, sizeof(addr)) == 0) { /* Not in use, lets grab it */ close(fd); + /* Add port to bitmap of reserved ports */ + virBitmapSetBit(vnc_used_port_bitmap, i - 5900); return i; } close(fd); @@ -3717,6 +3730,15 @@ retry:
qemudRemoveDomainStatus(driver, vm);
+ /* Remove VNC port from vnc_used_port_bitmap, but only if it was reserved + by the driver (autoport=yes) + */ + if ((vm->def->ngraphics == 1) && + vm->def->graphics[0]->type == VIR_DOMAIN_GRAPHICS_TYPE_VNC && + vm->def->graphics[0]->data.vnc.autoport) + virBitmapClearBit(vnc_used_port_bitmap, + vm->def->graphics[0]->data.vnc.port - 5900); + vm->pid = -1; vm->def->id = -1; vm->state = VIR_DOMAIN_SHUTOFF;
Afaict, you also need to add cleanup to the 'cleanup:' part of qemudStartVMDaemon() after checking that the port has been allocated. Stefan

Stefan Berger wrote:
On Mon, 2010-05-17 at 12:10 -0600, Jim Fehlig wrote:
The qemu driver contains a subtle race in the logic to find next available vnc port. Currently it iterates through all available ports and returns the first for which bind(2) succeeds. However it is possible that a previously issued port has not yet been bound by qemu, resulting in the same port used for a subsequent domain.
This patch addresses the race by using a simple bitmap to "reserve" the ports allocated by libvirt. --- src/qemu/qemu_driver.c | 22 ++++++++++++++++++++++ 1 files changed, 22 insertions(+), 0 deletions(-)
diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c index 582fdee..329859e 100644 --- a/src/qemu/qemu_driver.c +++ b/src/qemu/qemu_driver.c @@ -2629,13 +2629,24 @@ qemuInitPCIAddresses(struct qemud_driver *driver, return ret; }
+static virBitmapPtr vnc_used_port_bitmap; + static int qemudNextFreeVNCPort(struct qemud_driver *driver ATTRIBUTE_UNUSED) { int i;
+ if (vnc_used_port_bitmap == NULL) { + if ((vnc_used_port_bitmap = virBitmapAlloc(59635)) == NULL) + return -1; + } +
I think you could move the above into qmudStartup.
I originally had it there but decided it was a waste of 64k when no domains use vnc :-). I'll move it back.
Maybe replace the 59635 with (VNC_PORT_MAX - VNC_PORT_MIN) so one can see the math ...
for (i = 5900 ; i < 65535 ; i++) {
... and replace 5900 here with VNC_PORT_MIN and 65535 with VNC_PORT_MAX.
Right. Probably another patch that defines VNC_PORT_{MIN,MAX} and use those in the existing for loop, followed by this patch.
int fd; int reuse = 1; struct sockaddr_in addr; + + if (virBitmapGetBit(vnc_used_port_bitmap, i - 5900)) + continue; + addr.sin_family = AF_INET; addr.sin_port = htons(i); addr.sin_addr.s_addr = htonl(INADDR_ANY); @@ -2651,6 +2662,8 @@ static int qemudNextFreeVNCPort(struct qemud_driver *driver ATTRIBUTE_UNUSED) { if (bind(fd, (struct sockaddr*)&addr, sizeof(addr)) == 0) { /* Not in use, lets grab it */ close(fd); + /* Add port to bitmap of reserved ports */ + virBitmapSetBit(vnc_used_port_bitmap, i - 5900); return i; } close(fd); @@ -3717,6 +3730,15 @@ retry:
qemudRemoveDomainStatus(driver, vm);
+ /* Remove VNC port from vnc_used_port_bitmap, but only if it was reserved + by the driver (autoport=yes) + */ + if ((vm->def->ngraphics == 1) && + vm->def->graphics[0]->type == VIR_DOMAIN_GRAPHICS_TYPE_VNC && + vm->def->graphics[0]->data.vnc.autoport) + virBitmapClearBit(vnc_used_port_bitmap, + vm->def->graphics[0]->data.vnc.port - 5900); + vm->pid = -1; vm->def->id = -1; vm->state = VIR_DOMAIN_SHUTOFF;
Afaict, you also need to add cleanup to the 'cleanup:' part of qemudStartVMDaemon() after checking that the port has been allocated.
Nod. I'll respin these patches. Jim

On Mon, May 17, 2010 at 12:10:00PM -0600, Jim Fehlig wrote:
The qemu driver contains a subtle race in the logic to find next available vnc port. Currently it iterates through all available ports and returns the first for which bind(2) succeeds. However it is possible that a previously issued port has not yet been bound by qemu, resulting in the same port used for a subsequent domain.
This patch addresses the race by using a simple bitmap to "reserve" the ports allocated by libvirt. --- src/qemu/qemu_driver.c | 22 ++++++++++++++++++++++ 1 files changed, 22 insertions(+), 0 deletions(-)
diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c index 582fdee..329859e 100644 --- a/src/qemu/qemu_driver.c +++ b/src/qemu/qemu_driver.c @@ -2629,13 +2629,24 @@ qemuInitPCIAddresses(struct qemud_driver *driver, return ret; }
+static virBitmapPtr vnc_used_port_bitmap;
This should be a field in 'struct qemud_driver' rather than static.
static int qemudNextFreeVNCPort(struct qemud_driver *driver ATTRIBUTE_UNUSED) { int i;
+ if (vnc_used_port_bitmap == NULL) { + if ((vnc_used_port_bitmap = virBitmapAlloc(59635)) == NULL) + return -1; + } + for (i = 5900 ; i < 65535 ; i++) { int fd; int reuse = 1; struct sockaddr_in addr; + + if (virBitmapGetBit(vnc_used_port_bitmap, i - 5900)) + continue; + addr.sin_family = AF_INET; addr.sin_port = htons(i); addr.sin_addr.s_addr = htonl(INADDR_ANY); @@ -2651,6 +2662,8 @@ static int qemudNextFreeVNCPort(struct qemud_driver *driver ATTRIBUTE_UNUSED) { if (bind(fd, (struct sockaddr*)&addr, sizeof(addr)) == 0) { /* Not in use, lets grab it */ close(fd); + /* Add port to bitmap of reserved ports */ + virBitmapSetBit(vnc_used_port_bitmap, i - 5900); return i; } close(fd); @@ -3717,6 +3730,15 @@ retry:
qemudRemoveDomainStatus(driver, vm);
+ /* Remove VNC port from vnc_used_port_bitmap, but only if it was reserved + by the driver (autoport=yes) + */ + if ((vm->def->ngraphics == 1) && + vm->def->graphics[0]->type == VIR_DOMAIN_GRAPHICS_TYPE_VNC && + vm->def->graphics[0]->data.vnc.autoport) + virBitmapClearBit(vnc_used_port_bitmap, + vm->def->graphics[0]->data.vnc.port - 5900); + vm->pid = -1; vm->def->id = -1; vm->state = VIR_DOMAIN_SHUTOFF;
Regards, Daniel -- |: Red Hat, Engineering, London -o- http://people.redhat.com/berrange/ :| |: http://libvirt.org -o- http://virt-manager.org -o- http://deltacloud.org :| |: http://autobuild.org -o- http://search.cpan.org/~danberr/ :| |: GnuPG: 7D3B9505 -o- F3C9 553F A1DA 4AC2 5648 23C1 B3DF F742 7D3B 9505 :|
participants (4)
-
Daniel P. Berrange
-
Eric Blake
-
Jim Fehlig
-
Stefan Berger