[libvirt] [PATCH] event: fix two event-handling bugs

Regression introduced in commit e6b68d7. Prior to that point, handlesAlloc was always a multiple of EVENT_ALLOC_EXTENT, and was an integer (so even if the subtraction wrapped, a negative value was less than the count and did not try to free the handles array). But after that point, VIR_RESIZE_N made handlesAlloc grow geometrically, so handlesAlloc could be 49 when handlesCount was 47, while still freeing off only 10 at a time, and eventually reach handlesAlloc 7 and handlesCount 1. Since (size_t)(7 - 10) is indeed greater than 1, this then tried to free 10 elements, which had the awful effect of nuking the handles array while there were still live handles. Which leads to crashes like this: https://bugzilla.redhat.com/show_bug.cgi?id=670848 * daemon/event.c (virEventDispatchHandles): Cache watch while the lock is still held, as eventLoop.handles might be relocated outside the lock. (virEventCleanupHandles): Avoid integer wrap-around causing us to delete the entire handles array while handles are still active. --- Thanks to Dave Allan for letting me use his faqemu setup for testing this. Basically, starting 60 faqemu followed by stopping them all was a reliable way to trigger the handle cleanup integer wraparound. daemon/event.c | 12 +++++++----- 1 files changed, 7 insertions(+), 5 deletions(-) diff --git a/daemon/event.c b/daemon/event.c index 89ca9f0..9cad466 100644 --- a/daemon/event.c +++ b/daemon/event.c @@ -1,7 +1,7 @@ /* * event.c: event loop for monitoring file handles * - * Copyright (C) 2007, 2010 Red Hat, Inc. + * Copyright (C) 2007, 2010-2011 Red Hat, Inc. * Copyright (C) 2007 Daniel P. Berrange * * This library is free software; you can redistribute it and/or @@ -435,6 +435,7 @@ static int virEventDispatchTimeouts(void) { */ static int virEventDispatchHandles(int nfds, struct pollfd *fds) { int i, n; + int watch; DEBUG("Dispatch %d", nfds); /* NB, use nfds not eventLoop.handlesCount, because new @@ -463,9 +464,9 @@ static int virEventDispatchHandles(int nfds, struct pollfd *fds) { EVENT_DEBUG("Dispatch n=%d f=%d w=%d e=%d %p", i, fds[n].fd, eventLoop.handles[i].watch, fds[n].revents, eventLoop.handles[i].opaque); + watch = eventLoop.handles[i].watch; virMutexUnlock(&eventLoop.lock); - (cb)(eventLoop.handles[i].watch, - fds[n].fd, hEvents, opaque); + (cb)(watch, fds[n].fd, hEvents, opaque); virMutexLock(&eventLoop.lock); } } @@ -542,9 +543,10 @@ static int virEventCleanupHandles(void) { } /* Release some memory if we've got a big chunk free */ - if ((eventLoop.handlesAlloc - EVENT_ALLOC_EXTENT) > eventLoop.handlesCount) { + if (eventLoop.handlesAlloc - eventLoop.handlesCount > EVENT_ALLOC_EXTENT) { EVENT_DEBUG("Releasing %zu out of %zu handles slots used, releasing %d", - eventLoop.handlesCount, eventLoop.handlesAlloc, EVENT_ALLOC_EXTENT); + eventLoop.handlesCount, eventLoop.handlesAlloc, + EVENT_ALLOC_EXTENT); VIR_SHRINK_N(eventLoop.handles, eventLoop.handlesAlloc, EVENT_ALLOC_EXTENT); } -- 1.7.3.4

On Thu, Jan 20, 2011 at 05:01:13PM -0700, Eric Blake wrote:
Regression introduced in commit e6b68d7.
Prior to that point, handlesAlloc was always a multiple of EVENT_ALLOC_EXTENT, and was an integer (so even if the subtraction wrapped, a negative value was less than the count and did not try to free the handles array). But after that point, VIR_RESIZE_N made handlesAlloc grow geometrically, so handlesAlloc could be 49 when handlesCount was 47, while still freeing off only 10 at a time, and eventually reach handlesAlloc 7 and handlesCount 1. Since (size_t)(7 - 10) is indeed greater than 1, this then tried to free 10 elements, which had the awful effect of nuking the handles array while there were still live handles.
Which leads to crashes like this: https://bugzilla.redhat.com/show_bug.cgi?id=670848
* daemon/event.c (virEventDispatchHandles): Cache watch while the lock is still held, as eventLoop.handles might be relocated outside the lock. (virEventCleanupHandles): Avoid integer wrap-around causing us to delete the entire handles array while handles are still active. ---
Thanks to Dave Allan for letting me use his faqemu setup for testing this. Basically, starting 60 faqemu followed by stopping them all was a reliable way to trigger the handle cleanup integer wraparound.
daemon/event.c | 12 +++++++----- 1 files changed, 7 insertions(+), 5 deletions(-)
diff --git a/daemon/event.c b/daemon/event.c index 89ca9f0..9cad466 100644 --- a/daemon/event.c +++ b/daemon/event.c @@ -1,7 +1,7 @@ /* * event.c: event loop for monitoring file handles * - * Copyright (C) 2007, 2010 Red Hat, Inc. + * Copyright (C) 2007, 2010-2011 Red Hat, Inc. * Copyright (C) 2007 Daniel P. Berrange * * This library is free software; you can redistribute it and/or @@ -435,6 +435,7 @@ static int virEventDispatchTimeouts(void) { */ static int virEventDispatchHandles(int nfds, struct pollfd *fds) { int i, n; + int watch; DEBUG("Dispatch %d", nfds);
/* NB, use nfds not eventLoop.handlesCount, because new @@ -463,9 +464,9 @@ static int virEventDispatchHandles(int nfds, struct pollfd *fds) { EVENT_DEBUG("Dispatch n=%d f=%d w=%d e=%d %p", i, fds[n].fd, eventLoop.handles[i].watch, fds[n].revents, eventLoop.handles[i].opaque); + watch = eventLoop.handles[i].watch; virMutexUnlock(&eventLoop.lock); - (cb)(eventLoop.handles[i].watch, - fds[n].fd, hEvents, opaque); + (cb)(watch, fds[n].fd, hEvents, opaque); virMutexLock(&eventLoop.lock); } } @@ -542,9 +543,10 @@ static int virEventCleanupHandles(void) { }
/* Release some memory if we've got a big chunk free */ - if ((eventLoop.handlesAlloc - EVENT_ALLOC_EXTENT) > eventLoop.handlesCount) { + if (eventLoop.handlesAlloc - eventLoop.handlesCount > EVENT_ALLOC_EXTENT) { EVENT_DEBUG("Releasing %zu out of %zu handles slots used, releasing %d", - eventLoop.handlesCount, eventLoop.handlesAlloc, EVENT_ALLOC_EXTENT); + eventLoop.handlesCount, eventLoop.handlesAlloc, + EVENT_ALLOC_EXTENT); VIR_SHRINK_N(eventLoop.handles, eventLoop.handlesAlloc, EVENT_ALLOC_EXTENT); }
ACK, fairly nasty ! thanks for chasing this down ! Daniel -- Daniel Veillard | libxml Gnome XML XSLT toolkit http://xmlsoft.org/ daniel@veillard.com | Rpmfind RPM search engine http://rpmfind.net/ http://veillard.com/ | virtualization library http://libvirt.org/

On Thu, Jan 20, 2011 at 05:01:13PM -0700, Eric Blake wrote:
Regression introduced in commit e6b68d7.
Prior to that point, handlesAlloc was always a multiple of EVENT_ALLOC_EXTENT, and was an integer (so even if the subtraction wrapped, a negative value was less than the count and did not try to free the handles array). But after that point, VIR_RESIZE_N made handlesAlloc grow geometrically, so handlesAlloc could be 49 when handlesCount was 47, while still freeing off only 10 at a time, and eventually reach handlesAlloc 7 and handlesCount 1. Since (size_t)(7 - 10) is indeed greater than 1, this then tried to free 10 elements, which had the awful effect of nuking the handles array while there were still live handles.
It is probably quite dificult, but is there any way this codepath could be validated under the eventtest program, even if it would run running eventtest under valgrind to actually detect the flaw.
Which leads to crashes like this: https://bugzilla.redhat.com/show_bug.cgi?id=670848
* daemon/event.c (virEventDispatchHandles): Cache watch while the lock is still held, as eventLoop.handles might be relocated outside the lock. (virEventCleanupHandles): Avoid integer wrap-around causing us to delete the entire handles array while handles are still active. ---
Thanks to Dave Allan for letting me use his faqemu setup for testing this. Basically, starting 60 faqemu followed by stopping them all was a reliable way to trigger the handle cleanup integer wraparound.
Excellant usage of fake qemu
daemon/event.c | 12 +++++++----- 1 files changed, 7 insertions(+), 5 deletions(-)
diff --git a/daemon/event.c b/daemon/event.c index 89ca9f0..9cad466 100644 --- a/daemon/event.c +++ b/daemon/event.c @@ -1,7 +1,7 @@ /* * event.c: event loop for monitoring file handles * - * Copyright (C) 2007, 2010 Red Hat, Inc. + * Copyright (C) 2007, 2010-2011 Red Hat, Inc. * Copyright (C) 2007 Daniel P. Berrange * * This library is free software; you can redistribute it and/or @@ -435,6 +435,7 @@ static int virEventDispatchTimeouts(void) { */ static int virEventDispatchHandles(int nfds, struct pollfd *fds) { int i, n; + int watch; DEBUG("Dispatch %d", nfds);
/* NB, use nfds not eventLoop.handlesCount, because new @@ -463,9 +464,9 @@ static int virEventDispatchHandles(int nfds, struct pollfd *fds) { EVENT_DEBUG("Dispatch n=%d f=%d w=%d e=%d %p", i, fds[n].fd, eventLoop.handles[i].watch, fds[n].revents, eventLoop.handles[i].opaque); + watch = eventLoop.handles[i].watch; virMutexUnlock(&eventLoop.lock); - (cb)(eventLoop.handles[i].watch, - fds[n].fd, hEvents, opaque); + (cb)(watch, fds[n].fd, hEvents, opaque); virMutexLock(&eventLoop.lock); } } @@ -542,9 +543,10 @@ static int virEventCleanupHandles(void) { }
/* Release some memory if we've got a big chunk free */ - if ((eventLoop.handlesAlloc - EVENT_ALLOC_EXTENT) > eventLoop.handlesCount) { + if (eventLoop.handlesAlloc - eventLoop.handlesCount > EVENT_ALLOC_EXTENT) { EVENT_DEBUG("Releasing %zu out of %zu handles slots used, releasing %d", - eventLoop.handlesCount, eventLoop.handlesAlloc, EVENT_ALLOC_EXTENT); + eventLoop.handlesCount, eventLoop.handlesAlloc, + EVENT_ALLOC_EXTENT); VIR_SHRINK_N(eventLoop.handles, eventLoop.handlesAlloc, EVENT_ALLOC_EXTENT);
ACK Daniel

On 01/21/2011 04:15 AM, Daniel P. Berrange wrote:
Prior to that point, handlesAlloc was always a multiple of EVENT_ALLOC_EXTENT, and was an integer (so even if the subtraction wrapped, a negative value was less than the count and did not try to free the handles array). But after that point, VIR_RESIZE_N made handlesAlloc grow geometrically, so handlesAlloc could be 49 when handlesCount was 47, while still freeing off only 10 at a time, and eventually reach handlesAlloc 7 and handlesCount 1. Since (size_t)(7 - 10) is indeed greater than 1, this then tried to free 10 elements, which had the awful effect of nuking the handles array while there were still live handles.
It is probably quite dificult, but is there any way this codepath could be validated under the eventtest program, even if it would run running eventtest under valgrind to actually detect the flaw.
I'm looking into that. As is, timeoutsAlloc also has the same bug, so v2 of the patch is coming up, after I (re-)audit the entire file for all uses of eventLoop to ensure that reads and modifications are only done inside locks, and that modifications don't fall foul of integer wraparound. I'm also updating the commit message. The problem cannot be triggered until you hit either 31 timeouts or 31 handles (which is why I never saw it in my earlier testing, since running 1 or 2, or even 10, VMs does not create that many handles), because our allocation pattern was always requesting growth by 10 (VIR_RESIZE_N uses MAX(requested, 50%) for it's growth delta), for 10, 20, 30, 45. And since freeing always happened by 10, you didn't get integer overflow until you had hit the allocation of 45 or higher (which requires growing past 30). Very nasty bug indeed. -- Eric Blake eblake@redhat.com +1-801-349-2682 Libvirt virtualization library http://libvirt.org

The first bug has been present since before the time that commit f8a519 (Dec 2008) tried to make the dispatch loop re-entrant. Dereferencing eventLoop.handles outside the lock risks crashing, since any other thread could have reallocated the array in the meantime. It's a narrow race window, however, and one that would have most likely resulted in passing bogus data to the callback rather than actually causing a segv, which is probably why it has gone undetected this long. The second is a regression introduced in commit e6b68d7 (Nov 2010). Prior to that point, handlesAlloc was always a multiple of EVENT_ALLOC_EXTENT (10), and was an int (so even if the subtraction had been able to wrap, a negative value would be less than the count not try to free the handles array). But after that point, VIR_RESIZE_N made handlesAlloc grow geometrically (with a pattern of 10, 20, 30, 45 for the handles array) but still freed in multiples of EVENT_ALLOC_EXTENT; and the count changed to size_t. Which means that after 31 handles have been created, then 30 handles destroyed, handlesAlloc is 5 while handlesCount is 1, and since (size_t)(1 - 5) is indeed greater than 1, this then tried to free 10 elements, which had the awful effect of nuking the handles array while there were still live handles. Nuking live handles puts libvirtd in an inconsistent state, and was easily reproducible by starting and then stopping 60 faqemu guests. It may also be the root cause of crashes like this: https://bugzilla.redhat.com/show_bug.cgi?id=670848 * daemon/event.c (virEventDispatchHandles): Cache data while inside the lock, as the array might be reallocated once outside. (virEventCleanupTimeouts, virEventCleanupHandles): Avoid integer wrap-around causing us to delete the entire array while entries are still active. * tests/eventtest.c (mymain): Expose the bug. --- v2: fix timeoutsAlloc, which had same bug as handlesAlloc. Avoid memory leak when all handles are deregistered, and free in larger chunks to match allocating in larger chunks. Break some long lines. Add testsuite exposure.
It is probably quite dificult, but is there any way this codepath could be validated under the eventtest program, even if it would run running eventtest under valgrind to actually detect the flaw. I'm looking into that. As is, timeoutsAlloc also has the same bug, so v2 of the patch is coming up, after I (re-)audit the entire file for all uses of eventLoop to ensure that reads and modifications are only done inside locks, and that modifications don't fall foul of integer wraparound.
Re-audit complete, the timeoutsAlloc bug was the only other one that I found. And modifying the testsuite was doable (I won't say easy, since it took me several hours) - merely bump the limit up to 31 to trigger the allocation to be a non-multiple of the de-allocation, then make sure enough virEventRunOnce gets called to free things back to the point of nuking the live array (freeing only 10 at a time meant that I had to add a for loop; it was figuring that out that took me the longest). I've verified that the changes to eventtest.c in isolation reliably cause a core dump. daemon/event.c | 42 +++++++++++++++++++++++----------------- tests/eventtest.c | 54 +++++++++++++++++++++++++++++++++++++++------------- 2 files changed, 64 insertions(+), 32 deletions(-) diff --git a/daemon/event.c b/daemon/event.c index 89ca9f0..4c97fb9 100644 --- a/daemon/event.c +++ b/daemon/event.c @@ -1,7 +1,7 @@ /* * event.c: event loop for monitoring file handles * - * Copyright (C) 2007, 2010 Red Hat, Inc. + * Copyright (C) 2007, 2010-2011 Red Hat, Inc. * Copyright (C) 2007 Daniel P. Berrange * * This library is free software; you can redistribute it and/or @@ -458,14 +458,13 @@ static int virEventDispatchHandles(int nfds, struct pollfd *fds) { if (fds[n].revents) { virEventHandleCallback cb = eventLoop.handles[i].cb; + int watch = eventLoop.handles[i].watch; void *opaque = eventLoop.handles[i].opaque; int hEvents = virPollEventToEventHandleType(fds[n].revents); EVENT_DEBUG("Dispatch n=%d f=%d w=%d e=%d %p", i, - fds[n].fd, eventLoop.handles[i].watch, - fds[n].revents, eventLoop.handles[i].opaque); + fds[n].fd, watch, fds[n].revents, opaque); virMutexUnlock(&eventLoop.lock); - (cb)(eventLoop.handles[i].watch, - fds[n].fd, hEvents, opaque); + (cb)(watch, fds[n].fd, hEvents, opaque); virMutexLock(&eventLoop.lock); } } @@ -480,6 +479,7 @@ static int virEventDispatchHandles(int nfds, struct pollfd *fds) { */ static int virEventCleanupTimeouts(void) { int i; + size_t gap; DEBUG("Cleanup %zu", eventLoop.timeoutsCount); /* Remove deleted entries, shuffling down remaining @@ -491,24 +491,27 @@ static int virEventCleanupTimeouts(void) { continue; } - EVENT_DEBUG("Purging timeout %d with id %d", i, eventLoop.timeouts[i].timer); + EVENT_DEBUG("Purging timeout %d with id %d", i, + eventLoop.timeouts[i].timer); if (eventLoop.timeouts[i].ff) (eventLoop.timeouts[i].ff)(eventLoop.timeouts[i].opaque); if ((i+1) < eventLoop.timeoutsCount) { memmove(eventLoop.timeouts+i, eventLoop.timeouts+i+1, - sizeof(struct virEventTimeout)*(eventLoop.timeoutsCount-(i+1))); + sizeof(struct virEventTimeout)*(eventLoop.timeoutsCount + -(i+1))); } eventLoop.timeoutsCount--; } /* Release some memory if we've got a big chunk free */ - if ((eventLoop.timeoutsAlloc - EVENT_ALLOC_EXTENT) > eventLoop.timeoutsCount) { - EVENT_DEBUG("Releasing %zu out of %zu timeout slots used, releasing %d", - eventLoop.timeoutsCount, eventLoop.timeoutsAlloc, EVENT_ALLOC_EXTENT); - VIR_SHRINK_N(eventLoop.timeouts, eventLoop.timeoutsAlloc, - EVENT_ALLOC_EXTENT); + gap = eventLoop.timeoutsAlloc - eventLoop.timeoutsCount; + if (eventLoop.timeoutsCount == 0 || + (gap > eventLoop.timeoutsCount && gap > EVENT_ALLOC_EXTENT)) { + EVENT_DEBUG("Found %zu out of %zu timeout slots used, releasing %zu", + eventLoop.timeoutsCount, eventLoop.timeoutsAlloc, gap); + VIR_SHRINK_N(eventLoop.timeouts, eventLoop.timeoutsAlloc, gap); } return 0; } @@ -519,6 +522,7 @@ static int virEventCleanupTimeouts(void) { */ static int virEventCleanupHandles(void) { int i; + size_t gap; DEBUG("Cleanup %zu", eventLoop.handlesCount); /* Remove deleted entries, shuffling down remaining @@ -536,17 +540,19 @@ static int virEventCleanupHandles(void) { if ((i+1) < eventLoop.handlesCount) { memmove(eventLoop.handles+i, eventLoop.handles+i+1, - sizeof(struct virEventHandle)*(eventLoop.handlesCount-(i+1))); + sizeof(struct virEventHandle)*(eventLoop.handlesCount + -(i+1))); } eventLoop.handlesCount--; } /* Release some memory if we've got a big chunk free */ - if ((eventLoop.handlesAlloc - EVENT_ALLOC_EXTENT) > eventLoop.handlesCount) { - EVENT_DEBUG("Releasing %zu out of %zu handles slots used, releasing %d", - eventLoop.handlesCount, eventLoop.handlesAlloc, EVENT_ALLOC_EXTENT); - VIR_SHRINK_N(eventLoop.handles, eventLoop.handlesAlloc, - EVENT_ALLOC_EXTENT); + gap = eventLoop.handlesAlloc - eventLoop.handlesCount; + if (eventLoop.handlesCount == 0 || + (gap > eventLoop.handlesCount && gap > EVENT_ALLOC_EXTENT)) { + EVENT_DEBUG("Found %zu out of %zu handles slots used, releasing %zu", + eventLoop.handlesCount, eventLoop.handlesAlloc, gap); + VIR_SHRINK_N(eventLoop.handles, eventLoop.handlesAlloc, gap); } return 0; } diff --git a/tests/eventtest.c b/tests/eventtest.c index 067e365..93317be 100644 --- a/tests/eventtest.c +++ b/tests/eventtest.c @@ -1,7 +1,7 @@ /* * eventtest.c: Test the libvirtd event loop impl * - * Copyright (C) 2009 Red Hat, Inc. + * Copyright (C) 2009, 2011 Red Hat, Inc. * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public @@ -33,8 +33,8 @@ #include "util.h" #include "../daemon/event.h" -#define NUM_FDS 5 -#define NUM_TIME 5 +#define NUM_FDS 31 +#define NUM_TIME 31 static struct handleInfo { int pipeFD[2]; @@ -147,11 +147,14 @@ verifyFired(const char *name, int handle, int timer) for (i = 0 ; i < NUM_FDS ; i++) { if (handles[i].fired) { if (i != handle) { - virtTestResult(name, 1, "Handle %d fired, but expected %d\n", i, handle); + virtTestResult(name, 1, + "Handle %d fired, but expected %d\n", i, + handle); return EXIT_FAILURE; } else { if (handles[i].error != EV_ERROR_NONE) { - virtTestResult(name, 1, "Handle %d fired, but had error %d\n", i, + virtTestResult(name, 1, + "Handle %d fired, but had error %d\n", i, handles[i].error); return EXIT_FAILURE; } @@ -159,13 +162,17 @@ verifyFired(const char *name, int handle, int timer) } } else { if (i == handle) { - virtTestResult(name, 1, "Handle %d should have fired, but didn't\n", handle); + virtTestResult(name, 1, + "Handle %d should have fired, but didn't\n", + handle); return EXIT_FAILURE; } } } if (handleFired != 1 && handle != -1) { - virtTestResult(name, 1, "Something wierd happened, expecting handle %d\n", handle); + virtTestResult(name, 1, + "Something weird happened, expecting handle %d\n", + handle); return EXIT_FAILURE; } @@ -173,11 +180,13 @@ verifyFired(const char *name, int handle, int timer) for (i = 0 ; i < NUM_TIME ; i++) { if (timers[i].fired) { if (i != timer) { - virtTestResult(name, 1, "Timer %d fired, but expected %d\n", i, timer); + virtTestResult(name, 1, + "Timer %d fired, but expected %d\n", i, timer); return EXIT_FAILURE; } else { if (timers[i].error != EV_ERROR_NONE) { - virtTestResult(name, 1, "Timer %d fired, but had error %d\n", i, + virtTestResult(name, 1, + "Timer %d fired, but had error %d\n", i, timers[i].error); return EXIT_FAILURE; } @@ -185,13 +194,17 @@ verifyFired(const char *name, int handle, int timer) } } else { if (i == timer) { - virtTestResult(name, 1, "Timer %d should have fired, but didn't\n", timer); + virtTestResult(name, 1, + "Timer %d should have fired, but didn't\n", + timer); return EXIT_FAILURE; } } } if (timerFired != 1 && timer != -1) { - virtTestResult(name, 1, "Something wierd happened, expecting timer %d\n", timer); + virtTestResult(name, 1, + "Something weird happened, expecting timer %d\n", + timer); return EXIT_FAILURE; } return EXIT_SUCCESS; @@ -217,7 +230,8 @@ finishJob(const char *name, int handle, int timer) waitTime.tv_sec += 5; rc = 0; while (!eventThreadJobDone && rc == 0) - rc = pthread_cond_timedwait(&eventThreadJobCond, &eventThreadMutex, &waitTime); + rc = pthread_cond_timedwait(&eventThreadJobCond, &eventThreadMutex, + &waitTime); if (rc != 0) { virtTestResult(name, 1, "Timed out waiting for pipe event\n"); return EXIT_FAILURE; @@ -426,13 +440,25 @@ mymain(int argc, char **argv) if (finishJob("Deleted during dispatch", -1, 2) != EXIT_SUCCESS) return EXIT_FAILURE; - for (i = 0 ; i < NUM_FDS ; i++) + for (i = 0 ; i < NUM_FDS - 1 ; i++) virEventRemoveHandleImpl(handles[i].watch); - for (i = 0 ; i < NUM_TIME ; i++) + for (i = 0 ; i < NUM_TIME - 1 ; i++) virEventRemoveTimeoutImpl(timers[i].timer); resetAll(); + /* Make sure the last handle still works several times in a row. */ + for (i = 0; i < 4; i++) { + startJob(); + if (safewrite(handles[NUM_FDS - 1].pipeFD[1], &one, 1) != 1) + return EXIT_FAILURE; + if (finishJob("Simple write", NUM_FDS - 1, -1) != EXIT_SUCCESS) + return EXIT_FAILURE; + + resetAll(); + } + + /* Final test, register same FD twice, once with no * events, and make sure the right callback runs */ handles[0].pipeFD[0] = handles[1].pipeFD[0]; -- 1.7.3.4

On 01/21/2011 01:06 PM, Eric Blake wrote:
The first bug has been present since before the time that commit f8a519 (Dec 2008) tried to make the dispatch loop re-entrant.
The second is a regression introduced in commit e6b68d7 (Nov 2010).
I'm splitting this into two commits (to make it easier to backport the first fix to libvirt 0.8.5 and earlier, since the second bug was introduced in 0.8.6).
+++ b/daemon/event.c @@ -1,7 +1,7 @@ /* * event.c: event loop for monitoring file handles * - * Copyright (C) 2007, 2010 Red Hat, Inc. + * Copyright (C) 2007, 2010-2011 Red Hat, Inc. * Copyright (C) 2007 Daniel P. Berrange * * This library is free software; you can redistribute it and/or @@ -458,14 +458,13 @@ static int virEventDispatchHandles(int nfds, struct pollfd *fds) {
if (fds[n].revents) { virEventHandleCallback cb = eventLoop.handles[i].cb; + int watch = eventLoop.handles[i].watch; void *opaque = eventLoop.handles[i].opaque; int hEvents = virPollEventToEventHandleType(fds[n].revents); EVENT_DEBUG("Dispatch n=%d f=%d w=%d e=%d %p", i, - fds[n].fd, eventLoop.handles[i].watch, - fds[n].revents, eventLoop.handles[i].opaque); + fds[n].fd, watch, fds[n].revents, opaque); virMutexUnlock(&eventLoop.lock); - (cb)(eventLoop.handles[i].watch, - fds[n].fd, hEvents, opaque); + (cb)(watch, fds[n].fd, hEvents, opaque); virMutexLock(&eventLoop.lock); } }
This hunk to commit 1; the rest of the commit to commit 2. -- Eric Blake eblake@redhat.com +1-801-349-2682 Libvirt virtualization library http://libvirt.org

On 01/21/2011 02:07 PM, Eric Blake wrote:
On 01/21/2011 01:06 PM, Eric Blake wrote:
The first bug has been present since before the time that commit f8a519 (Dec 2008) tried to make the dispatch loop re-entrant.
The second is a regression introduced in commit e6b68d7 (Nov 2010).
I'm splitting this into two commits (to make it easier to backport the first fix to libvirt 0.8.5 and earlier, since the second bug was introduced in 0.8.6).
I've pushed the first hunk (since it is unchanged from the v1 ACK), but would still like an ACK to my v2 commit for the testsuite changes and additional timeoutsAlloc bug-fix before I push that. -- Eric Blake eblake@redhat.com +1-801-349-2682 Libvirt virtualization library http://libvirt.org

On Fri, Jan 21, 2011 at 01:06:58PM -0700, Eric Blake wrote:
The first bug has been present since before the time that commit f8a519 (Dec 2008) tried to make the dispatch loop re-entrant.
Dereferencing eventLoop.handles outside the lock risks crashing, since any other thread could have reallocated the array in the meantime. It's a narrow race window, however, and one that would have most likely resulted in passing bogus data to the callback rather than actually causing a segv, which is probably why it has gone undetected this long.
The second is a regression introduced in commit e6b68d7 (Nov 2010).
Prior to that point, handlesAlloc was always a multiple of EVENT_ALLOC_EXTENT (10), and was an int (so even if the subtraction had been able to wrap, a negative value would be less than the count not try to free the handles array). But after that point, VIR_RESIZE_N made handlesAlloc grow geometrically (with a pattern of 10, 20, 30, 45 for the handles array) but still freed in multiples of EVENT_ALLOC_EXTENT; and the count changed to size_t. Which means that after 31 handles have been created, then 30 handles destroyed, handlesAlloc is 5 while handlesCount is 1, and since (size_t)(1 - 5) is indeed greater than 1, this then tried to free 10 elements, which had the awful effect of nuking the handles array while there were still live handles.
Nuking live handles puts libvirtd in an inconsistent state, and was easily reproducible by starting and then stopping 60 faqemu guests. It may also be the root cause of crashes like this: https://bugzilla.redhat.com/show_bug.cgi?id=670848
* daemon/event.c (virEventDispatchHandles): Cache data while inside the lock, as the array might be reallocated once outside. (virEventCleanupTimeouts, virEventCleanupHandles): Avoid integer wrap-around causing us to delete the entire array while entries are still active. * tests/eventtest.c (mymain): Expose the bug. ---
v2: fix timeoutsAlloc, which had same bug as handlesAlloc. Avoid memory leak when all handles are deregistered, and free in larger chunks to match allocating in larger chunks. Break some long lines. Add testsuite exposure.
It is probably quite dificult, but is there any way this codepath could be validated under the eventtest program, even if it would run running eventtest under valgrind to actually detect the flaw. I'm looking into that. As is, timeoutsAlloc also has the same bug, so v2 of the patch is coming up, after I (re-)audit the entire file for all uses of eventLoop to ensure that reads and modifications are only done inside locks, and that modifications don't fall foul of integer wraparound.
Re-audit complete, the timeoutsAlloc bug was the only other one that I found.
And modifying the testsuite was doable (I won't say easy, since it took me several hours) - merely bump the limit up to 31 to trigger the allocation to be a non-multiple of the de-allocation, then make sure enough virEventRunOnce gets called to free things back to the point of nuking the live array (freeing only 10 at a time meant that I had to add a for loop; it was figuring that out that took me the longest). I've verified that the changes to eventtest.c in isolation reliably cause a core dump.
daemon/event.c | 42 +++++++++++++++++++++++----------------- tests/eventtest.c | 54 +++++++++++++++++++++++++++++++++++++++------------- 2 files changed, 64 insertions(+), 32 deletions(-)
diff --git a/daemon/event.c b/daemon/event.c index 89ca9f0..4c97fb9 100644 --- a/daemon/event.c +++ b/daemon/event.c @@ -1,7 +1,7 @@ /* * event.c: event loop for monitoring file handles * - * Copyright (C) 2007, 2010 Red Hat, Inc. + * Copyright (C) 2007, 2010-2011 Red Hat, Inc. * Copyright (C) 2007 Daniel P. Berrange * * This library is free software; you can redistribute it and/or @@ -458,14 +458,13 @@ static int virEventDispatchHandles(int nfds, struct pollfd *fds) {
if (fds[n].revents) { virEventHandleCallback cb = eventLoop.handles[i].cb; + int watch = eventLoop.handles[i].watch; void *opaque = eventLoop.handles[i].opaque; int hEvents = virPollEventToEventHandleType(fds[n].revents); EVENT_DEBUG("Dispatch n=%d f=%d w=%d e=%d %p", i, - fds[n].fd, eventLoop.handles[i].watch, - fds[n].revents, eventLoop.handles[i].opaque); + fds[n].fd, watch, fds[n].revents, opaque); virMutexUnlock(&eventLoop.lock); - (cb)(eventLoop.handles[i].watch, - fds[n].fd, hEvents, opaque); + (cb)(watch, fds[n].fd, hEvents, opaque); virMutexLock(&eventLoop.lock); } } @@ -480,6 +479,7 @@ static int virEventDispatchHandles(int nfds, struct pollfd *fds) { */ static int virEventCleanupTimeouts(void) { int i; + size_t gap; DEBUG("Cleanup %zu", eventLoop.timeoutsCount);
/* Remove deleted entries, shuffling down remaining @@ -491,24 +491,27 @@ static int virEventCleanupTimeouts(void) { continue; }
- EVENT_DEBUG("Purging timeout %d with id %d", i, eventLoop.timeouts[i].timer); + EVENT_DEBUG("Purging timeout %d with id %d", i, + eventLoop.timeouts[i].timer); if (eventLoop.timeouts[i].ff) (eventLoop.timeouts[i].ff)(eventLoop.timeouts[i].opaque);
if ((i+1) < eventLoop.timeoutsCount) { memmove(eventLoop.timeouts+i, eventLoop.timeouts+i+1, - sizeof(struct virEventTimeout)*(eventLoop.timeoutsCount-(i+1))); + sizeof(struct virEventTimeout)*(eventLoop.timeoutsCount + -(i+1))); } eventLoop.timeoutsCount--; }
/* Release some memory if we've got a big chunk free */ - if ((eventLoop.timeoutsAlloc - EVENT_ALLOC_EXTENT) > eventLoop.timeoutsCount) { - EVENT_DEBUG("Releasing %zu out of %zu timeout slots used, releasing %d", - eventLoop.timeoutsCount, eventLoop.timeoutsAlloc, EVENT_ALLOC_EXTENT); - VIR_SHRINK_N(eventLoop.timeouts, eventLoop.timeoutsAlloc, - EVENT_ALLOC_EXTENT); + gap = eventLoop.timeoutsAlloc - eventLoop.timeoutsCount; + if (eventLoop.timeoutsCount == 0 || + (gap > eventLoop.timeoutsCount && gap > EVENT_ALLOC_EXTENT)) { + EVENT_DEBUG("Found %zu out of %zu timeout slots used, releasing %zu", + eventLoop.timeoutsCount, eventLoop.timeoutsAlloc, gap); + VIR_SHRINK_N(eventLoop.timeouts, eventLoop.timeoutsAlloc, gap); } return 0; } @@ -519,6 +522,7 @@ static int virEventCleanupTimeouts(void) { */ static int virEventCleanupHandles(void) { int i; + size_t gap; DEBUG("Cleanup %zu", eventLoop.handlesCount);
/* Remove deleted entries, shuffling down remaining @@ -536,17 +540,19 @@ static int virEventCleanupHandles(void) { if ((i+1) < eventLoop.handlesCount) { memmove(eventLoop.handles+i, eventLoop.handles+i+1, - sizeof(struct virEventHandle)*(eventLoop.handlesCount-(i+1))); + sizeof(struct virEventHandle)*(eventLoop.handlesCount + -(i+1))); } eventLoop.handlesCount--; }
/* Release some memory if we've got a big chunk free */ - if ((eventLoop.handlesAlloc - EVENT_ALLOC_EXTENT) > eventLoop.handlesCount) { - EVENT_DEBUG("Releasing %zu out of %zu handles slots used, releasing %d", - eventLoop.handlesCount, eventLoop.handlesAlloc, EVENT_ALLOC_EXTENT); - VIR_SHRINK_N(eventLoop.handles, eventLoop.handlesAlloc, - EVENT_ALLOC_EXTENT); + gap = eventLoop.handlesAlloc - eventLoop.handlesCount; + if (eventLoop.handlesCount == 0 || + (gap > eventLoop.handlesCount && gap > EVENT_ALLOC_EXTENT)) { + EVENT_DEBUG("Found %zu out of %zu handles slots used, releasing %zu", + eventLoop.handlesCount, eventLoop.handlesAlloc, gap); + VIR_SHRINK_N(eventLoop.handles, eventLoop.handlesAlloc, gap); } return 0; } diff --git a/tests/eventtest.c b/tests/eventtest.c index 067e365..93317be 100644 --- a/tests/eventtest.c +++ b/tests/eventtest.c @@ -1,7 +1,7 @@ /* * eventtest.c: Test the libvirtd event loop impl * - * Copyright (C) 2009 Red Hat, Inc. + * Copyright (C) 2009, 2011 Red Hat, Inc. * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public @@ -33,8 +33,8 @@ #include "util.h" #include "../daemon/event.h"
-#define NUM_FDS 5 -#define NUM_TIME 5 +#define NUM_FDS 31 +#define NUM_TIME 31
static struct handleInfo { int pipeFD[2]; @@ -147,11 +147,14 @@ verifyFired(const char *name, int handle, int timer) for (i = 0 ; i < NUM_FDS ; i++) { if (handles[i].fired) { if (i != handle) { - virtTestResult(name, 1, "Handle %d fired, but expected %d\n", i, handle); + virtTestResult(name, 1, + "Handle %d fired, but expected %d\n", i, + handle); return EXIT_FAILURE; } else { if (handles[i].error != EV_ERROR_NONE) { - virtTestResult(name, 1, "Handle %d fired, but had error %d\n", i, + virtTestResult(name, 1, + "Handle %d fired, but had error %d\n", i, handles[i].error); return EXIT_FAILURE; } @@ -159,13 +162,17 @@ verifyFired(const char *name, int handle, int timer) } } else { if (i == handle) { - virtTestResult(name, 1, "Handle %d should have fired, but didn't\n", handle); + virtTestResult(name, 1, + "Handle %d should have fired, but didn't\n", + handle); return EXIT_FAILURE; } } } if (handleFired != 1 && handle != -1) { - virtTestResult(name, 1, "Something wierd happened, expecting handle %d\n", handle); + virtTestResult(name, 1, + "Something weird happened, expecting handle %d\n", + handle); return EXIT_FAILURE; }
@@ -173,11 +180,13 @@ verifyFired(const char *name, int handle, int timer) for (i = 0 ; i < NUM_TIME ; i++) { if (timers[i].fired) { if (i != timer) { - virtTestResult(name, 1, "Timer %d fired, but expected %d\n", i, timer); + virtTestResult(name, 1, + "Timer %d fired, but expected %d\n", i, timer); return EXIT_FAILURE; } else { if (timers[i].error != EV_ERROR_NONE) { - virtTestResult(name, 1, "Timer %d fired, but had error %d\n", i, + virtTestResult(name, 1, + "Timer %d fired, but had error %d\n", i, timers[i].error); return EXIT_FAILURE; } @@ -185,13 +194,17 @@ verifyFired(const char *name, int handle, int timer) } } else { if (i == timer) { - virtTestResult(name, 1, "Timer %d should have fired, but didn't\n", timer); + virtTestResult(name, 1, + "Timer %d should have fired, but didn't\n", + timer); return EXIT_FAILURE; } } } if (timerFired != 1 && timer != -1) { - virtTestResult(name, 1, "Something wierd happened, expecting timer %d\n", timer); + virtTestResult(name, 1, + "Something weird happened, expecting timer %d\n", + timer); return EXIT_FAILURE; } return EXIT_SUCCESS; @@ -217,7 +230,8 @@ finishJob(const char *name, int handle, int timer) waitTime.tv_sec += 5; rc = 0; while (!eventThreadJobDone && rc == 0) - rc = pthread_cond_timedwait(&eventThreadJobCond, &eventThreadMutex, &waitTime); + rc = pthread_cond_timedwait(&eventThreadJobCond, &eventThreadMutex, + &waitTime); if (rc != 0) { virtTestResult(name, 1, "Timed out waiting for pipe event\n"); return EXIT_FAILURE; @@ -426,13 +440,25 @@ mymain(int argc, char **argv) if (finishJob("Deleted during dispatch", -1, 2) != EXIT_SUCCESS) return EXIT_FAILURE;
- for (i = 0 ; i < NUM_FDS ; i++) + for (i = 0 ; i < NUM_FDS - 1 ; i++) virEventRemoveHandleImpl(handles[i].watch); - for (i = 0 ; i < NUM_TIME ; i++) + for (i = 0 ; i < NUM_TIME - 1 ; i++) virEventRemoveTimeoutImpl(timers[i].timer);
resetAll();
+ /* Make sure the last handle still works several times in a row. */ + for (i = 0; i < 4; i++) { + startJob(); + if (safewrite(handles[NUM_FDS - 1].pipeFD[1], &one, 1) != 1) + return EXIT_FAILURE; + if (finishJob("Simple write", NUM_FDS - 1, -1) != EXIT_SUCCESS) + return EXIT_FAILURE; + + resetAll(); + } + + /* Final test, register same FD twice, once with no * events, and make sure the right callback runs */ handles[0].pipeFD[0] = handles[1].pipeFD[0];
ACK Daniel

On 01/27/2011 09:09 AM, Daniel P. Berrange wrote:
The second is a regression introduced in commit e6b68d7 (Nov 2010).
ACK
Now pushed. -- Eric Blake eblake@redhat.com +1-801-349-2682 Libvirt virtualization library http://libvirt.org
participants (3)
-
Daniel P. Berrange
-
Daniel Veillard
-
Eric Blake