From: Derbyshev Dmitry <dderbyshev(a)virtuozzo.com>
This makes it possible to avoid allocations in
virEventPollMakePollFDs, as well as generally reduces time spent in
kernel if handles remain the same, which should generally be true.
__________________________________________
Questions:
1) What to do with probe in virEventPollRunOnce?
2) Are ifdef an acceptable solution to epoll avaliability issues?
3) Is using MAX_POLL_EVENTS_AT_ONCE constant a valid solution?
4) some fds are sometimes added twice, but only once with events!=0? This complicates
virEventPoll*Handle fuctions. At the very least, it is called twice in
_dbus_watch_list_set_functions.
Signed-off-by: Derbyshev Dmitry <dderbyshev(a)virtuozzo.com>
---
configure.ac | 26 +++++
src/util/vireventpoll.c | 205 ++++++++++++++++++++++++++++++++++++---
src/util/vireventpoll.h | 5 +
tests/commanddata/test3epoll.log | 15 +++
tests/commandtest.c | 4 +
5 files changed, 242 insertions(+), 13 deletions(-)
create mode 100644 tests/commanddata/test3epoll.log
diff --git a/configure.ac b/configure.ac
index dfc536f..c985ccc 100644
--- a/configure.ac
+++ b/configure.ac
@@ -2695,6 +2695,32 @@ AC_DEFINE_UNQUOTED([isbase64],[libvirt_gl_isbase64],[Hack to avoid
symbol clash]
AC_DEFINE_UNQUOTED([base64_encode],[libvirt_gl_base64_encode],[Hack to avoid symbol
clash])
AC_DEFINE_UNQUOTED([base64_encode_alloc],[libvirt_gl_base64_encode_alloc],[Hack to avoid
symbol clash])
+AC_ARG_ENABLE([epoll],
+ [AS_HELP_STRING([--enable-epoll],[use epoll(4) on Linux])],
+ [enable_epoll=$enableval], [enable_epoll=auto])
+if test x$enable_epoll = xno; then
+ have_linux_epoll=no
+else
+ AC_MSG_CHECKING([for Linux epoll(4)])
+ AC_LINK_IFELSE([AC_LANG_PROGRAM(
+ [
+ #ifndef __linux__
+ #error This is not Linux
+ #endif
+ #include <sys/epoll.h>
+ ],
+ [epoll_create1 (EPOLL_CLOEXEC);])],
+ [have_linux_epoll=yes],
+ [have_linux_epoll=no])
+ AC_MSG_RESULT([$have_linux_epoll])
+fi
+if test x$enable_epoll,$have_linux_epoll = xyes,no; then
+ AC_MSG_ERROR([epoll support explicitly enabled but not available])
+fi
+if test "x$have_linux_epoll" = xyes; then
+ AC_DEFINE_UNQUOTED([HAVE_LINUX_EPOLL], 1, [whether epoll is supported])
+fi
+
AC_CONFIG_FILES([run],
[chmod +x,-w run])
AC_CONFIG_FILES([\
diff --git a/src/util/vireventpoll.c b/src/util/vireventpoll.c
index 81ecab4..1541706 100644
--- a/src/util/vireventpoll.c
+++ b/src/util/vireventpoll.c
@@ -25,7 +25,11 @@
#include <stdlib.h>
#include <string.h>
-#include <poll.h>
+#ifdef HAVE_LINUX_EPOLL
+# include <sys/epoll.h>
+#else
+# include <poll.h>
+#endif
#include <sys/time.h>
#include <errno.h>
#include <unistd.h>
@@ -87,6 +91,9 @@ struct virEventPollLoop {
size_t timeoutsCount;
size_t timeoutsAlloc;
struct virEventPollTimeout *timeouts;
+#ifdef HAVE_LINUX_EPOLL
+ int epollfd;
+#endif
};
/* Only have one event loop */
@@ -109,6 +116,11 @@ int virEventPollAddHandle(int fd, int events,
virFreeCallback ff)
{
int watch;
+#ifdef HAVE_LINUX_EPOLL
+ size_t i;
+ struct epoll_event ev;
+#endif
+ int native = virEventPollToNativeEvents(events);
virMutexLock(&eventLoop.lock);
if (eventLoop.handlesCount == eventLoop.handlesAlloc) {
EVENT_DEBUG("Used %zu handle slots, adding at least %d more",
@@ -124,8 +136,7 @@ int virEventPollAddHandle(int fd, int events,
eventLoop.handles[eventLoop.handlesCount].watch = watch;
eventLoop.handles[eventLoop.handlesCount].fd = fd;
- eventLoop.handles[eventLoop.handlesCount].events =
- virEventPollToNativeEvents(events);
+ eventLoop.handles[eventLoop.handlesCount].events = native;
eventLoop.handles[eventLoop.handlesCount].cb = cb;
eventLoop.handles[eventLoop.handlesCount].ff = ff;
eventLoop.handles[eventLoop.handlesCount].opaque = opaque;
@@ -133,7 +144,30 @@ int virEventPollAddHandle(int fd, int events,
eventLoop.handlesCount++;
+#ifdef HAVE_LINUX_EPOLL
+ ev.events = native;
+ ev.data.fd = fd;
+ if (epoll_ctl(eventLoop.epollfd, EPOLL_CTL_ADD, fd, &ev) < 0) {
+ if (errno == EEXIST) {
+ for (i = 0; i < eventLoop.handlesCount; i++) {
+ if (eventLoop.handles[i].fd == fd &&
+ !eventLoop.handles[i].deleted) {
+ ev.events |= eventLoop.handles[i].events;
+ }
+ }
+ if (epoll_ctl(eventLoop.epollfd, EPOLL_CTL_MOD, fd, &ev) < 0) {
+ virMutexUnlock(&eventLoop.lock);
+ return -1;
+ }
+ }
+ else {
+ virMutexUnlock(&eventLoop.lock);
+ return -1;
+ }
+ }
+#else
virEventPollInterruptLocked();
+#endif
PROBE(EVENT_POLL_ADD_HANDLE,
"watch=%d fd=%d events=%d cb=%p opaque=%p ff=%p",
@@ -145,8 +179,14 @@ int virEventPollAddHandle(int fd, int events,
void virEventPollUpdateHandle(int watch, int events)
{
+#ifdef HAVE_LINUX_EPOLL
+ struct epoll_event ev;
+ size_t i, j;
+#else
size_t i;
+#endif
bool found = false;
+ int native = virEventPollToNativeEvents(events);
PROBE(EVENT_POLL_UPDATE_HANDLE,
"watch=%d events=%d",
watch, events);
@@ -159,13 +199,34 @@ void virEventPollUpdateHandle(int watch, int events)
virMutexLock(&eventLoop.lock);
for (i = 0; i < eventLoop.handlesCount; i++) {
if (eventLoop.handles[i].watch == watch) {
- eventLoop.handles[i].events =
- virEventPollToNativeEvents(events);
+ eventLoop.handles[i].events = native;
+#ifndef HAVE_LINUX_EPOLL
virEventPollInterruptLocked();
+#endif
found = true;
break;
}
}
+
+#ifdef HAVE_LINUX_EPOLL
+ ev.events = native;
+ for (j = 0; j < eventLoop.handlesCount; j++) {
+ if (eventLoop.handles[i].fd == eventLoop.handles[i].fd &&
+ !eventLoop.handles[i].deleted &&
+ i != j) {
+ ev.events |= eventLoop.handles[i].events;
+ }
+ }
+ ev.data.fd = eventLoop.handles[i].fd;
+
+ if (found && epoll_ctl(eventLoop.epollfd, EPOLL_CTL_MOD,
+ eventLoop.handles[i].fd, &ev) < 0) {
+ VIR_WARN("Update for existing handle watch %d failed", watch);
+ virMutexUnlock(&eventLoop.lock);
+ return;
+ }
+#endif
+
virMutexUnlock(&eventLoop.lock);
if (!found)
@@ -180,7 +241,12 @@ void virEventPollUpdateHandle(int watch, int events)
*/
int virEventPollRemoveHandle(int watch)
{
+#ifdef HAVE_LINUX_EPOLL
+ struct epoll_event ev;
+ size_t i, j;
+#else
size_t i;
+#endif
PROBE(EVENT_POLL_REMOVE_HANDLE,
"watch=%d",
watch);
@@ -196,15 +262,47 @@ int virEventPollRemoveHandle(int watch)
continue;
if (eventLoop.handles[i].watch == watch) {
- EVENT_DEBUG("mark delete %zu %d", i, eventLoop.handles[i].fd);
- eventLoop.handles[i].deleted = 1;
- virEventPollInterruptLocked();
+ break;
+ }
+ }
+
+ if (i == eventLoop.handlesCount) {
+ virMutexUnlock(&eventLoop.lock);
+ return -1;
+ }
+
+#ifdef HAVE_LINUX_EPOLL
+ ev.events = 0;
+ for (j = 0; j < eventLoop.handlesCount; j++) {
+ if (eventLoop.handles[j].fd == eventLoop.handles[i].fd &&
+ !eventLoop.handles[j].deleted &&
+ i != j) {
+ ev.events |= eventLoop.handles[i].events;
+ }
+ }
+
+ ev.data.fd = eventLoop.handles[i].fd;
+ if (ev.events) {
+ if (epoll_ctl(eventLoop.epollfd, EPOLL_CTL_MOD, eventLoop.handles[i].fd,
+ &ev) < 0) {
virMutexUnlock(&eventLoop.lock);
- return 0;
+ return -1;
}
}
+ else {
+ if (epoll_ctl(eventLoop.epollfd, EPOLL_CTL_DEL, eventLoop.handles[i].fd,
+ &ev) < 0) {
+ virMutexUnlock(&eventLoop.lock);
+ return -1;
+ }
+ }
+#endif
+
+ EVENT_DEBUG("mark delete %zu %d", i, eventLoop.handles[i].fd);
+ eventLoop.handles[i].deleted = 1;
+ virEventPollInterruptLocked();
virMutexUnlock(&eventLoop.lock);
- return -1;
+ return 0;
}
@@ -373,6 +471,7 @@ static int virEventPollCalculateTimeout(int *timeout)
return 0;
}
+#ifndef HAVE_LINUX_EPOLL
/*
* Allocate a pollfd array containing data for all registered
* file handles. The caller must free the returned data struct
@@ -409,6 +508,7 @@ static struct pollfd *virEventPollMakePollFDs(int *nfds) {
return fds;
}
+#endif
/*
@@ -472,7 +572,11 @@ static int virEventPollDispatchTimeouts(void)
*
* Returns 0 upon success, -1 if an error occurred
*/
+#ifdef HAVE_LINUX_EPOLL
+static int virEventPollDispatchHandles(int nfds, struct epoll_event *events)
+#else
static int virEventPollDispatchHandles(int nfds, struct pollfd *fds)
+#endif
{
size_t i, n;
VIR_DEBUG("Dispatch %d", nfds);
@@ -482,7 +586,11 @@ static int virEventPollDispatchHandles(int nfds, struct pollfd *fds)
* in the fds array we've got */
for (i = 0, n = 0; n < nfds && i < eventLoop.handlesCount; n++) {
while (i < eventLoop.handlesCount &&
+#ifdef HAVE_LINUX_EPOLL
+ (eventLoop.handles[i].fd != events[n].data.fd ||
+#else
(eventLoop.handles[i].fd != fds[n].fd ||
+#endif
eventLoop.handles[i].events == 0)) {
i++;
}
@@ -496,16 +604,25 @@ static int virEventPollDispatchHandles(int nfds, struct pollfd
*fds)
continue;
}
+#ifdef HAVE_LINUX_EPOLL
+ if (events[n].events) {
+ int hEvents = virEventPollFromNativeEvents(events[n].events);
+#else
if (fds[n].revents) {
+ int hEvents = virEventPollFromNativeEvents(fds[n].revents);
+#endif
virEventHandleCallback cb = eventLoop.handles[i].cb;
int watch = eventLoop.handles[i].watch;
void *opaque = eventLoop.handles[i].opaque;
- int hEvents = virEventPollFromNativeEvents(fds[n].revents);
PROBE(EVENT_POLL_DISPATCH_HANDLE,
"watch=%d events=%d",
watch, hEvents);
virMutexUnlock(&eventLoop.lock);
+#ifdef HAVE_LINUX_EPOLL
+ (cb)(watch, events[n].data.fd, hEvents, opaque);
+#else
(cb)(watch, fds[n].fd, hEvents, opaque);
+#endif
virMutexLock(&eventLoop.lock);
}
}
@@ -618,7 +735,11 @@ static void virEventPollCleanupHandles(void)
*/
int virEventPollRunOnce(void)
{
+#ifdef HAVE_LINUX_EPOLL
+ struct epoll_event events[MAX_POLL_EVENTS_AT_ONCE];
+#else
struct pollfd *fds = NULL;
+#endif
int ret, timeout, nfds;
virMutexLock(&eventLoop.lock);
@@ -628,17 +749,29 @@ int virEventPollRunOnce(void)
virEventPollCleanupTimeouts();
virEventPollCleanupHandles();
- if (!(fds = virEventPollMakePollFDs(&nfds)) ||
- virEventPollCalculateTimeout(&timeout) < 0)
+#ifndef HAVE_LINUX_EPOLL
+ if (!(fds = virEventPollMakePollFDs(&nfds)))
+ goto error;
+#endif
+ if (virEventPollCalculateTimeout(&timeout) < 0)
goto error;
virMutexUnlock(&eventLoop.lock);
retry:
+#ifdef HAVE_LINUX_EPOLL
+//FIXME: PROBE handles
+ PROBE(EVENT_POLL_RUN,
+ "nhandles=%d timeout=%d",
+ 42, timeout);
+ nfds = ret = epoll_wait(eventLoop.epollfd, events,
+ MAX_POLL_EVENTS_AT_ONCE, timeout);
+#else
PROBE(EVENT_POLL_RUN,
"nhandles=%d timeout=%d",
nfds, timeout);
ret = poll(fds, nfds, timeout);
+#endif
if (ret < 0) {
EVENT_DEBUG("Poll got error event %d", errno);
if (errno == EINTR || errno == EAGAIN)
@@ -653,22 +786,32 @@ int virEventPollRunOnce(void)
if (virEventPollDispatchTimeouts() < 0)
goto error;
+#ifdef HAVE_LINUX_EPOLL
+ if (ret > 0 &&
+ virEventPollDispatchHandles(nfds, events) < 0)
+ goto error;
+#else
if (ret > 0 &&
virEventPollDispatchHandles(nfds, fds) < 0)
goto error;
+#endif
virEventPollCleanupTimeouts();
virEventPollCleanupHandles();
eventLoop.running = 0;
virMutexUnlock(&eventLoop.lock);
+#ifndef HAVE_LINUX_EPOLL
VIR_FREE(fds);
+#endif
return 0;
error:
virMutexUnlock(&eventLoop.lock);
error_unlocked:
+#ifndef HAVE_LINUX_EPOLL
VIR_FREE(fds);
+#endif
return -1;
}
@@ -698,6 +841,17 @@ int virEventPollInit(void)
return -1;
}
+#ifdef HAVE_LINUX_EPOLL
+ eventLoop.epollfd = epoll_create1(0);
+ if (eventLoop.epollfd < 0) {
+ virReportError(VIR_ERR_INTERNAL_ERROR,
+ _("Unable to initialize epoll"));
+ VIR_FORCE_CLOSE(eventLoop.wakeupfd[0]);
+ VIR_FORCE_CLOSE(eventLoop.wakeupfd[1]);
+ return -1;
+ }
+#endif
+
if (virEventPollAddHandle(eventLoop.wakeupfd[0],
VIR_EVENT_HANDLE_READABLE,
virEventPollHandleWakeup, NULL, NULL) < 0) {
@@ -706,6 +860,9 @@ int virEventPollInit(void)
eventLoop.wakeupfd[0]);
VIR_FORCE_CLOSE(eventLoop.wakeupfd[0]);
VIR_FORCE_CLOSE(eventLoop.wakeupfd[1]);
+#ifdef HAVE_LINUX_EPOLL
+ VIR_FORCE_CLOSE(eventLoop.epollfd);
+#endif
return -1;
}
@@ -742,6 +899,16 @@ int
virEventPollToNativeEvents(int events)
{
int ret = 0;
+#ifdef HAVE_LINUX_EPOLL
+ if (events & VIR_EVENT_HANDLE_READABLE)
+ ret |= EPOLLIN;
+ if (events & VIR_EVENT_HANDLE_WRITABLE)
+ ret |= EPOLLOUT;
+ if (events & VIR_EVENT_HANDLE_ERROR)
+ ret |= EPOLLERR;
+ if (events & VIR_EVENT_HANDLE_HANGUP)
+ ret |= EPOLLHUP;
+#else
if (events & VIR_EVENT_HANDLE_READABLE)
ret |= POLLIN;
if (events & VIR_EVENT_HANDLE_WRITABLE)
@@ -750,6 +917,7 @@ virEventPollToNativeEvents(int events)
ret |= POLLERR;
if (events & VIR_EVENT_HANDLE_HANGUP)
ret |= POLLHUP;
+#endif
return ret;
}
@@ -757,6 +925,16 @@ int
virEventPollFromNativeEvents(int events)
{
int ret = 0;
+#ifdef HAVE_LINUX_EPOLL
+ if (events & EPOLLIN)
+ ret |= VIR_EVENT_HANDLE_READABLE;
+ if (events & EPOLLOUT)
+ ret |= VIR_EVENT_HANDLE_WRITABLE;
+ if (events & EPOLLERR)
+ ret |= VIR_EVENT_HANDLE_ERROR;
+ if (events & EPOLLHUP)
+ ret |= VIR_EVENT_HANDLE_HANGUP;
+#else
if (events & POLLIN)
ret |= VIR_EVENT_HANDLE_READABLE;
if (events & POLLOUT)
@@ -767,5 +945,6 @@ virEventPollFromNativeEvents(int events)
ret |= VIR_EVENT_HANDLE_ERROR;
if (events & POLLHUP)
ret |= VIR_EVENT_HANDLE_HANGUP;
+#endif
return ret;
}
diff --git a/src/util/vireventpoll.h b/src/util/vireventpoll.h
index 8844c9c..172009a 100644
--- a/src/util/vireventpoll.h
+++ b/src/util/vireventpoll.h
@@ -26,6 +26,11 @@
# include "internal.h"
+# ifdef HAVE_LINUX_EPOLL
+/* Maximum number of events that are returned by epoll in virEventPollRunOnce */
+# define MAX_POLL_EVENTS_AT_ONCE 10
+# endif
+
/**
* virEventPollAddHandle: register a callback for monitoring file handle events
*
diff --git a/tests/commanddata/test3epoll.log b/tests/commanddata/test3epoll.log
new file mode 100644
index 0000000..e4619b3
--- /dev/null
+++ b/tests/commanddata/test3epoll.log
@@ -0,0 +1,15 @@
+ENV:DISPLAY=:0.0
+ENV:HOME=/home/test
+ENV:HOSTNAME=test
+ENV:LANG=C
+ENV:LOGNAME=testTMPDIR=/tmp
+ENV:PATH=/usr/bin:/bin
+ENV:USER=test
+FD:0
+FD:1
+FD:2
+FD:6
+FD:8
+DAEMON:no
+CWD:/tmp
+UMASK:0022
diff --git a/tests/commandtest.c b/tests/commandtest.c
index 7bf5447..6b23659 100644
--- a/tests/commandtest.c
+++ b/tests/commandtest.c
@@ -227,7 +227,11 @@ static int test3(const void *unused ATTRIBUTE_UNUSED)
goto cleanup;
}
+#ifdef HAVE_LINUX_EPOLL
+ ret = checkoutput("test3epoll", NULL);
+#else
ret = checkoutput("test3", NULL);
+#endif
cleanup:
virCommandFree(cmd);
--
1.9.5.msysgit.0