Disable changelist for epoll by default because of Linux dup() bug; add an option and/or an envvar to reenable it for speed.

Rename option to control epoll changelist; make epoll changelist off by default
This commit is contained in:
Nick Mathewson 2010-11-14 17:52:16 -05:00
parent 3a67d0bf42
commit 9531763ab0
2 changed files with 106 additions and 18 deletions

102
epoll.c
View File

@ -63,8 +63,8 @@ static void *epoll_init(struct event_base *);
static int epoll_dispatch(struct event_base *, struct timeval *);
static void epoll_dealloc(struct event_base *);
const struct eventop epollops = {
"epoll",
static const struct eventop epollops_changelist = {
"epoll (with changelist)",
epoll_init,
event_changelist_add,
event_changelist_del,
@ -75,6 +75,24 @@ const struct eventop epollops = {
EVENT_CHANGELIST_FDINFO_SIZE
};
static int epoll_nochangelist_add(struct event_base *base, evutil_socket_t fd,
short old, short events, void *p);
static int epoll_nochangelist_del(struct event_base *base, evutil_socket_t fd,
short old, short events, void *p);
const struct eventop epollops = {
"epoll",
epoll_init,
epoll_nochangelist_add,
epoll_nochangelist_del,
epoll_dispatch,
epoll_dealloc,
1, /* need reinit */
EV_FEATURE_ET|EV_FEATURE_O1,
0
};
#define INITIAL_NEVENT 32
#define MAX_NEVENT 4096
@ -115,6 +133,11 @@ epoll_init(struct event_base *base)
}
epollop->nevents = INITIAL_NEVENT;
if ((base->flags & EVENT_BASE_FLAG_EPOLL_USE_CHANGELIST) != 0 ||
((base->flags & EVENT_BASE_FLAG_IGNORE_ENV) == 0 &&
evutil_getenv("EVENT_EPOLL_USE_CHANGELIST") != NULL))
base->evsel = &epollops_changelist;
evsig_init(base);
return (epollop);
@ -145,19 +168,14 @@ epoll_op_to_string(int op)
}
static int
epoll_apply_changes(struct event_base *base)
epoll_apply_one_change(struct event_base *base,
struct epollop *epollop,
const struct event_change *ch)
{
struct event_changelist *changelist = &base->changelist;
struct epollop *epollop = base->evbase;
struct event_change *ch;
struct epoll_event epev;
int i;
int op, events;
for (i = 0; i < changelist->n_changes; ++i) {
ch = &changelist->changes[i];
events = 0;
int op, events = 0;
if (1) {
/* The logic here is a little tricky. If we had no events set
on the fd before, we need to set op="ADD" and set
events=the events we want to add. If we had any events set
@ -166,7 +184,6 @@ epoll_apply_changes(struct event_base *base)
want to remain. But if we want to delete the last event,
we say op="DEL" and set events=the remaining events. What
fun!
*/
/* TODO: Turn this into a switch or a table lookup. */
@ -237,7 +254,7 @@ epoll_apply_changes(struct event_base *base)
}
if (!events)
continue;
return 0;
memset(&epev, 0, sizeof(epev));
epev.data.fd = ch->fd;
@ -251,6 +268,7 @@ epoll_apply_changes(struct event_base *base)
if (epoll_ctl(epollop->epfd, EPOLL_CTL_ADD, ch->fd, &epev) == -1) {
event_warn("Epoll MOD(%d) on %d retried as ADD; that failed too",
(int)epev.events, ch->fd);
return -1;
} else {
event_debug(("Epoll MOD(%d) on %d retried as ADD; succeeded.",
(int)epev.events,
@ -267,6 +285,7 @@ epoll_apply_changes(struct event_base *base)
if (epoll_ctl(epollop->epfd, EPOLL_CTL_MOD, ch->fd, &epev) == -1) {
event_warn("Epoll ADD(%d) on %d retried as MOD; that failed too",
(int)epev.events, ch->fd);
return -1;
} else {
event_debug(("Epoll ADD(%d) on %d retried as MOD; succeeded.",
(int)epev.events,
@ -292,6 +311,7 @@ epoll_apply_changes(struct event_base *base)
change_to_string(ch->read_change),
ch->write_change,
change_to_string(ch->write_change));
return -1;
}
} else {
event_debug(("Epoll %s(%d) on fd %d okay. [old events were %d; read change was %d; write change was %d]",
@ -303,8 +323,60 @@ epoll_apply_changes(struct event_base *base)
ch->write_change));
}
}
return 0;
}
return (0);
static int
epoll_apply_changes(struct event_base *base)
{
struct event_changelist *changelist = &base->changelist;
struct epollop *epollop = base->evbase;
struct event_change *ch;
int r = 0;
int i;
for (i = 0; i < changelist->n_changes; ++i) {
ch = &changelist->changes[i];
if (epoll_apply_one_change(base, epollop, ch) < 0)
r = -1;
}
return (r);
}
static int
epoll_nochangelist_add(struct event_base *base, evutil_socket_t fd,
short old, short events, void *p)
{
struct event_change ch;
ch.fd = fd;
ch.old_events = old;
ch.read_change = ch.write_change = 0;
if (events & EV_WRITE)
ch.write_change = EV_CHANGE_ADD |
(events & EV_ET);
if (events & EV_READ)
ch.read_change = EV_CHANGE_ADD |
(events & EV_ET);
return epoll_apply_one_change(base, base->evbase, &ch);
}
static int
epoll_nochangelist_del(struct event_base *base, evutil_socket_t fd,
short old, short events, void *p)
{
struct event_change ch;
ch.fd = fd;
ch.old_events = old;
ch.read_change = ch.write_change = 0;
if (events & EV_WRITE)
ch.write_change = EV_CHANGE_DEL;
if (events & EV_READ)
ch.read_change = EV_CHANGE_DEL;
return epoll_apply_one_change(base, base->evbase, &ch);
}
static int

View File

@ -183,15 +183,31 @@ enum event_base_config_flag {
/** Do not allocate a lock for the event base, even if we have
locking set up. */
EVENT_BASE_FLAG_NOLOCK = 0x01,
/** Do not check the EVENT_NO* environment variables when picking
an event_base. */
/** Do not check the EVENT_* environment variables when configuring
an event_base */
EVENT_BASE_FLAG_IGNORE_ENV = 0x02,
/** Windows only: enable the IOCP dispatcher at startup */
EVENT_BASE_FLAG_STARTUP_IOCP = 0x04,
/** Instead of checking the current time every time the event loop is
ready to run timeout callbacks, check after each timeout callback.
*/
EVENT_BASE_FLAG_NO_CACHE_TIME = 0x08
EVENT_BASE_FLAG_NO_CACHE_TIME = 0x08,
/** If we are using the epoll backend, this flag says that it is
safe to use Libevent's internal change-list code to batch up
adds and deletes in order to try to do as few syscalls as
possible. Setting this flag can make your code run faster, but
it may trigger a Linux bug: it is not safe to use this flag
if you have any fds cloned by dup() or its variants. Doing so
will produce strange and hard-to-diagnose bugs.
This flag can also be activated by settnig the
EVENT_EPOLL_USE_CHANGELIST environment variable.
This flag has no effect if you wind up using a backend other than
epoll.
*/
EVENT_BASE_FLAG_EPOLL_USE_CHANGELIST = 0x10
};
/**