VFS: change select(2) semantics for closed filps

If a select(2) call was issued on a file descriptor for which the file
pointer was closed due to invalidation (FILP_CLOSED), typically as the
result of a character/socket driver dying, the call would previously
return with an error: EINTR upon call entry or EIO on invalidation at
at a later time.  Especially the former could severely confuse
applications, which would assume the call was interrupted by a signal,
restart the select call and immediately get EINTR again, ad infinitum.

This patch changes the select(2) semantics such that for closed filps,
the file descriptor is returned as readable and/or writable (depending
on the requested operations), as such letting the entire select call
finish successfully.  Applications will then typically attempt to read
from and/or write to the file descriptor, resulting in an I/O error
that they should generally be better equipped to handle.

This patch also fixes a potential problem with returning early from a
select(2) call if a bad file descriptor is given: previously, in such
cases not all actions taken so far would be undone; now they are.

Change-Id: Ia6581f8789473a8a6c200852fccf552691a17025
This commit is contained in:
David van Moolenbroek 2017-02-02 16:20:33 +00:00
parent e3b8d4bb58
commit 722cbc6186

View File

@ -103,9 +103,9 @@ int do_select(void)
* timeout and wait for either the file descriptors to become ready or the * timeout and wait for either the file descriptors to become ready or the
* timer to go off. If no timeout value was provided, we wait indefinitely. * timer to go off. If no timeout value was provided, we wait indefinitely.
*/ */
int r, nfds, do_timeout, fd, s; int r, nfds, do_timeout, fd, type, s;
struct filp *f; struct filp *f;
unsigned int type, ops; unsigned int ops;
struct timeval timeout; struct timeval timeout;
struct selectentry *se; struct selectentry *se;
vir_bytes vtimeout; vir_bytes vtimeout;
@ -183,16 +183,22 @@ int do_select(void)
if (!(ops = tab2ops(fd, se))) if (!(ops = tab2ops(fd, se)))
continue; /* No operations set; nothing to do for this fd */ continue; /* No operations set; nothing to do for this fd */
/* Get filp belonging to this fd */ /* Get filp belonging to this fd. If this fails, there are two causes:
* either the given file descriptor was bad, or the associated filp is
* closed (in the FILP_CLOSED sense) as a result of invalidation. Only
* the former is a select error. The latter should result in operations
* being returned as ready on the file descriptor, since subsequent
* I/O calls are guaranteed to return I/O errors on such descriptors.
*/
f = se->filps[fd] = get_filp(fd, VNODE_READ); f = se->filps[fd] = get_filp(fd, VNODE_READ);
if (f == NULL) { if (f == NULL && err_code != EIO) {
if (err_code == EBADF) assert(err_code == EBADF);
r = err_code;
else /* File descriptor is 'ready' to return EIO */
r = EINTR;
se->requestor = NULL; /* We may already have adjusted filp_selectors on previous
return(r); * file pointers in the set, so do not simply return here.
*/
se->error = EBADF;
break;
} }
/* Check file types. According to POSIX 2008: /* Check file types. According to POSIX 2008:
@ -208,8 +214,14 @@ int do_select(void)
* device nodes. Some may not implement support for select and let * device nodes. Some may not implement support for select and let
* libchardriver return EBADF, which we then pass to the calling * libchardriver return EBADF, which we then pass to the calling
* process once we receive the reply. * process once we receive the reply.
*
* If we could not access the file pointer at all, it will have been
* closed due to invalidation after a service crash. In that case, we
* skip type matching and simply return pending operations as ready.
*/ */
se->type[fd] = -1; se->type[fd] = -1;
if (f == NULL)
continue; /* closed, skip type matching */
for (type = 0; type < SEL_FDS; type++) { for (type = 0; type < SEL_FDS; type++) {
if (fdtypes[type].type_match(f)) { if (fdtypes[type].type_match(f)) {
se->type[fd] = type; se->type[fd] = type;
@ -220,11 +232,18 @@ int do_select(void)
} }
unlock_filp(f); unlock_filp(f);
if (se->type[fd] == -1) { /* Type not found */ if (se->type[fd] == -1) { /* Type not found */
se->requestor = NULL; se->error = EBADF;
return(EBADF); break;
} }
} }
/* If an error occurred already, undo any changes so far and return. */
if (se->error != OK) {
select_cancel_all(se);
se->requestor = NULL;
return(se->error);
}
/* Check all file descriptors in the set whether one is 'ready' now */ /* Check all file descriptors in the set whether one is 'ready' now */
for (fd = 0; fd < nfds; fd++) { for (fd = 0; fd < nfds; fd++) {
/* Again, check for involuntarily selected fd's */ /* Again, check for involuntarily selected fd's */
@ -233,9 +252,15 @@ int do_select(void)
/* File descriptors selected for reading that are not opened for /* File descriptors selected for reading that are not opened for
* reading should be marked as readable, as read calls would fail * reading should be marked as readable, as read calls would fail
* immediately. The same applies to writing. * immediately. The same applies to writing. For file descriptors for
* which the file pointer is already closed (f==NULL), return readable
* and writable operations (if requested) and skip the rest.
*/ */
f = se->filps[fd]; f = se->filps[fd];
if (f == NULL) {
ops2tab(SEL_RD | SEL_WR, fd, se);
continue;
}
if ((ops & SEL_RD) && !(f->filp_mode & R_BIT)) { if ((ops & SEL_RD) && !(f->filp_mode & R_BIT)) {
ops2tab(SEL_RD, fd, se); ops2tab(SEL_RD, fd, se);
ops &= ~SEL_RD; ops &= ~SEL_RD;
@ -251,6 +276,7 @@ int do_select(void)
wantops = (f->filp_select_ops |= ops); wantops = (f->filp_select_ops |= ops);
type = se->type[fd]; type = se->type[fd];
assert(type >= 0);
select_lock_filp(f, wantops); select_lock_filp(f, wantops);
r = fdtypes[type].select_request(f, &wantops, se->block, fp); r = fdtypes[type].select_request(f, &wantops, se->block, fp);
unlock_filp(f); unlock_filp(f);
@ -861,7 +887,7 @@ void select_unsuspend_by_endpt(endpoint_t proc_e)
struct dmap *dp; struct dmap *dp;
struct smap *sp; struct smap *sp;
devmajor_t major; devmajor_t major;
int fd, s, is_driver; int fd, s, is_driver, restart;
struct selectentry *se; struct selectentry *se;
struct filp *f; struct filp *f;
@ -872,7 +898,6 @@ void select_unsuspend_by_endpt(endpoint_t proc_e)
is_driver = (dp != NULL || sp != NULL); is_driver = (dp != NULL || sp != NULL);
for (s = 0; s < MAXSELECTS; s++) { for (s = 0; s < MAXSELECTS; s++) {
int wakehim = 0;
se = &selecttab[s]; se = &selecttab[s];
if (se->requestor == NULL) continue; if (se->requestor == NULL) continue;
if (se->requestor->fp_endpoint == proc_e) { if (se->requestor->fp_endpoint == proc_e) {
@ -885,6 +910,8 @@ void select_unsuspend_by_endpt(endpoint_t proc_e)
if (!is_driver) if (!is_driver)
continue; continue;
restart = FALSE;
for (fd = 0; fd < se->nfds; fd++) { for (fd = 0; fd < se->nfds; fd++) {
if ((f = se->filps[fd]) == NULL) if ((f = se->filps[fd]) == NULL)
continue; continue;
@ -892,24 +919,24 @@ void select_unsuspend_by_endpt(endpoint_t proc_e)
assert(f->filp_select_dev != NO_DEV); assert(f->filp_select_dev != NO_DEV);
major = major(f->filp_select_dev); major = major(f->filp_select_dev);
if (dmap_driver_match(proc_e, major)) { if (dmap_driver_match(proc_e, major)) {
ops2tab(SEL_RD | SEL_WR, fd, se);
se->filps[fd] = NULL; se->filps[fd] = NULL;
se->error = EIO;
select_cancel_filp(f); select_cancel_filp(f);
wakehim = 1; restart = TRUE;
} }
} else if (sp != NULL && is_sock_device(f)) { } else if (sp != NULL && is_sock_device(f)) {
assert(f->filp_select_dev != NO_DEV); assert(f->filp_select_dev != NO_DEV);
if (get_smap_by_dev(f->filp_select_dev, NULL) == sp) { if (get_smap_by_dev(f->filp_select_dev, NULL) == sp) {
ops2tab(SEL_RD | SEL_WR, fd, se);
se->filps[fd] = NULL; se->filps[fd] = NULL;
se->error = EIO;
select_cancel_filp(f); select_cancel_filp(f);
wakehim = 1; restart = TRUE;
} }
} }
} }
if (wakehim && !is_deferred(se)) if (restart)
select_return(se); restart_proc(se);
} }
/* Any outstanding queries will never be answered, so forget about them. */ /* Any outstanding queries will never be answered, so forget about them. */