diff --git a/minix/include/minix/ipc.h b/minix/include/minix/ipc.h index 7a3507106..5bfac14e7 100644 --- a/minix/include/minix/ipc.h +++ b/minix/include/minix/ipc.h @@ -621,8 +621,9 @@ _ASSERT_MSG_SIZE(mess_lc_vfs_chown); typedef struct { int fd; + int nblock; - uint8_t padding[52]; + uint8_t padding[48]; } mess_lc_vfs_close; _ASSERT_MSG_SIZE(mess_lc_vfs_close); diff --git a/minix/include/minix/syslib.h b/minix/include/minix/syslib.h index a6a80a6c6..b5f573863 100644 --- a/minix/include/minix/syslib.h +++ b/minix/include/minix/syslib.h @@ -274,6 +274,7 @@ int copyfd(endpoint_t endpt, int fd, int what); #define COPYFD_FROM 0 /* copy file descriptor from remote process */ #define COPYFD_TO 1 /* copy file descriptor to remote process */ #define COPYFD_CLOSE 2 /* close file descriptor in remote process */ +int closenb(int fd); /* * These are also the event numbers used in PROC_EVENT messages, but in order diff --git a/minix/lib/libc/sys/close.c b/minix/lib/libc/sys/close.c index f74571bde..ac8c7f54c 100644 --- a/minix/lib/libc/sys/close.c +++ b/minix/lib/libc/sys/close.c @@ -5,12 +5,14 @@ #include #include -int close(fd) -int fd; +int +close(int fd) { - message m; + message m; - memset(&m, 0, sizeof(m)); - m.m_lc_vfs_close.fd = fd; - return(_syscall(VFS_PROC_NR, VFS_CLOSE, &m)); + memset(&m, 0, sizeof(m)); + m.m_lc_vfs_close.fd = fd; + m.m_lc_vfs_close.nblock = 0; + + return _syscall(VFS_PROC_NR, VFS_CLOSE, &m); } diff --git a/minix/lib/libsys/Makefile b/minix/lib/libsys/Makefile index c4ebc869b..84a5b3aa0 100644 --- a/minix/lib/libsys/Makefile +++ b/minix/lib/libsys/Makefile @@ -15,6 +15,7 @@ SRCS+= \ asynsend.c \ checkperms.c \ clock_time.c \ + closenb.c \ copyfd.c \ cpuavg.c \ ds.c \ diff --git a/minix/lib/libsys/closenb.c b/minix/lib/libsys/closenb.c new file mode 100644 index 000000000..5603708cc --- /dev/null +++ b/minix/lib/libsys/closenb.c @@ -0,0 +1,27 @@ +#include "syslib.h" + +#include + +/* + * Non-blocking variant of close(2). This call is to be used by system + * services that need to close arbitrary local file descriptors. The purpose + * of the call is to avoid that such services end up blocking on closing socket + * file descriptors with the SO_LINGER socket option enabled. They cannot put + * the file pointer in non-blocking mode to that end, because the file pointer + * may be shared with other processes. + * + * Even though this call is defined for system services only, there is no harm + * in letting arbitrary user processes use this functionality. Thus, it needs + * no separate VFS call number. + */ +int +closenb(int fd) +{ + message m; + + memset(&m, 0, sizeof(m)); + m.m_lc_vfs_close.fd = fd; + m.m_lc_vfs_close.nblock = 1; + + return _taskcall(VFS_PROC_NR, VFS_CLOSE, &m); +} diff --git a/minix/servers/vfs/exec.c b/minix/servers/vfs/exec.c index 24704f08a..f37fc0b2e 100644 --- a/minix/servers/vfs/exec.c +++ b/minix/servers/vfs/exec.c @@ -391,7 +391,7 @@ pm_execfinal: } if(execi.vmfd >= 0 && !execi.vmfd_used) { - if(OK != close_fd(vmfp, execi.vmfd)) { + if(OK != close_fd(vmfp, execi.vmfd, FALSE /*may_suspend*/)) { printf("VFS: unexpected close fail of vm fd\n"); } } @@ -727,7 +727,7 @@ static void clo_exec(struct fproc *rfp) /* Check the file desriptors one by one for presence of FD_CLOEXEC. */ for (i = 0; i < OPEN_MAX; i++) if ( FD_ISSET(i, &rfp->fp_cloexec_set)) - (void) close_fd(rfp, i); + (void) close_fd(rfp, i, FALSE /*may_suspend*/); } /*===========================================================================* diff --git a/minix/servers/vfs/filedes.c b/minix/servers/vfs/filedes.c index 1bbeeb6ef..4ba4faf8a 100644 --- a/minix/servers/vfs/filedes.c +++ b/minix/servers/vfs/filedes.c @@ -410,12 +410,17 @@ unlock_filps(struct filp *filp1, struct filp *filp2) /*===========================================================================* * close_filp * *===========================================================================*/ -void -close_filp(struct filp *f) +int +close_filp(struct filp * f, int may_suspend) { -/* Close a file. Will also unlock filp when done */ - - int rw; +/* Close a file. Will also unlock filp when done. The 'may_suspend' flag + * indicates whether the current process may be suspended closing a socket. + * That is currently supported only when the user issued a close(2), and (only) + * in that case may this function return SUSPEND instead of OK. In all other + * cases, this function will always return OK. It will never return another + * error code, for reasons explained below. + */ + int r, rw; dev_t dev; struct vnode *vp; @@ -425,6 +430,8 @@ close_filp(struct filp *f) vp = f->filp_vno; + r = OK; + if (f->filp_count - 1 == 0 && f->filp_mode != FILP_CLOSED) { /* Check to see if the file is special. */ if (S_ISCHR(vp->v_mode) || S_ISBLK(vp->v_mode) || @@ -446,19 +453,34 @@ close_filp(struct filp *f) (void) cdev_close(dev); /* Ignore errors */ } else { /* - * TODO: this should be completely redone. Sockets may - * take a while to be closed (SO_LINGER etc), and thus, - * we should be able to issue a suspending close to a + * Sockets may take a while to be closed (SO_LINGER), + * and thus, we may issue a suspending close to a * socket driver. Getting this working for close(2) is - * the easy case, but there's also eg dup2(2), which if - * interrupted by a signal should fail without closing - * the file descriptor. Then there are cases where the - * close should probably never block: close-on-exec, - * exit, and UDS closing in-flight FDs (currently just - * using close(2), but it could set the FD to non- - * blocking) for instance. There is much to do here. + * the easy case, and that is all we support for now. + * However, there is also eg dup2(2), which if + * interrupted by a signal should technically fail + * without closing the file descriptor. Then there are + * cases where the close should never block: process + * exit and close-on-exec for example. Getting all + * such cases right is left to future work; currently + * they all perform thread-blocking socket closes and + * thus cause the socket to perform lingering in the + * background if at all. */ - (void) sdev_close(dev); /* Ignore errors */ + assert(!may_suspend || job_call_nr == VFS_CLOSE); + + if (f->filp_flags & O_NONBLOCK) + may_suspend = FALSE; + + r = sdev_close(dev, may_suspend); + + /* + * Returning a non-OK error is a bad idea, because it + * will leave the application wondering whether closing + * the file descriptor actually succeeded. + */ + if (r != SUSPEND) + r = OK; } f->filp_mode = FILP_CLOSED; @@ -492,6 +514,8 @@ close_filp(struct filp *f) } mutex_unlock(&f->filp_lock); + + return r; } /*===========================================================================* diff --git a/minix/servers/vfs/misc.c b/minix/servers/vfs/misc.c index 4ea85f11c..b9336f3cc 100644 --- a/minix/servers/vfs/misc.c +++ b/minix/servers/vfs/misc.c @@ -452,7 +452,7 @@ int do_vm_call(void) } case VMVFSREQ_FDCLOSE: { - result = close_fd(fp, req_fd); + result = close_fd(fp, req_fd, FALSE /*may_suspend*/); if(result != OK) { printf("VFS: VM fd close for fd %d, %d (%d)\n", req_fd, fp->fp_endpoint, result); @@ -645,7 +645,7 @@ static void free_proc(int flags) /* Loop on file descriptors, closing any that are open. */ for (i = 0; i < OPEN_MAX; i++) { - (void) close_fd(fp, i); + (void) close_fd(fp, i, FALSE /*may_suspend*/); } /* Release root and working directories. */ diff --git a/minix/servers/vfs/open.c b/minix/servers/vfs/open.c index b5e3f8513..fc5b0c227 100644 --- a/minix/servers/vfs/open.c +++ b/minix/servers/vfs/open.c @@ -673,9 +673,13 @@ int do_lseek(void) *===========================================================================*/ int do_close(void) { -/* Perform the close(fd) system call. */ - int thefd = job_m_in.m_lc_vfs_close.fd; - return close_fd(fp, thefd); +/* Perform the close(fd) or closenb(fd) system call. */ + int fd, nblock; + + fd = job_m_in.m_lc_vfs_close.fd; + nblock = job_m_in.m_lc_vfs_close.nblock; + + return close_fd(fp, fd, !nblock /*may_suspend*/); } @@ -683,13 +687,13 @@ int do_close(void) * close_fd * *===========================================================================*/ int -close_fd(struct fproc *rfp, int fd_nr) +close_fd(struct fproc * rfp, int fd_nr, int may_suspend) { /* Perform the close(fd) system call. */ register struct filp *rfilp; register struct vnode *vp; struct file_lock *flp; - int lock_count; + int r, lock_count; /* First locate the vnode that belongs to the file descriptor. */ if ( (rfilp = get_filp2(rfp, fd_nr, VNODE_OPCL)) == NULL) return(err_code); @@ -701,7 +705,7 @@ close_fd(struct fproc *rfp, int fd_nr) */ rfp->fp_filp[fd_nr] = NULL; - close_filp(rfilp); + r = close_filp(rfilp, may_suspend); FD_CLR(fd_nr, &rfp->fp_cloexec_set); @@ -719,5 +723,5 @@ close_fd(struct fproc *rfp, int fd_nr) lock_revive(); /* one or more locks released */ } - return(OK); + return(r); } diff --git a/minix/servers/vfs/proto.h b/minix/servers/vfs/proto.h index fada70710..d9a3b2068 100644 --- a/minix/servers/vfs/proto.h +++ b/minix/servers/vfs/proto.h @@ -89,7 +89,7 @@ void invalidate_filp(struct filp *); void invalidate_filp_by_endpt(endpoint_t proc_e); void invalidate_filp_by_char_major(devmajor_t major); void invalidate_filp_by_sock_drv(unsigned int num); -void close_filp(struct filp *fp); +int close_filp(struct filp *fp, int may_suspend); int do_copyfd(void); /* fscall.c */ @@ -149,7 +149,7 @@ void unmount_all(int force); /* open.c */ int do_close(void); -int close_fd(struct fproc *rfp, int fd_nr); +int close_fd(struct fproc *rfp, int fd_nr, int may_suspend); int common_open(char path[PATH_MAX], int oflags, mode_t omode, int for_exec); int do_creat(void); int do_lseek(void); @@ -281,7 +281,7 @@ int sdev_getsockopt(dev_t dev, int level, int name, vir_bytes addr, int sdev_getsockname(dev_t dev, vir_bytes addr, unsigned int *addr_len); int sdev_getpeername(dev_t dev, vir_bytes addr, unsigned int *addr_len); int sdev_shutdown(dev_t dev, int how); -int sdev_close(dev_t dev); +int sdev_close(dev_t dev, int may_suspend); int sdev_select(dev_t dev, int ops); void sdev_stop(struct fproc *rfp); void sdev_cancel(void); diff --git a/minix/servers/vfs/sdev.c b/minix/servers/vfs/sdev.c index 7bc667292..cc3cd4e50 100644 --- a/minix/servers/vfs/sdev.c +++ b/minix/servers/vfs/sdev.c @@ -40,11 +40,6 @@ * thread is spawned for processing successful accept replies, unless the reply * was received from a worker thread already (as may be the case if the accept * request was being canceled). - * - * As a current shortcoming, close requests should be long-lived (in order to - * support SO_LINGER) but are modeled as short-lived in VFS. This is the - * result of implementation limitations that have to be resolved first; see the - * comments in sdev_close() and close_filp() for more information. */ #include "fs.h" @@ -166,7 +161,7 @@ sdev_socket(int domain, int type, int protocol, dev_t * dev, int pair) if (sock_id2 < 0) { printf("VFS: %d sent bad SOCKETPAIR socket ID %d\n", sp->smap_endpt, sock_id2); - (void)sdev_close(dev[0]); + (void)sdev_close(dev[0], FALSE /*may_suspend*/); return EIO; } @@ -606,22 +601,42 @@ sdev_shutdown(dev_t dev, int how) * Close the socket identified by the given socket device number. */ int -sdev_close(dev_t dev) +sdev_close(dev_t dev, int may_suspend) { + struct smap *sp; + sockid_t sock_id; + message m; + int r; /* - * TODO: for now, we generate only nonblocking requests, because VFS as - * a whole does not yet support blocking close operations. See also - * the comment in close_filp(). All callers of sdev_close() currently - * ignore the return value, so socket drivers can already implement - * support for blocking close requests if they want, although at some - * later point we may have to introduce an additional SDEV_ flag to - * indicate whether the close call should be interrupted (keeping the - * socket open and returning EINTR, for dup2(2)) or continue in the - * background (closing the socket later and returning EINPROGRESS, for - * close(2)). + * Originally, all close requests were blocking the calling thread, but + * the new support for SO_LINGER has changed that. In a very strictly + * limited subset of cases - namely, the user process calling close(2), + * we suspend the close request and handle it asynchronously. In all + * other cases, including close-on-exit, close-on-exec, and even dup2, + * the close is issued as a thread-synchronous request instead. */ - return sdev_simple(dev, SDEV_CLOSE, SDEV_NONBLOCK); + if (may_suspend) { + if ((sp = get_smap_by_dev(dev, &sock_id)) == NULL) + return EIO; + + /* Prepare the request message. */ + memset(&m, 0, sizeof(m)); + m.m_type = SDEV_CLOSE; + m.m_vfs_lsockdriver_simple.req_id = (sockid_t)who_e; + m.m_vfs_lsockdriver_simple.sock_id = sock_id; + m.m_vfs_lsockdriver_simple.param = 0; + + /* Send the request to the driver. */ + if ((r = asynsend3(sp->smap_endpt, &m, AMF_NOREPLY)) != OK) + panic("VFS: asynsend in sdev_bindconn failed: %d", r); + + /* Suspend the process until the reply arrives. */ + return sdev_suspend(dev, GRANT_INVALID, GRANT_INVALID, + GRANT_INVALID, -1, 0); + } else + /* Block the calling thread until the socket is closed. */ + return sdev_simple(dev, SDEV_CLOSE, SDEV_NONBLOCK); } /* @@ -774,12 +789,21 @@ sdev_finish(struct fproc * rfp, message * m_ptr) case VFS_SENDTO: case VFS_SENDMSG: case VFS_IOCTL: + case VFS_CLOSE: /* * These calls all use the same SDEV_REPLY reply type and only * need to reply an OK-or-error status code back to userland. */ if (m_ptr->m_type == SDEV_REPLY) { status = m_ptr->m_lsockdriver_vfs_reply.status; + + /* + * For close(2) calls, the return value must indicate + * that the file descriptor has been closed. + */ + if (callnr == VFS_CLOSE && + status != OK && status != EINPROGRESS) + status = OK; } else if (m_ptr->m_type < 0) { status = m_ptr->m_type; } else { diff --git a/minix/servers/vfs/socket.c b/minix/servers/vfs/socket.c index dd8af0a12..17ec25131 100644 --- a/minix/servers/vfs/socket.c +++ b/minix/servers/vfs/socket.c @@ -212,7 +212,7 @@ do_socket(void) return r; if ((r = make_sock_fd(dev, flags)) < 0) - (void)sdev_close(dev); + (void)sdev_close(dev, FALSE /*may_suspend*/); return r; } @@ -250,14 +250,14 @@ do_socketpair(void) return r; if ((fd0 = make_sock_fd(dev[0], flags)) < 0) { - (void)sdev_close(dev[0]); - (void)sdev_close(dev[1]); + (void)sdev_close(dev[0], FALSE /*may_suspend*/); + (void)sdev_close(dev[1], FALSE /*may_suspend*/); return fd0; } if ((fd1 = make_sock_fd(dev[1], flags)) < 0) { - close_fd(fp, fd0); - (void)sdev_close(dev[1]); + close_fd(fp, fd0, FALSE /*may_suspend*/); + (void)sdev_close(dev[1], FALSE /*may_suspend*/); return fd1; } @@ -442,7 +442,7 @@ resume_accept(struct fproc * rfp, int status, dev_t dev, unsigned int addr_len, * handle address copy failures in the cleanest possible way. */ if (status != OK) { - (void)sdev_close(dev); + (void)sdev_close(dev, FALSE /*may_suspend*/); replycode(rfp->fp_endpoint, status); @@ -459,7 +459,7 @@ resume_accept(struct fproc * rfp, int status, dev_t dev, unsigned int addr_len, flags &= O_CLOEXEC | O_NONBLOCK | O_NOSIGPIPE; if ((r = make_sock_fd(dev, flags)) < 0) { - (void)sdev_close(dev); + (void)sdev_close(dev, FALSE /*may_suspend*/); replycode(rfp->fp_endpoint, r);