/* * This file implements the lower socket layer of VFS: communication with * socket drivers. Socket driver communication evolved out of character driver * communication, and the two have many similarities between them. Most * importantly, socket driver communication also has the distinction between * short-lived and long-lived requests. * * Short-lived requests are expected to be replied to by the socket driver * immediately in all cases. For such requests, VFS keeps the worker thread * for the calling process alive until the reply arrives. In contrast, * long-lived requests may block. For such requests, VFS suspends the calling * process until a reply comes in, or until a signal interrupts the request. * Both short-lived and long-lived requests may be aborted if VFS finds that * the corresponding socket driver has died. Even though long-lived requests * may be marked as nonblocking, nonblocking calls are still handled as * long-lived in terms of VFS processing. * * For an overview of the socket driver requests and replies, message layouts, * and which requests are long-lived or short-lived (i.e. may suspend or not), * please refer to the corresponding table in the libsockdriver source code. * * For most long-lived socket requests, the main VFS thread processes the reply * from the socket driver. This typically consists of waking up the user * process that originally issued the system call on the socket by simply * relaying the call's result code. Some socket calls require a specific reply * message and/or additional post-call actions; for those, resume_*() calls are * made back into the upper socket layer. * * If a process is interrupted by a signal, any ongoing long-lived socket * request must be canceled. This is done by sending a one-way cancel request * to the socket driver, and waiting for it to reply to the original request. * In this case, the reply will be processed from the worker thread that is * handling the cancel operation. Canceling does not imply call failure: the * cancellation may result in a partial I/O reply, and a successful reply may * cross the cancel request. * * One main exception is the reply to an accept request. Once a connection has * been accepted, a new socket has to be created for it. This requires actions * that require the ability to block the current thread, and so, a worker * thread is spawned for processing successful accept replies, unless the reply * was received from a worker thread already (as may be the case if the accept * request was being canceled). */ #include "fs.h" #include #include /* * Send a short-lived request message to the given socket driver, and suspend * the current worker thread until a reply message has been received. On * success, the function will return OK, and the reply message will be stored * in the message structure pointed to by 'm_ptr'. The function may fail if * the socket driver dies before sending a reply. In that case, the function * will return a negative error code, and also store the same negative error * code in the m_type field of the 'm_ptr' message structure. */ static int sdev_sendrec(struct smap * sp, message * m_ptr) { int r; /* Send the request to the driver. */ if ((r = asynsend3(sp->smap_endpt, m_ptr, AMF_NOREPLY)) != OK) panic("VFS: asynsend in sdev_sendrec failed: %d", r); /* Suspend this thread until we have received the response. */ self->w_task = sp->smap_endpt; self->w_drv_sendrec = m_ptr; worker_wait(); self->w_task = NONE; assert(self->w_drv_sendrec == NULL); return (!IS_SDEV_RS(m_ptr->m_type)) ? m_ptr->m_type : OK; } /* * Suspend the current process for later completion of its system call. */ int sdev_suspend(dev_t dev, cp_grant_id_t grant0, cp_grant_id_t grant1, cp_grant_id_t grant2, int fd, vir_bytes buf) { fp->fp_sdev.dev = dev; fp->fp_sdev.callnr = job_call_nr; fp->fp_sdev.grant[0] = grant0; fp->fp_sdev.grant[1] = grant1; fp->fp_sdev.grant[2] = grant2; if (job_call_nr == VFS_ACCEPT) { assert(fd != -1); assert(buf == 0); fp->fp_sdev.aux.fd = fd; } else if (job_call_nr == VFS_RECVMSG) { assert(fd == -1); /* * TODO: we are not yet consistent enough in dealing with * mapped NULL pages to have an assert(buf != 0) here.. */ fp->fp_sdev.aux.buf = buf; } else { assert(fd == -1); assert(buf == 0); } suspend(FP_BLOCKED_ON_SDEV); return SUSPEND; } /* * Create a socket or socket pair. Return OK on success, with the new socket * device identifier(s) stored in the 'dev' array. Return an error code upon * failure. */ int sdev_socket(int domain, int type, int protocol, dev_t * dev, int pair) { struct smap *sp; message m; sockid_t sock_id, sock_id2; int r; /* We could return EAFNOSUPPORT, but the caller should have checked. */ if ((sp = get_smap_by_domain(domain)) == NULL) panic("VFS: sdev_socket for unknown domain"); /* Prepare the request message. */ memset(&m, 0, sizeof(m)); m.m_type = pair ? SDEV_SOCKETPAIR : SDEV_SOCKET; m.m_vfs_lsockdriver_socket.req_id = (sockid_t)who_e; m.m_vfs_lsockdriver_socket.domain = domain; m.m_vfs_lsockdriver_socket.type = type; m.m_vfs_lsockdriver_socket.protocol = protocol; m.m_vfs_lsockdriver_socket.user_endpt = who_e; /* Send the request, and wait for the reply. */ if ((r = sdev_sendrec(sp, &m)) != OK) return r; /* socket driver died */ /* Parse the reply message, and check for protocol errors. */ if (m.m_type != SDEV_SOCKET_REPLY) { printf("VFS: %d sent bad reply type %d for call %d\n", sp->smap_endpt, m.m_type, job_call_nr); return EIO; } sock_id = m.m_lsockdriver_vfs_socket_reply.sock_id; sock_id2 = m.m_lsockdriver_vfs_socket_reply.sock_id2; /* Check for regular errors. Upon success, return the socket(s). */ if (sock_id < 0) return sock_id; dev[0] = make_smap_dev(sp, sock_id); if (pair) { /* Okay, one more protocol error. */ if (sock_id2 < 0) { printf("VFS: %d sent bad SOCKETPAIR socket ID %d\n", sp->smap_endpt, sock_id2); (void)sdev_close(dev[0], FALSE /*may_suspend*/); return EIO; } dev[1] = make_smap_dev(sp, sock_id2); } return OK; } /* * Bind or connect a socket to a particular address. These calls may block, so * suspend the current process instead of making the thread wait for the reply. */ static int sdev_bindconn(dev_t dev, int type, vir_bytes addr, unsigned int addr_len, int filp_flags) { struct smap *sp; sockid_t sock_id; cp_grant_id_t grant; message m; int r; if ((sp = get_smap_by_dev(dev, &sock_id)) == NULL) return EIO; /* Allocate resources. */ grant = cpf_grant_magic(sp->smap_endpt, who_e, addr, addr_len, CPF_READ); if (!GRANT_VALID(grant)) panic("VFS: cpf_grant_magic failed"); /* Prepare the request message. */ memset(&m, 0, sizeof(m)); m.m_type = type; m.m_vfs_lsockdriver_addr.req_id = (sockid_t)who_e; m.m_vfs_lsockdriver_addr.sock_id = sock_id; m.m_vfs_lsockdriver_addr.grant = grant; m.m_vfs_lsockdriver_addr.len = addr_len; m.m_vfs_lsockdriver_addr.user_endpt = who_e; m.m_vfs_lsockdriver_addr.sflags = (filp_flags & O_NONBLOCK) ? SDEV_NONBLOCK : 0; /* Send the request to the driver. */ if ((r = asynsend3(sp->smap_endpt, &m, AMF_NOREPLY)) != OK) panic("VFS: asynsend in sdev_bindconn failed: %d", r); /* Suspend the process until the reply arrives. */ return sdev_suspend(dev, grant, GRANT_INVALID, GRANT_INVALID, -1, 0); } /* * Bind a socket to a local address. */ int sdev_bind(dev_t dev, vir_bytes addr, unsigned int addr_len, int filp_flags) { return sdev_bindconn(dev, SDEV_BIND, addr, addr_len, filp_flags); } /* * Connect a socket to a remote address. */ int sdev_connect(dev_t dev, vir_bytes addr, unsigned int addr_len, int filp_flags) { return sdev_bindconn(dev, SDEV_CONNECT, addr, addr_len, filp_flags); } /* * Send and receive a "simple" request: listen, shutdown, or close. Note that * while cancel requests use the same request format, they require a different * way of handling their replies. */ static int sdev_simple(dev_t dev, int type, int param) { struct smap *sp; sockid_t sock_id; message m; int r; assert(type == SDEV_LISTEN || type == SDEV_SHUTDOWN || type == SDEV_CLOSE); if ((sp = get_smap_by_dev(dev, &sock_id)) == NULL) return EIO; /* Prepare the request message. */ memset(&m, 0, sizeof(m)); m.m_type = type; m.m_vfs_lsockdriver_simple.req_id = (sockid_t)who_e; m.m_vfs_lsockdriver_simple.sock_id = sock_id; m.m_vfs_lsockdriver_simple.param = param; /* Send the request, and wait for the reply. */ if ((r = sdev_sendrec(sp, &m)) != OK) return r; /* socket driver died */ /* Parse and return the reply. */ if (m.m_type != SDEV_REPLY) { printf("VFS: %d sent bad reply type %d for call %d\n", sp->smap_endpt, m.m_type, job_call_nr); return EIO; } return m.m_lsockdriver_vfs_reply.status; } /* * Put a socket in listening mode. */ int sdev_listen(dev_t dev, int backlog) { assert(backlog >= 0); return sdev_simple(dev, SDEV_LISTEN, backlog); } /* * Accept a new connection on a socket. */ int sdev_accept(dev_t dev, vir_bytes addr, unsigned int addr_len, int filp_flags, int listen_fd) { struct smap *sp; sockid_t sock_id; cp_grant_id_t grant; message m; int r; if ((sp = get_smap_by_dev(dev, &sock_id)) == NULL) return EIO; /* Allocate resources. */ if (addr != 0) { grant = cpf_grant_magic(sp->smap_endpt, who_e, addr, addr_len, CPF_WRITE); if (!GRANT_VALID(grant)) panic("VFS: cpf_grant_magic failed"); } else grant = GRANT_INVALID; /* Prepare the request message. */ memset(&m, 0, sizeof(m)); m.m_type = SDEV_ACCEPT; m.m_vfs_lsockdriver_addr.req_id = (sockid_t)who_e; m.m_vfs_lsockdriver_addr.sock_id = sock_id; m.m_vfs_lsockdriver_addr.grant = grant; m.m_vfs_lsockdriver_addr.len = addr_len; m.m_vfs_lsockdriver_addr.user_endpt = who_e; m.m_vfs_lsockdriver_addr.sflags = (filp_flags & O_NONBLOCK) ? SDEV_NONBLOCK : 0; /* Send the request to the driver. */ if ((r = asynsend3(sp->smap_endpt, &m, AMF_NOREPLY)) != OK) panic("VFS: asynsend in sdev_accept failed: %d", r); /* Suspend the process until the reply arrives. */ return sdev_suspend(dev, grant, GRANT_INVALID, GRANT_INVALID, listen_fd, 0); } /* * Send or receive a message on a socket. All read (read(2), recvfrom(2), and * recvmsg(2)) and write (write(2), sendto(2), sendmsg(2)) system calls on * sockets pass through this function. The function is named sdev_readwrite * rather than sdev_sendrecv to avoid confusion with sdev_sendrec. */ int sdev_readwrite(dev_t dev, vir_bytes data_buf, size_t data_len, vir_bytes ctl_buf, unsigned int ctl_len, vir_bytes addr_buf, unsigned int addr_len, int flags, int rw_flag, int filp_flags, vir_bytes user_buf) { struct smap *sp; sockid_t sock_id; cp_grant_id_t data_grant, ctl_grant, addr_grant; message m; int r, bits; if ((sp = get_smap_by_dev(dev, &sock_id)) == NULL) return EIO; /* Allocate resources. */ data_grant = GRANT_INVALID; ctl_grant = GRANT_INVALID; addr_grant = GRANT_INVALID; bits = (rw_flag == WRITING) ? CPF_READ : CPF_WRITE; /* * Supposedly it is allowed to send or receive zero data bytes, even * though it is a bad idea as the return value will then be zero, which * may also indicate EOF (as per W. Richard Stevens). */ if (data_buf != 0) { data_grant = cpf_grant_magic(sp->smap_endpt, who_e, data_buf, data_len, bits); if (!GRANT_VALID(data_grant)) panic("VFS: cpf_grant_magic failed"); } if (ctl_buf != 0) { ctl_grant = cpf_grant_magic(sp->smap_endpt, who_e, ctl_buf, ctl_len, bits); if (!GRANT_VALID(ctl_grant)) panic("VFS: cpf_grant_magic failed"); } if (addr_buf != 0) { addr_grant = cpf_grant_magic(sp->smap_endpt, who_e, addr_buf, addr_len, bits); if (!GRANT_VALID(addr_grant)) panic("VFS: cpf_grant_magic failed"); } /* Prepare the request message. */ memset(&m, 0, sizeof(m)); m.m_type = (rw_flag == WRITING) ? SDEV_SEND : SDEV_RECV; m.m_vfs_lsockdriver_sendrecv.req_id = (sockid_t)who_e; m.m_vfs_lsockdriver_sendrecv.sock_id = sock_id; m.m_vfs_lsockdriver_sendrecv.data_grant = data_grant; m.m_vfs_lsockdriver_sendrecv.data_len = data_len; m.m_vfs_lsockdriver_sendrecv.ctl_grant = ctl_grant; m.m_vfs_lsockdriver_sendrecv.ctl_len = ctl_len; m.m_vfs_lsockdriver_sendrecv.addr_grant = addr_grant; m.m_vfs_lsockdriver_sendrecv.addr_len = addr_len; m.m_vfs_lsockdriver_sendrecv.user_endpt = who_e; m.m_vfs_lsockdriver_sendrecv.flags = flags; if (filp_flags & O_NONBLOCK) m.m_vfs_lsockdriver_sendrecv.flags |= MSG_DONTWAIT; if (rw_flag == WRITING && (filp_flags & O_NOSIGPIPE)) m.m_vfs_lsockdriver_sendrecv.flags |= MSG_NOSIGNAL; /* Send the request to the driver. */ if ((r = asynsend3(sp->smap_endpt, &m, AMF_NOREPLY)) != OK) panic("VFS: asynsend in sdev_readwrite failed: %d", r); /* Suspend the process until the reply arrives. */ return sdev_suspend(dev, data_grant, ctl_grant, addr_grant, -1, user_buf); } /* * Perform I/O control. */ int sdev_ioctl(dev_t dev, unsigned long request, vir_bytes buf, int filp_flags) { struct smap *sp; sockid_t sock_id; cp_grant_id_t grant; message m; int r; if ((sp = get_smap_by_dev(dev, &sock_id)) == NULL) return EIO; /* Allocate resources. */ grant = make_ioctl_grant(sp->smap_endpt, who_e, buf, request); /* Prepare the request message. */ memset(&m, 0, sizeof(m)); m.m_type = SDEV_IOCTL; m.m_vfs_lsockdriver_ioctl.req_id = (sockid_t)who_e; m.m_vfs_lsockdriver_ioctl.sock_id = sock_id; m.m_vfs_lsockdriver_ioctl.request = request; m.m_vfs_lsockdriver_ioctl.grant = grant; m.m_vfs_lsockdriver_ioctl.user_endpt = who_e; m.m_vfs_lsockdriver_ioctl.sflags = (filp_flags & O_NONBLOCK) ? SDEV_NONBLOCK : 0; /* Send the request to the driver. */ if ((r = asynsend3(sp->smap_endpt, &m, AMF_NOREPLY)) != OK) panic("VFS: asynsend in sdev_ioctl failed: %d", r); /* Suspend the process until the reply arrives. */ return sdev_suspend(dev, grant, GRANT_INVALID, GRANT_INVALID, -1, 0); } /* * Set socket options. */ int sdev_setsockopt(dev_t dev, int level, int name, vir_bytes addr, unsigned int len) { struct smap *sp; sockid_t sock_id; cp_grant_id_t grant; message m; int r; if ((sp = get_smap_by_dev(dev, &sock_id)) == NULL) return EIO; /* Allocate resources. */ grant = cpf_grant_magic(sp->smap_endpt, who_e, addr, len, CPF_READ); if (!GRANT_VALID(grant)) panic("VFS: cpf_grant_magic failed"); /* Prepare the request message. */ memset(&m, 0, sizeof(m)); m.m_type = SDEV_SETSOCKOPT; m.m_vfs_lsockdriver_getset.req_id = (sockid_t)who_e; m.m_vfs_lsockdriver_getset.sock_id = sock_id; m.m_vfs_lsockdriver_getset.level = level; m.m_vfs_lsockdriver_getset.name = name; m.m_vfs_lsockdriver_getset.grant = grant; m.m_vfs_lsockdriver_getset.len = len; /* Send the request, and wait for the reply. */ r = sdev_sendrec(sp, &m); /* Free resources. */ (void)cpf_revoke(grant); if (r != OK) return r; /* socket driver died */ /* Parse and return the reply. */ if (m.m_type != SDEV_REPLY) { printf("VFS: %d sent bad reply type %d for call %d\n", sp->smap_endpt, m.m_type, job_call_nr); return EIO; } return m.m_lsockdriver_vfs_reply.status; } /* * Send and receive a "get" request: getsockopt, getsockname, or getpeername. */ static int sdev_get(dev_t dev, int type, int level, int name, vir_bytes addr, unsigned int * len) { struct smap *sp; sockid_t sock_id; cp_grant_id_t grant; message m; int r; assert(type == SDEV_GETSOCKOPT || type == SDEV_GETSOCKNAME || type == SDEV_GETPEERNAME); if ((sp = get_smap_by_dev(dev, &sock_id)) == NULL) return EIO; /* Allocate resources. */ grant = cpf_grant_magic(sp->smap_endpt, who_e, addr, *len, CPF_WRITE); if (!GRANT_VALID(grant)) panic("VFS: cpf_grant_magic failed"); /* Prepare the request message. */ memset(&m, 0, sizeof(m)); m.m_type = type; m.m_vfs_lsockdriver_getset.req_id = (sockid_t)who_e; m.m_vfs_lsockdriver_getset.sock_id = sock_id; m.m_vfs_lsockdriver_getset.level = level; m.m_vfs_lsockdriver_getset.name = name; m.m_vfs_lsockdriver_getset.grant = grant; m.m_vfs_lsockdriver_getset.len = *len; /* Send the request, and wait for the reply. */ r = sdev_sendrec(sp, &m); /* Free resources. */ (void)cpf_revoke(grant); if (r != OK) return r; /* socket driver died */ /* Parse and return the reply. */ if (m.m_type != SDEV_REPLY) { printf("VFS: %d sent bad reply type %d for call %d\n", sp->smap_endpt, m.m_type, job_call_nr); return EIO; } if ((r = m.m_lsockdriver_vfs_reply.status) < 0) return r; *len = (unsigned int)r; return OK; } /* * Get socket options. */ int sdev_getsockopt(dev_t dev, int level, int name, vir_bytes addr, unsigned int * len) { return sdev_get(dev, SDEV_GETSOCKOPT, level, name, addr, len); } /* * Get the local address of a socket. */ int sdev_getsockname(dev_t dev, vir_bytes addr, unsigned int * addr_len) { return sdev_get(dev, SDEV_GETSOCKNAME, 0, 0, addr, addr_len); } /* * Get the remote address of a socket. */ int sdev_getpeername(dev_t dev, vir_bytes addr, unsigned int * addr_len) { return sdev_get(dev, SDEV_GETPEERNAME, 0, 0, addr, addr_len); } /* * Shut down socket send and receive operations. */ int sdev_shutdown(dev_t dev, int how) { assert(how == SHUT_RD || how == SHUT_WR || how == SHUT_RDWR); return sdev_simple(dev, SDEV_SHUTDOWN, how); } /* * Close the socket identified by the given socket device number. */ int sdev_close(dev_t dev, int may_suspend) { struct smap *sp; sockid_t sock_id; message m; int r; /* * Originally, all close requests were blocking the calling thread, but * the new support for SO_LINGER has changed that. In a very strictly * limited subset of cases - namely, the user process calling close(2), * we suspend the close request and handle it asynchronously. In all * other cases, including close-on-exit, close-on-exec, and even dup2, * the close is issued as a thread-synchronous request instead. */ if (may_suspend) { if ((sp = get_smap_by_dev(dev, &sock_id)) == NULL) return EIO; /* Prepare the request message. */ memset(&m, 0, sizeof(m)); m.m_type = SDEV_CLOSE; m.m_vfs_lsockdriver_simple.req_id = (sockid_t)who_e; m.m_vfs_lsockdriver_simple.sock_id = sock_id; m.m_vfs_lsockdriver_simple.param = 0; /* Send the request to the driver. */ if ((r = asynsend3(sp->smap_endpt, &m, AMF_NOREPLY)) != OK) panic("VFS: asynsend in sdev_bindconn failed: %d", r); /* Suspend the process until the reply arrives. */ return sdev_suspend(dev, GRANT_INVALID, GRANT_INVALID, GRANT_INVALID, -1, 0); } else /* Block the calling thread until the socket is closed. */ return sdev_simple(dev, SDEV_CLOSE, SDEV_NONBLOCK); } /* * Initiate a select call on a socket device. Return OK iff the request was * sent, without suspending the process. */ int sdev_select(dev_t dev, int ops) { struct smap *sp; sockid_t sock_id; message m; int r; if ((sp = get_smap_by_dev(dev, &sock_id)) == NULL) return EIO; /* Prepare the request message. */ memset(&m, 0, sizeof(m)); m.m_type = SDEV_SELECT; m.m_vfs_lsockdriver_select.sock_id = sock_id; m.m_vfs_lsockdriver_select.ops = ops; /* Send the request to the driver. */ if ((r = asynsend3(sp->smap_endpt, &m, AMF_NOREPLY)) != OK) panic("VFS: asynsend in sdev_select failed: %d", r); return OK; } /* * A reply has arrived for a previous socket accept request, and the reply * indicates that a socket has been accepted. A status is also returned; * usually, this status is OK, but if not, the newly accepted socket must be * closed immediately again. Process the low-level aspects of the reply, and * call resume_accept() to let the upper socket layer handle the rest. This * function is always called from a worker thread, and may thus block. */ static void sdev_finish_accept(struct fproc * rfp, message * m_ptr) { struct smap *sp; sockid_t sock_id; dev_t dev; unsigned int len; int status; assert(rfp->fp_sdev.callnr == VFS_ACCEPT); assert(m_ptr->m_type == SDEV_ACCEPT_REPLY); assert(m_ptr->m_lsockdriver_vfs_accept_reply.sock_id >= 0); /* Free resources. Accept requests use up to one grant. */ if (GRANT_VALID(rfp->fp_sdev.grant[0])) cpf_revoke(rfp->fp_sdev.grant[0]); assert(!GRANT_VALID(rfp->fp_sdev.grant[1])); assert(!GRANT_VALID(rfp->fp_sdev.grant[2])); sock_id = m_ptr->m_lsockdriver_vfs_accept_reply.sock_id; status = m_ptr->m_lsockdriver_vfs_accept_reply.status; len = m_ptr->m_lsockdriver_vfs_accept_reply.len; /* * We do not want the upper socket layer (socket.c) to deal with smap * and socket ID details, so we construct the new socket device number * here. We won't use the saved listen FD to determine the smap entry * here, since that involves file pointers and other upper-layer-only * stuff. So we have to look it up by the source endpoint. As a * result, we detect some driver deaths here (but not all: see below). */ if ((sp = get_smap_by_endpt(m_ptr->m_source)) != NULL) { /* Leave 'status' as is, regardless of whether it is OK. */ dev = make_smap_dev(sp, sock_id); } else { /* * The driver must have died while the thread was blocked on * activation. Extremely rare, but theoretically possible. * Some driver deaths are indicated only by a driver-up * announcement though; resume_accept() will detect this by * checking that the listening socket has not been invalidated. */ status = EIO; dev = NO_DEV; } /* Let the upper socket layer handle the rest. */ resume_accept(rfp, status, dev, len, rfp->fp_sdev.aux.fd); } /* * Worker thread stub for finishing successful accept requests. */ static void do_accept_reply(void) { sdev_finish_accept(fp, &job_m_in); } /* * With the exception of successful accept requests, this function is called * whenever a reply is received for a socket driver request for which the * corresponding user process was suspended (as opposed to requests which just * suspend the worker thread), i.e., for long-lasting socket calls. This * function is also called if the socket driver has died during a long-lasting * socket call, in which case the given message's m_type is a negative error * code. * * The division between the upper socket layer (socket.c) and the lower socket * layer (this file) here is roughly: if resuming the system call involves no * more than a simple replycode() call, do that here; otherwise call into the * upper socket layer to handle the details. In any case, do not ever let the * upper socket layer deal with reply message parsing or suspension state. * * This function may or may not be called from a worker thread; as such, it * MUST NOT block its calling thread. This function is called for failed * accept requests; successful accept requests have their replies routed * through sdev_finish_accept() instead, because those require a worker thread. */ static void sdev_finish(struct fproc * rfp, message * m_ptr) { unsigned int ctl_len, addr_len; int callnr, status, flags; /* The suspension status must just have been cleared by the caller. */ assert(rfp->fp_blocked_on == FP_BLOCKED_ON_NONE); /* * Free resources. Every suspending call sets all grant fields, so we * can safely revoke all of them without testing the original call. */ if (GRANT_VALID(rfp->fp_sdev.grant[0])) cpf_revoke(rfp->fp_sdev.grant[0]); if (GRANT_VALID(rfp->fp_sdev.grant[1])) cpf_revoke(rfp->fp_sdev.grant[1]); if (GRANT_VALID(rfp->fp_sdev.grant[2])) cpf_revoke(rfp->fp_sdev.grant[2]); /* * Now that the socket driver call has finished (or been stopped due to * driver death), we need to finish the corresponding system call from * the user process. The action to take depends on the system call. */ callnr = rfp->fp_sdev.callnr; switch (callnr) { case VFS_BIND: case VFS_CONNECT: case VFS_WRITE: case VFS_SENDTO: case VFS_SENDMSG: case VFS_IOCTL: case VFS_CLOSE: /* * These calls all use the same SDEV_REPLY reply type and only * need to reply an OK-or-error status code back to userland. */ if (m_ptr->m_type == SDEV_REPLY) { status = m_ptr->m_lsockdriver_vfs_reply.status; /* * For close(2) calls, the return value must indicate * that the file descriptor has been closed. */ if (callnr == VFS_CLOSE && status != OK && status != EINPROGRESS) status = OK; } else if (m_ptr->m_type < 0) { status = m_ptr->m_type; } else { printf("VFS: %d sent bad reply type %d for call %d\n", m_ptr->m_source, m_ptr->m_type, callnr); status = EIO; } replycode(rfp->fp_endpoint, status); break; case VFS_READ: case VFS_RECVFROM: case VFS_RECVMSG: /* * These calls use SDEV_RECV_REPLY. The action to take depends * on the exact call. */ ctl_len = addr_len = 0; flags = 0; if (m_ptr->m_type == SDEV_RECV_REPLY) { status = m_ptr->m_lsockdriver_vfs_recv_reply.status; ctl_len = m_ptr->m_lsockdriver_vfs_recv_reply.ctl_len; addr_len = m_ptr->m_lsockdriver_vfs_recv_reply.addr_len; flags = m_ptr->m_lsockdriver_vfs_recv_reply.flags; } else if (m_ptr->m_type < 0) { status = m_ptr->m_type; } else { printf("VFS: %d sent bad reply type %d for call %d\n", m_ptr->m_source, m_ptr->m_type, callnr); status = EIO; } switch (callnr) { case VFS_READ: replycode(rfp->fp_endpoint, status); break; case VFS_RECVFROM: resume_recvfrom(rfp, status, addr_len); break; case VFS_RECVMSG: resume_recvmsg(rfp, status, ctl_len, addr_len, flags, rfp->fp_sdev.aux.buf); break; } break; case VFS_ACCEPT: /* * This call uses SDEV_ACCEPT_REPLY. We only get here if the * accept call has failed without creating a new socket, in * which case we can simply call replycode() with the error. * For nothing other than consistency, we let resume_accept() * handle this case too. */ addr_len = 0; if (m_ptr->m_type == SDEV_ACCEPT_REPLY) { assert(m_ptr->m_lsockdriver_vfs_accept_reply.sock_id < 0); status = m_ptr->m_lsockdriver_vfs_accept_reply.status; addr_len = m_ptr->m_lsockdriver_vfs_accept_reply.len; } else if (m_ptr->m_type < 0) { status = m_ptr->m_type; } else { printf("VFS: %d sent bad reply type %d for call %d\n", m_ptr->m_source, m_ptr->m_type, callnr); status = EIO; } /* * Quick rundown of m_lsockdriver_vfs_accept_reply cases: * * - sock_id >= 0, status == OK: new socket accepted * - sock_id >= 0, status != OK: new socket must be closed * - sock_id < 0, status != OK: failure accepting socket * - sock_id < 0, status == OK: invalid, covered right here * * See libsockdriver for why there are two reply fields at all. */ if (status >= 0) { printf("VFS: %d sent bad status %d for call %d\n", m_ptr->m_source, status, callnr); status = EIO; } resume_accept(rfp, status, NO_DEV, addr_len, rfp->fp_sdev.aux.fd); break; default: /* * Ultimately, enumerating all system calls that may cause * socket I/O may prove too cumbersome. In that case, the * callnr field could be replaced by a field that stores the * combination of the expected reply type and the action to * take, for example. */ panic("VFS: socket reply %d for unknown call %d from %d", m_ptr->m_type, callnr, rfp->fp_endpoint); } } /* * Abort the suspended socket call for the given process, because the * corresponding socket driver has died. */ void sdev_stop(struct fproc * rfp) { message m; assert(rfp->fp_blocked_on == FP_BLOCKED_ON_SDEV); rfp->fp_blocked_on = FP_BLOCKED_ON_NONE; /* * We use one single approach both here and when stopping worker * threads: the reply message's m_type is set to an error code (always * EIO for now) instead of an actual SDEV_ reply code. We test for * this case in non-suspending calls as well as in sdev_finish(). */ m.m_type = EIO; sdev_finish(rfp, &m); } /* * Cancel the ongoing long-lasting socket call, because the calling process has * received a caught or terminating signal. This function is always called * from a worker thread (as part of PM) work, with 'fp' set to the process that * issued the original system call. The calling function has just unsuspended * the process out of _SDEV blocking state. The job of this function is to * issue a cancel request and then block until a reply comes in; the reply may * indicate success, in which case it must be handled accordingly. */ void sdev_cancel(void) { struct smap *sp; message m; sockid_t sock_id; /* The suspension status must just have been cleared by the caller. */ assert(fp->fp_blocked_on == FP_BLOCKED_ON_NONE); if ((sp = get_smap_by_dev(fp->fp_sdev.dev, &sock_id)) != NULL) { /* Prepare the request message. */ memset(&m, 0, sizeof(m)); m.m_type = SDEV_CANCEL; m.m_vfs_lsockdriver_simple.req_id = (sockid_t)who_e; m.m_vfs_lsockdriver_simple.sock_id = sock_id; /* * Send the cancel request, and wait for a reply. The reply * will be for the original request and must be processed * accordingly. It is possible that the original request * actually succeeded, because 1) the cancel request resulted * in partial success or 2) the original reply and the cancel * request crossed each other. It is because of the second * case that a socket driver must not respond at all to a * cancel operation for an unknown request. */ sdev_sendrec(sp, &m); } else m.m_type = EIO; /* * Successful accept requests require special processing, but since we * are already operating from a working thread here, we need not spawn * an additional worker thread for this case. */ if (m.m_type == SDEV_ACCEPT_REPLY && m.m_lsockdriver_vfs_accept_reply.sock_id >= 0) sdev_finish_accept(fp, &m); else sdev_finish(fp, &m); } /* * A socket driver has sent a reply to a socket request. Process it, by either * waking up an active worker thread, finishing the system call from here, or * (in the exceptional case of accept calls) spawning a new worker thread to * process the reply. This function MUST NOT block its calling thread. */ void sdev_reply(void) { struct fproc *rfp; struct smap *sp; struct worker_thread *wp; sockid_t req_id = -1; dev_t dev; int slot; if ((sp = get_smap_by_endpt(who_e)) == NULL) { printf("VFS: ignoring sock dev reply from unknown driver %d\n", who_e); return; } switch (call_nr) { case SDEV_REPLY: req_id = m_in.m_lsockdriver_vfs_reply.req_id; break; case SDEV_SOCKET_REPLY: req_id = m_in.m_lsockdriver_vfs_socket_reply.req_id; break; case SDEV_ACCEPT_REPLY: req_id = m_in.m_lsockdriver_vfs_accept_reply.req_id; break; case SDEV_RECV_REPLY: req_id = m_in.m_lsockdriver_vfs_recv_reply.req_id; break; case SDEV_SELECT1_REPLY: dev = make_smap_dev(sp, m_in.m_lsockdriver_vfs_select_reply.sock_id); select_sdev_reply1(dev, m_in.m_lsockdriver_vfs_select_reply.status); return; case SDEV_SELECT2_REPLY: dev = make_smap_dev(sp, m_in.m_lsockdriver_vfs_select_reply.sock_id); select_sdev_reply2(dev, m_in.m_lsockdriver_vfs_select_reply.status); return; default: printf("VFS: ignoring unknown sock dev reply %d from %d\n", call_nr, who_e); return; } if (isokendpt((endpoint_t)req_id, &slot) != OK) { printf("VFS: ignoring sock dev reply from %d for unknown %d\n", who_e, req_id); return; } rfp = &fproc[slot]; wp = rfp->fp_worker; if (wp != NULL && wp->w_task == who_e && wp->w_drv_sendrec != NULL) { assert(!fp_is_blocked(rfp)); *wp->w_drv_sendrec = m_in; wp->w_drv_sendrec = NULL; worker_signal(wp); /* resume suspended thread */ /* * It is up to the worker thread to 1) check that the reply is * of the right type for the request, and 2) keep in mind that * the reply type may be EIO in case the socket driver died. */ } else if (rfp->fp_blocked_on != FP_BLOCKED_ON_SDEV || get_smap_by_dev(rfp->fp_sdev.dev, NULL) != sp) { printf("VFS: ignoring sock dev reply, %d not blocked on %d\n", rfp->fp_endpoint, who_e); return; } else if (call_nr == SDEV_ACCEPT_REPLY && m_in.m_lsockdriver_vfs_accept_reply.sock_id >= 0) { /* * For accept replies that return a new socket, we need to * spawn a worker thread, because accept calls may block (so * there will no longer be a worker thread) and processing the * reply requires additional blocking calls (which we cannot * issue from the main thread). This is tricky. Under no * circumstances may we "lose" a legitimate reply, because this * would lead to resource leaks in the socket driver. To this * end, we rely on the current worker thread model to * prioritize regular work over PM work. Still, sdev_cancel() * may end up receiving the accept reply if it was already * blocked waiting for the reply message, and it must then * perform the same tasks. */ /* * It is possible that if all threads are in use, there is a * "gap" between starting the thread and its activation. The * main problem for this case is that the socket driver dies * within that gap. For accepts, we address this with no less * than two checks: 1) in this file, by looking up the smap * entry by the reply source endpoint again - if the entry is * no longer valid, the socket driver must have died; 2) in * socket.c, by revalidating the original listening socket - if * the listening socket has been invalidated, the driver died. * * Since we unsuspend the process now, a socket driver sending * two accept replies in a row may never cause VFS to attempt * spawning two threads; the second reply should be ignored. */ assert(fp->fp_func == NULL); worker_start(rfp, do_accept_reply, &m_in, FALSE /*use_spare*/); /* * TODO: I just introduced the notion of not using the fp_u * union across yields after unsuspension, but for socket calls * we have a lot of socket state to carry over, so I'm now * immediately violating my own rule again here. Possible * solutions: 1) introduce another blocking state just to mark * the fp_u union in use (this has side effects though), 2) * introduce a pseudo message type which covers both the accept * reply fields and the fp_u state (do_pending_pipe does this), * or 3) add a fp_flags flag for this purpose. In any case, * the whole point is that we catch any attempts to reuse fp_u * for other purposes and thus cause state corruption. This * should not happen anyway, but it's too dangerous to leave * entirely unchecked. --dcvmoole */ rfp->fp_blocked_on = FP_BLOCKED_ON_NONE; } else { rfp->fp_blocked_on = FP_BLOCKED_ON_NONE; sdev_finish(rfp, &m_in); } }