/* UNIX Domain Sockets - io.c - sending and receiving */ #include "uds.h" #include /* * Our UDS sockets do not have a send buffer. They only have a receive buffer. * This receive buffer, when not empty, is split up in segments. Each segment * may contain regular data, ancillary data, both, or (for SOCK_SEQPACKET and * (SOCK_DGRAM) neither. There are two types of ancillary data: in-flight file * descriptors and sender credentials. In addition, for SOCK_DGRAM sockets, * the segment may contain the sender's socket path (if the sender's socket is * bound). Each segment has has a header, containing the full segment size, * the size of the actual data in the segment (if any), and a flags field that * states which ancillary are associated with the segment (if any). For * SOCK_STREAM type sockets, new data may be merged into a previous segment, * but only if it has no ancillary data. For the other two socket types, each * packet has its own header. The resulting behavior should be in line with * the POSIX "Socket Receive Queue" specification. * * More specifically, each segment consists of the following parts: * - always a five-byte header, containing a two-byte segment length (including * the header, so always non-zero), a two-byte regular data length (zero or * more), and a one-byte flags field which is a bitwise combination of * UDS_HAS_{FD,CRED,PATH} flags; * - next, if UDS_HAS_CRED is set in the segment header: a sockcred structure; * since this structure is variable-size, the structure is prepended by a * single byte that contains the length of the structure (excluding the byte * itself, thus ranging from sizeof(struct sockcred) to UDS_MAXCREDLEN); * - next, if UDS_HAS_PATH is set in the segment header: * - next, if the data length is non-zero, the actual regular data. * If the segment is not the last in the receive buffer, it is followed by the * next segment immediately afterward. There is no alignment. * * It is the sender's responsibility to merge new data into the last segment * whenever possible, so that the receiver side never needs to consider more * than one segment at once. In order to allow such merging, each receive * buffer has not only a tail and in-use length (pointing to the head when * combined) but also an offset from the tail to the last header, if any. Note * that the receiver may over time still look at multiple segments for a single * request: this happens when a MSG_WAITALL request empties the buffer and then * blocks - the next piece of arriving data can then obviously not be merged. * * If a segment has the UDS_HAS_FD flag set, then one or more in-flight file * descriptors are associated with the segment. These are stored in a separate * data structure, mainly to simplify cleaning up when the socket is shut down * for reading or closed. That structure also contains the number of file * descriptors associated with the current segment, so this is not stored in * the segment itself. As mentioned later, this may be changed in the future. * * On the sender side, there is a trade-off between fully utilizing the receive * buffer, and not repeatedly performing expensive actions for the same call: * it may be costly to determine exactly how many in-flight file descriptors * there will be (if any) and/or how much space is needed to store credentials. * We currently use the policy that we rather block/reject a send request that * may (just) have fit in the remaining part of the receive buffer, than obtain * the same information multiple times or keep state between callbacks. In * practice this is not expected to make a difference, especially since * transfer of ancillary data should be rare anyway. */ /* * The current layout of the segment header is as follows. * * The first byte contains the upper eight bits of the total segment length. * The second byte contains the lower eight bits of the total segment length. * The third byte contains the upper eight bits of the data length. * The fourth byte contains the lower eight bits of the data length. * The fifth byte is a bitmask for ancillary data associated with the segment. */ #define UDS_HDRLEN 5 #define UDS_HAS_FDS 0x01 /* segment has in-flight file descriptors */ #define UDS_HAS_CRED 0x02 /* segment has sender credentials */ #define UDS_HAS_PATH 0x04 /* segment has source socket path */ #define UDS_MAXCREDLEN SOCKCREDSIZE(NGROUPS_MAX) #define uds_get_head(uds) \ ((size_t)((uds)->uds_tail + (uds)->uds_len) % UDS_BUF) #define uds_get_last(uds) \ ((size_t)((uds)->uds_tail + (uds)->uds_last) % UDS_BUF) #define uds_advance(pos,add) (((pos) + (add)) % UDS_BUF) /* * All in-flight file descriptors are (co-)owned by the UDS driver itself, as * local open file descriptors. Like any other process, the UDS driver can not * have more than OPEN_MAX open file descriptors at any time. Thus, this is * also the inherent maximum number of in-flight file descriptors. Therefore, * we maintain a single pool of in-flight FD structures, and we associate these * structures with sockets as needed. */ static struct uds_fd uds_fds[OPEN_MAX]; static SIMPLEQ_HEAD(uds_freefds, uds_fd) uds_freefds; static char uds_ctlbuf[UDS_CTL_MAX]; static int uds_ctlfds[UDS_CTL_MAX / sizeof(int)]; /* * Initialize the input/output part of the UDS service. */ void uds_io_init(void) { unsigned int slot; SIMPLEQ_INIT(&uds_freefds); for (slot = 0; slot < __arraycount(uds_fds); slot++) SIMPLEQ_INSERT_TAIL(&uds_freefds, &uds_fds[slot], ufd_next); } /* * Set up all input/output state for the given socket, which has just been * allocated. As part of this, allocate memory for the receive buffer of the * socket. Return OK or a negative error code. */ int uds_io_setup(struct udssock * uds) { /* TODO: decide if we should preallocate the memory. */ if ((uds->uds_buf = mmap(NULL, UDS_BUF, PROT_READ | PROT_WRITE, MAP_ANON | MAP_PRIVATE, -1, 0)) == MAP_FAILED) return ENOMEM; uds->uds_tail = 0; uds->uds_len = 0; uds->uds_last = 0; SIMPLEQ_INIT(&uds->uds_fds); return OK; } /* * Clean up the input/output state for the given socket, which is about to be * freed. As part of this, deallocate memory for the receive buffer and close * any file descriptors still in flight on the socket. */ void uds_io_cleanup(struct udssock * uds) { /* Close any in-flight file descriptors. */ uds_io_reset(uds); /* Free the receive buffer memory. */ if (munmap(uds->uds_buf, UDS_BUF) != 0) panic("UDS: munmap failed: %d", errno); } /* * The socket is being closed or shut down for reading. If there are still any * in-flight file descriptors, theey will never be received anymore, so close * them now. */ void uds_io_reset(struct udssock * uds) { struct uds_fd *ufd; /* * The UDS service may have the last and only reference to any of these * file descriptors here. For that reason, we currently disallow * transfer of UDS file descriptors, because the close(2) here could * block on a socket close operation back to us, leading to a deadlock. * Also, we use a non-blocking variant of close(2), to prevent that we * end up hanging on sockets with SO_LINGER turned on. */ SIMPLEQ_FOREACH(ufd, &uds->uds_fds, ufd_next) { dprintf(("UDS: closing local fd %d\n", ufd->ufd_fd)); closenb(ufd->ufd_fd); } SIMPLEQ_CONCAT(&uds_freefds, &uds->uds_fds); /* * If this reset happens as part of a shutdown, it might be done * again on close, so ensure that it will find a clean state. The * receive buffer should never be looked at again either way, but reset * it too just to be sure. */ uds->uds_tail = 0; uds->uds_len = 0; uds->uds_last = 0; SIMPLEQ_INIT(&uds->uds_fds); } /* * Return the maximum usable part of the receive buffer, in bytes. The return * value is used for the SO_SNDBUF and SO_RCVBUF socket options. */ size_t uds_io_buflen(void) { /* * TODO: it would be nicer if at least for SOCK_STREAM-type sockets, we * could use the full receive buffer for data. This would require that * we store up to one header in the socket object rather than in the * receive buffer. */ return UDS_BUF - UDS_HDRLEN; } /* * Fetch 'len' bytes starting from absolute position 'pos' into the receive * buffer of socket 'uds', and copy them into the buffer pointed to by 'ptr'. * Return the absolute position of the first byte after the fetched data in the * receive buffer. */ static size_t uds_fetch(struct udssock * uds, size_t off, void * ptr, size_t len) { size_t left; assert(off < UDS_BUF); left = UDS_BUF - off; if (len >= left) { memcpy(ptr, &uds->uds_buf[off], left); if ((len -= left) > 0) memcpy((char *)ptr + left, &uds->uds_buf[0], len); return len; } else { memcpy(ptr, &uds->uds_buf[off], len); return off + len; } } /* * Store 'len' bytes from the buffer pointed to by 'ptr' into the receive * buffer of socket 'uds', starting at absolute position 'pos' into the receive * buffer. Return the absolute position of the first byte after the stored * data in the receive buffer. */ static size_t uds_store(struct udssock * uds, size_t off, const void * ptr, size_t len) { size_t left; assert(off < UDS_BUF); left = UDS_BUF - off; if (len >= left) { memcpy(&uds->uds_buf[off], ptr, left); if ((len -= left) > 0) memcpy(&uds->uds_buf[0], (const char *)ptr + left, len); return len; } else { memcpy(&uds->uds_buf[off], ptr, len); return off + len; } } /* * Fetch a segment header previously stored in the receive buffer of socket * 'uds' at absolute position 'off'. Return the absolute position of the first * byte after the header, as well as the entire segment length in 'seglen', the * length of the data in the segment in 'datalen', and the segment flags in * 'segflags'. */ static size_t uds_fetch_hdr(struct udssock * uds, size_t off, size_t * seglen, size_t * datalen, unsigned int * segflags) { unsigned char hdr[UDS_HDRLEN]; off = uds_fetch(uds, off, hdr, sizeof(hdr)); *seglen = ((size_t)hdr[0] << 8) | (size_t)hdr[1]; *datalen = ((size_t)hdr[2] << 8) | (size_t)hdr[3]; *segflags = hdr[4]; assert(*seglen >= UDS_HDRLEN); assert(*seglen <= uds->uds_len); assert(*datalen <= *seglen - UDS_HDRLEN); assert(*segflags != 0 || *datalen == *seglen - UDS_HDRLEN); assert(!(*segflags & ~(UDS_HAS_FDS | UDS_HAS_CRED | UDS_HAS_PATH))); return off; } /* * Store a segment header in the receive buffer of socket 'uds' at absolute * position 'off', with the segment length 'seglen', the segment data length * 'datalen', and the segment flags 'segflags'. Return the absolute receive * buffer position of the first data byte after the stored header. */ static size_t uds_store_hdr(struct udssock * uds, size_t off, size_t seglen, size_t datalen, unsigned int segflags) { unsigned char hdr[UDS_HDRLEN]; assert(seglen <= USHRT_MAX); assert(datalen <= seglen); assert(segflags <= UCHAR_MAX); assert(!(segflags & ~(UDS_HAS_FDS | UDS_HAS_CRED | UDS_HAS_PATH))); hdr[0] = (seglen >> 8) & 0xff; hdr[1] = seglen & 0xff; hdr[2] = (datalen >> 8) & 0xff; hdr[3] = datalen & 0xff; hdr[4] = segflags; return uds_store(uds, off, hdr, sizeof(hdr)); } /* * Perform initial checks on a send request, before it may potentially be * suspended. Return OK if this send request is valid, or a negative error * code if it is not. */ int uds_pre_send(struct sock * sock, size_t len, socklen_t ctl_len __unused, const struct sockaddr * addr, socklen_t addr_len __unused, endpoint_t user_endpt __unused, int flags) { struct udssock *uds = (struct udssock *)sock; size_t pathlen; /* * Reject calls with unknown flags. Besides the flags handled entirely * by libsockevent (which are not part of 'flags' here), that is all of * them. TODO: ensure that we should really reject all other flags * rather than ignore them. */ if (flags != 0) return EOPNOTSUPP; /* * Perform very basic address and message size checks on the send call. * For non-stream sockets, we must reject packets that may never fit in * the receive buffer, or otherwise (at least for SOCK_SEQPACKET) the * send call may end up being suspended indefinitely. Therefore, we * assume the worst-case scenario, which is that a full set of * credentials must be associated with the packet. As a result, we may * reject some large packets that could actually just fit. Checking * the peer's LOCAL_CREDS setting here is not safe: even if we know the * peer already at all (for SOCK_DGRAM we do not), the send may still * block and the option toggled before it unblocks. */ switch (uds_get_type(uds)) { case SOCK_STREAM: /* Nothing to check for this case. */ break; case SOCK_SEQPACKET: if (len > UDS_BUF - UDS_HDRLEN - 1 - UDS_MAXCREDLEN) return EMSGSIZE; break; case SOCK_DGRAM: if (!uds_has_link(uds) && addr == NULL) return EDESTADDRREQ; /* * The path is stored without null terminator, but with leading * byte containing the path length--if there is a path at all. */ pathlen = (size_t)uds->uds_pathlen; if (pathlen > 0) pathlen++; if (len > UDS_BUF - UDS_HDRLEN - pathlen - 1 - UDS_MAXCREDLEN) return EMSGSIZE; break; default: assert(0); } return OK; } /* * Determine whether the (real or pretend) send request should be processed * now, suspended until later, or rejected based on the current socket state. * Return OK if the send request should be processed now. Return SUSPEND if * the send request should be retried later. Return an appropriate negative * error code if the send request should fail. */ static int uds_send_test(struct udssock * uds, size_t len, socklen_t ctl_len, size_t min, int partial) { struct udssock *conn; size_t avail, hdrlen, credlen; assert(!uds_is_shutdown(uds, SFL_SHUT_WR)); if (uds_get_type(uds) != SOCK_DGRAM) { if (uds_is_connecting(uds)) return SUSPEND; if (!uds_is_connected(uds) && !uds_is_disconnected(uds)) return ENOTCONN; if (!uds_has_conn(uds)) return EPIPE; conn = uds->uds_conn; if (uds_is_shutdown(conn, SFL_SHUT_RD)) return EPIPE; /* * For connection-type sockets, we now have to check if there * is enough room in the receive buffer. For SOCK_STREAM * sockets, we must check if at least 'min' bytes can be moved * into the receive buffer, at least if that is a reasonable * value for ever making any forward progress at all. For * SOCK_SEQPACKET sockets, we must check if the entire packet * of size 'len' can be stored in the receive buffer. In both * cases, we must take into account any metadata to store along * with the data. * * Unlike in uds_pre_send(), we can now check safely whether * the peer is expecting credentials, but we still don't know * the actual size of the credentials, so again we take the * maximum possible size. The same applies to file descriptors * transferred via control data: all we have the control length * right now, which if non-zero we assume to mean there might * be file descriptors. * * In both cases, the reason of overestimating is that actually * getting accurate sizes, by obtaining credentials or copying * in control data, is very costly. We want to do that only * when we are sure we will not suspend the send call after * all. It is no problem to overestimate how much space will * be needed here, but not to underestimate: that could cause * applications that use select(2) and non-blocking sockets to * end up in a busy-wait loop. */ if (!partial && (conn->uds_flags & UDSF_PASSCRED)) credlen = 1 + UDS_MAXCREDLEN; else credlen = 0; avail = UDS_BUF - conn->uds_len; if (uds_get_type(uds) == SOCK_STREAM) { /* * Limit the low threshold to the maximum that can ever * be sent at once. */ if (min > UDS_BUF - UDS_HDRLEN - credlen) min = UDS_BUF - UDS_HDRLEN - credlen; /* * Suspend the call only if not even the low threshold * is met. Otherwise we may make (partial) progress. */ if (len > min) len = min; /* * If the receive buffer already has at least one * segment, and there are certainly no file descriptors * to transfer now, and we do not have to store * credentials either, then this segment can be merged * with the previous one. In that case, we need no * space for a header. That is certainly the case if * we are resuming an already partially completed send. */ hdrlen = (avail == UDS_BUF || ctl_len != 0 || credlen > 0) ? UDS_HDRLEN : 0; } else hdrlen = UDS_HDRLEN; if (avail < hdrlen + credlen + len) return SUSPEND; } return OK; } /* * Get the destination peer for a send request. The send test has already been * performed first. On success, return OK, with a pointer to the peer socket * stored in 'peerp'. On failure, return an appropriate error code. */ static int uds_send_peer(struct udssock * uds, const struct sockaddr * addr, socklen_t addr_len, endpoint_t user_endpt, struct udssock ** peerp) { struct udssock *peer; int r; if (uds_get_type(uds) == SOCK_DGRAM) { if (!uds_has_link(uds)) { /* This was already checked in uds_pre_check(). */ assert(addr != NULL); /* * Find the socket identified by the given address. * If it exists at all, see if it is a proper match. */ if ((r = uds_lookup(uds, addr, addr_len, user_endpt, &peer)) != OK) return r; /* * If the peer socket is connected to a target, it * must be this socket. Unfortunately, POSIX does not * specify an error code for this. We borrow Linux's. */ if (uds_has_link(peer) && peer->uds_link != uds) return EPERM; } else peer = uds->uds_link; /* * If the receiving end will never receive this packet, we * might as well not send it, so drop it immeiately. Indicate * as such to the caller, using NetBSD's chosen error code. */ if (uds_is_shutdown(peer, SFL_SHUT_RD)) return ENOBUFS; } else { assert(uds_has_conn(uds)); peer = uds->uds_conn; } *peerp = peer; return OK; } /* * Generate a new segment for the current send request, or arrange things such * that new data can be merged with a previous segment. As part of this, * decide whether we can merge data at all. The segment will be merged if, and * only if, all of the following requirements are met: * * 1) the socket is of type SOCK_STREAM; * 2) there is a previous segment in the receive buffer; * 3) there is no ancillary data for the current send request. * * Also copy in regular data (if any), retrieve the sender's credentials (if * needed), and copy over the source path (if applicable). However, do not yet * commit the segment (or the new part to be merged), because the send request * may still fail for other reasons. * * On success, return the length of the new segment (or, when merging, the * length to be added to the last segment), as well as a flag indicating * whether we are merging into the last segment in 'mergep', the length of the * (new) data in the segment in 'datalenp', and the new segment's flags in * 'segflagsp' (always zero when merging). Note that a return value of zero * implies that we are merging zero extra bytes into the last segment, which * means that effectively nothing changes; in that case the send call will be * cut short and return zero to the caller as well. On failure, return a * negative error code. */ static int uds_send_data(struct udssock * uds, struct udssock * peer, const struct sockdriver_data * data, size_t len, size_t off, endpoint_t user_endpt, unsigned int nfds, int * __restrict mergep, size_t * __restrict datalenp, unsigned int * __restrict segflagsp) { struct sockcred sockcred; gid_t groups[NGROUPS_MAX]; iovec_t iov[2]; unsigned int iovcnt, segflags; unsigned char lenbyte; size_t credlen, pathlen, datalen, seglen; size_t avail, pos, left; int r, merge; /* * At this point we should add the data to the peer's receive buffer. * In the case of SOCK_STREAM sockets, we should add as much of the * data as possible and suspend the call to send the rest later, if * applicable. In the case of SOCK_DGRAM sockets, we should drop the * packet if it does not fit in the buffer. * * Due to the checks in uds_can_send(), we know for sure that we no * longer have to suspend without making any progress at this point. */ segflags = (nfds > 0) ? UDS_HAS_FDS : 0; /* * Obtain the credentials now. Doing so allows us to determine how * much space we actually need for them. */ if (off == 0 && (peer->uds_flags & UDSF_PASSCRED)) { memset(&sockcred, 0, sizeof(sockcred)); if ((r = getsockcred(user_endpt, &sockcred, groups, __arraycount(groups))) != OK) return r; credlen = 1 + SOCKCREDSIZE(sockcred.sc_ngroups); segflags |= UDS_HAS_CRED; } else credlen = 0; /* For bound source datagram sockets, include the source path. */ if (uds_get_type(uds) == SOCK_DGRAM && uds->uds_pathlen != 0) { pathlen = (size_t)uds->uds_pathlen + 1; segflags |= UDS_HAS_PATH; } else pathlen = 0; avail = UDS_BUF - peer->uds_len; if (uds_get_type(uds) == SOCK_STREAM) { /* * Determine whether we can merge data into the previous * segment. This is a more refined version of the test in * uds_can_send(), as we now know whether there are actually * any FDs to transfer. */ merge = (peer->uds_len != 0 && nfds == 0 && credlen == 0); /* Determine how much we can send at once. */ if (!merge) { assert(avail > UDS_HDRLEN + credlen); datalen = avail - UDS_HDRLEN - credlen; } else datalen = avail; if (datalen > len) datalen = len; /* If we cannot make progress, we should have suspended.. */ assert(datalen != 0 || len == 0); } else { merge = FALSE; datalen = len; } assert(datalen <= len); assert(datalen <= UDS_BUF); /* * Compute the total amount of space we need for the segment in the * receive buffer. Given that we have done will-it-fit tests in * uds_can_send() for SOCK_STREAM and SOCK_SEQPACKET, there is only one * case left where the result may not fit, and that is for SOCK_DGRAM * packets. In that case, we drop the packet. POSIX says we should * throw an error in that case, and that is also what NetBSD does. */ if (!merge) seglen = UDS_HDRLEN + credlen + pathlen + datalen; else seglen = datalen; if (seglen > avail) { assert(uds_get_type(uds) == SOCK_DGRAM); /* Drop the packet, borrowing NetBSD's chosen error code. */ return ENOBUFS; } /* * Generate the full segment, but do not yet update the buffer head. * We may still run into an error (copying in file descriptors) or even * decide that nothing gets sent after all (if there are no data or * file descriptors). If we are merging the new data into the previous * segment, do not generate a header. */ pos = uds_get_head(peer); /* Generate the header, if needed. */ if (!merge) pos = uds_store_hdr(peer, pos, seglen, datalen, segflags); else assert(segflags == 0); /* Copy in and store the sender's credentials, if desired. */ if (credlen > 0) { assert(credlen >= 1 + sizeof(sockcred)); assert(credlen <= UCHAR_MAX); lenbyte = credlen - 1; pos = uds_store(peer, pos, &lenbyte, 1); if (sockcred.sc_ngroups > 0) { pos = uds_store(peer, pos, &sockcred, offsetof(struct sockcred, sc_groups)); pos = uds_store(peer, pos, groups, sockcred.sc_ngroups * sizeof(gid_t)); } else pos = uds_store(peer, pos, &sockcred, sizeof(sockcred)); } /* Store the sender's address if any. Datagram sockets only. */ if (pathlen > 0) { assert(pathlen > 1); assert(pathlen <= UCHAR_MAX); lenbyte = uds->uds_pathlen; pos = uds_store(peer, pos, &lenbyte, 1); pos = uds_store(peer, pos, uds->uds_path, pathlen - 1); } /* Lastly, copy in the actual data (if any) from the caller. */ if (datalen > 0) { iov[0].iov_addr = (vir_bytes)&peer->uds_buf[pos]; left = UDS_BUF - pos; if (left < datalen) { assert(left > 0); iov[0].iov_size = left; iov[1].iov_addr = (vir_bytes)&peer->uds_buf[0]; iov[1].iov_size = datalen - left; iovcnt = 2; } else { iov[0].iov_size = datalen; iovcnt = 1; } if ((r = sockdriver_vcopyin(data, off, iov, iovcnt)) != OK) return r; } *mergep = merge; *datalenp = datalen; *segflagsp = segflags; return seglen; } /* * Copy in control data for the current send request, and extract any file * descriptors to be transferred. Do not yet duplicate the file descriptors, * but rather store a list in a temporary buffer: the send request may still * fail in which case we want to avoid having to undo the duplication. * * On success, return the number of (zero or more) file descriptors extracted * from the request and stored in the temporary buffer. On failure, return a * negative error code. */ static int uds_send_ctl(const struct sockdriver_data * ctl, socklen_t ctl_len, endpoint_t user_endpt) { struct msghdr msghdr; struct cmsghdr *cmsg; socklen_t left; unsigned int i, n, nfds; int r; /* * Copy in the control data. We can spend a lot of effort copying in * the data in small chunks, and change the receiving side to do the * same, but it is really not worth it: applications never send a whole * lot of file descriptors at once, and the buffer size is currently * such that the UDS service itself will exhaust its OPEN_MAX limit * anyway if they do. */ if (ctl_len > sizeof(uds_ctlbuf)) return ENOBUFS; if ((r = sockdriver_copyin(ctl, 0, uds_ctlbuf, ctl_len)) != OK) return r; if (ctl_len < sizeof(uds_ctlbuf)) memset(&uds_ctlbuf[ctl_len], 0, sizeof(uds_ctlbuf) - ctl_len); /* * Look for any file descriptors, and store their remote file * descriptor numbers into a temporary array. */ memset(&msghdr, 0, sizeof(msghdr)); msghdr.msg_control = uds_ctlbuf; msghdr.msg_controllen = ctl_len; nfds = 0; r = OK; /* * The sender may provide file descriptors in multiple chunks. * Currently we do not preserve these chunk boundaries, instead * generating one single chunk with all file descriptors for the * segment upon receipt. If needed, we can fairly easily adapt this * later. */ for (cmsg = CMSG_FIRSTHDR(&msghdr); cmsg != NULL; cmsg = CMSG_NXTHDR(&msghdr, cmsg)) { /* * Check for bogus lengths. There is no excuse for this; * either the caller does not know what they are doing or we * are looking at a hacking attempt. */ assert((socklen_t)((char *)cmsg - uds_ctlbuf) <= ctl_len); left = ctl_len - (socklen_t)((char *)cmsg - uds_ctlbuf); assert(left >= CMSG_LEN(0)); /* guaranteed by CMSG_xxHDR */ if (cmsg->cmsg_len < CMSG_LEN(0) || cmsg->cmsg_len > left) { printf("UDS: malformed control data from %u\n", user_endpt); r = EINVAL; break; } if (cmsg->cmsg_level != SOL_SOCKET || cmsg->cmsg_type != SCM_RIGHTS) continue; n = (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int); for (i = 0; i < n; i++) { /* * Copy the file descriptor to the temporary buffer, * whose size is based on the control data buffer, so * it is always large enough to contain all FDs. */ assert(nfds < __arraycount(uds_ctlfds)); memcpy(&uds_ctlfds[nfds], &((int *)CMSG_DATA(cmsg))[i], sizeof(int)); nfds++; } } return nfds; } /* * Actually duplicate any file descriptors that we extracted from the sender's * control data and stored in our temporary buffer. On success, return OK, * with all file descriptors stored in file descriptor objects that are * appended to the socket's list of in-flight FD objects. Thus, on success, * the send request may no longer fail. On failure, return a negative error * code, with any partial duplication undone. */ static int uds_send_fds(struct udssock * peer, unsigned int nfds, endpoint_t user_endpt) { SIMPLEQ_HEAD(, uds_fd) fds; struct uds_fd *ufd; unsigned int i; int r; SIMPLEQ_INIT(&fds); for (i = 0; i < nfds; i++) { if (SIMPLEQ_EMPTY(&uds_freefds)) { /* UDS itself may already have OPEN_MAX FDs. */ r = ENFILE; break; } /* * The caller may have given an invalid FD, or UDS itself may * unexpectedly have run out of available file descriptors etc. */ if ((r = copyfd(user_endpt, uds_ctlfds[i], COPYFD_FROM)) < 0) break; ufd = SIMPLEQ_FIRST(&uds_freefds); SIMPLEQ_REMOVE_HEAD(&uds_freefds, ufd_next); ufd->ufd_fd = r; ufd->ufd_count = 0; SIMPLEQ_INSERT_TAIL(&fds, ufd, ufd_next); dprintf(("UDS: copied in fd %d -> %d\n", uds_ctlfds[i], r)); } /* Did we experience an error while copying in the file descriptors? */ if (r < 0) { /* Revert the successful copyfd() calls made so far. */ SIMPLEQ_FOREACH(ufd, &fds, ufd_next) { dprintf(("UDS: closing local fd %d\n", ufd->ufd_fd)); closenb(ufd->ufd_fd); } SIMPLEQ_CONCAT(&uds_freefds, &fds); return r; } /* * Success. If there were any file descriptors at all, add them to the * peer's list of in-flight file descriptors. Assign the number of * file descriptors copied in to the first file descriptor object, so * that we know how many to copy out (or discard) for this segment. * Also set the UDS_HAS_FDS flag on the segment. */ ufd = SIMPLEQ_FIRST(&fds); ufd->ufd_count = nfds; SIMPLEQ_CONCAT(&peer->uds_fds, &fds); return OK; } /* * The current send request is successful or at least has made progress. * Commit the new segment or, if we decided to merge the new data into the last * segment, update the header of the last segment. Also wake up the receiving * side, because there will now be new data to receive. */ static void uds_send_advance(struct udssock * uds, struct udssock * peer, size_t datalen, int merge, size_t seglen, unsigned int segflags) { size_t pos, prevseglen, prevdatalen; /* * For non-datagram sockets, credentials are sent only once after * setting the LOCAL_CREDS option. After that, the option is unset. */ if ((segflags & UDS_HAS_CRED) && uds_get_type(uds) != SOCK_DGRAM) peer->uds_flags &= ~UDSF_PASSCRED; if (merge) { assert(segflags == 0); pos = uds_get_last(peer); (void)uds_fetch_hdr(peer, pos, &prevseglen, &prevdatalen, &segflags); peer->uds_len += seglen; assert(peer->uds_len <= UDS_BUF); seglen += prevseglen; datalen += prevdatalen; assert(seglen <= UDS_BUF); uds_store_hdr(peer, pos, seglen, datalen, segflags); } else { peer->uds_last = peer->uds_len; peer->uds_len += seglen; assert(peer->uds_len <= UDS_BUF); } /* Now that there are new data, wake up the receiver side. */ sockevent_raise(&peer->uds_sock, SEV_RECV); } /* * Process a send request. Return OK if the send request has successfully * completed, SUSPEND if it should be tried again later, or a negative error * code on failure. In all cases, the values of 'off' and 'ctl_off' must be * updated if any progress has been made; if either is non-zero, libsockevent * will return the partial progress rather than an error code. */ int uds_send(struct sock * sock, const struct sockdriver_data * data, size_t len, size_t * off, const struct sockdriver_data * ctl, socklen_t ctl_len, socklen_t * ctl_off, const struct sockaddr * addr, socklen_t addr_len, endpoint_t user_endpt, int flags __unused, size_t min) { struct udssock *uds = (struct udssock *)sock; struct udssock *peer; size_t seglen, datalen = 0 /*gcc*/; unsigned int nfds, segflags = 0 /*gcc*/; int r, partial, merge = 0 /*gcc*/; dprintf(("UDS: send(%d,%zu,%zu,%u,%u,0x%x)\n", uds_get_id(uds), len, (off != NULL) ? *off : 0, ctl_len, (ctl_off != NULL) ? *ctl_off : 0, flags)); partial = (off != NULL && *off > 0); /* * First see whether we can process this send call at all right now. * Most importantly, for connected sockets, if the peer's receive * buffer is full, we may have to suspend the call until some space has * been freed up. */ if ((r = uds_send_test(uds, len, ctl_len, min, partial)) != OK) return r; /* * Then get the peer socket. For connected sockets, this is trivial. * For unconnected sockets, it may involve a lookup of the given * address. */ if ((r = uds_send_peer(uds, addr, addr_len, user_endpt, &peer)) != OK) return r; /* * We now know for sure that we will not suspend this call without * making any progress. However, the call may still fail. Copy in * control data first now, so that we know whether there are any file * descriptors to transfer. This aspect may determine whether or not * we can merge data with a previous segment. Do not actually copy in * the actual file descriptors yet, because that is much harder to undo * in case of a failure later on. */ if (ctl_len > 0) { /* We process control data once, in full. */ assert(*ctl_off == 0); if ((r = uds_send_ctl(ctl, ctl_len, user_endpt)) < 0) return r; nfds = (unsigned int)r; } else nfds = 0; /* * Now generate a new segment, or (if possible) merge new data into the * last segment. Since the call may still fail, prepare the segment * but do not update the buffer head yet. Note that the segment * contains not just regular data (in fact it may contain no data at * all) but (also) certain ancillary data. */ if ((r = uds_send_data(uds, peer, data, len, *off, user_endpt, nfds, &merge, &datalen, &segflags)) <= 0) return r; seglen = (size_t)r; /* * If we extracted any file descriptors from the control data earlier, * copy them over to ourselves now. The resulting in-flight file * descriptors are stored in a separate data structure. This is the * last point where the send call may actually fail. */ if (nfds > 0) { if ((r = uds_send_fds(peer, nfds, user_endpt)) != OK) return r; } /* * The transmission is now known to be (partially) successful. Commit * the new work by moving the receive buffer head. */ uds_send_advance(uds, peer, datalen, merge, seglen, segflags); /* * Register the result. For stream-type sockets, the expected behavior * is that all data be sent, and so we may still have to suspend the * call after partial progress. Otherwise, we are now done. Either * way, we are done with the control data, so mark it as consumed. */ *off += datalen; *ctl_off += ctl_len; if (uds_get_type(uds) == SOCK_STREAM && datalen < len) return SUSPEND; else return OK; } /* * Test whether a send request would block. The given 'min' parameter contains * the minimum number of bytes that should be possible to send without blocking * (the low send watermark). Return SUSPEND if the send request would block, * or any other error code if it would not. */ int uds_test_send(struct sock * sock, size_t min) { struct udssock *uds = (struct udssock *)sock; return uds_send_test(uds, min, 0, min, FALSE /*partial*/); } /* * Perform initial checks on a receive request, before it may potentially be * suspended. Return OK if this receive request is valid, or a negative error * code if it is not. */ int uds_pre_recv(struct sock * sock __unused, endpoint_t user_endpt __unused, int flags) { /* * Reject calls with unknown flags. TODO: ensure that we should really * reject all other flags rather than ignore them. */ if ((flags & ~(MSG_PEEK | MSG_WAITALL | MSG_CMSG_CLOEXEC)) != 0) return EOPNOTSUPP; return OK; } /* * Determine whether the (real or pretend) receive request should be processed * now, suspended until later, or rejected based on the current socket state. * Return OK if the receive request should be processed now, along with a first * indication whether the call may still be suspended later in 'may_block'. * Return SUSPEND if the receive request should be retried later. Return an * appropriate negative error code if the receive request should fail. */ static int uds_recv_test(struct udssock * uds, size_t len, size_t min, int partial, int * may_block) { size_t seglen, datalen; unsigned int segflags; int r; /* * If there are any pending data, those should always be received * first. However, if there is nothing to receive, then whether we * should suspend the receive call or fail immediately depends on other * conditions. We first look at these other conditions. */ r = OK; if (uds_get_type(uds) != SOCK_DGRAM) { if (uds_is_connecting(uds)) r = SUSPEND; else if (!uds_is_connected(uds) && !uds_is_disconnected(uds)) r = ENOTCONN; else if (!uds_has_conn(uds) || uds_is_shutdown(uds->uds_conn, SFL_SHUT_WR)) r = SOCKEVENT_EOF; } if (uds->uds_len == 0) { /* * For stream-type sockets, we use the policy: if no regular * data is requested, then end the call without receiving * anything. For packet-type sockets, the request should block * until there is a packet to discard, though. */ if (r != OK || (uds_get_type(uds) == SOCK_STREAM && len == 0)) return r; return SUSPEND; } /* * For stream-type sockets, we should still suspend the call if fewer * than 'min' bytes are available right now, and there is a possibility * that more data may arrive later. More may arrive later iff 'r' is * OK (i.e., no EOF or error will follow) and, in case we already * received some partial results, there is not already a next segment * with ancillary data (i.e, nonzero segment flags), or in any case * there isn't more than one segment in the buffer. Limit 'min' to the * maximum that can ever be received, though. Since that is difficult * in our case, we check whether the buffer is entirely full instead. */ if (r == OK && uds_get_type(uds) == SOCK_STREAM && min > 0 && uds->uds_len < UDS_BUF) { assert(uds->uds_len >= UDS_HDRLEN); (void)uds_fetch_hdr(uds, uds->uds_tail, &seglen, &datalen, &segflags); if (datalen < min && seglen == uds->uds_len && (!partial || segflags == 0)) return SUSPEND; } /* * Also start the decision process as to whether we should suspend the * current call if MSG_WAITALL is given. Unfortunately there is no one * place where we can conveniently do all the required checks. */ if (may_block != NULL) *may_block = (r == OK && uds_get_type(uds) == SOCK_STREAM); return OK; } /* * Receive regular data, and possibly the source path, from the tail segment in * the receive buffer. On success, return the positive non-zero length of the * tail segment, with 'addr' and 'addr_len' modified to store the source * address if applicable, the result flags in 'rflags' updated as appropriate, * the tail segment's data length stored in 'datalen', the number of received * regular data bytes stored in 'reslen', the segment flags stored in * 'segflags', and the absolute receive buffer position of the credentials in * the segment stored in 'credpos' if applicable. Since the receive call may * still fail, this function must not yet update the tail or any other aspect * of the receive buffer. Return zero if the current receive call was already * partially successful (due to MSG_WAITALL) and can no longer make progress, * and thus should be ended. Return a negative error code on failure. */ static int uds_recv_data(struct udssock * uds, const struct sockdriver_data * data, size_t len, size_t off, struct sockaddr * addr, socklen_t * addr_len, int * __restrict rflags, size_t * __restrict datalen, size_t * __restrict reslen, unsigned int * __restrict segflags, size_t * __restrict credpos) { iovec_t iov[2]; unsigned char lenbyte; unsigned int iovcnt; size_t pos, seglen, left; int r; pos = uds_fetch_hdr(uds, uds->uds_tail, &seglen, datalen, segflags); /* * If a partially completed receive now runs into a segment that cannot * be logically merged with the previous one (because it has at least * one segment flag set, meaning it has ancillary data), then we must * shortcut the receive now. */ if (off != 0 && *segflags != 0) return OK; /* * As stated, for stream-type sockets, we choose to ignore zero-size * receive calls. This has the consequence that reading a zero-sized * segment (with ancillary data) requires a receive request for at * least one regular data byte. Such a receive call would then return * zero. The problem with handling zero-data receive requests is that * we need to know whether the current segment is terminated (i.e., no * more data can possibly be merged into it later), which is a test * that we rather not perform, not in the least because we do not know * whether there is an error pending on the socket. * * For datagrams, we currently allow a zero-size receive call to * discard the next datagram. * * TODO: compare this against policies on other platforms. */ if (len == 0 && uds_get_type(uds) == SOCK_STREAM) return OK; /* * We have to skip the credentials for now: these are copied out as * control data, and thus will (well, may) be looked at when dealing * with the control data. For the same reason, we do not even look at * UDS_HAS_FDS here. */ if (*segflags & UDS_HAS_CRED) { *credpos = pos; pos = uds_fetch(uds, pos, &lenbyte, 1); pos = uds_advance(pos, (size_t)lenbyte); } /* * Copy out the source address, but only if the (datagram) socket is * not connected. TODO: even when it is connected, it may still * receive packets sent to it from other sockets *before* being * connected, and the receiver has no way of knowing that those packets * did not come from its new peer. Ideally, the older packets should * be dropped.. */ if (*segflags & UDS_HAS_PATH) { pos = uds_fetch(uds, pos, &lenbyte, 1); if (uds_get_type(uds) == SOCK_DGRAM && !uds_has_link(uds)) uds_make_addr((const char *)&uds->uds_buf[pos], (size_t)lenbyte, addr, addr_len); pos = uds_advance(pos, (size_t)lenbyte); } /* * We can receive no more data than those that are present in the * segment, obviously. For stream-type sockets, any more data that * could have been received along with the current data would have been * merged in the current segment, so we need not search for any next * segments. * * For non-stream sockets, the caller may receive less than a whole * packet if it supplied a small buffer. In that case, the rest of the * packet will be discarded (but not here yet!) and the caller gets * the MSG_TRUNC flag in its result, if it was using sendmsg(2) anyway. */ if (len > *datalen) len = *datalen; else if (len < *datalen && uds_get_type(uds) != SOCK_STREAM) *rflags |= MSG_TRUNC; /* Copy out the data to the caller. */ if (len > 0) { iov[0].iov_addr = (vir_bytes)&uds->uds_buf[pos]; left = UDS_BUF - pos; if (left < len) { iov[0].iov_size = left; iov[1].iov_addr = (vir_bytes)&uds->uds_buf[0]; iov[1].iov_size = len - left; iovcnt = 2; } else { iov[0].iov_size = len; iovcnt = 1; } if ((r = sockdriver_vcopyout(data, off, iov, iovcnt)) != OK) return r; } *reslen = len; assert(seglen > 0 && seglen <= INT_MAX); return (int)seglen; } /* * The current segment has associated file descriptors. If possible, copy out * all file descriptors to the receiver, and generate and copy out a chunk of * control data that contains their file descriptor numbers. If not all * file descriptors fit in the receiver's buffer, or if any error occurs, no * file descriptors are copied out. */ static int uds_recv_fds(struct udssock * uds, const struct sockdriver_data * ctl, socklen_t ctl_len, socklen_t ctl_off, endpoint_t user_endpt, int flags) { struct msghdr msghdr; struct cmsghdr *cmsg; struct uds_fd *ufd; unsigned int i, nfds; socklen_t chunklen, chunkspace; int r, fd, what; /* See how many file descriptors should be part of this chunk. */ assert(!SIMPLEQ_EMPTY(&uds->uds_fds)); ufd = SIMPLEQ_FIRST(&uds->uds_fds); nfds = ufd->ufd_count; assert(nfds > 0); /* * We produce and copy out potentially unaligned chunks, using * CMSG_LEN, but return the aligned size at the end, using CMSG_SPACE. * This may leave "gap" bytes unchanged in userland, but that should * not be a problem. By producing unaligned chunks, we eliminate a * potential boundary case where the unaligned chunk passed in (by the * sender) no longer fits in the same buffer after being aligned here. */ chunklen = CMSG_LEN(sizeof(int) * nfds); chunkspace = CMSG_SPACE(sizeof(int) * nfds); assert(chunklen <= sizeof(uds_ctlbuf)); if (chunklen > ctl_len) return 0; /* chunk would not fit, so produce nothing instead */ if (chunkspace > ctl_len) chunkspace = ctl_len; memset(&msghdr, 0, sizeof(msghdr)); msghdr.msg_control = uds_ctlbuf; msghdr.msg_controllen = sizeof(uds_ctlbuf); memset(uds_ctlbuf, 0, chunklen); cmsg = CMSG_FIRSTHDR(&msghdr); cmsg->cmsg_len = chunklen; cmsg->cmsg_level = SOL_SOCKET; cmsg->cmsg_type = SCM_RIGHTS; /* * Copy the group's local file descriptors to the target endpoint, and * store the resulting remote file descriptors in the chunk buffer. */ r = OK; for (i = 0; i < nfds; i++) { assert(ufd != SIMPLEQ_END(&uds->uds_fds)); assert(i == 0 || ufd->ufd_count == 0); what = COPYFD_TO; if (flags & MSG_CMSG_CLOEXEC) what |= COPYFD_CLOEXEC; /* Failure may happen legitimately here (e.g., EMFILE). */ if ((r = copyfd(user_endpt, ufd->ufd_fd, what)) < 0) break; /* we keep our progress so far in 'i' */ fd = r; dprintf(("UDS: copied out fd %d -> %d\n", ufd->ufd_fd, fd)); memcpy(&((int *)CMSG_DATA(cmsg))[i], &fd, sizeof(int)); ufd = SIMPLEQ_NEXT(ufd, ufd_next); } /* If everything went well so far, copy out the produced chunk. */ if (r >= 0) r = sockdriver_copyout(ctl, ctl_off, uds_ctlbuf, chunklen); /* * Handle errors. At this point, the 'i' variable contains the number * of file descriptors that have already been successfully copied out. */ if (r < 0) { /* Revert the successful copyfd() calls made so far. */ while (i-- > 0) { memcpy(&fd, &((int *)CMSG_DATA(cmsg))[i], sizeof(int)); (void)copyfd(user_endpt, fd, COPYFD_CLOSE); } return r; } /* * Success. Return the aligned size of the produced chunk, if the * given length permits it. From here on, the receive call may no * longer fail, as that would result in lost file descriptors. */ return chunkspace; } /* * Generate and copy out a chunk of control data with the sender's credentials. * Return the aligned chunk size on success, or a negative error code on * failure. */ static int uds_recv_cred(struct udssock * uds, const struct sockdriver_data * ctl, socklen_t ctl_len, socklen_t ctl_off, size_t credpos) { struct msghdr msghdr; struct cmsghdr *cmsg; socklen_t chunklen, chunkspace; unsigned char lenbyte; size_t credlen; int r; /* * Since the sender side already did the hard work of producing the * (variable-size) sockcred structure as it should be received, there * is relatively little work to be done here. */ credpos = uds_fetch(uds, credpos, &lenbyte, 1); credlen = (size_t)lenbyte; chunklen = CMSG_LEN(credlen); chunkspace = CMSG_SPACE(credlen); assert(chunklen <= sizeof(uds_ctlbuf)); if (chunklen > ctl_len) return 0; /* chunk would not fit, so produce nothing instead */ if (chunkspace > ctl_len) chunkspace = ctl_len; memset(&msghdr, 0, sizeof(msghdr)); msghdr.msg_control = uds_ctlbuf; msghdr.msg_controllen = sizeof(uds_ctlbuf); memset(uds_ctlbuf, 0, chunklen); cmsg = CMSG_FIRSTHDR(&msghdr); cmsg->cmsg_len = chunklen; cmsg->cmsg_level = SOL_SOCKET; cmsg->cmsg_type = SCM_CREDS; uds_fetch(uds, credpos, CMSG_DATA(cmsg), credlen); if ((r = sockdriver_copyout(ctl, ctl_off, uds_ctlbuf, chunklen)) != OK) return r; return chunkspace; } /* * Copy out control data for the ancillary data associated with the current * segment, if any. Return OK on success, at which point the current receive * call may no longer fail. 'rflags' may be updated with additional result * flags. Return a negative error code on failure. */ static int uds_recv_ctl(struct udssock * uds, const struct sockdriver_data * ctl, socklen_t ctl_len, socklen_t * ctl_off, endpoint_t user_endpt, int flags, unsigned int segflags, size_t credpos, int * rflags) { int r; /* * We first copy out all file descriptors, if any. We put them in one * SCM_RIGHTS chunk, even if the sender put them in separate SCM_RIGHTS * chunks. We believe that this should not cause application-level * issues, but if it does, we can change that later with some effort. * We then copy out credentials, if any. * * We copy out each control chunk independently of the others, and also * perform error recovery on a per-chunk basis. This implies the * following. If producing or copying out the first chunk fails, the * entire recvmsg(2) call will fail with an appropriate error. If * producing or copying out any subsequent chunk fails, the recvmsg(2) * call will still return the previously generated chunks (a "short * control read" if you will) as well as the MSG_CTRUNC flag. This * approach is simple and clean, and it guarantees that we can always * copy out at least as many file descriptors as we copied in for this * segment, even if credentials are present as well. However, the * approach does cause slightly more overhead when there are multiple * chunks per call, as those are copied out separately. * * Since the generated SCM_RIGHTS chunk is never larger than the * originally received SCM_RIGHTS chunk, the temporary "uds_ctlbuf" * buffer is always large enough to contain the chunk in its entirety. * SCM_CREDS chunks should always fit easily as well. * * The MSG_CTRUNC flag will be returned iff not the entire user-given * control buffer was filled and not all control chunks were delivered. * Our current implementation does not deliver partial chunks. NetBSD * does, except for SCM_RIGHTS chunks. * * TODO: get rid of the redundancy in processing return values. */ if (segflags & UDS_HAS_FDS) { r = uds_recv_fds(uds, ctl, ctl_len, *ctl_off, user_endpt, flags); /* * At this point, 'r' contains one of the following: * * r > 0 a chunk of 'r' bytes was added successfully. * r == 0 not enough space left; the chunk was not added. * r < 0 an error occurred; the chunk was not added. */ if (r < 0 && *ctl_off == 0) return r; if (r > 0) { ctl_len -= r; *ctl_off += r; } else *rflags |= MSG_CTRUNC; } if (segflags & UDS_HAS_CRED) { r = uds_recv_cred(uds, ctl, ctl_len, *ctl_off, credpos); /* As above. */ if (r < 0 && *ctl_off == 0) return r; if (r > 0) { ctl_len -= r; *ctl_off += r; } else *rflags |= MSG_CTRUNC; } return OK; } /* * The current receive request is successful or, in the case of MSG_WAITALL, * has made progress. Advance the receive buffer tail, either by discarding * the entire tail segment or by generating a new, smaller tail segment that * contains only the regular data left to be received from the original tail * segment. Also wake up the sending side for connection-oriented sockets if * applicable, because there may now be room for more data to be sent. Update * 'may_block' if we are now sure that the call may not block on MSG_WAITALL * after all. */ static void uds_recv_advance(struct udssock * uds, size_t seglen, size_t datalen, size_t reslen, unsigned int segflags, int * may_block) { struct udssock *conn; struct uds_fd *ufd; size_t delta, nseglen, advance; unsigned int nfds; /* Note that 'reslen' may be legitimately zero. */ assert(reslen <= datalen); if (uds_get_type(uds) != SOCK_STREAM && reslen < datalen) reslen = datalen; delta = datalen - reslen; if (delta == 0) { /* * Fully consume the tail segment. We advance the tail by the * full segment length, thus moving up to either the next * segment in the receive buffer, or an empty receive buffer. */ advance = seglen; uds->uds_tail = uds_advance(uds->uds_tail, advance); } else { /* * Partially consume the tail segment. We put a new segment * header right in front of the remaining data, which obviously * always fits. Since any ancillary data was consumed along * with the first data byte of the segment, the new segment has * no ancillary data anymore (and thus a zero flags field). */ nseglen = UDS_HDRLEN + delta; assert(nseglen < seglen); advance = seglen - nseglen; uds->uds_tail = uds_advance(uds->uds_tail, advance); uds_store_hdr(uds, uds->uds_tail, nseglen, delta, 0); } /* * For datagram-oriented sockets, we always consume at least a header. * For stream-type sockets, we either consume a zero-data segment along * with its ancillary data, or we consume at least one byte from a * segment that does have regular data. In all other cases, the * receive call has already been ended by now. Thus, we always advance * the tail of the receive buffer here. */ assert(advance > 0); /* * The receive buffer's used length (uds_len) and pointer to the * previous segment header (uds_last) are offsets from the tail. Now * that we have moved the tail, we need to adjust these accordingly. * If the buffer is now empty, reset the tail to the buffer start so as * to avoid splitting inter-process copies whenever possible. */ assert(uds->uds_len >= advance); uds->uds_len -= advance; if (uds->uds_len == 0) uds->uds_tail = 0; /* * If uds_last is zero here, it was pointing to the segment we just * (partially) consumed. By leaving it zero, it will still point to * the new or next segment. */ if (uds->uds_last > 0) { assert(uds->uds_len > 0); assert(uds->uds_last >= advance); uds->uds_last -= advance; } /* * If there were any file descriptors associated with this segment, * close and free them now. */ if (segflags & UDS_HAS_FDS) { assert(!SIMPLEQ_EMPTY(&uds->uds_fds)); ufd = SIMPLEQ_FIRST(&uds->uds_fds); nfds = ufd->ufd_count; assert(nfds > 0); while (nfds-- > 0) { assert(!SIMPLEQ_EMPTY(&uds->uds_fds)); ufd = SIMPLEQ_FIRST(&uds->uds_fds); SIMPLEQ_REMOVE_HEAD(&uds->uds_fds, ufd_next); dprintf(("UDS: closing local fd %d\n", ufd->ufd_fd)); closenb(ufd->ufd_fd); SIMPLEQ_INSERT_TAIL(&uds_freefds, ufd, ufd_next); } } /* * If there is now any data left in the receive buffer, then there has * been a reason that we haven't received it. For stream sockets, that * reason is that the next segment has ancillary data. In any case, * this means we should never block the current receive operation * waiting for more data. Otherwise, we may block on MSG_WAITALL. */ if (uds->uds_len > 0) *may_block = FALSE; /* * If the (non-datagram) socket has a peer that is not shut down for * writing, see if it can be woken up to send more data. Note that * the event will never be processed immediately. */ if (uds_is_connected(uds)) { assert(uds_get_type(uds) != SOCK_DGRAM); conn = uds->uds_conn; if (!uds_is_shutdown(conn, SFL_SHUT_WR)) sockevent_raise(&conn->uds_sock, SEV_SEND); } } /* * Process a receive request. Return OK if the receive request has completed * successfully, SUSPEND if it should be tried again later, SOCKEVENT_EOF if an * end-of-file condition is reached, or a negative error code on failure. In * all cases, the values of 'off' and 'ctl_off' must be updated if any progress * has been made; if either is non-zero, libsockevent will return the partial * progress rather than an error code or EOF. */ int uds_recv(struct sock * sock, const struct sockdriver_data * data, size_t len, size_t * off, const struct sockdriver_data * ctl, socklen_t ctl_len, socklen_t * ctl_off, struct sockaddr * addr, socklen_t * addr_len, endpoint_t user_endpt, int flags, size_t min, int * rflags) { struct udssock *uds = (struct udssock *)sock; size_t seglen, datalen, reslen = 0 /*gcc*/, credpos = 0 /*gcc*/; unsigned int segflags; int r, partial, may_block; dprintf(("UDS: recv(%d,%zu,%zu,%u,%u,0x%x)\n", uds_get_id(uds), len, (off != NULL) ? *off : 0, ctl_len, (ctl_off != NULL) ? *ctl_off : 0, flags)); /* * Start by testing whether anything can be received at all, or whether * an error or EOF should be returned instead, or whether the receive * call should be suspended until later otherwise. If no (regular or * control) data can be received, or if this was a test for select, * we bail out right after. */ partial = (off != NULL && *off > 0); if ((r = uds_recv_test(uds, len, min, partial, &may_block)) != OK) return r; /* * Copy out regular data, if any. Do this before copying out control * data, because the latter is harder to undo on failure. This data * copy function returns returns OK (0) if we are to return a result of * zero bytes (which is *not* EOF) to the caller without doing anything * else. The function returns a nonzero positive segment length if we * should carry on with the receive call (as it happens, all its other * returned values may in fact be zero). */ if ((r = uds_recv_data(uds, data, len, *off, addr, addr_len, rflags, &datalen, &reslen, &segflags, &credpos)) <= 0) return r; seglen = (size_t)r; /* * Copy out control data, if any: transfer and copy out records of file * descriptors, and/or copy out sender credentials. This is the last * part of the call that may fail. */ if ((r = uds_recv_ctl(uds, ctl, ctl_len, ctl_off, user_endpt, flags, segflags, credpos, rflags)) != OK) return r; /* * Now that the call has succeeded, move the tail of the receive * buffer, unless we were merely peeking. */ if (!(flags & MSG_PEEK)) uds_recv_advance(uds, seglen, datalen, reslen, segflags, &may_block); else may_block = FALSE; /* * If the MSG_WAITALL flag was given, we may still have to suspend the * call after partial success. In particular, the receive call may * suspend after partial success if all of these conditions are met: * * 1) the socket is a stream-type socket; * 2) MSG_WAITALL is set; * 3) MSG_PEEK is not set; * 4) MSG_DONTWAIT is not set (tested upon return); * 5) the socket must not have a pending error (tested upon return); * 6) the socket must not be shut down for reading (tested later); * 7) the socket must still be connected to a peer (no EOF); * 8) the peer must not have been shut down for writing (no EOF); * 9) the next segment, if any, contains no ancillary data. * * Together, these points guarantee that the call could conceivably * receive more after being resumed. Points 4 to 6 are covered by * libsockevent, which will end the call even if we return SUSPEND * here. Due to segment merging, we cover point 9 by checking that * there is currently no next segment at all. Once a new segment * arrives, the ancillary-data test is done then. */ *off += reslen; if ((flags & MSG_WAITALL) && reslen < len && may_block) return SUSPEND; else return OK; } /* * Test whether a receive request would block. The given 'min' parameter * contains the minimum number of bytes that should be possible to receive * without blocking (the low receive watermark). Return SUSPEND if the send * request would block. Otherwise, return any other error code (including OK * or SOCKEVENT_EOF), and if 'size' is not a NULL pointer, it should be filled * with the number of bytes available for receipt right now (if not zero). * Note that if 'size' is not NULL, 'min' will always be zero. */ int uds_test_recv(struct sock * sock, size_t min, size_t * size) { struct udssock *uds = (struct udssock *)sock; size_t seglen; unsigned int segflags; int r; if ((r = uds_recv_test(uds, min, min, FALSE /*partial*/, NULL /*may_block*/)) == SUSPEND) return r; if (size != NULL && uds->uds_len > 0) (void)uds_fetch_hdr(uds, uds->uds_tail, &seglen, size, &segflags); return r; }