David van Moolenbroek 27852ebe53 UDS: full rewrite
This new implementation of the UDS service is built on top of the
libsockevent library.  It thereby inherits all the advantages that
libsockevent brings.  However, the fundamental restructuring
required for that change also paved the way for resolution of a
number of other important open issues with the old UDS code.  Most
importantly, the rewrite brings the behavior of the service much
closer to POSIX compliance and NetBSD compatibility.  These are the
most important changes:

- due to the use of libsockevent, UDS now supports multiple suspending
  calls per socket and a large number of standard socket flags and
  options;
- socket address matching is now based on <device,inode> lookups
  instead of canonized path names, and socket addresses are no longer
  altered either due to canonization or at connect time;
- the socket state machine is now well defined, most importantly
  resolving the erroneous reset-on-EOF semantics of the old UDS, but
  also allowing socket reuse;
- sockets are now connected before being accepted instead of being
  held in connecting state, unless the LOCAL_CONNWAIT option is set
  on either the connecting or the listening socket;
- connect(2) on datagram sockets is now supported (needed by syslog),
  and proper datagram socket disconnect notification is provided;
- the receive queue now supports segmentation, associating ancillary
  data (in-flight file descriptors and credentials) with each segment
  instead of being kept fully separately; this is a POSIX requirement
  (and needed by tmux);
- as part of the segmentation support, the receive queue can now hold
  as many packets as can fit, instead of one;
- in addition to the flags supported by libsockevent, the MSG_PEEK,
  MSG_WAITALL, MSG_CMSG_CLOEXEC, MSG_TRUNC, and MSG_CTRUNC send and
  receive flags are now supported;
- the SO_PASSCRED and SO_PEERCRED socket options are replaced by
  LOCAL_CREDS and LOCAL_PEEREID respectively, now following NetBSD
  semantics and allowing use of NetBSD libc's getpeereid(3);
- memory usage is reduced by about 250 KB due to centralized in-flight
  file descriptor tracking, with a limit of OPEN_MAX total rather than
  of OPEN_MAX per socket;
- memory usage is reduced by another ~50 KB due to removal of state
  redundancy, despite the fact that socket path names may now be up to
  253 bytes rather than the previous 104 bytes;
- compared to the old UDS, there is now very little direct indexing on
  the static array of sockets, thus allowing dynamic allocation of
  sockets more easily in the future;
- the UDS service now has RMIB support for the net.local sysctl tree,
  implementing preliminary support for NetBSD netstat(1).

Change-Id: I4a9b6fe4aaeef0edf2547eee894e6c14403fcb32
2017-03-09 23:39:56 +00:00

1418 lines
37 KiB
C

/* UNIX Domain Sockets - uds.c - socket management */
#include "uds.h"
static struct udssock uds_array[NR_UDSSOCK];
static TAILQ_HEAD(uds_freelist, udssock) uds_freelist;
static unsigned int uds_in_use;
static int uds_running;
static const struct sockevent_ops uds_ops;
static SLIST_HEAD(udshash, udssock) udshash[UDSHASH_SLOTS];
/*
* Initialize file-to-socket hash table.
*/
static void
udshash_init(void)
{
unsigned int slot;
for (slot = 0; slot < __arraycount(udshash); slot++)
SLIST_INIT(&udshash[slot]);
}
/*
* Return a hash table slot number for the given <dev,ino> pair.
*/
static unsigned int
udshash_slot(dev_t dev, ino_t ino)
{
assert(dev != NO_DEV);
assert(ino != 0);
/*
* Effectively combining two 64-bit numbers into a single 6-or-so-bit
* hash is not too easy. This hash function is probably among the
* worst options. Then again it is not all that critical as we are not
* expecting that many bound UDS sockets in the system anyway.
*/
return (unsigned int)(dev ^ ino) % UDSHASH_SLOTS;
}
/*
* Look for a socket that is bound to the given <dev,ino> pair. Return a
* pointer to the socket if found, or NULL otherwise.
*/
static struct udssock *
udshash_get(dev_t dev, ino_t ino)
{
struct udssock *uds;
unsigned int slot;
slot = udshash_slot(dev, ino);
SLIST_FOREACH(uds, &udshash[slot], uds_hash) {
if (uds->uds_dev == dev && uds->uds_ino == ino)
return uds;
}
return NULL;
}
/*
* Add a socket to the file-to-socket hash table. The socket must have its
* device and inode fields set, and must not be in the hash table already.
*/
static void
udshash_add(struct udssock * uds)
{
unsigned int slot;
slot = udshash_slot(uds->uds_dev, uds->uds_ino);
SLIST_INSERT_HEAD(&udshash[slot], uds, uds_hash);
}
/*
* Remove a socket from the file-to-socket hash table. The socket must be in
* the hash table.
*/
static void
udshash_del(struct udssock * uds)
{
unsigned int slot;
slot = udshash_slot(uds->uds_dev, uds->uds_ino);
/* This macro is O(n). */
SLIST_REMOVE(&udshash[slot], uds, udssock, uds_hash);
}
/*
* Return the socket identifier for the given UDS socket object.
*/
sockid_t
uds_get_id(struct udssock * uds)
{
return (sockid_t)(uds - uds_array);
}
/*
* Given either NULL or a previously returned socket, return the next in-use
* UDS socket of the given socket type, or NULL if there are no more matches.
* The sockets are returned in random order, but each matching socket is
* returned exactly once (until any socket is allocated or freed).
*/
struct udssock *
uds_enum(struct udssock * prev, int type)
{
sockid_t id;
if (prev != NULL)
id = uds_get_id(prev) + 1;
else
id = 0;
for (; id < NR_UDSSOCK; id++)
if ((uds_array[id].uds_flags & UDSF_IN_USE) &&
uds_get_type(&uds_array[id]) == type)
return &uds_array[id];
return NULL;
}
/*
* Invalidate credentials on the socket.
*/
static void
uds_clear_cred(struct udssock * uds)
{
uds->uds_cred.unp_pid = -1;
uds->uds_cred.unp_euid = -1;
uds->uds_cred.unp_egid = -1;
}
/*
* Obtain the credentials (process, user, and group ID) of the given user
* endpoint and associate them with the socket for later retrieval. It is
* important to note that this information is obtained once at connect time,
* and never updated later. The party receiving the credentials must take this
* into account.
*/
static void
uds_get_cred(struct udssock * uds, endpoint_t user_endpt)
{
int r;
if ((uds->uds_cred.unp_pid = r = getepinfo(user_endpt,
&uds->uds_cred.unp_euid, &uds->uds_cred.unp_egid)) < 0) {
printf("UDS: failed obtaining credentials of %d (%d)\n",
user_endpt, r);
uds_clear_cred(uds);
}
}
/*
* Allocate and initialize a UDS socket. On succes, return OK with a pointer
* to the new socket in 'udsp'. On failure, return a negative error code.
*/
static int
uds_alloc(struct udssock ** udsp)
{
struct udssock *uds;
int r;
/* Allocate, initialize, and return a UNIX domain socket object. */
if (TAILQ_EMPTY(&uds_freelist))
return ENOBUFS;
uds = TAILQ_FIRST(&uds_freelist);
uds->uds_conn = NULL; /* not connected */
uds->uds_link = NULL; /* not connecting or linked */
uds->uds_queued = 0;
uds->uds_flags = UDSF_IN_USE; /* may be found through enumeration */
uds->uds_pathlen = 0; /* not bound: no path */
uds->uds_dev = NO_DEV; /* not hashed: no socket file device */
uds->uds_ino = 0; /* not hashed: no socket file inode */
uds_clear_cred(uds); /* no bind/connect-time credentials */
TAILQ_INIT(&uds->uds_queue); /* an empty queue */
if ((r = uds_io_setup(uds)) != OK)
return r;
TAILQ_REMOVE(&uds_freelist, uds, uds_next);
assert(uds_in_use < NR_UDSSOCK);
uds_in_use++;
*udsp = uds;
return OK;
}
/*
* Free a previously allocated socket.
*/
static void
uds_free(struct sock * sock)
{
struct udssock *uds = (struct udssock *)sock;
uds_io_cleanup(uds);
uds->uds_flags = 0; /* no longer in use */
TAILQ_INSERT_HEAD(&uds_freelist, uds, uds_next);
assert(uds_in_use > 0);
if (--uds_in_use == 0 && uds_running == FALSE)
sef_cancel();
}
/*
* Create a new socket.
*/
static sockid_t
uds_socket(int domain, int type, int protocol, endpoint_t user_endpt __unused,
struct sock ** sockp, const struct sockevent_ops ** ops)
{
struct udssock *uds;
int r;
dprintf(("UDS: socket(%d,%d,%d)\n", domain, type, protocol));
if (domain != PF_UNIX) {
/* This means the service was configured incorrectly. */
printf("UDS: got request for domain %d\n", domain);
return EAFNOSUPPORT;
}
/* We support the following three socket types. */
switch (type) {
case SOCK_STREAM:
case SOCK_SEQPACKET:
case SOCK_DGRAM:
break;
default:
return EPROTOTYPE;
}
/*
* The PF_UNIX domain does not support particular protocols, so the
* given protocol must be zero (= anything that matches).
*/
if (protocol != UDSPROTO_UDS)
return EPROTONOSUPPORT;
if ((r = uds_alloc(&uds)) != OK)
return r;
dprintf(("UDS: socket returns %d\n", uds_get_id(uds)));
*sockp = &uds->uds_sock;
*ops = &uds_ops;
return uds_get_id(uds);
}
/*
* Connect a pair of sockets.
*/
static int
uds_pair(struct sock * sock1, struct sock * sock2, endpoint_t user_endpt)
{
struct udssock *uds1 = (struct udssock *)sock1;
struct udssock *uds2 = (struct udssock *)sock2;
dprintf(("UDS: pair(%d,%d)\n", uds_get_id(uds1), uds_get_id(uds2)));
/* Only connection-oriented types are acceptable. */
if (uds_get_type(uds1) == SOCK_DGRAM)
return EOPNOTSUPP;
/* Connect the sockets. */
uds1->uds_conn = uds2;
uds2->uds_conn = uds1;
uds1->uds_flags |= UDSF_CONNECTED;
uds2->uds_flags |= UDSF_CONNECTED;
/* Obtain the (same) credentials for both sides of the connection. */
uds_get_cred(uds1, user_endpt);
memcpy(&uds2->uds_cred, &uds1->uds_cred, sizeof(uds2->uds_cred));
return OK;
}
/*
* Disconnect a UDS socket, notifying or freeing up the other end of the
* connection depending on whether the socket was linked, that is, on the
* accept queue of a listening socket.
*/
static void
uds_disconnect(struct udssock * uds, int was_linked)
{
struct udssock *conn;
assert(uds_is_connected(uds));
assert(uds_has_conn(uds));
conn = uds->uds_conn;
assert(uds_is_connected(conn));
assert(uds_has_conn(conn));
assert(!uds_has_link(conn));
assert(conn->uds_conn == uds);
/* Disconnect the sockets. */
uds->uds_conn = NULL;
conn->uds_conn = NULL;
/*
* If the given socket is linked, then it is a connected socket for
* which the other end has been created but not yet accepted. In that
* case, the other end ('conn') will have to be freed up. Otherwise,
* it is a regular user-created socket and we must properly transition
* it into disconnected state.
*/
if (!was_linked) {
sockevent_raise(&conn->uds_sock, SEV_SEND | SEV_RECV);
/*
* Clear the peer credentials so that they will not be mistaken
* for having been obtained at bind time.
*/
uds_clear_cred(conn);
} else
sockevent_raise(&conn->uds_sock, SEV_CLOSE);
}
/*
* Add the socket 'link' to the queue of the socket 'uds'. This also implies
* that 'link's link socket is set to 'uds'.
*/
static void
uds_add_queue(struct udssock * uds, struct udssock * link)
{
dprintf(("UDS: add_queue(%d,%d)\n",
uds_get_id(uds), uds_get_id(link)));
TAILQ_INSERT_TAIL(&uds->uds_queue, link, uds_next);
uds->uds_queued++;
assert(uds->uds_queued != 0);
link->uds_link = uds;
}
/*
* Remove the socket 'link' from the queue of the socket 'uds'. This also
* reset 'link's link to NULL.
*/
static void
uds_del_queue(struct udssock * uds, struct udssock * link)
{
dprintf(("UDS: del_queue(%d,%d)\n",
uds_get_id(uds), uds_get_id(link)));
assert(link->uds_link == uds);
TAILQ_REMOVE(&uds->uds_queue, link, uds_next);
assert(uds->uds_queued > 0);
uds->uds_queued--;
link->uds_link = NULL;
}
/*
* Remove all sockets from the queue of the socket 'uds', with the exception of
* 'except' if non-NULL. Raise an ECONNRESET error on all removed sockets that
* are not equal to 'uds'.
*/
static void
uds_clear_queue(struct udssock * uds, struct udssock * except)
{
struct udssock *link, *tmp;
int found;
dprintf(("UDS: clear_queue(%d,%d)\n",
uds_get_id(uds), (except != NULL) ? uds_get_id(except) : -1));
found = 0;
/*
* Abort all connecting sockets queued on this socket, except for the
* given exception, which may be NULL.
*/
TAILQ_FOREACH_SAFE(link, &uds->uds_queue, uds_next, tmp) {
if (link == except) {
found++;
continue;
}
dprintf(("UDS: clear_queue removes %d\n", uds_get_id(link)));
assert(uds_get_type(link) == SOCK_DGRAM ||
uds_is_connecting(link) || uds_is_connected(link));
uds_del_queue(uds, link);
/*
* Generate an error only if the socket was not linked to
* itself (only datagram sockets can be linked to themselves).
* The error is not helpful for applications in that case.
*/
if (uds != link)
sockevent_set_error(&link->uds_sock, ECONNRESET);
/*
* If this is a listening socket, disconnect the connecting or
* connected end. If a connected peer was already created for
* the queued socket, dispose of that peer.
*
* Clear credentials obtained when starting to connect (in
* which case the socket is always a connection-oriented
* socket), so that they will not be mistaken for credentials
* obtained at bind time.
*/
if (uds_get_type(link) != SOCK_DGRAM) {
if (uds_is_connected(link))
uds_disconnect(link, TRUE /*was_linked*/);
else
uds_clear_cred(link);
}
}
assert(uds->uds_queued == found);
}
/*
* Check whether the socket address given in 'addr', with length 'addr_len', is
* a valid UNIX domain socket address (including a path to a socket file). On
* success, return the (non-zero) length of the socket file's path, minus the
* null terminator which may in fact not be present. The caller is responsible
* for copying and terminating the path as needed. A pointer to the path as
* stored in 'addr' is returned in 'pathp'. On failure, return an error code.
*/
static int
uds_check_addr(const struct sockaddr * addr, socklen_t addr_len,
const char ** pathp)
{
const char *p;
size_t len;
/*
* We could cast to a sockaddr_un structure pointer first, but that
* would not provide any benefits here. Instead, we use sa_data as the
* generic equivalent of sun_path.
*/
if (addr_len < offsetof(struct sockaddr, sa_data))
return EINVAL;
if (addr->sa_family != AF_UNIX)
return EAFNOSUPPORT;
len = (size_t)addr_len - offsetof(struct sockaddr, sa_data);
if (len > 0 && (p = memchr(addr->sa_data, '\0', len)) != NULL)
len = (size_t)(p - addr->sa_data);
/* The given path name must not be an empty string. */
if (len == 0)
return ENOENT;
/* This check should be redundant but better safe than sorry. */
if (len >= UDS_PATH_MAX)
return EINVAL;
*pathp = (const char *)addr->sa_data;
return len;
}
/*
* Given the socket file path given as 'path' with length 'path_len' (not
* necessarily null terminated), store a socket address with the path in
* 'addr', and return the socket address length in 'addr_len'. The calling
* libraries (libsockdriver, libsockevent) and the static assert in uds.h
* guarantee that 'addr' is sufficiently large to store any address we generate
* here. The libraries may subsequently copy out only a part of it to the user
* process. This function always succeeds.
*/
void
uds_make_addr(const char * path, size_t len, struct sockaddr * addr,
socklen_t * addr_len)
{
/*
* Generate the address. The stored length (sa_len/sun_len) does not
* include a null terminator. The entire structure does include a null
* terminator, but only if the socket is bound.
*/
addr->sa_len = offsetof(struct sockaddr, sa_data) + len;
addr->sa_family = AF_UNIX;
if (len > 0) {
/* This call may (intentionally) overrun the sa_data size. */
memcpy((char *)addr->sa_data, path, len);
((char *)addr->sa_data)[len] = '\0';
/* The socket is bound, so include the null terminator. */
len++;
assert(len <= UDS_PATH_MAX);
}
/* Note that this length may be different from sa_len/sun_len now. */
*addr_len = offsetof(struct sockaddr, sa_data) + len;
}
/*
* Bind a socket to a local address.
*/
static int
uds_bind(struct sock * sock, const struct sockaddr * addr, socklen_t addr_len,
endpoint_t user_endpt)
{
struct udssock *uds = (struct udssock *)sock;
struct udssock *uds2;
const char *path;
size_t len;
dev_t dev;
ino_t ino;
int r;
dprintf(("UDS: bind(%d)\n", uds_get_id(uds)));
/* A socket may be bound at any time, but only once. */
if (uds_is_bound(uds))
return EINVAL;
/* Verify that the user gave us an acceptable address. */
if ((r = uds_check_addr(addr, addr_len, &path)) < 0)
return r;
len = (size_t)r;
/* Attempt to create the socket file on the file system. */
r = socketpath(user_endpt, path, len, SPATH_CREATE, &dev, &ino);
if (r != OK)
return r;
assert(dev != NO_DEV && ino != 0);
/*
* It is possible that a socket file of a previously bound socket was
* unlinked, and due to inode number reuse, a new socket file has now
* been created with the same <dev,ino> pair. In that case, we must
* unbind the old socket, because it must no longer be found. The old
* socket will still have a path (and behave as though it is bound) but
* no longer be found through hash lookups.
*/
if ((uds2 = udshash_get(dev, ino)) != NULL) {
udshash_del(uds2);
uds2->uds_dev = NO_DEV;
uds2->uds_ino = 0;
}
/*
* Obtain credentials for the socket, unless the socket is already
* connecting or connected, in which case we must not replace the
* credentials we obtained already. We later clear those credentials
* upon a connection failure or disconnect, so that if the socket is
* then put in listening mode, we know there are no bind-time
* credentials. Not ideal, but we really need two separate sets of
* credentials if we want to get this right, which is a waste of memory
* as no sane application writer would ever rely on credential passing
* after recycling a socket..
*/
if (uds_get_type(uds) != SOCK_DGRAM && !uds_is_connecting(uds) &&
!uds_is_connected(uds))
uds_get_cred(uds, user_endpt);
/* Asssign the address to the socket. */
uds->uds_pathlen = len;
memcpy(&uds->uds_path, path, len);
uds->uds_dev = dev;
uds->uds_ino = ino;
udshash_add(uds);
return OK;
}
/*
* Look up a UDS socket based on a user-given address. If a socket exists for
* the address, check if it is type-compatible with the given UDS socket.
* On succes, return OK, with 'peerp' set to the socket that was found. On
* failure, return a negative error code.
*/
int
uds_lookup(struct udssock * uds, const struct sockaddr * addr,
socklen_t addr_len, endpoint_t user_endpt, struct udssock ** peerp)
{
struct udssock *peer;
const char *path;
size_t len;
dev_t dev;
ino_t ino;
int r;
/* Verify that the user gave us an acceptable address. */
if ((r = uds_check_addr(addr, addr_len, &path)) < 0)
return r;
len = (size_t)r;
/* Attempt to look up the socket file on the file system. */
r = socketpath(user_endpt, path, len, SPATH_CHECK, &dev, &ino);
if (r != OK)
return r;
assert(dev != NO_DEV && ino != 0);
if ((peer = udshash_get(dev, ino)) == NULL)
return ECONNREFUSED;
if (uds_get_type(peer) != uds_get_type(uds))
return EPROTOTYPE;
*peerp = peer;
return OK;
}
/*
* Given the listening socket 'uds', and the socket 'link' that is calling or
* has called connect(2) and is or will be linked to the listening socket's
* queue, create a new socket and connect it to 'link', putting both sockets in
* the connected state. The given link socket may be in unconnected,
* connecting, or disconnected state prior to the call. Return OK or an error
* code. The link state of the link socket remains unchanged in any case.
*/
static int
uds_attach(struct udssock * uds, struct udssock * link)
{
struct udssock *conn;
int r;
/*
* Allocate a new socket to use as peer socket for the connection that
* is about to be established. The new socket is not yet known by
* libsockevent.
*/
if ((r = uds_alloc(&conn)) != OK)
return r;
/*
* Ask libsockevent to clone the sock object in the new UDS socket from
* the listening socket. This adds the sock object to libsockevent's
* data structures and ensures that we can safely use the socket
* despite the fact that it has not yet been accepted (and thus
* returned to libsockevent). From this moment on, we must either
* return the socket's ID (but not a pointer to it!) from uds_accept()
* or raise SEV_CLOSE on it.
*/
sockevent_clone(&uds->uds_sock, &conn->uds_sock, uds_get_id(conn));
/* Connect the link socket to the new socket. */
link->uds_conn = conn;
link->uds_flags |= UDSF_CONNECTED;
/*
* Connect the new socket to the link socket as well. The child
* socket should also inherit pretty much all settings from the
* listening socket, including the bind path and the listening socket's
* bind-time credentials.
*/
conn->uds_conn = link;
conn->uds_flags = uds->uds_flags & (UDSF_PASSCRED | UDSF_CONNWAIT);
conn->uds_flags |= UDSF_CONNECTED;
conn->uds_pathlen = uds->uds_pathlen;
memcpy(conn->uds_path, uds->uds_path, (size_t)uds->uds_pathlen);
memcpy(&conn->uds_cred, &uds->uds_cred, sizeof(conn->uds_cred));
return OK;
}
/*
* Connect a socket to a remote address.
*/
static int
uds_connect(struct sock * sock, const struct sockaddr * addr,
socklen_t addr_len, endpoint_t user_endpt)
{
struct udssock *uds = (struct udssock *)sock;
struct udssock *link;
int r;
dprintf(("UDS: connect(%d)\n", uds_get_id(uds)));
/* For connection-oriented sockets, several state checks apply. */
if (uds_get_type(uds) != SOCK_DGRAM) {
if (uds_is_listening(uds))
return EOPNOTSUPP;
if (uds_is_connecting(uds))
return EALREADY;
if (uds_is_connected(uds))
return EISCONN;
/* Disconnected sockets may be reconnected, see below. */
} else {
/*
* Connectionless sockets may be unconnected by providing an
* address with family AF_UNSPEC. Handle this case first here.
*/
if (addr_len >= offsetof(struct sockaddr, sa_data) &&
addr->sa_family == AF_UNSPEC) {
/*
* Reset this socket's previous connection to another
* socket, if any. Unconnecting has no effect on other
* sockets connected to this socket, though.
*/
if (uds_has_link(uds))
uds_del_queue(uds->uds_link, uds);
return OK;
}
}
/*
* Find the socket identified by the given address. If it exists at
* all, see if it is a proper match.
*/
if ((r = uds_lookup(uds, addr, addr_len, user_endpt, &link)) != OK)
return r;
/*
* Handle connectionless sockets first, in which case a connect links
* the socket to a send target and limits receipt to datagrams from
* that target. We actually point the socket to the peer socket,
* through uds_link. That also means that if the target socket
* disappears, we have to reset any sockets connected to it, in which
* case we return them to the unconnected state. In order to allow
* finding all sockets connected to a particular socket, we put all
* those sockets on their target's queue, hence why we use uds_link and
* not uds_conn. As mentioned before, we allow reconnecting without
* restrictions.
* TODO: see if reconnecting should clear a pending ECONNRESET.
*
* An important note: 'uds' and 'link' may actually be the same socket,
* if the caller chooses to connect a socket with itself!
*/
if (uds_get_type(uds) == SOCK_DGRAM) {
/* Reconnecting to the same socket has no effect. */
if (uds_has_link(uds) && uds->uds_link == link)
return OK;
/*
* If the intended target is linked to another socket, we
* refuse linking to it. Sending or receiving would never work
* anyway. Do allow a socket to link to itself after being
* linked to another socket. The error code is the same as in
* the sending code, borrowed from Linux.
*/
if (uds != link && uds_has_link(link) && link->uds_link != uds)
return EPERM;
/*
* Reset this socket's previous link to another socket, if any.
*/
if (uds_has_link(uds))
uds_del_queue(uds->uds_link, uds);
/*
* Reset any links to this socket, except for the one by
* the intended target. Sending or receiving would no longer
* work anyway. If the socket was linked to itself, clear its
* self-link without generating an ECONNRESET. If the socket
* is relinking to itself, reestablish the link after first
* clearing it.
*/
uds_clear_queue(uds, (uds != link) ? link : NULL);
uds_add_queue(link, uds);
return OK;
}
/*
* For connection-oriented sockets there is more to do. First, make
* sure that the peer is a listening socket, that it has not been shut
* down, and that its backlog is not already at the configured maximum.
*/
if (!uds_is_listening(link))
return ECONNREFUSED;
if (uds_is_shutdown(link, SFL_SHUT_RD | SFL_SHUT_WR))
return ECONNREFUSED;
if (link->uds_queued >= link->uds_backlog)
return ECONNREFUSED;
/*
* The behavior of connect(2) now depends on whether LOCAL_CONNWAIT is
* set on either the connecting or the listening socket. If it is not,
* the socket will be connected to a new as-yet invisible socket, which
* will be the one returned from accept(2) later. If it was, the
* socket will be put in the connecting state.
*/
if (!((uds->uds_flags | link->uds_flags) & UDSF_CONNWAIT)) {
if ((r = uds_attach(link, uds)) != OK)
return r;
assert(uds_is_connected(uds));
} else {
/*
* Disconnected sockets now stop being connected. Any pending
* data can still be received, though.
*/
uds->uds_flags &= ~UDSF_CONNECTED;
r = SUSPEND;
}
/* Obtain credentials for the socket. */
uds_get_cred(uds, user_endpt);
/* Add the socket at the end of the listening socket's queue. */
uds_add_queue(link, uds);
assert(r != SUSPEND || uds_is_connecting(uds));
/*
* Let an accept call handle the rest, which will in turn resume this
* connect call. The sockevent library ensures that this works even if
* the call is non-blocking.
*/
sockevent_raise(&link->uds_sock, SEV_ACCEPT);
return r;
}
/*
* Put a socket in listening mode.
*/
static int
uds_listen(struct sock * sock, int backlog)
{
struct udssock *uds = (struct udssock *)sock;
/* The maximum backlog value must not exceed its field size. */
assert(SOMAXCONN <= USHRT_MAX);
dprintf(("UDS: listen(%d)\n", uds_get_id(uds)));
/* Only connection-oriented types may be put in listening mode. */
if (uds_get_type(uds) == SOCK_DGRAM)
return EOPNOTSUPP;
/* A connecting or connected socket may not listen. */
if (uds_is_connecting(uds) || uds_is_connected(uds))
return EINVAL;
/* POSIX says that this is now the appropriate error code here. */
if (!uds_is_bound(uds))
return EDESTADDRREQ;
/*
* The socket is now entering the listening state. If it was
* previously disconnected, clear the connection flag.
*/
uds->uds_flags &= ~UDSF_CONNECTED;
/*
* We do not remove sockets from the backlog if it is now being dropped
* below the current number of queued sockets. We only refuse newly
* connecting sockets beyond the backlog size.
*/
uds->uds_backlog = backlog;
return OK;
}
/*
* Test whether an accept request would block. Return OK if a socket could be
* accepted, an appropriate error code if an accept call would fail instantly,
* or SUSPEND if the accept request would block waiting for a connection.
*/
static int
uds_test_accept(struct sock * sock)
{
struct udssock *uds = (struct udssock *)sock;
/*
* Ensure that the socket is in listening mode. If not, we must return
* the error code that is appropriate for this socket type.
*/
if (uds_get_type(uds) == SOCK_DGRAM)
return EOPNOTSUPP;
if (!uds_is_listening(uds))
return EINVAL;
/*
* If the socket has been shut down, new connections are no longer
* accepted and accept calls no longer block. This is not a POSIX
* requirement, but rather an application convenience feature.
*/
if (uds->uds_queued == 0) {
if (uds_is_shutdown(uds, SFL_SHUT_RD | SFL_SHUT_WR))
return ECONNABORTED;
return SUSPEND;
}
return OK;
}
/*
* Accept a connection on a listening socket, creating a new socket. On
* success, return the new socket identifier, with the new socket stored in
* 'newsockp'. Otherwise, return an error code.
*/
static sockid_t
uds_accept(struct sock * sock, struct sockaddr * addr, socklen_t * addr_len,
endpoint_t user_endpt __unused, struct sock ** newsockp)
{
struct udssock *uds = (struct udssock *)sock;
struct udssock *link, *conn;
sockid_t r;
dprintf(("UDS: accept(%d)\n", uds_get_id(uds)));
if ((r = uds_test_accept(sock)) != OK)
return r;
/*
* Take the first connecting socket off the listening queue.
*/
assert(!TAILQ_EMPTY(&uds->uds_queue));
link = TAILQ_FIRST(&uds->uds_queue);
/*
* Depending on the LOCAL_CONNWAIT setting at the time of connect(2),
* the socket may be connecting or connected. In the latter case, its
* attached socket is the socket we will return now. Otherwise we have
* to attach a socket first.
*/
assert(uds_is_connecting(link) || uds_is_connected(link));
if (uds_is_connecting(link)) {
/*
* Attach a new socket. If this fails, return the error but
* leave the connecting socket on the listening queue.
*/
if ((r = uds_attach(uds, link)) != OK)
return r;
assert(uds_is_connected(link));
/*
* Wake up blocked (connect, send, select) calls on the peer
* socket.
*/
sockevent_raise(&link->uds_sock, SEV_CONNECT);
}
uds_del_queue(uds, link);
/* Return the peer socket's address to the caller. */
uds_make_addr(link->uds_path, link->uds_pathlen, addr, addr_len);
conn = link->uds_conn;
dprintf(("UDS: accept returns %d\n", uds_get_id(conn)));
/*
* We already cloned the sock object, so return its ID but not a
* pointer to it. That tells libsockevent not to reinitialize it.
*/
*newsockp = NULL;
return uds_get_id(conn);
}
/*
* Set socket options.
*/
static int
uds_setsockopt(struct sock * sock, int level, int name,
const struct sockdriver_data * data, socklen_t len)
{
struct udssock *uds = (struct udssock *)sock;
int r, val;
dprintf(("UDS: setsockopt(%d,%d,%d)\n", uds_get_id(uds), level, name));
switch (level) {
case SOL_SOCKET:
switch (name) {
case SO_SNDBUF:
case SO_RCVBUF:
/*
* The send buffer size may not be changed because the
* buffer is the same as the other side's receive
* buffer, and what the other side is may vary from
* send call to send call. Changing the receive buffer
* size would disallow us from even accurately guessing
* the send buffer size in getsockopt calls. Therefore
* both are hardcoded and cannot actually be changed.
* In order to support applications that want at least
* a certain minimum, we do accept requests to shrink
* either buffer, but we ignore the given size.
*/
if ((r = sockdriver_copyin_opt(data, &val, sizeof(val),
len)) != OK)
return r;
if (val <= 0 || (size_t)val > uds_io_buflen())
return EINVAL;
return OK; /* ignore new value */
}
break;
case UDSPROTO_UDS:
switch (name) {
case LOCAL_CREDS:
if ((r = sockdriver_copyin_opt(data, &val, sizeof(val),
len)) != OK)
return r;
if (val)
uds->uds_flags |= UDSF_PASSCRED;
else
uds->uds_flags &= ~UDSF_PASSCRED;
/*
* In incredibly rare cases, disabling this flag may
* allow blocked sends to be resumed, because suddenly
* no room for the credentials is needed in the receive
* buffer anymore.
*/
if (!val)
sockevent_raise(&uds->uds_sock, SEV_SEND);
return OK;
case LOCAL_CONNWAIT:
if ((r = sockdriver_copyin_opt(data, &val, sizeof(val),
len)) != OK)
return r;
if (val)
uds->uds_flags |= UDSF_CONNWAIT;
else
uds->uds_flags &= ~UDSF_CONNWAIT;
/*
* Changing the setting does not affect sockets that
* are currently pending to be accepted. Therefore,
* uds_accept() may have to deal with either case on a
* socket-by-socket basis.
*/
return OK;
case LOCAL_PEEREID:
/* This option may be retrieved but not set. */
return ENOPROTOOPT;
}
break;
}
return ENOPROTOOPT;
}
/*
* Retrieve socket options.
*/
static int
uds_getsockopt(struct sock * sock, int level, int name,
const struct sockdriver_data * data, socklen_t * len)
{
struct udssock *uds = (struct udssock *)sock;
int val;
dprintf(("UDS: getsockopt(%d,%d,%d)\n", uds_get_id(uds), level, name));
switch (level) {
case SOL_SOCKET:
switch (name) {
case SO_SNDBUF:
case SO_RCVBUF:
/* See uds_setsockopt() for why this is static. */
val = (int)uds_io_buflen();
return sockdriver_copyout_opt(data, &val, sizeof(val),
len);
}
break;
case UDSPROTO_UDS:
switch (name) {
case LOCAL_CREDS:
val = !!(uds->uds_flags & UDSF_PASSCRED);
return sockdriver_copyout_opt(data, &val, sizeof(val),
len);
case LOCAL_CONNWAIT:
val = !!(uds->uds_flags & UDSF_CONNWAIT);
return sockdriver_copyout_opt(data, &val, sizeof(val),
len);
case LOCAL_PEEREID:
/* getpeereid(3) documents these error codes. */
if (uds_get_type(uds) == SOCK_DGRAM)
return EINVAL;
if (!uds_is_connected(uds))
return ENOTCONN;
/*
* This is a custom MINIX3 error, indicating that there
* are no credentials to return. This could be due to
* a failure to obtain them (which *should* not happen)
* but also if the socket was bound while connected,
* disconnected, and then reused as listening socket.
*/
if (uds->uds_conn->uds_cred.unp_pid == -1)
return EINVAL;
return sockdriver_copyout_opt(data,
&uds->uds_conn->uds_cred,
sizeof(uds->uds_conn->uds_cred), len);
}
break;
}
return ENOPROTOOPT;
}
/*
* Retrieve a socket's local address.
*/
static int
uds_getsockname(struct sock * sock, struct sockaddr * addr,
socklen_t * addr_len)
{
struct udssock *uds = (struct udssock *)sock;
dprintf(("UDS: getsockname(%d)\n", uds_get_id(uds)));
uds_make_addr(uds->uds_path, uds->uds_pathlen, addr, addr_len);
return OK;
}
/*
* Retrieve a socket's remote address.
*/
static int
uds_getpeername(struct sock * sock, struct sockaddr * addr,
socklen_t * addr_len)
{
struct udssock *uds = (struct udssock *)sock;
struct udssock *peer;
dprintf(("UDS: getpeername(%d)\n", uds_get_id(uds)));
/*
* For disconnected sockets, we no longer have a peer socket and thus
* also no peer address. Too bad, but NetBSD does the same.
*
* For connecting sockets we could in fact return a peer address, but
* POSIX says (and other platforms agree) that we should deny the call.
*/
peer = uds_get_peer(uds);
if (peer == NULL || uds_is_connecting(uds))
return ENOTCONN;
uds_make_addr(peer->uds_path, peer->uds_pathlen, addr, addr_len);
return OK;
}
/*
* Shut down socket send and receive operations. Note that 'flags' is a
* bitwise mask with libsockevent's SFL_SHUT_{RD,WR} flags rather than the set
* of SHUT_{RD,WR,RDWR} values from userland.
*/
static int
uds_shutdown(struct sock * sock, unsigned int flags)
{
struct udssock *uds = (struct udssock *)sock;
struct udssock *conn;
unsigned int mask;
dprintf(("UDS: shutdown(%d,0x%x)\n", uds_get_id(uds), flags));
/*
* If we are shutting down the socket for reading, we can already close
* any in-flight file descriptors associated with this socket.
*/
if (flags & SFL_SHUT_RD)
uds_io_reset(uds);
/*
* A shutdown on this side of a connection may have an effect on
* ongoing operations on the other side. Fire appropriate events.
*/
if (uds_is_connected(uds)) {
assert(uds_get_type(uds) != SOCK_DGRAM);
conn = uds->uds_conn;
mask = 0;
if (flags & SFL_SHUT_RD)
mask |= SEV_SEND;
if (flags & SFL_SHUT_WR)
mask |= SEV_RECV;
sockevent_raise(&conn->uds_sock, mask);
}
return OK;
}
/*
* Close a socket.
*
* The 'force' flag is unused because we need never wait for data to be sent,
* since we keep all in-flight data on the receiver side.
*/
static int
uds_close(struct sock * sock, int force __unused)
{
struct udssock *uds = (struct udssock *)sock;
dprintf(("UDS: close(%d)\n", uds_get_id(uds)));
if (uds_get_type(uds) == SOCK_DGRAM) {
/* If this socket is linked to a target, disconnect it. */
if (uds_has_link(uds))
uds_del_queue(uds->uds_link, uds);
/* Reset all sockets linked to this socket as a target. */
uds_clear_queue(uds, NULL);
} else if (uds_is_listening(uds)) {
/*
* Abort all connecting sockets queued on this socket, and
* break all connections for connected sockets queued on this
* socket, freeing their peers.
*/
uds_clear_queue(uds, NULL);
} else if (uds_has_link(uds)) {
/*
* This socket is connecting or connected while the other side
* has not been accepted yet. Remove the socket from the
* listening socket's queue, and if it was connected, get rid
* of its peer socket altogether.
*/
assert(uds_is_listening(uds->uds_link));
uds_del_queue(uds->uds_link, uds);
if (uds_is_connected(uds))
uds_disconnect(uds, TRUE /*was_linked*/);
} else if (uds_is_connected(uds)) {
/*
* Decouple the peer socket from this socket, and possibly wake
* up any pending operations on it. The socket remains marked
* as connected, but will now be disconnected.
*/
uds_disconnect(uds, FALSE /*was_linked*/);
}
if (uds_is_hashed(uds))
udshash_del(uds);
return OK;
}
static const struct sockevent_ops uds_ops = {
.sop_pair = uds_pair,
.sop_bind = uds_bind,
.sop_connect = uds_connect,
.sop_listen = uds_listen,
.sop_accept = uds_accept,
.sop_test_accept = uds_test_accept,
.sop_pre_send = uds_pre_send,
.sop_send = uds_send,
.sop_test_send = uds_test_send,
.sop_pre_recv = uds_pre_recv,
.sop_recv = uds_recv,
.sop_test_recv = uds_test_recv,
.sop_setsockopt = uds_setsockopt,
.sop_getsockopt = uds_getsockopt,
.sop_getsockname = uds_getsockname,
.sop_getpeername = uds_getpeername,
.sop_shutdown = uds_shutdown,
.sop_close = uds_close,
.sop_free = uds_free
};
/*
* Initialize the service.
*/
static int
uds_init(int type __unused, sef_init_info_t * info __unused)
{
unsigned int i;
/* Initialize the list of free sockets. */
TAILQ_INIT(&uds_freelist);
for (i = 0; i < __arraycount(uds_array); i++) {
uds_array[i].uds_flags = 0;
TAILQ_INSERT_TAIL(&uds_freelist, &uds_array[i], uds_next);
}
/* Initialize the file-to-socket hash table. */
udshash_init();
/* Initialize the input/output module. */
uds_io_init();
/* Initialize the status module. */
uds_stat_init();
/* Initialize the sockevent library. */
sockevent_init(uds_socket);
uds_in_use = 0;
uds_running = TRUE;
return OK;
}
/*
* Clean up before shutdown.
*/
static void
uds_cleanup(void)
{
/* Tell the status module to clean up. */
uds_stat_cleanup();
}
/*
* The service has received a signal.
*/
static void
uds_signal(int signo)
{
/* Only check for the termination signal. Ignore anything else. */
if (signo != SIGTERM)
return;
/* Exit only once all sockets have been closed. */
uds_running = FALSE;
if (uds_in_use == 0)
sef_cancel();
}
/*
* Perform initialization using the System Event Framework (SEF).
*/
static void
uds_startup(void)
{
/* Register initialization callbacks. */
sef_setcb_init_fresh(uds_init);
/* Register signal callback. */
sef_setcb_signal_handler(uds_signal);
/* Let SEF perform startup. */
sef_startup();
}
/*
* The UNIX Domain Sockets driver.
*/
int
main(void)
{
message m;
int r, ipc_status;
/* Initialize the service. */
uds_startup();
/* Loop receiving and processing messages until instructed to stop. */
while (uds_running || uds_in_use > 0) {
if ((r = sef_receive_status(ANY, &m, &ipc_status)) != OK) {
if (r == EINTR)
continue; /* sef_cancel() was called */
panic("UDS: sef_receive_status failed: %d", r);
}
/*
* Messages from the MIB service are (ultimately) for the
* status module. Everything else is assumed to be a socket
* request and passed to libsockevent, which will ignore
* anything it does not recognize.
*/
if (m.m_source == MIB_PROC_NR)
rmib_process(&m, ipc_status);
else
sockevent_process(&m, ipc_status);
}
/* Clean up before graceful shutdown. */
uds_cleanup();
return EXIT_SUCCESS;
}