2591 lines
69 KiB
C
2591 lines
69 KiB
C
/* Socket event dispatching library - by D.C. van Moolenbroek */
|
|
|
|
#include <minix/drivers.h>
|
|
#include <minix/sockdriver.h>
|
|
#include <minix/sockevent.h>
|
|
#include <sys/ioctl.h>
|
|
|
|
#include "sockevent_proc.h"
|
|
|
|
#define US 1000000UL /* microseconds per second */
|
|
|
|
#define SOCKHASH_SLOTS 256 /* # slots in ID-to-sock hash table */
|
|
|
|
static SLIST_HEAD(, sock) sockhash[SOCKHASH_SLOTS];
|
|
|
|
static SLIST_HEAD(, sock) socktimer;
|
|
|
|
static minix_timer_t sockevent_timer;
|
|
|
|
static SIMPLEQ_HEAD(, sock) sockevent_pending;
|
|
|
|
static sockevent_socket_cb_t sockevent_socket_cb = NULL;
|
|
|
|
static int sockevent_working;
|
|
|
|
static void socktimer_del(struct sock * sock);
|
|
static void sockevent_cancel_send(struct sock * sock,
|
|
struct sockevent_proc * spr, int err);
|
|
static void sockevent_cancel_recv(struct sock * sock,
|
|
struct sockevent_proc * spr, int err);
|
|
|
|
/*
|
|
* Initialize the hash table of sock objects.
|
|
*/
|
|
static void
|
|
sockhash_init(void)
|
|
{
|
|
unsigned int slot;
|
|
|
|
for (slot = 0; slot < __arraycount(sockhash); slot++)
|
|
SLIST_INIT(&sockhash[slot]);
|
|
}
|
|
|
|
/*
|
|
* Given a socket identifier, return a hash table slot number.
|
|
*/
|
|
static unsigned int
|
|
sockhash_slot(sockid_t id)
|
|
{
|
|
|
|
/*
|
|
* The idea of the shift is that a socket driver may offer multiple
|
|
* classes of sockets, and put the class in the higher bits. The shift
|
|
* aims to prevent that all classes' first sockets end up in the same
|
|
* hash slot.
|
|
*/
|
|
return (id + (id >> 16)) % SOCKHASH_SLOTS;
|
|
}
|
|
|
|
/*
|
|
* Obtain a sock object from the hash table using its unique identifier.
|
|
* Return a pointer to the object if found, or NULL otherwise.
|
|
*/
|
|
static struct sock *
|
|
sockhash_get(sockid_t id)
|
|
{
|
|
struct sock *sock;
|
|
unsigned int slot;
|
|
|
|
slot = sockhash_slot(id);
|
|
|
|
SLIST_FOREACH(sock, &sockhash[slot], sock_hash) {
|
|
if (sock->sock_id == id)
|
|
return sock;
|
|
}
|
|
|
|
return NULL;
|
|
}
|
|
|
|
/*
|
|
* Add a sock object to the hash table. The sock object must have a valid ID
|
|
* in its 'sock_id' field, and must not be in the hash table already.
|
|
*/
|
|
static void
|
|
sockhash_add(struct sock * sock)
|
|
{
|
|
unsigned int slot;
|
|
|
|
slot = sockhash_slot(sock->sock_id);
|
|
|
|
SLIST_INSERT_HEAD(&sockhash[slot], sock, sock_hash);
|
|
}
|
|
|
|
/*
|
|
* Remove a sock object from the hash table. The sock object must be in the
|
|
* hash table.
|
|
*/
|
|
static void
|
|
sockhash_del(struct sock * sock)
|
|
{
|
|
unsigned int slot;
|
|
|
|
slot = sockhash_slot(sock->sock_id);
|
|
|
|
/* This macro is O(n). */
|
|
SLIST_REMOVE(&sockhash[slot], sock, sock, sock_hash);
|
|
}
|
|
|
|
/*
|
|
* Reset a socket object to a proper initial state, with a particular socket
|
|
* identifier, a SOCK_ type, and a socket operations table. The socket is
|
|
* added to the ID-to-object hash table. This function always succeeds.
|
|
*/
|
|
static void
|
|
sockevent_reset(struct sock * sock, sockid_t id, int domain, int type,
|
|
const struct sockevent_ops * ops)
|
|
{
|
|
|
|
assert(sock != NULL);
|
|
|
|
memset(sock, 0, sizeof(*sock));
|
|
|
|
sock->sock_id = id;
|
|
sock->sock_domain = domain;
|
|
sock->sock_type = type;
|
|
|
|
sock->sock_slowat = 1;
|
|
sock->sock_rlowat = 1;
|
|
|
|
sock->sock_ops = ops;
|
|
sock->sock_proc = NULL;
|
|
sock->sock_select.ss_endpt = NONE;
|
|
|
|
sockhash_add(sock);
|
|
}
|
|
|
|
/*
|
|
* Initialize a new socket that will serve as an accepted socket on the given
|
|
* listening socket 'sock'. The new socket is given as 'newsock', and its new
|
|
* socket identifier is given as 'newid'. This function always succeeds.
|
|
*/
|
|
void
|
|
sockevent_clone(struct sock * sock, struct sock * newsock, sockid_t newid)
|
|
{
|
|
|
|
sockevent_reset(newsock, newid, (int)sock->sock_domain,
|
|
sock->sock_type, sock->sock_ops);
|
|
|
|
/* These are the settings that are currently inherited. */
|
|
newsock->sock_opt = sock->sock_opt & ~SO_ACCEPTCONN;
|
|
newsock->sock_linger = sock->sock_linger;
|
|
newsock->sock_stimeo = sock->sock_stimeo;
|
|
newsock->sock_rtimeo = sock->sock_rtimeo;
|
|
newsock->sock_slowat = sock->sock_slowat;
|
|
newsock->sock_rlowat = sock->sock_rlowat;
|
|
|
|
newsock->sock_flags |= SFL_CLONED;
|
|
}
|
|
|
|
/*
|
|
* A new socket has just been accepted. The corresponding listening socket is
|
|
* given as 'sock'. The new socket has ID 'newid', and if it had not already
|
|
* been added to the hash table through sockevent_clone() before, 'newsock' is
|
|
* a non-NULL pointer which identifies the socket object to clone into.
|
|
*/
|
|
static void
|
|
sockevent_accepted(struct sock * sock, struct sock * newsock, sockid_t newid)
|
|
{
|
|
|
|
if (newsock == NULL) {
|
|
if ((newsock = sockhash_get(newid)) == NULL)
|
|
panic("libsockdriver: socket driver returned unknown "
|
|
"ID %d from accept callback", newid);
|
|
} else
|
|
sockevent_clone(sock, newsock, newid);
|
|
|
|
assert(newsock->sock_flags & SFL_CLONED);
|
|
newsock->sock_flags &= ~SFL_CLONED;
|
|
}
|
|
|
|
/*
|
|
* Allocate a sock object, by asking the socket driver for one. On success,
|
|
* return OK, with a pointer to the new object stored in 'sockp'. This new
|
|
* object has all its fields set to initial values, in part based on the given
|
|
* parameters. On failure, return an error code. Failure has two typical
|
|
* cause: either the given domain, type, protocol combination is not supported,
|
|
* or the socket driver is out of sockets (globally or for this combination).
|
|
*/
|
|
static int
|
|
sockevent_alloc(int domain, int type, int protocol, endpoint_t user_endpt,
|
|
struct sock ** sockp)
|
|
{
|
|
struct sock *sock;
|
|
const struct sockevent_ops *ops;
|
|
sockid_t r;
|
|
|
|
/*
|
|
* Verify that the given domain is sane. Unlike the type and protocol,
|
|
* the domain is already verified by VFS, so we do not limit ourselves
|
|
* here. The result is that we can store the domain in just a byte.
|
|
*/
|
|
if (domain < 0 || domain > UINT8_MAX)
|
|
return EAFNOSUPPORT;
|
|
|
|
/* Make sure that the library has actually been initialized. */
|
|
if (sockevent_socket_cb == NULL)
|
|
panic("libsockevent: not initialized");
|
|
|
|
sock = NULL;
|
|
ops = NULL;
|
|
|
|
/*
|
|
* Ask the socket driver to create a socket for the given combination
|
|
* of domain, type, and protocol. If so, let it return a new sock
|
|
* object, a unique socket identifier for that object, and an
|
|
* operations table for it.
|
|
*/
|
|
if ((r = sockevent_socket_cb(domain, type, protocol, user_endpt, &sock,
|
|
&ops)) < 0)
|
|
return r;
|
|
|
|
assert(sock != NULL);
|
|
assert(ops != NULL);
|
|
|
|
sockevent_reset(sock, r, domain, type, ops);
|
|
|
|
*sockp = sock;
|
|
return OK;
|
|
}
|
|
|
|
/*
|
|
* Free a previously allocated sock object.
|
|
*/
|
|
static void
|
|
sockevent_free(struct sock * sock)
|
|
{
|
|
const struct sockevent_ops *ops;
|
|
|
|
assert(sock->sock_proc == NULL);
|
|
|
|
socktimer_del(sock);
|
|
|
|
sockhash_del(sock);
|
|
|
|
/*
|
|
* Invalidate the operations table on the socket, before freeing the
|
|
* socket. This allows us to detect cases where sockevent functions
|
|
* are called on sockets that have already been freed.
|
|
*/
|
|
ops = sock->sock_ops;
|
|
sock->sock_ops = NULL;
|
|
|
|
assert(ops != NULL);
|
|
assert(ops->sop_free != NULL);
|
|
|
|
ops->sop_free(sock);
|
|
}
|
|
|
|
/*
|
|
* Create a new socket.
|
|
*/
|
|
static sockid_t
|
|
sockevent_socket(int domain, int type, int protocol, endpoint_t user_endpt)
|
|
{
|
|
struct sock *sock;
|
|
int r;
|
|
|
|
if ((r = sockevent_alloc(domain, type, protocol, user_endpt,
|
|
&sock)) != OK)
|
|
return r;
|
|
|
|
return sock->sock_id;
|
|
}
|
|
|
|
/*
|
|
* Create a pair of connected sockets.
|
|
*/
|
|
static int
|
|
sockevent_socketpair(int domain, int type, int protocol, endpoint_t user_endpt,
|
|
sockid_t id[2])
|
|
{
|
|
struct sock *sock1, *sock2;
|
|
int r;
|
|
|
|
if ((r = sockevent_alloc(domain, type, protocol, user_endpt,
|
|
&sock1)) != OK)
|
|
return r;
|
|
|
|
/* Creating socket pairs is not always supported. */
|
|
if (sock1->sock_ops->sop_pair == NULL) {
|
|
sockevent_free(sock1);
|
|
|
|
return EOPNOTSUPP;
|
|
}
|
|
|
|
if ((r = sockevent_alloc(domain, type, protocol, user_endpt,
|
|
&sock2)) != OK) {
|
|
sockevent_free(sock1);
|
|
|
|
return r;
|
|
}
|
|
|
|
assert(sock1->sock_ops == sock2->sock_ops);
|
|
|
|
r = sock1->sock_ops->sop_pair(sock1, sock2, user_endpt);
|
|
|
|
if (r != OK) {
|
|
sockevent_free(sock2);
|
|
sockevent_free(sock1);
|
|
|
|
return r;
|
|
}
|
|
|
|
id[0] = sock1->sock_id;
|
|
id[1] = sock2->sock_id;
|
|
return OK;
|
|
}
|
|
|
|
/*
|
|
* A send request returned EPIPE. If desired, send a SIGPIPE signal to the
|
|
* user process that issued the request.
|
|
*/
|
|
static void
|
|
sockevent_sigpipe(struct sock * sock, endpoint_t user_endpt, int flags)
|
|
{
|
|
|
|
/*
|
|
* POSIX says that pipe signals should be generated for SOCK_STREAM
|
|
* sockets. Linux does just this, NetBSD raises signals for all socket
|
|
* types.
|
|
*/
|
|
if (sock->sock_type != SOCK_STREAM)
|
|
return;
|
|
|
|
/*
|
|
* Why would there be fewer than four ways to do the same thing?
|
|
* O_NOSIGPIPE, MSG_NOSIGNAL, SO_NOSIGPIPE, and of course blocking
|
|
* SIGPIPE. VFS already sets MSG_NOSIGNAL for calls on sockets with
|
|
* O_NOSIGPIPE. The fact that SO_NOSIGPIPE is a thing, is also the
|
|
* reason why we cannot let VFS handle signal generation altogether.
|
|
*/
|
|
if (flags & MSG_NOSIGNAL)
|
|
return;
|
|
if (sock->sock_opt & SO_NOSIGPIPE)
|
|
return;
|
|
|
|
/*
|
|
* Send a SIGPIPE signal to the user process. Unfortunately we cannot
|
|
* guarantee that the SIGPIPE reaches the user process before the send
|
|
* call returns. Usually, the scheduling priorities of system services
|
|
* are such that the signal is likely to arrive first anyway, but if
|
|
* timely arrival of the signal is required, a more fundamental change
|
|
* to the system would be needed.
|
|
*/
|
|
sys_kill(user_endpt, SIGPIPE);
|
|
}
|
|
|
|
/*
|
|
* Suspend a request without data, that is, a bind, connect, accept, or close
|
|
* request.
|
|
*/
|
|
static void
|
|
sockevent_suspend(struct sock * sock, unsigned int event,
|
|
const struct sockdriver_call * __restrict call, endpoint_t user_endpt)
|
|
{
|
|
struct sockevent_proc *spr, **sprp;
|
|
|
|
/* There is one slot for each process, so this should never fail. */
|
|
if ((spr = sockevent_proc_alloc()) == NULL)
|
|
panic("libsockevent: too many suspended processes");
|
|
|
|
spr->spr_next = NULL;
|
|
spr->spr_event = event;
|
|
spr->spr_timer = FALSE;
|
|
spr->spr_call = *call;
|
|
spr->spr_endpt = user_endpt;
|
|
|
|
/*
|
|
* Add the request to the tail of the queue. This operation is O(n),
|
|
* but the number of suspended requests per socket is expected to be
|
|
* low at all times.
|
|
*/
|
|
for (sprp = &sock->sock_proc; *sprp != NULL;
|
|
sprp = &(*sprp)->spr_next);
|
|
*sprp = spr;
|
|
}
|
|
|
|
/*
|
|
* Suspend a request with data, that is, a send or receive request.
|
|
*/
|
|
static void
|
|
sockevent_suspend_data(struct sock * sock, unsigned int event, int timer,
|
|
const struct sockdriver_call * __restrict call, endpoint_t user_endpt,
|
|
const struct sockdriver_data * __restrict data, size_t len, size_t off,
|
|
const struct sockdriver_data * __restrict ctl, socklen_t ctl_len,
|
|
socklen_t ctl_off, int flags, int rflags, clock_t time)
|
|
{
|
|
struct sockevent_proc *spr, **sprp;
|
|
|
|
/* There is one slot for each process, so this should never fail. */
|
|
if ((spr = sockevent_proc_alloc()) == NULL)
|
|
panic("libsockevent: too many suspended processes");
|
|
|
|
spr->spr_next = NULL;
|
|
spr->spr_event = event;
|
|
spr->spr_timer = timer;
|
|
spr->spr_call = *call;
|
|
spr->spr_endpt = user_endpt;
|
|
sockdriver_pack_data(&spr->spr_data, call, data, len);
|
|
spr->spr_datalen = len;
|
|
spr->spr_dataoff = off;
|
|
sockdriver_pack_data(&spr->spr_ctl, call, ctl, ctl_len);
|
|
spr->spr_ctllen = ctl_len;
|
|
spr->spr_ctloff = ctl_off;
|
|
spr->spr_flags = flags;
|
|
spr->spr_rflags = rflags;
|
|
spr->spr_time = time;
|
|
|
|
/*
|
|
* Add the request to the tail of the queue. This operation is O(n),
|
|
* but the number of suspended requests per socket is expected to be
|
|
* low at all times.
|
|
*/
|
|
for (sprp = &sock->sock_proc; *sprp != NULL;
|
|
sprp = &(*sprp)->spr_next);
|
|
*sprp = spr;
|
|
}
|
|
|
|
/*
|
|
* Return TRUE if there are any suspended requests on the given socket's queue
|
|
* that match any of the events in the given event mask, or FALSE otherwise.
|
|
*/
|
|
static int
|
|
sockevent_has_suspended(struct sock * sock, unsigned int mask)
|
|
{
|
|
struct sockevent_proc *spr;
|
|
|
|
for (spr = sock->sock_proc; spr != NULL; spr = spr->spr_next)
|
|
if (spr->spr_event & mask)
|
|
return TRUE;
|
|
|
|
return FALSE;
|
|
}
|
|
|
|
/*
|
|
* Check whether the given call is on the given socket's queue of suspended
|
|
* requests. If so, remove it from the queue and return a pointer to the
|
|
* suspension data structure. The caller is then responsible for freeing that
|
|
* data structure using sockevent_proc_free(). If the call was not found, the
|
|
* function returns NULL.
|
|
*/
|
|
static struct sockevent_proc *
|
|
sockevent_unsuspend(struct sock * sock, const struct sockdriver_call * call)
|
|
{
|
|
struct sockevent_proc *spr, **sprp;
|
|
|
|
/* Find the suspended request being canceled. */
|
|
for (sprp = &sock->sock_proc; (spr = *sprp) != NULL;
|
|
sprp = &spr->spr_next) {
|
|
if (spr->spr_call.sc_endpt == call->sc_endpt &&
|
|
spr->spr_call.sc_req == call->sc_req) {
|
|
/* Found; remove and return it. */
|
|
*sprp = spr->spr_next;
|
|
|
|
return spr;
|
|
}
|
|
}
|
|
|
|
return NULL;
|
|
}
|
|
|
|
/*
|
|
* Attempt to resume the given suspended request for the given socket object.
|
|
* Return TRUE if the suspended request has been fully resumed and can be
|
|
* removed from the queue of suspended requests, or FALSE if it has not been
|
|
* fully resumed and should stay on the queue. In the latter case, no
|
|
* resumption will be attempted for other suspended requests of the same type.
|
|
*/
|
|
static int
|
|
sockevent_resume(struct sock * sock, struct sockevent_proc * spr)
|
|
{
|
|
struct sock *newsock;
|
|
struct sockdriver_data data, ctl;
|
|
char addr[SOCKADDR_MAX];
|
|
socklen_t addr_len;
|
|
size_t len, min;
|
|
sockid_t r;
|
|
|
|
switch (spr->spr_event) {
|
|
case SEV_CONNECT:
|
|
/*
|
|
* If the connect call was suspended for the purpose of
|
|
* intercepting resumption, simply remove it from the queue.
|
|
*/
|
|
if (spr->spr_call.sc_endpt == NONE)
|
|
return TRUE;
|
|
|
|
/* FALLTHROUGH */
|
|
case SEV_BIND:
|
|
if ((r = sock->sock_err) != OK)
|
|
sock->sock_err = OK;
|
|
|
|
sockdriver_reply_generic(&spr->spr_call, r);
|
|
|
|
return TRUE;
|
|
|
|
case SEV_ACCEPT:
|
|
/*
|
|
* A previous accept call may not have blocked on a socket that
|
|
* was not in listening mode.
|
|
*/
|
|
assert(sock->sock_opt & SO_ACCEPTCONN);
|
|
|
|
addr_len = 0;
|
|
newsock = NULL;
|
|
|
|
/*
|
|
* This call is suspended, which implies that the call table
|
|
* pointer has already tested to be non-NULL.
|
|
*/
|
|
if ((r = sock->sock_ops->sop_accept(sock,
|
|
(struct sockaddr *)&addr, &addr_len, spr->spr_endpt,
|
|
&newsock)) == SUSPEND)
|
|
return FALSE;
|
|
|
|
if (r >= 0) {
|
|
assert(addr_len <= sizeof(addr));
|
|
|
|
sockevent_accepted(sock, newsock, r);
|
|
}
|
|
|
|
sockdriver_reply_accept(&spr->spr_call, r,
|
|
(struct sockaddr *)&addr, addr_len);
|
|
|
|
return TRUE;
|
|
|
|
case SEV_SEND:
|
|
if (sock->sock_err != OK || (sock->sock_flags & SFL_SHUT_WR)) {
|
|
if (spr->spr_dataoff > 0 || spr->spr_ctloff > 0)
|
|
r = (int)spr->spr_dataoff;
|
|
else if ((r = sock->sock_err) != OK)
|
|
sock->sock_err = OK;
|
|
else
|
|
r = EPIPE;
|
|
} else {
|
|
sockdriver_unpack_data(&data, &spr->spr_call,
|
|
&spr->spr_data, spr->spr_datalen);
|
|
sockdriver_unpack_data(&ctl, &spr->spr_call,
|
|
&spr->spr_ctl, spr->spr_ctllen);
|
|
|
|
len = spr->spr_datalen - spr->spr_dataoff;
|
|
|
|
min = sock->sock_slowat;
|
|
if (min > len)
|
|
min = len;
|
|
|
|
/*
|
|
* As mentioned elsewhere, we do not save the address
|
|
* upon suspension so we cannot supply it anymore here.
|
|
*/
|
|
r = sock->sock_ops->sop_send(sock, &data, len,
|
|
&spr->spr_dataoff, &ctl,
|
|
spr->spr_ctllen - spr->spr_ctloff,
|
|
&spr->spr_ctloff, NULL, 0, spr->spr_endpt,
|
|
spr->spr_flags, min);
|
|
|
|
assert(r <= 0);
|
|
|
|
if (r == SUSPEND)
|
|
return FALSE;
|
|
|
|
/*
|
|
* If an error occurred but some data were already
|
|
* sent, return the progress rather than the error.
|
|
* Note that if the socket driver detects an
|
|
* asynchronous error during the send, it itself must
|
|
* perform this check and call sockevent_set_error() as
|
|
* needed, to make sure the error does not get lost.
|
|
*/
|
|
if (spr->spr_dataoff > 0 || spr->spr_ctloff > 0)
|
|
r = spr->spr_dataoff;
|
|
}
|
|
|
|
if (r == EPIPE)
|
|
sockevent_sigpipe(sock, spr->spr_endpt,
|
|
spr->spr_flags);
|
|
|
|
sockdriver_reply_generic(&spr->spr_call, r);
|
|
|
|
return TRUE;
|
|
|
|
case SEV_RECV:
|
|
addr_len = 0;
|
|
|
|
if (sock->sock_flags & SFL_SHUT_RD)
|
|
r = SOCKEVENT_EOF;
|
|
else {
|
|
len = spr->spr_datalen - spr->spr_dataoff;
|
|
|
|
if (sock->sock_err == OK) {
|
|
min = sock->sock_rlowat;
|
|
if (min > len)
|
|
min = len;
|
|
} else
|
|
min = 0;
|
|
|
|
sockdriver_unpack_data(&data, &spr->spr_call,
|
|
&spr->spr_data, spr->spr_datalen);
|
|
sockdriver_unpack_data(&ctl, &spr->spr_call,
|
|
&spr->spr_ctl, spr->spr_ctllen);
|
|
|
|
r = sock->sock_ops->sop_recv(sock, &data, len,
|
|
&spr->spr_dataoff, &ctl,
|
|
spr->spr_ctllen - spr->spr_ctloff,
|
|
&spr->spr_ctloff, (struct sockaddr *)&addr,
|
|
&addr_len, spr->spr_endpt, spr->spr_flags, min,
|
|
&spr->spr_rflags);
|
|
|
|
/*
|
|
* If the call remains suspended but a socket error is
|
|
* pending, return the pending socket error instead.
|
|
*/
|
|
if (r == SUSPEND) {
|
|
if (sock->sock_err == OK)
|
|
return FALSE;
|
|
|
|
r = SOCKEVENT_EOF;
|
|
}
|
|
|
|
assert(addr_len <= sizeof(addr));
|
|
}
|
|
|
|
/*
|
|
* If the receive call reported success, or if some data were
|
|
* already received, return the (partial) result. Otherwise,
|
|
* return a pending error if any, or otherwise a regular error
|
|
* or 0 for EOF.
|
|
*/
|
|
if (r == OK || spr->spr_dataoff > 0 || spr->spr_ctloff > 0)
|
|
r = (int)spr->spr_dataoff;
|
|
else if (sock->sock_err != OK) {
|
|
r = sock->sock_err;
|
|
|
|
sock->sock_err = OK;
|
|
} else if (r == SOCKEVENT_EOF)
|
|
r = 0; /* EOF */
|
|
|
|
sockdriver_reply_recv(&spr->spr_call, r, spr->spr_ctloff,
|
|
(struct sockaddr *)&addr, addr_len, spr->spr_rflags);
|
|
|
|
return TRUE;
|
|
|
|
case SEV_CLOSE:
|
|
sockdriver_reply_generic(&spr->spr_call, OK);
|
|
|
|
return TRUE;
|
|
|
|
default:
|
|
panic("libsockevent: process suspended on unknown event 0x%x",
|
|
spr->spr_event);
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Return TRUE if the given socket is ready for reading for a select call, or
|
|
* FALSE otherwise.
|
|
*/
|
|
static int
|
|
sockevent_test_readable(struct sock * sock)
|
|
{
|
|
int r;
|
|
|
|
/*
|
|
* The meaning of "ready-to-read" depends on whether the socket is a
|
|
* listening socket or not. For the former, it is a test on whether
|
|
* there are any new sockets to accept. However, shutdown flags take
|
|
* precedence in both cases.
|
|
*/
|
|
if (sock->sock_flags & SFL_SHUT_RD)
|
|
return TRUE;
|
|
|
|
if (sock->sock_err != OK)
|
|
return TRUE;
|
|
|
|
/*
|
|
* Depending on whether this is a listening-mode socket, test whether
|
|
* either accepts or receives would block.
|
|
*/
|
|
if (sock->sock_opt & SO_ACCEPTCONN) {
|
|
if (sock->sock_ops->sop_test_accept == NULL)
|
|
return TRUE;
|
|
|
|
r = sock->sock_ops->sop_test_accept(sock);
|
|
} else {
|
|
if (sock->sock_ops->sop_test_recv == NULL)
|
|
return TRUE;
|
|
|
|
r = sock->sock_ops->sop_test_recv(sock, sock->sock_rlowat,
|
|
NULL);
|
|
}
|
|
|
|
return (r != SUSPEND);
|
|
}
|
|
|
|
/*
|
|
* Return TRUE if the given socket is ready for writing for a select call, or
|
|
* FALSE otherwise.
|
|
*/
|
|
static int
|
|
sockevent_test_writable(struct sock * sock)
|
|
{
|
|
int r;
|
|
|
|
if (sock->sock_err != OK)
|
|
return TRUE;
|
|
|
|
if (sock->sock_flags & SFL_SHUT_WR)
|
|
return TRUE;
|
|
|
|
if (sock->sock_ops->sop_test_send == NULL)
|
|
return TRUE;
|
|
|
|
/*
|
|
* Test whether sends would block. The low send watermark is relevant
|
|
* for stream-type sockets only.
|
|
*/
|
|
r = sock->sock_ops->sop_test_send(sock, sock->sock_slowat);
|
|
|
|
return (r != SUSPEND);
|
|
}
|
|
|
|
/*
|
|
* Test whether any of the given select operations are ready on the given
|
|
* socket. Return the subset of ready operations; zero if none.
|
|
*/
|
|
static unsigned int
|
|
sockevent_test_select(struct sock * sock, unsigned int ops)
|
|
{
|
|
unsigned int ready_ops;
|
|
|
|
assert(!(ops & ~(SDEV_OP_RD | SDEV_OP_WR | SDEV_OP_ERR)));
|
|
|
|
/*
|
|
* We do not support the "bind in progress" case here. If a blocking
|
|
* bind call is in progress, the file descriptor should not be ready
|
|
* for either reading or writing. Currently, socket drivers will have
|
|
* to cover this case themselves. Otherwise we would have to check the
|
|
* queue of suspended calls, or create a custom flag for this.
|
|
*/
|
|
|
|
ready_ops = 0;
|
|
|
|
if ((ops & SDEV_OP_RD) && sockevent_test_readable(sock))
|
|
ready_ops |= SDEV_OP_RD;
|
|
|
|
if ((ops & SDEV_OP_WR) && sockevent_test_writable(sock))
|
|
ready_ops |= SDEV_OP_WR;
|
|
|
|
/* TODO: OOB receive support. */
|
|
|
|
return ready_ops;
|
|
}
|
|
|
|
/*
|
|
* Fire the given mask of events on the given socket object now.
|
|
*/
|
|
static void
|
|
sockevent_fire(struct sock * sock, unsigned int mask)
|
|
{
|
|
struct sockevent_proc *spr, **sprp;
|
|
unsigned int r, flag, ops;
|
|
|
|
/*
|
|
* A completed connection attempt (successful or not) also always
|
|
* implies that the socket becomes writable. For convenience we
|
|
* enforce this rule here, because it is easy to forget. Note that in
|
|
* any case, a suspended connect request should be the first in the
|
|
* list, so we do not risk returning 0 from a connect call as a result
|
|
* of sock_err getting eaten by another resumed call.
|
|
*/
|
|
if (mask & SEV_CONNECT)
|
|
mask |= SEV_SEND;
|
|
|
|
/*
|
|
* First try resuming regular system calls.
|
|
*/
|
|
for (sprp = &sock->sock_proc; (spr = *sprp) != NULL; ) {
|
|
flag = spr->spr_event;
|
|
|
|
if ((mask & flag) && sockevent_resume(sock, spr)) {
|
|
*sprp = spr->spr_next;
|
|
|
|
sockevent_proc_free(spr);
|
|
} else {
|
|
mask &= ~flag;
|
|
|
|
sprp = &spr->spr_next;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Then see if we can satisfy pending select queries.
|
|
*/
|
|
if ((mask & (SEV_ACCEPT | SEV_SEND | SEV_RECV)) &&
|
|
sock->sock_select.ss_endpt != NONE) {
|
|
assert(sock->sock_selops != 0);
|
|
|
|
/*
|
|
* Only retest select operations that, based on the given event
|
|
* mask, could possibly be satisfied now.
|
|
*/
|
|
ops = sock->sock_selops;
|
|
if (!(mask & (SEV_ACCEPT | SEV_RECV)))
|
|
ops &= ~SDEV_OP_RD;
|
|
if (!(mask & SEV_SEND))
|
|
ops &= ~SDEV_OP_WR;
|
|
if (!(0)) /* TODO: OOB receive support */
|
|
ops &= ~SDEV_OP_ERR;
|
|
|
|
/* Are there any operations to test? */
|
|
if (ops != 0) {
|
|
/* Test those operations. */
|
|
r = sockevent_test_select(sock, ops);
|
|
|
|
/* Were any satisfied? */
|
|
if (r != 0) {
|
|
/* Let the caller know. */
|
|
sockdriver_reply_select(&sock->sock_select,
|
|
sock->sock_id, r);
|
|
|
|
sock->sock_selops &= ~r;
|
|
|
|
/* Are there any saved operations left now? */
|
|
if (sock->sock_selops == 0)
|
|
sock->sock_select.ss_endpt = NONE;
|
|
}
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Finally, a SEV_CLOSE event unconditionally frees the sock object.
|
|
* This event should be fired only for sockets that are either not yet,
|
|
* or not anymore, in use by userland.
|
|
*/
|
|
if (mask & SEV_CLOSE) {
|
|
assert(sock->sock_flags & (SFL_CLONED | SFL_CLOSING));
|
|
|
|
sockevent_free(sock);
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Process all pending events. Events must still be blocked, so that if
|
|
* handling one event generates a new event, that event is handled from here
|
|
* rather than immediately.
|
|
*/
|
|
static void
|
|
sockevent_pump(void)
|
|
{
|
|
struct sock *sock;
|
|
unsigned int mask;
|
|
|
|
assert(sockevent_working);
|
|
|
|
while (!SIMPLEQ_EMPTY(&sockevent_pending)) {
|
|
sock = SIMPLEQ_FIRST(&sockevent_pending);
|
|
SIMPLEQ_REMOVE_HEAD(&sockevent_pending, sock_next);
|
|
|
|
mask = sock->sock_events;
|
|
assert(mask != 0);
|
|
sock->sock_events = 0;
|
|
|
|
sockevent_fire(sock, mask);
|
|
/*
|
|
* At this point, the sock object may already have been readded
|
|
* to the event list, or even be deallocated altogether.
|
|
*/
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Return TRUE if any events are pending on any sockets, or FALSE otherwise.
|
|
*/
|
|
static int
|
|
sockevent_has_events(void)
|
|
{
|
|
|
|
return (!SIMPLEQ_EMPTY(&sockevent_pending));
|
|
}
|
|
|
|
/*
|
|
* Raise the given bitwise-OR'ed set of events on the given socket object.
|
|
* Depending on the context of the call, they events may or may not be
|
|
* processed immediately.
|
|
*/
|
|
void
|
|
sockevent_raise(struct sock * sock, unsigned int mask)
|
|
{
|
|
|
|
assert(sock->sock_ops != NULL);
|
|
|
|
/*
|
|
* Handle SEV_CLOSE first. This event must not be deferred, so as to
|
|
* let socket drivers recycle sock objects as they are needed. For
|
|
* example, a user-closed TCP socket may stay open to transmit the
|
|
* remainder of its send buffer, until the TCP driver runs out of
|
|
* sockets, in which case the connection is aborted. The driver would
|
|
* then raise SEV_CLOSE on the sock object so as to clean it up, and
|
|
* immediately reuse it afterward. If the close event were to be
|
|
* deferred, this immediate reuse would not be possible.
|
|
*
|
|
* The sop_free() callback routine may not raise new events, and thus,
|
|
* the state of 'sockevent_working' need not be checked or set here.
|
|
*/
|
|
if (mask & SEV_CLOSE) {
|
|
assert(mask == SEV_CLOSE);
|
|
|
|
sockevent_fire(sock, mask);
|
|
|
|
return;
|
|
}
|
|
|
|
/*
|
|
* If we are currently processing a socket message, store the event for
|
|
* later. If not, this call is not coming from inside libsockevent,
|
|
* and we must handle the event immediately.
|
|
*/
|
|
if (sockevent_working) {
|
|
assert(mask != 0);
|
|
assert(mask <= UCHAR_MAX); /* sock_events field size check */
|
|
|
|
if (sock->sock_events == 0)
|
|
SIMPLEQ_INSERT_TAIL(&sockevent_pending, sock,
|
|
sock_next);
|
|
|
|
sock->sock_events |= mask;
|
|
} else {
|
|
sockevent_working = TRUE;
|
|
|
|
sockevent_fire(sock, mask);
|
|
|
|
if (sockevent_has_events())
|
|
sockevent_pump();
|
|
|
|
sockevent_working = FALSE;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Set a pending error on the socket object, and wake up any suspended
|
|
* operations that are affected by this.
|
|
*/
|
|
void
|
|
sockevent_set_error(struct sock * sock, int err)
|
|
{
|
|
|
|
assert(err < 0);
|
|
assert(sock->sock_ops != NULL);
|
|
|
|
/* If an error was set already, it will be overridden. */
|
|
sock->sock_err = err;
|
|
|
|
sockevent_raise(sock, SEV_BIND | SEV_CONNECT | SEV_SEND | SEV_RECV);
|
|
}
|
|
|
|
/*
|
|
* Initialize timer-related data structures.
|
|
*/
|
|
static void
|
|
socktimer_init(void)
|
|
{
|
|
|
|
SLIST_INIT(&socktimer);
|
|
|
|
init_timer(&sockevent_timer);
|
|
}
|
|
|
|
/*
|
|
* Check whether the given socket object has any suspended requests that have
|
|
* now expired. If so, cancel them. Also, if the socket object has any
|
|
* suspended requests with a timeout that has not yet expired, return the
|
|
* earliest (relative) timeout of all of them, or TMR_NEVER if no such requests
|
|
* are present.
|
|
*/
|
|
static clock_t
|
|
sockevent_expire(struct sock * sock, clock_t now)
|
|
{
|
|
struct sockevent_proc *spr, **sprp;
|
|
clock_t lowest, left;
|
|
int r;
|
|
|
|
/*
|
|
* First handle the case that the socket is closed. In this case,
|
|
* there may be a linger timer, although the socket may also simply
|
|
* still be on the timer list because of a request that did not time
|
|
* out right before the socket was closed.
|
|
*/
|
|
if (sock->sock_flags & SFL_CLOSING) {
|
|
/* Was there a linger timer and has it expired? */
|
|
if ((sock->sock_opt & SO_LINGER) &&
|
|
tmr_is_first(sock->sock_linger, now)) {
|
|
assert(sock->sock_ops->sop_close != NULL);
|
|
|
|
/*
|
|
* Whatever happens next, we must now resume the
|
|
* pending close operation, if it was not canceled
|
|
* earlier. As before, we return OK rather than the
|
|
* standardized EWOULDBLOCK, to ensure that the user
|
|
* process knows the file descriptor has been closed.
|
|
*/
|
|
if ((spr = sock->sock_proc) != NULL) {
|
|
assert(spr->spr_event == SEV_CLOSE);
|
|
assert(spr->spr_next == NULL);
|
|
|
|
sock->sock_proc = NULL;
|
|
|
|
sockdriver_reply_generic(&spr->spr_call, OK);
|
|
|
|
sockevent_proc_free(spr);
|
|
}
|
|
|
|
/*
|
|
* Tell the socket driver that closing the socket is
|
|
* now a bit more desired than the last time we asked.
|
|
*/
|
|
r = sock->sock_ops->sop_close(sock, TRUE /*force*/);
|
|
|
|
assert(r == OK || r == SUSPEND);
|
|
|
|
/*
|
|
* The linger timer fires once. After that, the socket
|
|
* driver is free to decide that it still will not
|
|
* close the socket. If it does, do not fire the
|
|
* linger timer again.
|
|
*/
|
|
if (r == SUSPEND)
|
|
sock->sock_opt &= ~SO_LINGER;
|
|
else
|
|
sockevent_free(sock);
|
|
}
|
|
|
|
return TMR_NEVER;
|
|
}
|
|
|
|
/*
|
|
* Then see if any send and/or receive requests have expired. Also see
|
|
* if there are any send and/or receive requests left that have not yet
|
|
* expired but do have a timeout, so that we can return the lowest of
|
|
* those timeouts.
|
|
*/
|
|
lowest = TMR_NEVER;
|
|
|
|
for (sprp = &sock->sock_proc; (spr = *sprp) != NULL; ) {
|
|
/* Skip requests without a timeout. */
|
|
if (spr->spr_timer == 0) {
|
|
sprp = &spr->spr_next;
|
|
|
|
continue;
|
|
}
|
|
|
|
assert(spr->spr_event == SEV_SEND ||
|
|
spr->spr_event == SEV_RECV);
|
|
|
|
/*
|
|
* If the request has expired, cancel it and remove it from the
|
|
* list. Otherwise, see if the request has the lowest number
|
|
* of ticks until its timeout so far.
|
|
*/
|
|
if (tmr_is_first(spr->spr_time, now)) {
|
|
*sprp = spr->spr_next;
|
|
|
|
if (spr->spr_event == SEV_SEND)
|
|
sockevent_cancel_send(sock, spr, EWOULDBLOCK);
|
|
else
|
|
sockevent_cancel_recv(sock, spr, EWOULDBLOCK);
|
|
|
|
sockevent_proc_free(spr);
|
|
} else {
|
|
left = spr->spr_time - now;
|
|
|
|
if (lowest == TMR_NEVER || lowest > left)
|
|
lowest = left;
|
|
|
|
sprp = &spr->spr_next;
|
|
}
|
|
}
|
|
|
|
return lowest;
|
|
}
|
|
|
|
/*
|
|
* The socket event alarm went off. Go through the set of socket objects with
|
|
* timers, and see if any of their requests have now expired. Set a new alarm
|
|
* as necessary.
|
|
*/
|
|
static void
|
|
socktimer_expire(int arg __unused)
|
|
{
|
|
SLIST_HEAD(, sock) oldtimer;
|
|
struct sock *sock, *tsock;
|
|
clock_t now, lowest, left;
|
|
int working;
|
|
|
|
/*
|
|
* This function may or may not be called from a context where we are
|
|
* already deferring events, so we have to cover both cases here.
|
|
*/
|
|
if ((working = sockevent_working) == FALSE)
|
|
sockevent_working = TRUE;
|
|
|
|
/* Start a new list. */
|
|
memcpy(&oldtimer, &socktimer, sizeof(oldtimer));
|
|
SLIST_INIT(&socktimer);
|
|
|
|
now = getticks();
|
|
lowest = TMR_NEVER;
|
|
|
|
/*
|
|
* Go through all sockets that have or had a request with a timeout,
|
|
* canceling any expired requests and building a new list of sockets
|
|
* that still have requests with timeouts as we go.
|
|
*/
|
|
SLIST_FOREACH_SAFE(sock, &oldtimer, sock_timer, tsock) {
|
|
assert(sock->sock_flags & SFL_TIMER);
|
|
sock->sock_flags &= ~SFL_TIMER;
|
|
|
|
left = sockevent_expire(sock, now);
|
|
/*
|
|
* The sock object may already have been deallocated now.
|
|
* If 'next' is TMR_NEVER, do not touch 'sock' anymore.
|
|
*/
|
|
|
|
if (left != TMR_NEVER) {
|
|
if (lowest == TMR_NEVER || lowest > left)
|
|
lowest = left;
|
|
|
|
SLIST_INSERT_HEAD(&socktimer, sock, sock_timer);
|
|
|
|
sock->sock_flags |= SFL_TIMER;
|
|
}
|
|
}
|
|
|
|
/* If there is a new lowest timeout at all, set a new timer. */
|
|
if (lowest != TMR_NEVER)
|
|
set_timer(&sockevent_timer, lowest, socktimer_expire, 0);
|
|
|
|
if (!working) {
|
|
/* If any new events were raised, process them now. */
|
|
if (sockevent_has_events())
|
|
sockevent_pump();
|
|
|
|
sockevent_working = FALSE;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Set a timer for the given (relative) number of clock ticks, adding the
|
|
* associated socket object to the set of socket objects with timers, if it was
|
|
* not already in that set. Set a new alarm if necessary, and return the
|
|
* absolute timeout for the timer. Since the timers list is maintained lazily,
|
|
* the caller need not take the object off the set if the call was canceled
|
|
* later; see also socktimer_del().
|
|
*/
|
|
static clock_t
|
|
socktimer_add(struct sock * sock, clock_t ticks)
|
|
{
|
|
clock_t now;
|
|
|
|
/*
|
|
* Relative time comparisons require that any two times are no more
|
|
* than half the comparison space (clock_t, unsigned long) apart.
|
|
*/
|
|
assert(ticks <= TMRDIFF_MAX);
|
|
|
|
/* If the socket was not already on the timers list, put it on. */
|
|
if (!(sock->sock_flags & SFL_TIMER)) {
|
|
SLIST_INSERT_HEAD(&socktimer, sock, sock_timer);
|
|
|
|
sock->sock_flags |= SFL_TIMER;
|
|
}
|
|
|
|
/*
|
|
* (Re)set the timer if either it was not running at all or this new
|
|
* timeout will occur sooner than the currently scheduled alarm. Note
|
|
* that setting a timer that was already set is allowed.
|
|
*/
|
|
now = getticks();
|
|
|
|
if (!tmr_is_set(&sockevent_timer) ||
|
|
tmr_is_first(now + ticks, tmr_exp_time(&sockevent_timer)))
|
|
set_timer(&sockevent_timer, ticks, socktimer_expire, 0);
|
|
|
|
/* Return the absolute timeout. */
|
|
return now + ticks;
|
|
}
|
|
|
|
/*
|
|
* Remove a socket object from the set of socket objects with timers. Since
|
|
* the timer list is maintained lazily, this needs to be done only right before
|
|
* the socket object is freed.
|
|
*/
|
|
static void
|
|
socktimer_del(struct sock * sock)
|
|
{
|
|
|
|
if (sock->sock_flags & SFL_TIMER) {
|
|
/* This macro is O(n). */
|
|
SLIST_REMOVE(&socktimer, sock, sock, sock_timer);
|
|
|
|
sock->sock_flags &= ~SFL_TIMER;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Bind a socket to a local address.
|
|
*/
|
|
static int
|
|
sockevent_bind(sockid_t id, const struct sockaddr * __restrict addr,
|
|
socklen_t addr_len, endpoint_t user_endpt,
|
|
const struct sockdriver_call * __restrict call)
|
|
{
|
|
struct sock *sock;
|
|
int r;
|
|
|
|
if ((sock = sockhash_get(id)) == NULL)
|
|
return EINVAL;
|
|
|
|
if (sock->sock_ops->sop_bind == NULL)
|
|
return EOPNOTSUPP;
|
|
|
|
/* Binding a socket in listening mode is never supported. */
|
|
if (sock->sock_opt & SO_ACCEPTCONN)
|
|
return EINVAL;
|
|
|
|
r = sock->sock_ops->sop_bind(sock, addr, addr_len, user_endpt);
|
|
|
|
if (r == SUSPEND) {
|
|
if (call == NULL)
|
|
return EINPROGRESS;
|
|
|
|
sockevent_suspend(sock, SEV_BIND, call, user_endpt);
|
|
}
|
|
|
|
return r;
|
|
}
|
|
|
|
/*
|
|
* Connect a socket to a remote address.
|
|
*/
|
|
static int
|
|
sockevent_connect(sockid_t id, const struct sockaddr * __restrict addr,
|
|
socklen_t addr_len, endpoint_t user_endpt,
|
|
const struct sockdriver_call * call)
|
|
{
|
|
struct sockdriver_call fakecall;
|
|
struct sockevent_proc *spr;
|
|
struct sock *sock;
|
|
int r;
|
|
|
|
if ((sock = sockhash_get(id)) == NULL)
|
|
return EINVAL;
|
|
|
|
if (sock->sock_ops->sop_connect == NULL)
|
|
return EOPNOTSUPP;
|
|
|
|
/* Connecting a socket in listening mode is never supported. */
|
|
if (sock->sock_opt & SO_ACCEPTCONN)
|
|
return EOPNOTSUPP;
|
|
|
|
/*
|
|
* The upcoming connect call may fire an accept event for which the
|
|
* handler may in turn fire a connect event on this socket. Since we
|
|
* delay event processing until after processing calls, this would
|
|
* create the problem that even if the connection is accepted right
|
|
* away, non-blocking connect requests would return EINPROGRESS. For
|
|
* UDS, this is undesirable behavior. To remedy this, we use a hack:
|
|
* we temporarily suspend the connect even if non-blocking, then
|
|
* process events, and then cancel the connect request again. If the
|
|
* connection was accepted immediately, the cancellation will have no
|
|
* effect, since the request has already been replied to. In order not
|
|
* to violate libsockdriver rules with this hack, we fabricate a fake
|
|
* 'conn' object.
|
|
*/
|
|
r = sock->sock_ops->sop_connect(sock, addr, addr_len, user_endpt);
|
|
|
|
if (r == SUSPEND) {
|
|
if (call != NULL || sockevent_has_events()) {
|
|
if (call == NULL) {
|
|
fakecall.sc_endpt = NONE;
|
|
|
|
call = &fakecall;
|
|
}
|
|
|
|
assert(!sockevent_has_suspended(sock,
|
|
SEV_SEND | SEV_RECV));
|
|
|
|
sockevent_suspend(sock, SEV_CONNECT, call, user_endpt);
|
|
|
|
if (call == &fakecall) {
|
|
/* Process any pending events first now. */
|
|
sockevent_pump();
|
|
|
|
/*
|
|
* If the connect request has not been resumed
|
|
* yet now, we must remove it from the queue
|
|
* again, and return EINPROGRESS ourselves.
|
|
* Otherwise, return OK or a pending error.
|
|
*/
|
|
spr = sockevent_unsuspend(sock, call);
|
|
if (spr != NULL) {
|
|
sockevent_proc_free(spr);
|
|
|
|
r = EINPROGRESS;
|
|
} else if ((r = sock->sock_err) != OK)
|
|
sock->sock_err = OK;
|
|
}
|
|
} else
|
|
r = EINPROGRESS;
|
|
}
|
|
|
|
if (r == OK) {
|
|
/*
|
|
* A completed connection attempt also always implies that the
|
|
* socket becomes writable. For convenience we enforce this
|
|
* rule here, because it is easy to forget.
|
|
*/
|
|
sockevent_raise(sock, SEV_SEND);
|
|
}
|
|
|
|
return r;
|
|
}
|
|
|
|
/*
|
|
* Put a socket in listening mode.
|
|
*/
|
|
static int
|
|
sockevent_listen(sockid_t id, int backlog)
|
|
{
|
|
struct sock *sock;
|
|
int r;
|
|
|
|
if ((sock = sockhash_get(id)) == NULL)
|
|
return EINVAL;
|
|
|
|
if (sock->sock_ops->sop_listen == NULL)
|
|
return EOPNOTSUPP;
|
|
|
|
/*
|
|
* Perform a general adjustment on the backlog value, applying the
|
|
* customary BSD "fudge factor" of 1.5x. Keep the value within bounds
|
|
* though. POSIX imposes that a negative backlog value is equal to a
|
|
* backlog value of zero. A backlog value of zero, in turn, may mean
|
|
* anything; we take it to be one. POSIX also imposes that all socket
|
|
* drivers accept up to at least SOMAXCONN connections on the queue.
|
|
*/
|
|
if (backlog < 0)
|
|
backlog = 0;
|
|
if (backlog < SOMAXCONN)
|
|
backlog += 1 + ((unsigned int)backlog >> 1);
|
|
if (backlog > SOMAXCONN)
|
|
backlog = SOMAXCONN;
|
|
|
|
r = sock->sock_ops->sop_listen(sock, backlog);
|
|
|
|
/*
|
|
* On success, the socket is now in listening mode. As part of that,
|
|
* a select(2) ready-to-read condition now indicates that a connection
|
|
* may be accepted on the socket, rather than that data may be read.
|
|
* Since libsockevent is responsible for this distinction, we keep
|
|
* track of the listening mode at this level. Conveniently, there is a
|
|
* socket option for this, which we support out of the box as a result.
|
|
*/
|
|
if (r == OK) {
|
|
sock->sock_opt |= SO_ACCEPTCONN;
|
|
|
|
/*
|
|
* For the extremely unlikely case that right after the socket
|
|
* is put into listening mode, it has a connection ready to
|
|
* accept, we retest blocked ready-to-read select queries now.
|
|
*/
|
|
sockevent_raise(sock, SEV_ACCEPT);
|
|
}
|
|
|
|
return r;
|
|
}
|
|
|
|
/*
|
|
* Accept a connection on a listening socket, creating a new socket.
|
|
*/
|
|
static sockid_t
|
|
sockevent_accept(sockid_t id, struct sockaddr * __restrict addr,
|
|
socklen_t * __restrict addr_len, endpoint_t user_endpt,
|
|
const struct sockdriver_call * __restrict call)
|
|
{
|
|
struct sock *sock, *newsock;
|
|
sockid_t r;
|
|
|
|
if ((sock = sockhash_get(id)) == NULL)
|
|
return EINVAL;
|
|
|
|
if (sock->sock_ops->sop_accept == NULL)
|
|
return EOPNOTSUPP;
|
|
|
|
/*
|
|
* Attempt to accept a connection. The socket driver is responsible
|
|
* for allocating a sock object (and identifier) on success. It may
|
|
* already have done so before, in which case it should leave newsock
|
|
* filled with NULL; otherwise, the returned sock object is cloned from
|
|
* the listening socket. The socket driver is also responsible for
|
|
* failing the call if the socket is not in listening mode, because it
|
|
* must specify the error to return: EOPNOTSUPP or EINVAL.
|
|
*/
|
|
newsock = NULL;
|
|
|
|
if ((r = sock->sock_ops->sop_accept(sock, addr, addr_len, user_endpt,
|
|
&newsock)) == SUSPEND) {
|
|
assert(sock->sock_opt & SO_ACCEPTCONN);
|
|
|
|
if (call == NULL)
|
|
return EWOULDBLOCK;
|
|
|
|
sockevent_suspend(sock, SEV_ACCEPT, call, user_endpt);
|
|
|
|
return SUSPEND;
|
|
}
|
|
|
|
if (r >= 0)
|
|
sockevent_accepted(sock, newsock, r);
|
|
|
|
return r;
|
|
}
|
|
|
|
/*
|
|
* Send regular and/or control data.
|
|
*/
|
|
static int
|
|
sockevent_send(sockid_t id, const struct sockdriver_data * __restrict data,
|
|
size_t len, const struct sockdriver_data * __restrict ctl_data,
|
|
socklen_t ctl_len, const struct sockaddr * __restrict addr,
|
|
socklen_t addr_len, endpoint_t user_endpt, int flags,
|
|
const struct sockdriver_call * __restrict call)
|
|
{
|
|
struct sock *sock;
|
|
clock_t time;
|
|
size_t min, off;
|
|
socklen_t ctl_off;
|
|
int r, timer;
|
|
|
|
if ((sock = sockhash_get(id)) == NULL)
|
|
return EINVAL;
|
|
|
|
/*
|
|
* The order of the following checks is not necessarily fixed, and may
|
|
* be changed later. As far as applicable, they should match the order
|
|
* of the checks during call resumption, though.
|
|
*/
|
|
if ((r = sock->sock_err) != OK) {
|
|
sock->sock_err = OK;
|
|
|
|
return r;
|
|
}
|
|
|
|
if (sock->sock_flags & SFL_SHUT_WR) {
|
|
sockevent_sigpipe(sock, user_endpt, flags);
|
|
|
|
return EPIPE;
|
|
}
|
|
|
|
/*
|
|
* Translate the sticky SO_DONTROUTE option to a per-request
|
|
* MSG_DONTROUTE flag. This achieves two purposes: socket drivers have
|
|
* to check only one flag, and socket drivers that do not support the
|
|
* flag will fail send requests in a consistent way.
|
|
*/
|
|
if (sock->sock_opt & SO_DONTROUTE)
|
|
flags |= MSG_DONTROUTE;
|
|
|
|
/*
|
|
* Check if this is a valid send request as far as the socket driver is
|
|
* concerned. We do this separately from sop_send for the reason that
|
|
* this send request may immediately be queued behind other pending
|
|
* send requests (without a call to sop_send), which means even invalid
|
|
* requests would be queued and not return failure until much later.
|
|
*/
|
|
if (sock->sock_ops->sop_pre_send != NULL &&
|
|
(r = sock->sock_ops->sop_pre_send(sock, len, ctl_len, addr,
|
|
addr_len, user_endpt,
|
|
flags & ~(MSG_DONTWAIT | MSG_NOSIGNAL))) != OK)
|
|
return r;
|
|
|
|
if (sock->sock_ops->sop_send == NULL)
|
|
return EOPNOTSUPP;
|
|
|
|
off = 0;
|
|
ctl_off = 0;
|
|
|
|
/*
|
|
* Sending out-of-band data is treated differently from regular data:
|
|
*
|
|
* - sop_send is called immediately, even if a partial non-OOB send
|
|
* operation is currently suspended (TODO: it may have to be aborted
|
|
* in order to maintain atomicity guarantees - that should be easy);
|
|
* - sop_send must not return SUSPEND; instead, if it cannot process
|
|
* the OOB data immediately, it must return an appropriate error;
|
|
* - the send low watermark is ignored.
|
|
*
|
|
* Given that none of the current socket drivers support OOB data at
|
|
* all, more sophisticated approaches would have no added value now.
|
|
*/
|
|
if (flags & MSG_OOB) {
|
|
r = sock->sock_ops->sop_send(sock, data, len, &off, ctl_data,
|
|
ctl_len, &ctl_off, addr, addr_len, user_endpt, flags, 0);
|
|
|
|
if (r == SUSPEND)
|
|
panic("libsockevent: MSG_OOB send calls may not be "
|
|
"suspended");
|
|
|
|
return (r == OK) ? (int)off : r;
|
|
}
|
|
|
|
/*
|
|
* Only call the actual sop_send function now if no other send calls
|
|
* are suspended already.
|
|
*
|
|
* Call sop_send with 'min' set to the minimum of the request size and
|
|
* the socket's send low water mark, but only if the call is non-
|
|
* blocking. For stream-oriented sockets, this should have the effect
|
|
* that non-blocking calls fail with EWOULDBLOCK if not at least that
|
|
* much can be sent immediately. For consistency, we choose to apply
|
|
* the same threshold to blocking calls. For datagram-oriented
|
|
* sockets, the minimum is not a factor to be considered.
|
|
*/
|
|
if (!sockevent_has_suspended(sock, SEV_SEND)) {
|
|
min = sock->sock_slowat;
|
|
if (min > len)
|
|
min = len;
|
|
|
|
r = sock->sock_ops->sop_send(sock, data, len, &off, ctl_data,
|
|
ctl_len, &ctl_off, addr, addr_len, user_endpt, flags, min);
|
|
} else
|
|
r = SUSPEND;
|
|
|
|
if (r == SUSPEND) {
|
|
/*
|
|
* We do not store the target's address on suspension, because
|
|
* that would add significantly to the per-process suspension
|
|
* state. As a result, we disallow socket drivers from
|
|
* suspending send calls with addresses, because we would no
|
|
* longer have the address for proper call resumption.
|
|
* However, we do not know here whether the socket is in
|
|
* connection-oriented mode; if it is, the address is to be
|
|
* ignored altogether. Therefore, there is no test on 'addr'
|
|
* here. Resumed calls will get a NULL address pointer, and
|
|
* the socket driver is expected to do the right thing.
|
|
*/
|
|
|
|
/*
|
|
* For non-blocking socket calls, return an error only if we
|
|
* were not able to send anything at all. If only control data
|
|
* were sent, the return value is therefore zero.
|
|
*/
|
|
if (call != NULL) {
|
|
if (sock->sock_stimeo != 0) {
|
|
timer = TRUE;
|
|
time = socktimer_add(sock, sock->sock_stimeo);
|
|
} else {
|
|
timer = FALSE;
|
|
time = 0;
|
|
}
|
|
|
|
sockevent_suspend_data(sock, SEV_SEND, timer, call,
|
|
user_endpt, data, len, off, ctl_data, ctl_len,
|
|
ctl_off, flags, 0, time);
|
|
} else
|
|
r = (off > 0 || ctl_off > 0) ? OK : EWOULDBLOCK;
|
|
} else if (r == EPIPE)
|
|
sockevent_sigpipe(sock, user_endpt, flags);
|
|
|
|
return (r == OK) ? (int)off : r;
|
|
}
|
|
|
|
/*
|
|
* The inner part of the receive request handler. An error returned from here
|
|
* may be overridden by an error pending on the socket, although data returned
|
|
* from here trumps such pending errors.
|
|
*/
|
|
static int
|
|
sockevent_recv_inner(struct sock * sock,
|
|
const struct sockdriver_data * __restrict data,
|
|
size_t len, size_t * __restrict off,
|
|
const struct sockdriver_data * __restrict ctl_data,
|
|
socklen_t ctl_len, socklen_t * __restrict ctl_off,
|
|
struct sockaddr * __restrict addr,
|
|
socklen_t * __restrict addr_len, endpoint_t user_endpt,
|
|
int * __restrict flags, const struct sockdriver_call * __restrict call)
|
|
{
|
|
clock_t time;
|
|
size_t min;
|
|
int r, oob, inflags, timer;
|
|
|
|
/*
|
|
* Check if this is a valid receive request as far as the socket driver
|
|
* is concerned. We do this separately from sop_recv for the reason
|
|
* that this receive request may immediately be queued behind other
|
|
* pending receive requests (without a call to sop_recv), which means
|
|
* even invalid requests would be queued and not return failure until
|
|
* much later.
|
|
*/
|
|
inflags = *flags;
|
|
*flags = 0;
|
|
|
|
if (sock->sock_ops->sop_pre_recv != NULL &&
|
|
(r = sock->sock_ops->sop_pre_recv(sock, user_endpt,
|
|
inflags & ~(MSG_DONTWAIT | MSG_NOSIGNAL))) != OK)
|
|
return r;
|
|
|
|
/*
|
|
* The order of the following checks is not necessarily fixed, and may
|
|
* be changed later. As far as applicable, they should match the order
|
|
* of the checks during call resumption, though.
|
|
*/
|
|
if (sock->sock_flags & SFL_SHUT_RD)
|
|
return SOCKEVENT_EOF;
|
|
|
|
if (sock->sock_ops->sop_recv == NULL)
|
|
return EOPNOTSUPP;
|
|
|
|
/*
|
|
* Receiving out-of-band data is treated differently from regular data:
|
|
*
|
|
* - sop_recv is called immediately, even if a partial non-OOB receive
|
|
* operation is currently suspended (TODO: it may have to be aborted
|
|
* in order to maintain atomicity guarantees - that should be easy);
|
|
* - sop_recv must not return SUSPEND; instead, if it cannot return any
|
|
* the OOB data immediately, it must return an appropriate error;
|
|
* - the receive low watermark is ignored.
|
|
*
|
|
* Given that none of the current socket drivers support OOB data at
|
|
* all, more sophisticated approaches would have no added value now.
|
|
*/
|
|
oob = (inflags & MSG_OOB);
|
|
|
|
if (oob && (sock->sock_opt & SO_OOBINLINE))
|
|
return EINVAL;
|
|
|
|
/*
|
|
* Only call the actual sop_recv function now if no other receive
|
|
* calls are suspended already.
|
|
*
|
|
* Call sop_recv with 'min' set to the minimum of the request size and
|
|
* the socket's socket's low water mark, unless there is a pending
|
|
* error. As a result, blocking calls will block, and non-blocking
|
|
* calls will yield EWOULDBLOCK, if at least that much can be received,
|
|
* unless another condition (EOF or that pending error) prevents more
|
|
* from being received anyway. For datagram-oriented sockets, the
|
|
* minimum is not a factor to be considered.
|
|
*/
|
|
if (oob || !sockevent_has_suspended(sock, SEV_RECV)) {
|
|
if (!oob && sock->sock_err == OK) {
|
|
min = sock->sock_rlowat;
|
|
if (min > len)
|
|
min = len;
|
|
} else
|
|
min = 0; /* receive even no-data segments */
|
|
|
|
r = sock->sock_ops->sop_recv(sock, data, len, off, ctl_data,
|
|
ctl_len, ctl_off, addr, addr_len, user_endpt, inflags, min,
|
|
flags);
|
|
} else
|
|
r = SUSPEND;
|
|
|
|
assert(r <= 0 || r == SOCKEVENT_EOF);
|
|
|
|
if (r == SUSPEND) {
|
|
if (oob)
|
|
panic("libsockevent: MSG_OOB receive calls may not be "
|
|
"suspended");
|
|
|
|
/*
|
|
* For non-blocking socket calls, return EWOULDBLOCK only if we
|
|
* did not receive anything at all. If only control data were
|
|
* received, the return value is therefore zero. Suspension
|
|
* implies that there is nothing to read. For the purpose of
|
|
* the calling wrapper function, never suspend a call when
|
|
* there is a pending error.
|
|
*/
|
|
if (call != NULL && sock->sock_err == OK) {
|
|
if (sock->sock_rtimeo != 0) {
|
|
timer = TRUE;
|
|
time = socktimer_add(sock, sock->sock_rtimeo);
|
|
} else {
|
|
timer = FALSE;
|
|
time = 0;
|
|
}
|
|
|
|
sockevent_suspend_data(sock, SEV_RECV, timer, call,
|
|
user_endpt, data, len, *off, ctl_data,
|
|
ctl_len, *ctl_off, inflags, *flags, time);
|
|
} else
|
|
r = EWOULDBLOCK;
|
|
}
|
|
|
|
return r;
|
|
}
|
|
|
|
/*
|
|
* Receive regular and/or control data.
|
|
*/
|
|
static int
|
|
sockevent_recv(sockid_t id, const struct sockdriver_data * __restrict data,
|
|
size_t len, const struct sockdriver_data * __restrict ctl_data,
|
|
socklen_t * __restrict ctl_len, struct sockaddr * __restrict addr,
|
|
socklen_t * __restrict addr_len, endpoint_t user_endpt,
|
|
int * __restrict flags, const struct sockdriver_call * __restrict call)
|
|
{
|
|
struct sock *sock;
|
|
size_t off;
|
|
socklen_t ctl_inlen;
|
|
int r;
|
|
|
|
if ((sock = sockhash_get(id)) == NULL)
|
|
return EINVAL;
|
|
|
|
/*
|
|
* This function is a wrapper around the actual receive functionality.
|
|
* The reason for this is that receiving data should take precedence
|
|
* over a pending socket error, while a pending socket error should
|
|
* take precedence over both regular errors as well as EOF. In other
|
|
* words: if there is a pending error, we must try to receive anything
|
|
* at all; if receiving does not work, we must fail the call with the
|
|
* pending error. However, until we call the receive callback, we have
|
|
* no way of telling whether any data can be received. So we must try
|
|
* that before we can decide whether to return a pending error.
|
|
*/
|
|
off = 0;
|
|
ctl_inlen = *ctl_len;
|
|
*ctl_len = 0;
|
|
|
|
/*
|
|
* Attempt to perform the actual receive call.
|
|
*/
|
|
r = sockevent_recv_inner(sock, data, len, &off, ctl_data, ctl_inlen,
|
|
ctl_len, addr, addr_len, user_endpt, flags, call);
|
|
|
|
/*
|
|
* If the receive request succeeded, or it failed but yielded a partial
|
|
* result, then return the (partal) result. Otherwise, if an error is
|
|
* pending, return that error. Otherwise, return either a regular
|
|
* error or 0 for EOF.
|
|
*/
|
|
if (r == OK || (r != SUSPEND && (off > 0 || *ctl_len > 0)))
|
|
r = (int)off;
|
|
else if (sock->sock_err != OK) {
|
|
assert(r != SUSPEND);
|
|
|
|
r = sock->sock_err;
|
|
|
|
sock->sock_err = OK;
|
|
} else if (r == SOCKEVENT_EOF)
|
|
r = 0;
|
|
|
|
return r;
|
|
}
|
|
|
|
/*
|
|
* Process an I/O control call.
|
|
*/
|
|
static int
|
|
sockevent_ioctl(sockid_t id, unsigned long request,
|
|
const struct sockdriver_data * __restrict data, endpoint_t user_endpt,
|
|
const struct sockdriver_call * __restrict call __unused)
|
|
{
|
|
struct sock *sock;
|
|
size_t size;
|
|
int r, val;
|
|
|
|
if ((sock = sockhash_get(id)) == NULL)
|
|
return EINVAL;
|
|
|
|
/* We handle a very small subset of generic IOCTLs here. */
|
|
switch (request) {
|
|
case FIONREAD:
|
|
size = 0;
|
|
if (!(sock->sock_flags & SFL_SHUT_RD) &&
|
|
sock->sock_ops->sop_test_recv != NULL)
|
|
(void)sock->sock_ops->sop_test_recv(sock, 0, &size);
|
|
|
|
val = (int)size;
|
|
|
|
return sockdriver_copyout(data, 0, &val, sizeof(val));
|
|
}
|
|
|
|
if (sock->sock_ops->sop_ioctl == NULL)
|
|
return ENOTTY;
|
|
|
|
r = sock->sock_ops->sop_ioctl(sock, request, data, user_endpt);
|
|
|
|
/*
|
|
* Suspending IOCTL requests is not currently supported by this
|
|
* library, even though the VFS protocol and libsockdriver do support
|
|
* it. The reason is that IOCTLs do not match our proces suspension
|
|
* model: they could be neither queued nor repeated. For now, it seems
|
|
* that this feature is not needed by the socket drivers either. Thus,
|
|
* even though there are possible solutions, we defer implementing them
|
|
* until we know what exactly is needed.
|
|
*/
|
|
if (r == SUSPEND)
|
|
panic("libsockevent: socket driver suspended IOCTL 0x%lx",
|
|
request);
|
|
|
|
return r;
|
|
}
|
|
|
|
/*
|
|
* Set socket options.
|
|
*/
|
|
static int
|
|
sockevent_setsockopt(sockid_t id, int level, int name,
|
|
const struct sockdriver_data * data, socklen_t len)
|
|
{
|
|
struct sock *sock;
|
|
struct linger linger;
|
|
struct timeval tv;
|
|
clock_t secs, ticks;
|
|
int r, val;
|
|
|
|
if ((sock = sockhash_get(id)) == NULL)
|
|
return EINVAL;
|
|
|
|
if (level == SOL_SOCKET) {
|
|
/*
|
|
* Handle a subset of the socket-level options here. For most
|
|
* of them, this means that the socket driver itself need not
|
|
* handle changing or returning the options, but still needs to
|
|
* implement the correct behavior based on them where needed.
|
|
* A few of them are handled exclusively in this library:
|
|
* SO_ACCEPTCONN, SO_NOSIGPIPE, SO_ERROR, SO_TYPE, SO_LINGER,
|
|
* SO_SNDLOWAT, SO_RCVLOWAT, SO_SNDTIMEO, and SO_RCVTIMEO.
|
|
* The SO_USELOOPBACK option is explicitly absent, as it is
|
|
* valid for routing sockets only and is set by default there.
|
|
*/
|
|
switch (name) {
|
|
case SO_DEBUG:
|
|
case SO_REUSEADDR:
|
|
case SO_KEEPALIVE:
|
|
case SO_DONTROUTE:
|
|
case SO_BROADCAST:
|
|
case SO_OOBINLINE:
|
|
case SO_REUSEPORT:
|
|
case SO_NOSIGPIPE:
|
|
case SO_TIMESTAMP:
|
|
/*
|
|
* Simple on-off options. Changing them does not
|
|
* involve the socket driver.
|
|
*/
|
|
if ((r = sockdriver_copyin_opt(data, &val, sizeof(val),
|
|
len)) != OK)
|
|
return r;
|
|
|
|
if (val)
|
|
sock->sock_opt |= (unsigned int)name;
|
|
else
|
|
sock->sock_opt &= ~(unsigned int)name;
|
|
|
|
/*
|
|
* In priciple these on-off options are maintained in
|
|
* this library, but some socket drivers may need to
|
|
* apply the options elsewhere, so we notify them that
|
|
* something has changed. Using the sop_setsockopt
|
|
* callback would be inconvenient for this for two
|
|
* reasons: multiple value copy-ins and default errors.
|
|
*/
|
|
if (sock->sock_ops->sop_setsockmask != NULL)
|
|
sock->sock_ops->sop_setsockmask(sock,
|
|
sock->sock_opt);
|
|
|
|
/*
|
|
* The inlining of OOB data may make new data available
|
|
* through regular receive calls. Thus, see if we can
|
|
* wake up any suspended receive calls now.
|
|
*/
|
|
if (name == SO_OOBINLINE && val)
|
|
sockevent_raise(sock, SEV_RECV);
|
|
|
|
return OK;
|
|
|
|
case SO_LINGER:
|
|
/* The only on-off option with an associated value. */
|
|
if ((r = sockdriver_copyin_opt(data, &linger,
|
|
sizeof(linger), len)) != OK)
|
|
return r;
|
|
|
|
if (linger.l_onoff) {
|
|
if (linger.l_linger < 0)
|
|
return EINVAL;
|
|
/* EDOM is the closest applicable error.. */
|
|
secs = (clock_t)linger.l_linger;
|
|
if (secs >= TMRDIFF_MAX / sys_hz())
|
|
return EDOM;
|
|
|
|
sock->sock_opt |= SO_LINGER;
|
|
sock->sock_linger = secs * sys_hz();
|
|
} else {
|
|
sock->sock_opt &= ~SO_LINGER;
|
|
sock->sock_linger = 0;
|
|
}
|
|
|
|
return OK;
|
|
|
|
case SO_SNDLOWAT:
|
|
case SO_RCVLOWAT:
|
|
if ((r = sockdriver_copyin_opt(data, &val, sizeof(val),
|
|
len)) != OK)
|
|
return r;
|
|
|
|
if (val <= 0)
|
|
return EINVAL;
|
|
|
|
/*
|
|
* Setting these values may allow suspended operations
|
|
* (send, recv, select) to be resumed, so recheck.
|
|
*/
|
|
if (name == SO_SNDLOWAT) {
|
|
sock->sock_slowat = (size_t)val;
|
|
|
|
sockevent_raise(sock, SEV_SEND);
|
|
} else {
|
|
sock->sock_rlowat = (size_t)val;
|
|
|
|
sockevent_raise(sock, SEV_RECV);
|
|
}
|
|
|
|
return OK;
|
|
|
|
case SO_SNDTIMEO:
|
|
case SO_RCVTIMEO:
|
|
if ((r = sockdriver_copyin_opt(data, &tv, sizeof(tv),
|
|
len)) != OK)
|
|
return r;
|
|
|
|
if (tv.tv_sec < 0 || tv.tv_usec < 0 ||
|
|
(unsigned long)tv.tv_usec >= US)
|
|
return EINVAL;
|
|
if (tv.tv_sec >= TMRDIFF_MAX / sys_hz())
|
|
return EDOM;
|
|
|
|
ticks = tv.tv_sec * sys_hz() +
|
|
(tv.tv_usec * sys_hz() + US - 1) / US;
|
|
|
|
if (name == SO_SNDTIMEO)
|
|
sock->sock_stimeo = ticks;
|
|
else
|
|
sock->sock_rtimeo = ticks;
|
|
|
|
/*
|
|
* The timeouts for any calls already in progress for
|
|
* this socket are left as is.
|
|
*/
|
|
return OK;
|
|
|
|
case SO_ACCEPTCONN:
|
|
case SO_ERROR:
|
|
case SO_TYPE:
|
|
/* These options may be retrieved but not set. */
|
|
return ENOPROTOOPT;
|
|
|
|
default:
|
|
/*
|
|
* The remaining options either cannot be handled in a
|
|
* generic way, or are not recognized altogether. Pass
|
|
* them to the socket driver, which should handle what
|
|
* it knows and reject the rest.
|
|
*/
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (sock->sock_ops->sop_setsockopt == NULL)
|
|
return ENOPROTOOPT;
|
|
|
|
/*
|
|
* The socket driver must return ENOPROTOOPT for all options it does
|
|
* not recognize.
|
|
*/
|
|
return sock->sock_ops->sop_setsockopt(sock, level, name, data, len);
|
|
}
|
|
|
|
/*
|
|
* Retrieve socket options.
|
|
*/
|
|
static int
|
|
sockevent_getsockopt(sockid_t id, int level, int name,
|
|
const struct sockdriver_data * __restrict data,
|
|
socklen_t * __restrict len)
|
|
{
|
|
struct sock *sock;
|
|
struct linger linger;
|
|
struct timeval tv;
|
|
clock_t ticks;
|
|
int val;
|
|
|
|
if ((sock = sockhash_get(id)) == NULL)
|
|
return EINVAL;
|
|
|
|
if (level == SOL_SOCKET) {
|
|
/*
|
|
* As with setting, handle a subset of the socket-level options
|
|
* here. The rest is to be taken care of by the socket driver.
|
|
*/
|
|
switch (name) {
|
|
case SO_DEBUG:
|
|
case SO_ACCEPTCONN:
|
|
case SO_REUSEADDR:
|
|
case SO_KEEPALIVE:
|
|
case SO_DONTROUTE:
|
|
case SO_BROADCAST:
|
|
case SO_OOBINLINE:
|
|
case SO_REUSEPORT:
|
|
case SO_NOSIGPIPE:
|
|
case SO_TIMESTAMP:
|
|
val = !!(sock->sock_opt & (unsigned int)name);
|
|
|
|
return sockdriver_copyout_opt(data, &val, sizeof(val),
|
|
len);
|
|
|
|
case SO_LINGER:
|
|
linger.l_onoff = !!(sock->sock_opt & SO_LINGER);
|
|
linger.l_linger = sock->sock_linger / sys_hz();
|
|
|
|
return sockdriver_copyout_opt(data, &linger,
|
|
sizeof(linger), len);
|
|
|
|
case SO_ERROR:
|
|
if ((val = -sock->sock_err) != OK)
|
|
sock->sock_err = OK;
|
|
|
|
return sockdriver_copyout_opt(data, &val, sizeof(val),
|
|
len);
|
|
|
|
case SO_TYPE:
|
|
val = sock->sock_type;
|
|
|
|
return sockdriver_copyout_opt(data, &val, sizeof(val),
|
|
len);
|
|
|
|
case SO_SNDLOWAT:
|
|
val = (int)sock->sock_slowat;
|
|
|
|
return sockdriver_copyout_opt(data, &val, sizeof(val),
|
|
len);
|
|
|
|
case SO_RCVLOWAT:
|
|
val = (int)sock->sock_rlowat;
|
|
|
|
return sockdriver_copyout_opt(data, &val, sizeof(val),
|
|
len);
|
|
|
|
case SO_SNDTIMEO:
|
|
case SO_RCVTIMEO:
|
|
if (name == SO_SNDTIMEO)
|
|
ticks = sock->sock_stimeo;
|
|
else
|
|
ticks = sock->sock_rtimeo;
|
|
|
|
tv.tv_sec = ticks / sys_hz();
|
|
tv.tv_usec = (ticks % sys_hz()) * US / sys_hz();
|
|
|
|
return sockdriver_copyout_opt(data, &tv, sizeof(tv),
|
|
len);
|
|
|
|
default:
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (sock->sock_ops->sop_getsockopt == NULL)
|
|
return ENOPROTOOPT;
|
|
|
|
/*
|
|
* The socket driver must return ENOPROTOOPT for all options it does
|
|
* not recognize.
|
|
*/
|
|
return sock->sock_ops->sop_getsockopt(sock, level, name, data, len);
|
|
}
|
|
|
|
/*
|
|
* Retrieve a socket's local address.
|
|
*/
|
|
static int
|
|
sockevent_getsockname(sockid_t id, struct sockaddr * __restrict addr,
|
|
socklen_t * __restrict addr_len)
|
|
{
|
|
struct sock *sock;
|
|
|
|
if ((sock = sockhash_get(id)) == NULL)
|
|
return EINVAL;
|
|
|
|
if (sock->sock_ops->sop_getsockname == NULL)
|
|
return EOPNOTSUPP;
|
|
|
|
return sock->sock_ops->sop_getsockname(sock, addr, addr_len);
|
|
}
|
|
|
|
/*
|
|
* Retrieve a socket's remote address.
|
|
*/
|
|
static int
|
|
sockevent_getpeername(sockid_t id, struct sockaddr * __restrict addr,
|
|
socklen_t * __restrict addr_len)
|
|
{
|
|
struct sock *sock;
|
|
|
|
if ((sock = sockhash_get(id)) == NULL)
|
|
return EINVAL;
|
|
|
|
/* Listening-mode sockets cannot possibly have a peer address. */
|
|
if (sock->sock_opt & SO_ACCEPTCONN)
|
|
return ENOTCONN;
|
|
|
|
if (sock->sock_ops->sop_getpeername == NULL)
|
|
return EOPNOTSUPP;
|
|
|
|
return sock->sock_ops->sop_getpeername(sock, addr, addr_len);
|
|
}
|
|
|
|
/*
|
|
* Mark the socket object as shut down for sending and/or receiving. The flags
|
|
* parameter may be a bitwise-OR'ed combination of SFL_SHUT_RD and SFL_SHUT_WR.
|
|
* This function will wake up any suspended requests affected by this change,
|
|
* but it will not invoke the sop_shutdown() callback function on the socket.
|
|
* The function may in fact be called from sop_shutdown() before completion to
|
|
* mark the socket as shut down as reflected by sockevent_is_shutdown().
|
|
*/
|
|
void
|
|
sockevent_set_shutdown(struct sock * sock, unsigned int flags)
|
|
{
|
|
unsigned int mask;
|
|
|
|
assert(sock->sock_ops != NULL);
|
|
assert(!(flags & ~(SFL_SHUT_RD | SFL_SHUT_WR)));
|
|
|
|
/* Look at the newly set flags only. */
|
|
flags &= ~(unsigned int)sock->sock_flags;
|
|
|
|
if (flags != 0) {
|
|
sock->sock_flags |= flags;
|
|
|
|
/*
|
|
* Wake up any blocked calls that are affected by the shutdown.
|
|
* Shutting down listening sockets causes ongoing accept calls
|
|
* to be rechecked.
|
|
*/
|
|
mask = 0;
|
|
if (flags & SFL_SHUT_RD)
|
|
mask |= SEV_RECV;
|
|
if (flags & SFL_SHUT_WR)
|
|
mask |= SEV_SEND;
|
|
if (sock->sock_opt & SO_ACCEPTCONN)
|
|
mask |= SEV_ACCEPT;
|
|
|
|
assert(mask != 0);
|
|
sockevent_raise(sock, mask);
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Shut down socket send and receive operations.
|
|
*/
|
|
static int
|
|
sockevent_shutdown(sockid_t id, int how)
|
|
{
|
|
struct sock *sock;
|
|
unsigned int flags;
|
|
int r;
|
|
|
|
if ((sock = sockhash_get(id)) == NULL)
|
|
return EINVAL;
|
|
|
|
/* Convert the request to a set of flags. */
|
|
flags = 0;
|
|
if (how == SHUT_RD || how == SHUT_RDWR)
|
|
flags |= SFL_SHUT_RD;
|
|
if (how == SHUT_WR || how == SHUT_RDWR)
|
|
flags |= SFL_SHUT_WR;
|
|
|
|
if (sock->sock_ops->sop_shutdown != NULL)
|
|
r = sock->sock_ops->sop_shutdown(sock, flags);
|
|
else
|
|
r = OK;
|
|
|
|
/* On success, update our internal state as well. */
|
|
if (r == OK)
|
|
sockevent_set_shutdown(sock, flags);
|
|
|
|
return r;
|
|
}
|
|
|
|
/*
|
|
* Close a socket.
|
|
*/
|
|
static int
|
|
sockevent_close(sockid_t id, const struct sockdriver_call * call)
|
|
{
|
|
struct sock *sock;
|
|
int r, force;
|
|
|
|
if ((sock = sockhash_get(id)) == NULL)
|
|
return EINVAL;
|
|
|
|
assert(sock->sock_proc == NULL);
|
|
sock->sock_select.ss_endpt = NONE;
|
|
|
|
/*
|
|
* There are several scenarios when it comes to closing sockets. First
|
|
* of all, we never actually force the socket driver to close a socket.
|
|
* The driver may always suspend the close call and take as long as it
|
|
* wants. After a suspension, it signals its completion of the close
|
|
* through the SEV_CLOSE socket event.
|
|
*
|
|
* With that said, we offer two levels of urgency regarding the close
|
|
* request: regular and forced. The former allows for a graceful
|
|
* close; the latter urges the socket driver to close the socket as
|
|
* soon as possible. A socket that has been requested to be closed
|
|
* gracefully can, as long as it is still open (i.e., no SEV_CLOSE was
|
|
* fired yet), later be requested to be closed forcefully. This is how
|
|
* SO_LINGER with a nonzero timeout is implemented. If SO_LINGER is
|
|
* set with a zero timeout, the socket is force-closed immediately.
|
|
* Finally, if SO_LINGER is not set, the socket will be closed normally
|
|
* and never be forced--akin to SO_LINGER with an infinite timeout.
|
|
*
|
|
* The return value of the caller's close(2) may only ever be either
|
|
* OK or EINPROGRESS, to ensure that the caller knows that the file
|
|
* descriptor is freed up, as per Austin Group Defect #529. In fact,
|
|
* EINPROGRESS is to be returned only on signal interruption (i.e.,
|
|
* cancel). For that reason, this function only ever returns OK.
|
|
*/
|
|
force = ((sock->sock_opt & SO_LINGER) && sock->sock_linger == 0);
|
|
|
|
if (sock->sock_ops->sop_close != NULL)
|
|
r = sock->sock_ops->sop_close(sock, force);
|
|
else
|
|
r = OK;
|
|
|
|
assert(r == OK || r == SUSPEND);
|
|
|
|
if (r == SUSPEND) {
|
|
sock->sock_flags |= SFL_CLOSING;
|
|
|
|
/*
|
|
* If we were requested to force-close the socket immediately,
|
|
* but the socket driver needs more time anyway, then tell the
|
|
* caller that the socket was closed right away.
|
|
*/
|
|
if (force)
|
|
return OK;
|
|
|
|
/*
|
|
* If we are to force-close the socket only after a specific
|
|
* linger timeout, set the timer for that now, even if the call
|
|
* is non-blocking. This also means that we cannot associate
|
|
* the linger timeout with the close call. Instead, we convert
|
|
* the sock_linger value from a (relative) duration to an
|
|
* (absolute) timeout time, and use the SFL_CLOSING flag (along
|
|
* with SFL_TIMER) to tell the difference. Since the socket is
|
|
* otherwise unreachable from userland at this point, the
|
|
* conversion is never visible in any way.
|
|
*
|
|
* The socket may already be in the timers list, so we must
|
|
* always check the SO_LINGER flag before checking sock_linger.
|
|
*
|
|
* If SO_LINGER is not set, we must never suspend the call.
|
|
*/
|
|
if (sock->sock_opt & SO_LINGER) {
|
|
sock->sock_linger =
|
|
socktimer_add(sock, sock->sock_linger);
|
|
} else
|
|
call = NULL;
|
|
|
|
/*
|
|
* A non-blocking close is completed asynchronously. The
|
|
* caller is not told about this with EWOULDBLOCK as usual, for
|
|
* the reasons mentioned above.
|
|
*/
|
|
if (call != NULL)
|
|
sockevent_suspend(sock, SEV_CLOSE, call, NONE);
|
|
else
|
|
r = OK;
|
|
} else if (r == OK)
|
|
sockevent_free(sock);
|
|
|
|
return r;
|
|
}
|
|
|
|
/*
|
|
* Cancel a suspended send request.
|
|
*/
|
|
static void
|
|
sockevent_cancel_send(struct sock * sock, struct sockevent_proc * spr, int err)
|
|
{
|
|
int r;
|
|
|
|
/*
|
|
* If any regular or control data were sent, return the number of data
|
|
* bytes sent--possibly zero. Otherwise return the given error code.
|
|
*/
|
|
if (spr->spr_dataoff > 0 || spr->spr_ctloff > 0)
|
|
r = (int)spr->spr_dataoff;
|
|
else
|
|
r = err;
|
|
|
|
sockdriver_reply_generic(&spr->spr_call, r);
|
|
|
|
/*
|
|
* In extremely rare circumstances, one send may be queued behind
|
|
* another send even though the former can actually be sent on the
|
|
* socket right away. For this reason, we retry sending when canceling
|
|
* a send. We need to do this only when the first send in the queue
|
|
* was canceled, but multiple blocked sends on a single socket should
|
|
* be rare anyway.
|
|
*/
|
|
sockevent_raise(sock, SEV_SEND);
|
|
}
|
|
|
|
/*
|
|
* Cancel a suspended receive request.
|
|
*/
|
|
static void
|
|
sockevent_cancel_recv(struct sock * sock, struct sockevent_proc * spr, int err)
|
|
{
|
|
int r;
|
|
|
|
/*
|
|
* If any regular or control data were received, return the number of
|
|
* data bytes received--possibly zero. Otherwise return the given
|
|
* error code.
|
|
*/
|
|
if (spr->spr_dataoff > 0 || spr->spr_ctloff > 0)
|
|
r = (int)spr->spr_dataoff;
|
|
else
|
|
r = err;
|
|
|
|
/*
|
|
* Also return any flags set for the data received so far, e.g.
|
|
* MSG_CTRUNC. Do not return an address: receive calls on unconnected
|
|
* sockets must never block after receiving some data--instead, they
|
|
* are supposed to return MSG_TRUNC if not all data were copied out.
|
|
*/
|
|
sockdriver_reply_recv(&spr->spr_call, r, spr->spr_ctloff, NULL, 0,
|
|
spr->spr_rflags);
|
|
|
|
/*
|
|
* The same story as for sends (see above) applies to receives,
|
|
* although this case should be even more rare in practice.
|
|
*/
|
|
sockevent_raise(sock, SEV_RECV);
|
|
}
|
|
|
|
/*
|
|
* Cancel a previous request that may currently be suspended. The cancel
|
|
* operation itself does not have a reply. Instead, if the given request was
|
|
* found to be suspended, that request must be aborted and an appropriate reply
|
|
* must be sent for the request. If no matching request was found, no reply
|
|
* must be sent at all.
|
|
*/
|
|
static void
|
|
sockevent_cancel(sockid_t id, const struct sockdriver_call * call)
|
|
{
|
|
struct sockevent_proc *spr;
|
|
struct sock *sock;
|
|
|
|
/*
|
|
* Due to asynchronous close(2) operations, not even the sock object
|
|
* may be found. If this (entirely legitimate) case, do not send any
|
|
* reply.
|
|
*/
|
|
if ((sock = sockhash_get(id)) == NULL)
|
|
return;
|
|
|
|
/*
|
|
* The request may already have completed by the time we receive the
|
|
* cancel request, in which case we can not find it. In this (entirely
|
|
* legitimate) case, do not send any reply.
|
|
*/
|
|
if ((spr = sockevent_unsuspend(sock, call)) == NULL)
|
|
return;
|
|
|
|
/*
|
|
* We found the operation. Cancel it according to its call type.
|
|
* Then, once fully done with it, free the suspension data structure.
|
|
*
|
|
* Note that we have to use the call structure from the suspension data
|
|
* structure rather than the given 'call' pointer: only the former
|
|
* includes all the information necessary to resume the request!
|
|
*/
|
|
switch (spr->spr_event) {
|
|
case SEV_BIND:
|
|
case SEV_CONNECT:
|
|
assert(spr->spr_call.sc_endpt != NONE);
|
|
|
|
sockdriver_reply_generic(&spr->spr_call, EINTR);
|
|
|
|
break;
|
|
|
|
case SEV_ACCEPT:
|
|
sockdriver_reply_accept(&spr->spr_call, EINTR, NULL, 0);
|
|
|
|
break;
|
|
|
|
case SEV_SEND:
|
|
sockevent_cancel_send(sock, spr, EINTR);
|
|
|
|
break;
|
|
|
|
case SEV_RECV:
|
|
sockevent_cancel_recv(sock, spr, EINTR);
|
|
|
|
break;
|
|
|
|
case SEV_CLOSE:
|
|
/*
|
|
* Return EINPROGRESS rather than EINTR, so that the user
|
|
* process can tell from the close(2) result that the file
|
|
* descriptor has in fact been closed.
|
|
*/
|
|
sockdriver_reply_generic(&spr->spr_call, EINPROGRESS);
|
|
|
|
/*
|
|
* Do not free the sock object here: the socket driver will
|
|
* complete the close in the background, and fire SEV_CLOSE
|
|
* once it is done. Only then is the sock object freed.
|
|
*/
|
|
break;
|
|
|
|
default:
|
|
panic("libsockevent: process suspended on unknown event 0x%x",
|
|
spr->spr_event);
|
|
}
|
|
|
|
sockevent_proc_free(spr);
|
|
}
|
|
|
|
/*
|
|
* Process a select request.
|
|
*/
|
|
static int
|
|
sockevent_select(sockid_t id, unsigned int ops,
|
|
const struct sockdriver_select * sel)
|
|
{
|
|
struct sock *sock;
|
|
unsigned int r, notify;
|
|
|
|
if ((sock = sockhash_get(id)) == NULL)
|
|
return EINVAL;
|
|
|
|
notify = (ops & SDEV_NOTIFY);
|
|
ops &= (SDEV_OP_RD | SDEV_OP_WR | SDEV_OP_ERR);
|
|
|
|
/*
|
|
* See if any of the requested select operations can be satisfied
|
|
* immediately.
|
|
*/
|
|
r = sockevent_test_select(sock, ops);
|
|
|
|
/*
|
|
* If select operations were pending, the new results must not indicate
|
|
* that any of those were satisfied, as that would indicate an internal
|
|
* logic error: the socket driver is supposed to update its state
|
|
* proactively, and thus, discovering that things have changed here is
|
|
* not something that should ever happen.
|
|
*/
|
|
assert(!(sock->sock_selops & r));
|
|
|
|
/*
|
|
* If any select operations are not satisfied immediately, and we are
|
|
* asked to notify the caller when they are satisfied later, save them
|
|
* for later retesting.
|
|
*/
|
|
ops &= ~r;
|
|
|
|
if (notify && ops != 0) {
|
|
/*
|
|
* For now, we support only one caller when it comes to select
|
|
* queries: VFS. If we want to support a networked file system
|
|
* (or so) directly calling select as well, this library will
|
|
* have to be extended accordingly (should not be too hard).
|
|
*/
|
|
if (sock->sock_select.ss_endpt != NONE) {
|
|
if (sock->sock_select.ss_endpt != sel->ss_endpt) {
|
|
printf("libsockevent: no support for multiple "
|
|
"select callers yet\n");
|
|
|
|
return EIO;
|
|
}
|
|
|
|
/*
|
|
* If a select query was already pending for this
|
|
* caller, we must simply merge in the new operations.
|
|
*/
|
|
sock->sock_selops |= ops;
|
|
} else {
|
|
assert(sel->ss_endpt != NONE);
|
|
|
|
sock->sock_select = *sel;
|
|
sock->sock_selops = ops;
|
|
}
|
|
}
|
|
|
|
return r;
|
|
}
|
|
|
|
/*
|
|
* An alarm has triggered. Expire any timers. Socket drivers that do not pass
|
|
* clock notification messages to libsockevent must call expire_timers(3)
|
|
* themselves instead.
|
|
*/
|
|
static void
|
|
sockevent_alarm(clock_t now)
|
|
{
|
|
|
|
expire_timers(now);
|
|
}
|
|
|
|
static const struct sockdriver sockevent_tab = {
|
|
.sdr_socket = sockevent_socket,
|
|
.sdr_socketpair = sockevent_socketpair,
|
|
.sdr_bind = sockevent_bind,
|
|
.sdr_connect = sockevent_connect,
|
|
.sdr_listen = sockevent_listen,
|
|
.sdr_accept = sockevent_accept,
|
|
.sdr_send = sockevent_send,
|
|
.sdr_recv = sockevent_recv,
|
|
.sdr_ioctl = sockevent_ioctl,
|
|
.sdr_setsockopt = sockevent_setsockopt,
|
|
.sdr_getsockopt = sockevent_getsockopt,
|
|
.sdr_getsockname = sockevent_getsockname,
|
|
.sdr_getpeername = sockevent_getpeername,
|
|
.sdr_shutdown = sockevent_shutdown,
|
|
.sdr_close = sockevent_close,
|
|
.sdr_cancel = sockevent_cancel,
|
|
.sdr_select = sockevent_select,
|
|
.sdr_alarm = sockevent_alarm
|
|
};
|
|
|
|
/*
|
|
* Initialize the socket event library.
|
|
*/
|
|
void
|
|
sockevent_init(sockevent_socket_cb_t socket_cb)
|
|
{
|
|
|
|
sockhash_init();
|
|
|
|
socktimer_init();
|
|
|
|
sockevent_proc_init();
|
|
|
|
SIMPLEQ_INIT(&sockevent_pending);
|
|
|
|
assert(socket_cb != NULL);
|
|
sockevent_socket_cb = socket_cb;
|
|
|
|
/* Announce we are up. */
|
|
sockdriver_announce();
|
|
|
|
sockevent_working = FALSE;
|
|
}
|
|
|
|
/*
|
|
* Process a socket driver request message.
|
|
*/
|
|
void
|
|
sockevent_process(const message * m_ptr, int ipc_status)
|
|
{
|
|
|
|
/* Block events until after we have processed the request. */
|
|
assert(!sockevent_working);
|
|
sockevent_working = TRUE;
|
|
|
|
/* Actually process the request. */
|
|
sockdriver_process(&sockevent_tab, m_ptr, ipc_status);
|
|
|
|
/*
|
|
* If any events were fired while processing the request, they will
|
|
* have been queued for later. Go through them now.
|
|
*/
|
|
if (sockevent_has_events())
|
|
sockevent_pump();
|
|
|
|
sockevent_working = FALSE;
|
|
}
|