
This new implementation of the UDS service is built on top of the libsockevent library. It thereby inherits all the advantages that libsockevent brings. However, the fundamental restructuring required for that change also paved the way for resolution of a number of other important open issues with the old UDS code. Most importantly, the rewrite brings the behavior of the service much closer to POSIX compliance and NetBSD compatibility. These are the most important changes: - due to the use of libsockevent, UDS now supports multiple suspending calls per socket and a large number of standard socket flags and options; - socket address matching is now based on <device,inode> lookups instead of canonized path names, and socket addresses are no longer altered either due to canonization or at connect time; - the socket state machine is now well defined, most importantly resolving the erroneous reset-on-EOF semantics of the old UDS, but also allowing socket reuse; - sockets are now connected before being accepted instead of being held in connecting state, unless the LOCAL_CONNWAIT option is set on either the connecting or the listening socket; - connect(2) on datagram sockets is now supported (needed by syslog), and proper datagram socket disconnect notification is provided; - the receive queue now supports segmentation, associating ancillary data (in-flight file descriptors and credentials) with each segment instead of being kept fully separately; this is a POSIX requirement (and needed by tmux); - as part of the segmentation support, the receive queue can now hold as many packets as can fit, instead of one; - in addition to the flags supported by libsockevent, the MSG_PEEK, MSG_WAITALL, MSG_CMSG_CLOEXEC, MSG_TRUNC, and MSG_CTRUNC send and receive flags are now supported; - the SO_PASSCRED and SO_PEERCRED socket options are replaced by LOCAL_CREDS and LOCAL_PEEREID respectively, now following NetBSD semantics and allowing use of NetBSD libc's getpeereid(3); - memory usage is reduced by about 250 KB due to centralized in-flight file descriptor tracking, with a limit of OPEN_MAX total rather than of OPEN_MAX per socket; - memory usage is reduced by another ~50 KB due to removal of state redundancy, despite the fact that socket path names may now be up to 253 bytes rather than the previous 104 bytes; - compared to the old UDS, there is now very little direct indexing on the static array of sockets, thus allowing dynamic allocation of sockets more easily in the future; - the UDS service now has RMIB support for the net.local sysctl tree, implementing preliminary support for NetBSD netstat(1). Change-Id: I4a9b6fe4aaeef0edf2547eee894e6c14403fcb32
297 lines
5.6 KiB
C
297 lines
5.6 KiB
C
#include <sys/cdefs.h>
|
|
#include "namespace.h"
|
|
#include <lib.h>
|
|
|
|
#include <string.h>
|
|
#include <assert.h>
|
|
#include <errno.h>
|
|
#include <stdio.h>
|
|
#include <sys/ioctl.h>
|
|
#include <sys/socket.h>
|
|
#include <sys/types.h>
|
|
#include <netinet/tcp.h>
|
|
|
|
#include <net/gen/in.h>
|
|
#include <net/gen/tcp.h>
|
|
#include <net/gen/tcp_io.h>
|
|
#include <net/gen/udp.h>
|
|
#include <net/gen/udp_io.h>
|
|
|
|
#define DEBUG 0
|
|
|
|
static int _tcp_setsockopt(int sock, int level, int option_name,
|
|
const void *option_value, socklen_t option_len);
|
|
|
|
static int _udp_setsockopt(int sock, int level, int option_name,
|
|
const void *option_value, socklen_t option_len);
|
|
|
|
static int _uds_setsockopt(int sock, int level, int option_name,
|
|
const void *option_value, socklen_t option_len);
|
|
|
|
/*
|
|
* Set socket options.
|
|
*/
|
|
static int
|
|
__setsockopt(int fd, int level, int option_name, const void * option_value,
|
|
socklen_t option_len)
|
|
{
|
|
message m;
|
|
|
|
memset(&m, 0, sizeof(m));
|
|
m.m_lc_vfs_sockopt.fd = fd;
|
|
m.m_lc_vfs_sockopt.level = level;
|
|
m.m_lc_vfs_sockopt.name = option_name;
|
|
m.m_lc_vfs_sockopt.buf = (vir_bytes)option_value;
|
|
m.m_lc_vfs_sockopt.len = option_len;
|
|
|
|
return _syscall(VFS_PROC_NR, VFS_SETSOCKOPT, &m);
|
|
}
|
|
|
|
int setsockopt(int sock, int level, int option_name,
|
|
const void *option_value, socklen_t option_len)
|
|
{
|
|
int r;
|
|
nwio_tcpopt_t tcpopt;
|
|
nwio_udpopt_t udpopt;
|
|
struct sockaddr_un uds_addr;
|
|
|
|
r = __setsockopt(sock, level, option_name, option_value, option_len);
|
|
if (r != -1 || (errno != ENOTSOCK && errno != ENOSYS))
|
|
return r;
|
|
|
|
r= ioctl(sock, NWIOGTCPOPT, &tcpopt);
|
|
if (r != -1 || errno != ENOTTY)
|
|
{
|
|
if (r == -1)
|
|
{
|
|
/* Bad file descriptor */
|
|
return -1;
|
|
}
|
|
return _tcp_setsockopt(sock, level, option_name,
|
|
option_value, option_len);
|
|
}
|
|
|
|
r= ioctl(sock, NWIOGUDPOPT, &udpopt);
|
|
if (r != -1 || errno != ENOTTY)
|
|
{
|
|
if (r == -1)
|
|
{
|
|
/* Bad file descriptor */
|
|
return -1;
|
|
}
|
|
return _udp_setsockopt(sock, level, option_name,
|
|
option_value, option_len);
|
|
}
|
|
|
|
r= ioctl(sock, NWIOGUDSADDR, &uds_addr);
|
|
if (r != -1 || errno != ENOTTY)
|
|
{
|
|
if (r == -1)
|
|
{
|
|
/* Bad file descriptor */
|
|
return -1;
|
|
}
|
|
return _uds_setsockopt(sock, level, option_name,
|
|
option_value, option_len);
|
|
}
|
|
|
|
errno = ENOTSOCK;
|
|
return -1;
|
|
}
|
|
|
|
static int _tcp_setsockopt(int sock, int level, int option_name,
|
|
const void *option_value, socklen_t option_len)
|
|
{
|
|
int i;
|
|
|
|
if (level == SOL_SOCKET && option_name == SO_REUSEADDR)
|
|
{
|
|
if (option_len != sizeof(i))
|
|
{
|
|
errno= EINVAL;
|
|
return -1;
|
|
}
|
|
i= *(const int *)option_value;
|
|
if (!i)
|
|
{
|
|
/* At the moment there is no way to turn off
|
|
* reusing addresses.
|
|
*/
|
|
errno= ENOSYS;
|
|
return -1;
|
|
}
|
|
return 0;
|
|
}
|
|
if (level == SOL_SOCKET && option_name == SO_KEEPALIVE)
|
|
{
|
|
if (option_len != sizeof(i))
|
|
{
|
|
errno= EINVAL;
|
|
return -1;
|
|
}
|
|
i= *(const int *)option_value;
|
|
if (!i)
|
|
{
|
|
/* At the moment there is no way to turn off
|
|
* keepalives.
|
|
*/
|
|
errno= ENOSYS;
|
|
return -1;
|
|
}
|
|
return 0;
|
|
}
|
|
if (level == SOL_SOCKET && option_name == SO_RCVBUF)
|
|
{
|
|
if (option_len != sizeof(i))
|
|
{
|
|
errno= EINVAL;
|
|
return -1;
|
|
}
|
|
i= *(const int *)option_value;
|
|
if (i > 32*1024)
|
|
{
|
|
/* The receive buffer is limited to 32K at the moment.
|
|
*/
|
|
errno= ENOSYS;
|
|
return -1;
|
|
}
|
|
/* There is no way to reduce the receive buffer, do we have to
|
|
* let this call fail for smaller buffers?
|
|
*/
|
|
return 0;
|
|
}
|
|
if (level == SOL_SOCKET && option_name == SO_SNDBUF)
|
|
{
|
|
if (option_len != sizeof(i))
|
|
{
|
|
errno= EINVAL;
|
|
return -1;
|
|
}
|
|
i= *(const int *)option_value;
|
|
if (i > 32*1024)
|
|
{
|
|
/* The send buffer is limited to 32K at the moment.
|
|
*/
|
|
errno= ENOSYS;
|
|
return -1;
|
|
}
|
|
/* There is no way to reduce the send buffer, do we have to
|
|
* let this call fail for smaller buffers?
|
|
*/
|
|
return 0;
|
|
}
|
|
if (level == IPPROTO_TCP && option_name == TCP_NODELAY)
|
|
{
|
|
if (option_len != sizeof(i))
|
|
{
|
|
errno= EINVAL;
|
|
return -1;
|
|
}
|
|
i= *(const int *)option_value;
|
|
if (i)
|
|
{
|
|
/* At the moment there is no way to turn on
|
|
* nodelay.
|
|
*/
|
|
errno= ENOSYS;
|
|
return -1;
|
|
}
|
|
return 0;
|
|
}
|
|
#if DEBUG
|
|
fprintf(stderr, "_tcp_setsocketopt: level %d, name %d\n",
|
|
level, option_name);
|
|
#endif
|
|
|
|
errno= ENOSYS;
|
|
return -1;
|
|
}
|
|
|
|
static int _udp_setsockopt(int sock, int level, int option_name,
|
|
const void *option_value, socklen_t option_len)
|
|
{
|
|
#if DEBUG
|
|
fprintf(stderr, "_udp_setsocketopt: level %d, name %d\n",
|
|
level, option_name);
|
|
#endif
|
|
|
|
errno= ENOSYS;
|
|
return -1;
|
|
}
|
|
|
|
|
|
static int _uds_setsockopt(int sock, int level, int option_name,
|
|
const void *option_value, socklen_t option_len)
|
|
{
|
|
int i;
|
|
size_t size;
|
|
|
|
if (level == SOL_SOCKET && option_name == SO_RCVBUF)
|
|
{
|
|
if (option_len != sizeof(size))
|
|
{
|
|
errno= EINVAL;
|
|
return -1;
|
|
}
|
|
size= *(const size_t *)option_value;
|
|
return ioctl(sock, NWIOSUDSRCVBUF, &size);
|
|
}
|
|
|
|
if (level == SOL_SOCKET && option_name == SO_SNDBUF)
|
|
{
|
|
if (option_len != sizeof(size))
|
|
{
|
|
errno= EINVAL;
|
|
return -1;
|
|
}
|
|
size= *(const size_t *)option_value;
|
|
return ioctl(sock, NWIOSUDSSNDBUF, &size);
|
|
}
|
|
|
|
if (level == SOL_SOCKET && option_name == SO_REUSEADDR)
|
|
{
|
|
if (option_len != sizeof(i))
|
|
{
|
|
errno= EINVAL;
|
|
return -1;
|
|
}
|
|
i= *(const int *)option_value;
|
|
if (!i)
|
|
{
|
|
/* At the moment there is no way to turn off
|
|
* reusing addresses.
|
|
*/
|
|
errno= ENOSYS;
|
|
return -1;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
#ifdef SO_PASSCRED
|
|
if (level == SOL_SOCKET && option_name == SO_PASSCRED)
|
|
{
|
|
if (option_len != sizeof(i))
|
|
{
|
|
errno= EINVAL;
|
|
return -1;
|
|
}
|
|
i= *(const int *)option_value;
|
|
if (!i)
|
|
{
|
|
/* credentials can always be received. */
|
|
errno= ENOSYS;
|
|
return -1;
|
|
}
|
|
return 0;
|
|
}
|
|
#endif
|
|
|
|
#if DEBUG
|
|
fprintf(stderr, "_uds_setsocketopt: level %d, name %d\n",
|
|
level, option_name);
|
|
#endif
|
|
|
|
errno= ENOSYS;
|
|
return -1;
|
|
}
|