phunix/minix/net/lwip/rtsock.c
David van Moolenbroek ef8d499e2d Add lwip: a new lwIP-based TCP/IP service
This commit adds a new TCP/IP service to MINIX 3.  As its core, the
service uses the lwIP TCP/IP stack for maintenance reasons.  The
service aims to be compatible with NetBSD userland, including its
low-level network management utilities.  It also aims to support
modern features such as IPv6.  In summary, the new LWIP service has
support for the following main features:

- TCP, UDP, RAW sockets with mostly standard BSD API semantics;
- IPv6 support: host mode (complete) and router mode (partial);
- most of the standard BSD API socket options (SO_);
- all of the standard BSD API message flags (MSG_);
- the most used protocol-specific socket and control options;
- a default loopback interface and the ability to create one more;
- configuration-free ethernet interfaces and driver tracking;
- queuing and multiple concurrent requests to each ethernet driver;
- standard ioctl(2)-based BSD interface management;
- radix tree backed, destination-based routing;
- routing sockets for standard BSD route reporting and management;
- multicast traffic and multicast group membership tracking;
- Berkeley Packet Filter (BPF) devices;
- standard and custom sysctl(7) nodes for many internals;
- a slab allocation based, hybrid static/dynamic memory pool model.

Many of its modules come with fairly elaborate comments that cover
many aspects of what is going on.  The service is primarily a socket
driver built on top of the libsockdriver library, but for BPF devices
it is at the same time also a character driver.

Change-Id: Ib0c02736234b21143915e5fcc0fda8fe408f046f
2017-04-30 13:16:03 +00:00

1913 lines
50 KiB
C

/* LWIP service - rtsock.c - routing sockets and route sysctl support */
/*
* In a nutshell, the intended abstraction is that only this module deals with
* route messages, message headers, and RTA arrays, whereas other modules
* (ifaddr, route) are responsible for parsing and providing sockaddr_* type
* addresses, with the exception of compression and expansion which is
* particular to routing sockets. Concretely, there should be no reference to
* (e.g.) rt_msghdr outside this module, and no mention of ip_addr_t inside it.
*/
#include "lwip.h"
#include "ifaddr.h"
#include "rtsock.h"
#include "route.h"
#include "lldata.h"
/* The number of routing sockets. */
#define NR_RTSOCK 8
/*
* The send buffer maximum determines the maximum size of requests. The
* maximum possible request size is the size of the routing message header plus
* RTAX_MAX times the maximum socket address size, including alignment. That
* currently works out to a number in the low 400s, so 512 should be fine for
* now. At this time we do not support changing the send buffer size, because
* there really is no point in doing so. Hence also no RT_SNDBUF_{MIN,DEF}.
*/
#define RT_SNDBUF_MAX 512 /* maximum RT send buffer size */
#define RT_RCVBUF_MIN 0 /* minimum RT receive buffer size */
#define RT_RCVBUF_DEF 16384 /* default RT receive buffer size */
#define RT_RCVBUF_MAX 65536 /* maximum RT receive buffer size */
/* Address length of routing socket address structures; two bytes only. */
#define RTSOCK_ADDR_LEN offsetof(struct sockaddr, sa_data)
struct rtsock_rta {
const void *rta_ptr[RTAX_MAX];
socklen_t rta_len[RTAX_MAX];
};
static const char rtsock_padbuf[RT_ROUNDUP(0)];
static struct rtsock {
struct sock rt_sock; /* socket object, MUST be first */
int rt_family; /* address family filter if not zero */
unsigned int rt_flags; /* routing socket flags (RTF_) */
struct pbuf *rt_rcvhead; /* receive buffer, first packet */
struct pbuf **rt_rcvtailp; /* receive buffer, last ptr-ptr */
size_t rt_rcvlen; /* receive buffer, length in bytes */
size_t rt_rcvbuf; /* receive buffer, maximum size */
TAILQ_ENTRY(rtsock) rt_next; /* next in active or free list */
} rt_array[NR_RTSOCK];
#define RTF_NOLOOPBACK 0x1 /* suppress reply messages */
static TAILQ_HEAD(, rtsock) rt_freelist; /* free routing sockets */
static TAILQ_HEAD(, rtsock) rt_activelist; /* active routing sockets */
struct rtsock_request {
struct rtsock *rtr_src; /* source socket of the request */
pid_t rtr_pid; /* process ID of requesting process */
int rtr_seq; /* sequence number from the request */
int rtr_getif; /* RTM_GET only: get interface info */
};
static const struct sockevent_ops rtsock_ops;
static ssize_t rtsock_info(struct rmib_call *, struct rmib_node *,
struct rmib_oldp *, struct rmib_newp *);
/* The CTL_NET PF_ROUTE subtree. */
static struct rmib_node net_route_table[] = {
[0] = RMIB_FUNC(RMIB_RO | CTLTYPE_NODE, 0, rtsock_info,
"rtable", "Routing table information"),
};
/* The CTL_NET PF_ROUTE node. */
static struct rmib_node net_route_node =
RMIB_NODE(RMIB_RO, net_route_table, "route", "PF_ROUTE information");
/*
* Initialize the routing sockets module.
*/
void
rtsock_init(void)
{
const int mib[] = { CTL_NET, PF_ROUTE };
unsigned int slot;
int r;
/* Initialize the list of free routing sockets. */
TAILQ_INIT(&rt_freelist);
for (slot = 0; slot < __arraycount(rt_array); slot++)
TAILQ_INSERT_TAIL(&rt_freelist, &rt_array[slot], rt_next);
/* Initialize the list of acive routing sockets. */
TAILQ_INIT(&rt_activelist);
/* Register the "net.route" subtree with the MIB service. */
if ((r = rmib_register(mib, __arraycount(mib), &net_route_node)) != OK)
panic("unable to register net.route RMIB tree: %d", r);
}
/*
* Allocate a pbuf suitable for storing a routing message of 'size' bytes.
* Return the allocated pbuf on success, or NULL on memory allocation failure.
*/
static struct pbuf *
rtsock_alloc(size_t size)
{
struct pbuf *pbuf;
/*
* The data will currently always fit in a single pool buffer. Just in
* case this changes in the future, warn and fail cleanly. The rest of
* the code is not able to deal with buffer chains as it is, although
* that can be changed if necessary.
*/
if (size > MEMPOOL_BUFSIZE) {
printf("LWIP: routing socket packet too large (%zu)\n", size);
return NULL;
}
pbuf = pbuf_alloc(PBUF_RAW, size, PBUF_RAM);
assert(pbuf == NULL || pbuf->tot_len == pbuf->len);
return pbuf;
}
/*
* Initialize a routing addresses map.
*/
static void
rtsock_rta_init(struct rtsock_rta * rta)
{
memset(rta, 0, sizeof(*rta));
}
/*
* Set an entry in a routing addresses map. When computing sizes, 'ptr' may be
* NULL.
*/
static void
rtsock_rta_set(struct rtsock_rta * rta, unsigned int rtax, const void * ptr,
socklen_t len)
{
assert(rtax < RTAX_MAX);
rta->rta_ptr[rtax] = ptr;
rta->rta_len[rtax] = len;
}
/*
* Copy out a message with a header and any entries in a routing addresses map,
* either into a pbuf allocated for this purpose, or to a RMIB (sysctl) caller,
* at the given offset. If no destination is given ('pbuf ' and 'oldp' are
* both NULL), compute just the size of the resulting data. Otherwise, set the
* length and address mask fields in the header as a side effect. Return the
* number of bytes copied on success, and if 'pbuf' is not NULL, it is filled
* with a pointer to the newly allocated pbuf. Return a negative error code on
* failure. Note that when computing the size only, any actual data pointers
* ('hdr', 'msglen', 'addrs', and the pointers in 'rta') may be NULL or even
* invalid, even though the corresponding sizes should still be supplied.
*/
static ssize_t
rtsock_rta_finalize(void * hdr, size_t hdrlen, u_short * msglen, int * addrs,
const struct rtsock_rta * rta, struct pbuf ** pbuf,
struct rmib_oldp * oldp, ssize_t off)
{
iovec_t iov[1 + RTAX_MAX * 2];
size_t len, padlen, totallen;
unsigned int i, iovcnt;
int mask;
assert(pbuf == NULL || oldp == NULL);
assert(pbuf == NULL || off == 0);
assert(RT_ROUNDUP(hdrlen) == hdrlen);
iov[0].iov_addr = (vir_bytes)hdr;
iov[0].iov_size = hdrlen;
iovcnt = 1;
totallen = hdrlen;
mask = 0;
/*
* The addresses in the given RTA map, as present, should be stored in
* the numbering order of the map.
*/
for (i = 0; i < RTAX_MAX; i++) {
if (rta->rta_ptr[i] == NULL)
continue;
if ((len = rta->rta_len[i]) > 0) {
assert(iovcnt < __arraycount(iov));
iov[iovcnt].iov_addr = (vir_bytes)rta->rta_ptr[i];
iov[iovcnt++].iov_size = len;
}
/* Note that RT_ROUNDUP(0) is not 0.. */
if ((padlen = RT_ROUNDUP(len) - len) > 0) {
assert(iovcnt < __arraycount(iov));
iov[iovcnt].iov_addr = (vir_bytes)rtsock_padbuf;
iov[iovcnt++].iov_size = padlen;
}
totallen += len + padlen;
mask |= (1 << i); /* convert RTAX_ to RTA_ */
}
/* If only the length was requested, return it now. */
if (pbuf == NULL && oldp == NULL)
return totallen;
/*
* Casting 'hdr' would violate C99 strict aliasing rules, but the
* address mask is not always at the same location anyway.
*/
*msglen = totallen;
*addrs = mask;
if (pbuf != NULL) {
if ((*pbuf = rtsock_alloc(totallen)) == NULL)
return ENOMEM;
return util_coalesce((char *)(*pbuf)->payload, totallen, iov,
iovcnt);
} else
return rmib_vcopyout(oldp, off, iov, iovcnt);
}
/*
* Reduce the size of a network mask to the bytes actually used. It is highly
* doubtful that this extra complexity pays off in any form, but it is what the
* BSDs historically do. We currently implement compression for IPv4 only.
*/
static void
rtsock_compress_netmask(struct sockaddr * sa)
{
struct sockaddr_in sin;
uint32_t addr;
if (sa->sa_family != AF_INET)
return; /* nothing to do */
memcpy(&sin, sa, sizeof(sin)); /* no type punning.. (sigh) */
addr = htonl(sin.sin_addr.s_addr);
if (addr & 0x000000ff)
sa->sa_len = 8;
else if (addr & 0x0000ffff)
sa->sa_len = 7;
else if (addr & 0x00ffffff)
sa->sa_len = 6;
else if (addr != 0)
sa->sa_len = 5;
else
sa->sa_len = 0;
}
/*
* Expand a possibly compressed IPv4 or IPv6 network mask, given as 'sa', into
* 'mask'. Return TRUE if expansion succeeded. In that case, the resulting
* mask must have sa.sa_len and sa.sa_family filled in correctly, and have the
* appropriate size for its address family. Return FALSE if expansion failed
* and an error should be returned to the caller.
*/
static int
rtsock_expand_netmask(union sockaddr_any * mask, const struct sockaddr * sa)
{
if (sa->sa_len > sizeof(*mask))
return FALSE;
memset(mask, 0, sizeof(*mask));
memcpy(mask, sa, sa->sa_len);
/*
* Amazingly, even the address family may be chopped off, in which case
* an IPv4 address is implied.
*/
if (sa->sa_len >= offsetof(struct sockaddr, sa_data) &&
sa->sa_family == AF_INET6) {
if (sa->sa_len > sizeof(struct sockaddr_in6))
return FALSE;
mask->sa.sa_len = sizeof(struct sockaddr_in6);
mask->sa.sa_family = AF_INET6;
} else {
if (sa->sa_len > sizeof(struct sockaddr_in))
return FALSE;
mask->sa.sa_len = sizeof(struct sockaddr_in);
mask->sa.sa_family = AF_INET;
}
return TRUE;
}
/*
* Create a routing socket.
*/
sockid_t
rtsock_socket(int type, int protocol, struct sock ** sockp,
const struct sockevent_ops ** ops)
{
struct rtsock *rt;
/*
* There is no superuser check here: regular users are allowed to issue
* (only) RTM_GET requests on routing sockets.
*/
if (type != SOCK_RAW)
return EPROTOTYPE;
/* We could accept only the protocols we know, but this is fine too. */
if (protocol < 0 || protocol >= AF_MAX)
return EPROTONOSUPPORT;
if (TAILQ_EMPTY(&rt_freelist))
return ENOBUFS;
rt = TAILQ_FIRST(&rt_freelist);
TAILQ_REMOVE(&rt_freelist, rt, rt_next);
rt->rt_flags = 0;
rt->rt_family = protocol;
rt->rt_rcvhead = NULL;
rt->rt_rcvtailp = &rt->rt_rcvhead;
rt->rt_rcvlen = 0;
rt->rt_rcvbuf = RT_RCVBUF_DEF;
TAILQ_INSERT_HEAD(&rt_activelist, rt, rt_next);
*sockp = &rt->rt_sock;
*ops = &rtsock_ops;
return SOCKID_RT | (sockid_t)(rt - rt_array);
}
/*
* Enqueue data on the receive queue of a routing socket. The caller must have
* checked whether the receive buffer size allows for the receipt of the data.
*/
static void
rtsock_enqueue(struct rtsock * rt, struct pbuf * pbuf)
{
*rt->rt_rcvtailp = pbuf;
rt->rt_rcvtailp = pchain_end(pbuf);
rt->rt_rcvlen += pchain_size(pbuf);
sockevent_raise(&rt->rt_sock, SEV_RECV);
}
/*
* Determine whether a routing message for address family 'family', originated
* from routing socket 'rtsrc' if not NULL, should be sent to routing socket
* 'rt'. Return TRUE if the message should be sent to this socket, or FALSE
* if it should not.
*/
static int
rtsock_can_send(struct rtsock *rt, struct rtsock *rtsrc, int family)
{
/* Do not send anything on sockets shut down for reading. */
if (sockevent_is_shutdown(&rt->rt_sock, SFL_SHUT_RD))
return FALSE;
/*
* Do not send a reply message to the source of the request if the
* source is not interested in replies to its own requests.
*/
if (rt == rtsrc && (rt->rt_flags & RTF_NOLOOPBACK))
return FALSE;
/*
* For address family specific messages, make sure the routing socket
* is interested in that family. Make an exception if the socket was
* the source of the request, though: we currently do not prevent user
* processes from issuing commands for the "wrong" family.
*/
if (rt->rt_family != AF_UNSPEC && family != AF_UNSPEC &&
rt->rt_family != family && rt != rtsrc)
return FALSE;
/*
* See whether the receive queue of the socket is already full. We do
* not consider the size of the current request, in order to not drop
* larger messages and then enqueue smaller ones.
*/
if (rt->rt_rcvlen >= rt->rt_rcvbuf)
return FALSE;
/* All is well: go on and deliver the message. */
return TRUE;
}
/*
* Send the routing message in 'pbuf' to the given routing socket if possible,
* or check whether such a message could be sent to that socket if 'pbuf' is
* NULL. In the former case, the function takes ownership of 'pbuf'. The
* given routing socket is assumed to be the source of the routing request that
* generated this message. In the latter case, the function returns TRUE if
* the socket would take the message or FALSE if not. If 'family' is not
* AF_UNSPEC, it is to be the address family of the message.
*/
static int
rtsock_msg_one(struct rtsock * rt, int family, struct pbuf * pbuf)
{
if (rtsock_can_send(rt, rt, family)) {
if (pbuf != NULL)
rtsock_enqueue(rt, pbuf);
return TRUE;
} else {
if (pbuf != NULL)
pbuf_free(pbuf);
return FALSE;
}
}
/*
* Send the routing message in 'pbuf' to all matching routing sockets, or check
* whether there are any such matching routing sockets if 'pbuf' is NULL. In
* the former case, the function takes ownership of 'pbuf'. In the latter
* case, the function returns TRUE if there are any matching sockets or FALSE
* if there are none. If 'rtsrc' is not NULL, it is to be the routing socket
* that is the source of the message. If 'family' is not AF_UNSPEC, it is to
* be the address family of the message.
*/
static int
rtsock_msg_match(struct rtsock * rtsrc, int family, struct pbuf * pbuf)
{
struct rtsock *rt, *rtprev;
struct pbuf *pcopy;
rtprev = NULL;
TAILQ_FOREACH(rt, &rt_activelist, rt_next) {
if (!rtsock_can_send(rt, rtsrc, family))
continue;
/*
* There is at least one routing socket that is interested in
* receiving this message, and able to receive it.
*/
if (pbuf == NULL)
return TRUE;
/*
* We need to make copies of the generated message for all but
* the last matching socket, which gets the original. If we're
* out of memory, free the original and stop: there are more
* important things to spend memory on than routing sockets.
*/
if (rtprev != NULL) {
if ((pcopy = rtsock_alloc(pbuf->tot_len)) == NULL) {
pbuf_free(pbuf);
return TRUE;
}
if (pbuf_copy(pcopy, pbuf) != ERR_OK)
panic("unexpected pbuf copy failure");
rtsock_enqueue(rtprev, pcopy);
}
rtprev = rt;
}
if (rtprev != NULL)
rtsock_enqueue(rtprev, pbuf);
else if (pbuf != NULL)
pbuf_free(pbuf);
return (rtprev != NULL);
}
/*
* Dequeue and free the head of the receive queue of a routing socket.
*/
static void
rtsock_dequeue(struct rtsock * rt)
{
struct pbuf *pbuf, **pnext;
size_t size;
pbuf = rt->rt_rcvhead;
assert(pbuf != NULL);
pnext = pchain_end(pbuf);
size = pchain_size(pbuf);
if ((rt->rt_rcvhead = *pnext) == NULL)
rt->rt_rcvtailp = &rt->rt_rcvhead;
assert(rt->rt_rcvlen >= size);
rt->rt_rcvlen -= size;
*pnext = NULL;
pbuf_free(pbuf);
}
/*
* Process a routing message sent on a socket. Return OK on success, in which
* case the caller assumes that the processing routine has sent a reply to the
* user and possibly other routing sockets. Return a negative error code on
* failure, in which case the caller will send the reply to the user instead.
*/
static int
rtsock_process(struct rtsock *rt, struct rt_msghdr * rtm, char * buf,
size_t len, int is_root)
{
struct rtsock_request rtr;
struct rtsock_rta rta;
const struct sockaddr *netmask;
struct sockaddr sa;
union sockaddr_any mask;
size_t off;
int i;
if (rtm->rtm_msglen != len)
return EINVAL;
if (rtm->rtm_version != RTM_VERSION) {
printf("LWIP: PID %d uses routing sockets version %u\n",
rtm->rtm_pid, rtm->rtm_version);
return EPROTONOSUPPORT;
}
/*
* Make sure that we won't misinterpret the rest of the message. While
* looking at the message type, also make sure non-root users can only
* ever issue RTM_GET requests.
*/
switch (rtm->rtm_type) {
case RTM_ADD:
case RTM_DELETE:
case RTM_CHANGE:
case RTM_LOCK:
if (!is_root)
return EPERM;
/* FALLTHROUGH */
case RTM_GET:
break;
default:
return EOPNOTSUPP;
}
/*
* Extract all given addresses. We do not actually support all types
* of entries, but we cannot skip the ones we do not need either.
*/
rtsock_rta_init(&rta);
off = sizeof(*rtm);
assert(off == RT_ROUNDUP(off));
for (i = 0; i < RTAX_MAX; i++) {
if (!(rtm->rtm_addrs & (1 << i)))
continue;
if (off + offsetof(struct sockaddr, sa_data) > len)
return EINVAL;
/*
* It is safe to access sa_len and even sa_family in all cases,
* in particular even when the structure is of size zero.
*/
assert(offsetof(struct sockaddr, sa_data) <= RT_ROUNDUP(0));
memcpy(&sa, &buf[off], offsetof(struct sockaddr, sa_data));
if (off + sa.sa_len > len)
return EINVAL;
rtsock_rta_set(&rta, i, &buf[off], sa.sa_len);
off += RT_ROUNDUP((size_t)sa.sa_len);
}
/*
* Expand the given netmask if it is in compressed IPv4 form. We do
* this here because it is particular to routing sockets; we also do
* the compression in this module. Note how the compression may even
* strip off the address family; really, who came up with this ****?
*/
netmask = (const struct sockaddr *)rta.rta_ptr[RTAX_NETMASK];
if (netmask != NULL) {
if (!rtsock_expand_netmask(&mask, netmask))
return EINVAL;
rtsock_rta_set(&rta, RTAX_NETMASK, &mask, mask.sa.sa_len);
}
/*
* Actually process the command. Pass on enough information so that a
* reply can be generated on success. The abstraction as sketched at
* the top of the file imposes that we pass quite a few parameters.
*/
rtr.rtr_src = rt;
rtr.rtr_pid = rtm->rtm_pid;
rtr.rtr_seq = rtm->rtm_seq;
rtr.rtr_getif = (rtm->rtm_type == RTM_GET &&
(rta.rta_ptr[RTAX_IFP] != NULL || rta.rta_ptr[RTAX_IFA] != NULL));
return route_process(rtm->rtm_type,
(const struct sockaddr *)rta.rta_ptr[RTAX_DST],
(const struct sockaddr *)rta.rta_ptr[RTAX_NETMASK],
(const struct sockaddr *)rta.rta_ptr[RTAX_GATEWAY],
(const struct sockaddr *)rta.rta_ptr[RTAX_IFP],
(const struct sockaddr *)rta.rta_ptr[RTAX_IFA],
rtm->rtm_flags, rtm->rtm_inits, &rtm->rtm_rmx, &rtr);
}
/*
* Perform preliminary checks on a send request.
*/
static int
rtsock_pre_send(struct sock * sock __unused, size_t len,
socklen_t ctl_len __unused, const struct sockaddr * addr,
socklen_t addr_len __unused, endpoint_t user_endpt __unused, int flags)
{
if (flags != 0)
return EOPNOTSUPP;
if (addr != NULL)
return EISCONN;
/*
* For the most basic failures - that is, we cannot even manage to
* receive the request - we do not generate a reply message.
*/
if (len < sizeof(struct rt_msghdr))
return ENOBUFS;
if (len > RT_SNDBUF_MAX)
return EMSGSIZE;
return OK;
}
/*
* Send data on a routing socket.
*/
static int
rtsock_send(struct sock * sock, const struct sockdriver_data * data,
size_t len, size_t * offp, const struct sockdriver_data * ctl __unused,
socklen_t ctl_len __unused, socklen_t * ctl_off __unused,
const struct sockaddr * addr __unused, socklen_t addr_len __unused,
endpoint_t user_endpt, int flags __unused, size_t min __unused)
{
struct rtsock *rt = (struct rtsock *)sock;
char buf[RT_SNDBUF_MAX] __aligned(4);
struct rt_msghdr rtm;
struct pbuf *pbuf;
uid_t euid;
int r, is_root;
/* Copy in the request, and adjust some fields right away. */
assert(len >= sizeof(rtm));
assert(len <= sizeof(buf));
if ((r = sockdriver_copyin(data, 0, buf, len)) != OK)
return r;
memcpy(&rtm, buf, sizeof(rtm));
rtm.rtm_errno = 0;
rtm.rtm_flags &= ~RTF_DONE;
rtm.rtm_pid = getepinfo(user_endpt, &euid, NULL /*gid*/);
is_root = (euid == ROOT_EUID);
/* Process the request. */
r = rtsock_process(rt, &rtm, buf, len, is_root);
/*
* If the request has been processed successfully, a reply has been
* sent already, possibly also to other routing sockets. Here, we
* handle the case that the request has resulted in failure, in which
* case we send a reply to the caller only. This behavior is different
* from the traditional BSD behavior, which also sends failure replies
* to other sockets. Our motivation is that while other parties are
* never going to be interested in failures anyway, it is in fact easy
* for an unprivileged user process to abuse the failure-reply system
* in order to fake other types of routing messages (e.g., RTM_IFINFO)
* to other parties. By sending failure replies only to the requestor,
* we eliminate the need for security-sensitive request validation.
*/
if (r != OK && rtsock_can_send(rt, rt, AF_UNSPEC)) {
rtm.rtm_errno = -r;
if ((pbuf = rtsock_alloc(len)) == NULL)
return ENOMEM;
/* For the reply, reuse the request message largely as is. */
memcpy(pbuf->payload, &rtm, sizeof(rtm));
if (len > sizeof(rtm))
memcpy((uint8_t *)pbuf->payload + sizeof(rtm),
buf + sizeof(rtm), len - sizeof(rtm));
rtsock_enqueue(rt, pbuf);
} else if (r == OK)
*offp = len;
return r;
}
/*
* Perform preliminary checks on a receive request.
*/
static int
rtsock_pre_recv(struct sock * sock __unused, endpoint_t user_endpt __unused,
int flags)
{
/*
* We accept the same flags across all socket types in LWIP, and then
* simply ignore the ones we do not support for routing sockets.
*/
if ((flags & ~(MSG_PEEK | MSG_WAITALL)) != 0)
return EOPNOTSUPP;
return OK;
}
/*
* Receive data on a routing socket.
*/
static int
rtsock_recv(struct sock * sock, const struct sockdriver_data * data,
size_t len, size_t * off, const struct sockdriver_data * ctl __unused,
socklen_t ctl_len __unused, socklen_t * ctl_off __unused,
struct sockaddr * addr, socklen_t * addr_len,
endpoint_t user_endpt __unused, int flags, size_t min __unused,
int * rflags)
{
struct rtsock *rt = (struct rtsock *)sock;
struct pbuf *pbuf;
int r;
if ((pbuf = rt->rt_rcvhead) == NULL)
return SUSPEND;
/* Copy out the data to the calling user process. */
if (len >= pbuf->tot_len)
len = pbuf->tot_len;
else
*rflags |= MSG_TRUNC;
r = util_copy_data(data, len, 0, pbuf, 0, FALSE /*copy_in*/);
if (r != OK)
return r;
/* Generate a dummy source address. */
addr->sa_len = RTSOCK_ADDR_LEN;
addr->sa_family = AF_ROUTE;
*addr_len = RTSOCK_ADDR_LEN;
/* Discard the data now, unless we were instructed to peek only. */
if (!(flags & MSG_PEEK))
rtsock_dequeue(rt);
/* Return the received part of the data length. */
*off = len;
return OK;
}
/*
* Test whether data can be received on a routing socket, and if so, how many
* bytes of data.
*/
static int
rtsock_test_recv(struct sock * sock, size_t min __unused, size_t * size)
{
struct rtsock *rt = (struct rtsock *)sock;
if (rt->rt_rcvhead == NULL)
return SUSPEND;
if (size != NULL)
*size = rt->rt_rcvhead->tot_len;
return OK;
}
/*
* Set socket options on a routing socket.
*/
static int
rtsock_setsockopt(struct sock * sock, int level, int name,
const struct sockdriver_data * data, socklen_t len)
{
struct rtsock *rt = (struct rtsock *)sock;
int r, val;
if (level == SOL_SOCKET) {
switch (name) {
case SO_USELOOPBACK:
if ((r = sockdriver_copyin_opt(data, &val, sizeof(val),
len)) != OK)
return r;
if (!val)
rt->rt_flags |= RTF_NOLOOPBACK;
else
rt->rt_flags &= ~RTF_NOLOOPBACK;
return OK;
case SO_RCVBUF:
if ((r = sockdriver_copyin_opt(data, &val, sizeof(val),
len)) != OK)
return r;
if (val < RT_RCVBUF_MIN || val > RT_RCVBUF_MAX)
return EINVAL;
rt->rt_rcvbuf = (size_t)val;
return OK;
}
}
return ENOPROTOOPT;
}
/*
* Retrieve socket options on a routing socket.
*/
static int
rtsock_getsockopt(struct sock * sock, int level, int name,
const struct sockdriver_data * data, socklen_t * len)
{
struct rtsock *rt = (struct rtsock *)sock;
int val;
if (level == SOL_SOCKET) {
switch (name) {
case SO_USELOOPBACK:
val = !(rt->rt_flags & RTF_NOLOOPBACK);
return sockdriver_copyout_opt(data, &val, sizeof(val),
len);
case SO_RCVBUF:
val = rt->rt_rcvbuf;
return sockdriver_copyout_opt(data, &val, sizeof(val),
len);
}
}
return ENOPROTOOPT;
}
/*
* Retrieve the local or remote socket address of a routing socket.
*/
static int
rtsock_getname(struct sock * sock __unused, struct sockaddr * addr,
socklen_t * addr_len)
{
/* This is entirely useless but apparently common between OSes. */
addr->sa_len = RTSOCK_ADDR_LEN;
addr->sa_family = AF_ROUTE;
*addr_len = RTSOCK_ADDR_LEN;
return OK;
}
/*
* Drain the receive queue of a routing socket.
*/
static void
rtsock_drain(struct rtsock * rt)
{
while (rt->rt_rcvhead != NULL)
rtsock_dequeue(rt);
}
/*
* Shut down a routing socket for reading and/or writing.
*/
static int
rtsock_shutdown(struct sock * sock, unsigned int mask)
{
struct rtsock *rt = (struct rtsock *)sock;
if (mask & SFL_SHUT_RD)
rtsock_drain(rt);
return OK;
}
/*
* Close a routing socket.
*/
static int
rtsock_close(struct sock * sock, int force __unused)
{
struct rtsock *rt = (struct rtsock *)sock;
rtsock_drain(rt);
return OK;
}
/*
* Free up a closed routing socket.
*/
static void
rtsock_free(struct sock * sock)
{
struct rtsock *rt = (struct rtsock *)sock;
TAILQ_REMOVE(&rt_activelist, rt, rt_next);
TAILQ_INSERT_HEAD(&rt_freelist, rt, rt_next);
}
static const struct sockevent_ops rtsock_ops = {
.sop_pre_send = rtsock_pre_send,
.sop_send = rtsock_send,
.sop_pre_recv = rtsock_pre_recv,
.sop_recv = rtsock_recv,
.sop_test_recv = rtsock_test_recv,
.sop_setsockopt = rtsock_setsockopt,
.sop_getsockopt = rtsock_getsockopt,
.sop_getsockname = rtsock_getname,
.sop_getpeername = rtsock_getname,
.sop_shutdown = rtsock_shutdown,
.sop_close = rtsock_close,
.sop_free = rtsock_free
};
/*
* Send an interface announcement message about the given interface. If
* 'arrival' is set, the interface has just been created; otherwise, the
* interface is about to be destroyed.
*/
void
rtsock_msg_ifannounce(struct ifdev * ifdev, int arrival)
{
struct if_announcemsghdr ifan;
struct pbuf *pbuf;
if (!rtsock_msg_match(NULL /*rtsrc*/, AF_UNSPEC, NULL /*pbuf*/))
return;
memset(&ifan, 0, sizeof(ifan));
ifan.ifan_msglen = sizeof(ifan);
ifan.ifan_version = RTM_VERSION;
ifan.ifan_type = RTM_IFANNOUNCE;
ifan.ifan_index = ifdev_get_index(ifdev);
strlcpy(ifan.ifan_name, ifdev_get_name(ifdev), sizeof(ifan.ifan_name));
ifan.ifan_what = (arrival) ? IFAN_ARRIVAL : IFAN_DEPARTURE;
if ((pbuf = rtsock_alloc(sizeof(ifan))) == NULL)
return;
memcpy(pbuf->payload, &ifan, sizeof(ifan));
rtsock_msg_match(NULL /*rtsrc*/, AF_UNSPEC, pbuf);
}
/*
* Send an interface information routing message.
*/
void
rtsock_msg_ifinfo(struct ifdev * ifdev)
{
struct if_msghdr ifm;
struct pbuf *pbuf;
if (!rtsock_msg_match(NULL /*rtsrc*/, AF_UNSPEC, NULL /*pbuf*/))
return;
memset(&ifm, 0, sizeof(ifm));
ifm.ifm_msglen = sizeof(ifm);
ifm.ifm_version = RTM_VERSION;
ifm.ifm_type = RTM_IFINFO;
ifm.ifm_addrs = 0;
ifm.ifm_flags = ifdev_get_ifflags(ifdev);
ifm.ifm_index = ifdev_get_index(ifdev);
memcpy(&ifm.ifm_data, ifdev_get_ifdata(ifdev), sizeof(ifm.ifm_data));
if ((pbuf = rtsock_alloc(sizeof(ifm))) == NULL)
return;
memcpy(pbuf->payload, &ifm, sizeof(ifm));
rtsock_msg_match(NULL /*rtsrc*/, AF_UNSPEC, pbuf);
}
/*
* Set up a RTA map and an interface address structure for use in a RTM_xxxADDR
* routing message.
*/
static void
rtsock_rta_init_ifam(struct rtsock_rta * rta, struct ifa_msghdr * ifam,
struct ifdev * ifdev, unsigned int type, struct sockaddr_dlx * sdlx)
{
memset(ifam, 0, sizeof(*ifam));
ifam->ifam_version = RTM_VERSION;
ifam->ifam_type = type;
ifam->ifam_flags = 0;
ifam->ifam_index = ifdev_get_index(ifdev);
ifam->ifam_metric = ifdev_get_metric(ifdev);
rtsock_rta_init(rta);
ifaddr_dl_get(ifdev, (ifaddr_dl_num_t)0, sdlx);
rtsock_rta_set(rta, RTAX_IFP, sdlx, sdlx->sdlx_len);
}
/*
* Add a specific link-layer address for an interface to the given RTA map.
*/
static void
rtsock_rta_add_dl(struct rtsock_rta * rta, struct ifdev * ifdev,
ifaddr_dl_num_t num, struct sockaddr_dlx * sdlx)
{
/* Obtain the address data. */
ifaddr_dl_get(ifdev, num, sdlx);
/* Add the interface address. */
rtsock_rta_set(rta, RTAX_IFA, sdlx, sdlx->sdlx_len);
/*
* NetBSD also adds a RTAX_NETMASK entry here. At this moment it is
* not clear to me why, and it is a pain to make, so for now we do not.
*/
}
/*
* Send a routing message about a new, changed, or deleted datalink address for
* the given interface.
*/
void
rtsock_msg_addr_dl(struct ifdev * ifdev, unsigned int type,
ifaddr_dl_num_t num)
{
struct rtsock_rta rta;
struct ifa_msghdr ifam;
struct sockaddr_dlx name, addr;
struct pbuf *pbuf;
if (!rtsock_msg_match(NULL /*rtsrc*/, AF_LINK, NULL /*pbuf*/))
return;
rtsock_rta_init_ifam(&rta, &ifam, ifdev, type, &name);
rtsock_rta_add_dl(&rta, ifdev, num, &addr);
if (rtsock_rta_finalize(&ifam, sizeof(ifam), &ifam.ifam_msglen,
&ifam.ifam_addrs, &rta, &pbuf, NULL, 0) > 0)
rtsock_msg_match(NULL /*rtsrc*/, AF_LINK, pbuf);
}
/*
* Add a specific IPv4 address for an interface to the given RTA map.
*/
static void
rtsock_rta_add_v4(struct rtsock_rta * rta, struct ifdev * ifdev,
ifaddr_v4_num_t num, struct sockaddr_in sin[4])
{
/* Obtain the address data. */
(void)ifaddr_v4_get(ifdev, num, &sin[0], &sin[1], &sin[2], &sin[3]);
/* Add the interface address. */
rtsock_rta_set(rta, RTAX_IFA, &sin[0], sin[0].sin_len);
/* Add the netmask, after compressing it. */
rtsock_compress_netmask((struct sockaddr *)&sin[1]);
rtsock_rta_set(rta, RTAX_NETMASK, &sin[1], sin[1].sin_len);
/* Possibly add a broadcast or destination address. */
if (sin[2].sin_len != 0)
rtsock_rta_set(rta, RTAX_BRD, &sin[2], sin[2].sin_len);
else if (sin[3].sin_len != 0)
rtsock_rta_set(rta, RTAX_DST, &sin[3], sin[3].sin_len);
}
/*
* Send a routing message about a new or deleted IPv4 address for the given
* interface.
*/
void
rtsock_msg_addr_v4(struct ifdev * ifdev, unsigned int type,
ifaddr_v4_num_t num)
{
struct rtsock_rta rta;
struct ifa_msghdr ifam;
struct sockaddr_dlx name;
struct sockaddr_in sin[4];
struct pbuf *pbuf;
if (!rtsock_msg_match(NULL /*rtsrc*/, AF_INET, NULL /*pbuf*/))
return;
rtsock_rta_init_ifam(&rta, &ifam, ifdev, type, &name);
rtsock_rta_add_v4(&rta, ifdev, num, sin);
if (rtsock_rta_finalize(&ifam, sizeof(ifam), &ifam.ifam_msglen,
&ifam.ifam_addrs, &rta, &pbuf, NULL, 0) > 0)
rtsock_msg_match(NULL /*rtsrc*/, AF_INET, pbuf);
}
/*
* Add a specific IPv6 address for an interface to the given RTA map.
*/
static void
rtsock_rta_add_v6(struct rtsock_rta * rta, struct ifdev * ifdev,
ifaddr_v6_num_t num, struct sockaddr_in6 sin6[3])
{
/* Obtain the address data. */
ifaddr_v6_get(ifdev, num, &sin6[0], &sin6[1], &sin6[2]);
/* Add the interface address. */
rtsock_rta_set(rta, RTAX_IFA, &sin6[0], sin6[0].sin6_len);
/* Add the netmask, after compressing it (a no-op at the moment). */
rtsock_compress_netmask((struct sockaddr *)&sin6[1]);
rtsock_rta_set(rta, RTAX_NETMASK, &sin6[1], sin6[1].sin6_len);
/* Possibly add a destination address. */
if (sin6[2].sin6_len != 0)
rtsock_rta_set(rta, RTAX_DST, &sin6[2], sin6[2].sin6_len);
}
/*
* Send a routing message about a new or deleted IPv6 address for the given
* interface.
*/
void
rtsock_msg_addr_v6(struct ifdev * ifdev, unsigned int type,
ifaddr_v6_num_t num)
{
struct rtsock_rta rta;
struct ifa_msghdr ifam;
struct sockaddr_dlx name;
struct sockaddr_in6 sin6[3];
struct pbuf *pbuf;
if (!rtsock_msg_match(NULL /*rtsrc*/, AF_INET6, NULL /*pbuf*/))
return;
rtsock_rta_init_ifam(&rta, &ifam, ifdev, type, &name);
rtsock_rta_add_v6(&rta, ifdev, num, sin6);
if (rtsock_rta_finalize(&ifam, sizeof(ifam), &ifam.ifam_msglen,
&ifam.ifam_addrs, &rta, &pbuf, NULL, 0) > 0)
rtsock_msg_match(NULL /*rtsrc*/, AF_INET6, pbuf);
}
/*
* Send an RTM_MISS routing message about an address for which no route was
* found. The caller must provide the address in the appropriate form and
* perform any per-address rate limiting.
*/
void
rtsock_msg_miss(const struct sockaddr * addr)
{
struct rt_msghdr rtm;
struct rtsock_rta rta;
struct pbuf *pbuf;
/*
* Unfortunately the destination address has already been generated (as
* 'addr'), which is a big part of the work. Still, skip the rest if
* there is no routing socket to deliver the message to.
*/
if (!rtsock_msg_match(NULL /*rtsrc*/, addr->sa_family, NULL /*pbuf*/))
return;
memset(&rtm, 0, sizeof(rtm));
rtm.rtm_version = RTM_VERSION;
rtm.rtm_type = RTM_MISS;
rtsock_rta_init(&rta);
rtsock_rta_set(&rta, RTAX_DST, addr, addr->sa_len);
if (rtsock_rta_finalize(&rtm, sizeof(rtm), &rtm.rtm_msglen,
&rtm.rtm_addrs, &rta, &pbuf, NULL, 0) > 0)
rtsock_msg_match(NULL /*rtsrc*/, addr->sa_family, pbuf);
}
/*
* Generate routing socket data for a route, for either routing socket
* broadcasting or a sysctl(7) request. The route is given as 'route'. The
* type of the message (RTM_) is given as 'type'. The resulting routing
* message header is stored in 'rtm' and an address vector is stored in 'rta'.
* The latter may point to addresses generated in 'addr', 'mask', 'gateway',
* and optionally (if not NULL) 'ifp' and 'ifa'. The caller is responsible for
* combining the results into an appropriate routing message.
*/
static void
rtsock_get_route(struct rt_msghdr * rtm, struct rtsock_rta * rta,
union sockaddr_any * addr, union sockaddr_any * mask,
union sockaddr_any * gateway, union sockaddr_any * ifp,
union sockaddr_any * ifa, const struct route_entry * route,
unsigned int type)
{
struct ifdev *ifdev;
unsigned int flags, use;
route_get(route, addr, mask, gateway, ifp, ifa, &ifdev, &flags, &use);
memset(rtm, 0, sizeof(*rtm));
rtm->rtm_version = RTM_VERSION;
rtm->rtm_type = type;
rtm->rtm_flags = flags;
rtm->rtm_index = ifdev_get_index(ifdev);
rtm->rtm_use = use;
rtsock_rta_init(rta);
rtsock_rta_set(rta, RTAX_DST, addr, addr->sa.sa_len);
if (!(flags & RTF_HOST)) {
rtsock_compress_netmask(&mask->sa);
rtsock_rta_set(rta, RTAX_NETMASK, mask, mask->sa.sa_len);
}
rtsock_rta_set(rta, RTAX_GATEWAY, gateway, gateway->sa.sa_len);
if (ifp != NULL)
rtsock_rta_set(rta, RTAX_IFP, ifp, ifp->sa.sa_len);
if (ifa != NULL)
rtsock_rta_set(rta, RTAX_IFA, ifa, ifa->sa.sa_len);
}
/*
* Send a routing message about a route, with the given type which may be one
* of RTM_ADD, RTM_CHANGE, RTM_DELETE, RTM_LOCK, and RTM_GET. The routing
* socket request information 'rtr', if not NULL, provides additional
* information about the routing socket that was the source of the request (if
* any), various fields that should be echoed, and (for RTM_GET) whether to
* add interface information to the output.
*/
void
rtsock_msg_route(const struct route_entry * route, unsigned int type,
const struct rtsock_request * rtr)
{
union sockaddr_any addr, mask, gateway, ifp, ifa;
struct rt_msghdr rtm;
struct rtsock_rta rta;
struct rtsock *rtsrc;
struct pbuf *pbuf;
int family, getif;
rtsrc = (rtr != NULL) ? rtr->rtr_src : NULL;
family = (route_is_ipv6(route)) ? AF_INET6 : AF_INET;
if (!rtsock_msg_match(rtsrc, family, NULL /*pbuf*/))
return;
getif = (rtr != NULL && rtr->rtr_getif);
rtsock_get_route(&rtm, &rta, &addr, &mask, &gateway,
(getif) ? &ifp : NULL, (getif) ? &ifa : NULL, route, type);
if (rtr != NULL) {
rtm.rtm_flags |= RTF_DONE;
rtm.rtm_pid = rtr->rtr_pid;
rtm.rtm_seq = rtr->rtr_seq;
}
if (rtsock_rta_finalize(&rtm, sizeof(rtm), &rtm.rtm_msglen,
&rtm.rtm_addrs, &rta, &pbuf, NULL, 0) > 0)
rtsock_msg_match(rtsrc, family, pbuf);
}
/*
* Generate sysctl(7) output or length for the given routing table entry
* 'route', provided that the route passes the flags filter 'filter'. The
* address length 'addr_len' is used to compute a cheap length estimate. On
* success, return the byte size of the output. If the route was not a match
* for the filter, return zero. On failure, return a negative error code.
*/
static ssize_t
rtsock_info_rtable_entry(const struct route_entry * route, unsigned int filter,
socklen_t addr_len, struct rmib_oldp * oldp, size_t off)
{
union sockaddr_any addr, mask, gateway;
struct rt_msghdr rtm;
struct rtsock_rta rta;
unsigned int flags;
ssize_t len;
flags = route_get_flags(route);
/* Apparently, matching any of the flags (if given) is sufficient. */
if (filter != 0 && (filter & flags) != 0)
return 0;
/* Size (over)estimation shortcut. */
if (oldp == NULL) {
len = sizeof(rtm) + RT_ROUNDUP(addr_len) +
RT_ROUNDUP(sizeof(gateway));
if (!(flags & RTF_HOST))
len += RT_ROUNDUP(addr_len);
return len;
}
rtsock_get_route(&rtm, &rta, &addr, &mask, &gateway, NULL /*ifp*/,
NULL /*ifa*/, route, RTM_GET);
return rtsock_rta_finalize(&rtm, sizeof(rtm), &rtm.rtm_msglen,
&rtm.rtm_addrs, &rta, NULL /*pbuf*/, oldp, off);
}
/*
* Obtain routing table entries.
*/
static ssize_t
rtsock_info_rtable(struct rmib_oldp * oldp, int family, int filter)
{
struct route_entry *route;
ssize_t r, off;
off = 0;
if (family == AF_UNSPEC || family == AF_INET) {
for (route = NULL; (route = route_enum_v4(route)) != NULL; ) {
if ((r = rtsock_info_rtable_entry(route,
(unsigned int)filter, sizeof(struct sockaddr_in),
oldp, off)) < 0)
return r;
off += r;
}
}
if (family == AF_UNSPEC || family == AF_INET6) {
for (route = NULL; (route = route_enum_v6(route)) != NULL; ) {
if ((r = rtsock_info_rtable_entry(route,
(unsigned int)filter, sizeof(struct sockaddr_in6),
oldp, off)) < 0)
return r;
off += r;
}
}
/* TODO: should we add slack here? */
return off;
}
/*
* Generate routing socket data for an ARP table entry, for either routing
* socket broadcasting or a sysctl(7) request. The ARP table entry number is
* given as 'num'. The type of the message (RTM_) is given as 'type'. The
* resulting routing message header is stored in 'rtm' and an address vector is
* stored in 'rta'. The latter may point to addresses generated in 'addr' and
* 'gateway'. The caller is responsible for combining the results into an
* appropriate routing message.
*/
static void
rtsock_get_arp(struct rt_msghdr * rtm, struct rtsock_rta * rta,
struct sockaddr_in * addr, struct sockaddr_dlx * gateway,
lldata_arp_num_t num, unsigned int type)
{
struct ifdev *ifdev;
unsigned int flags;
lldata_arp_get(num, addr, gateway, &ifdev, &flags);
memset(rtm, 0, sizeof(*rtm));
rtm->rtm_version = RTM_VERSION;
rtm->rtm_type = type;
rtm->rtm_flags = flags;
rtm->rtm_index = ifdev_get_index(ifdev);
/* TODO: obtaining and reporting the proper expiry time, if any. */
if (!(flags & RTF_STATIC))
rtm->rtm_rmx.rmx_expire = (time_t)-1;
rtsock_rta_init(rta);
rtsock_rta_set(rta, RTAX_DST, addr, addr->sin_len);
rtsock_rta_set(rta, RTAX_GATEWAY, gateway, gateway->sdlx_len);
}
/*
* Send a routing message about an ARP table entry, with the given type which
* may be one of RTM_ADD, RTM_CHANGE, RTM_DELETE, RTM_LOCK, and RTM_GET. The
* routing socket request information 'rtr', if not NULL, provides additional
* information about the routing socket that was the source of the request (if
* any) and various fields that should be echoed.
*/
void
rtsock_msg_arp(lldata_arp_num_t num, unsigned int type,
const struct rtsock_request * rtr)
{
struct sockaddr_in addr;
struct sockaddr_dlx gateway;
struct rt_msghdr rtm;
struct rtsock_rta rta;
struct pbuf *pbuf;
assert(rtr != NULL);
/*
* We do not maintain the link-local tables ourselves, and thus, we do
* not have a complete view of modifications to them. In order not to
* confuse userland with inconsistent updates (e.g., deletion of
* previously unreported entries), send these routing messages to the
* source of the routing request only.
*/
if (!rtsock_msg_one(rtr->rtr_src, AF_INET, NULL /*pbuf*/))
return;
rtsock_get_arp(&rtm, &rta, &addr, &gateway, num, type);
if (rtr != NULL) {
rtm.rtm_flags |= RTF_DONE;
rtm.rtm_pid = rtr->rtr_pid;
rtm.rtm_seq = rtr->rtr_seq;
}
if (rtsock_rta_finalize(&rtm, sizeof(rtm), &rtm.rtm_msglen,
&rtm.rtm_addrs, &rta, &pbuf, NULL, 0) > 0)
rtsock_msg_one(rtr->rtr_src, AF_INET, pbuf);
}
/*
* Obtain ARP table entries.
*/
static ssize_t
rtsock_info_lltable_arp(struct rmib_oldp * oldp)
{
struct sockaddr_in addr;
struct sockaddr_dlx gateway;
struct rt_msghdr rtm;
struct rtsock_rta rta;
lldata_arp_num_t num;
ssize_t r, off;
off = 0;
for (num = 0; lldata_arp_enum(&num); num++) {
/* Size (over)estimation shortcut. */
if (oldp == NULL) {
off += sizeof(struct rt_msghdr) +
RT_ROUNDUP(sizeof(addr)) +
RT_ROUNDUP(sizeof(gateway));
continue;
}
rtsock_get_arp(&rtm, &rta, &addr, &gateway, num, RTM_GET);
if ((r = rtsock_rta_finalize(&rtm, sizeof(rtm),
&rtm.rtm_msglen, &rtm.rtm_addrs, &rta, NULL /*pbuf*/, oldp,
off)) < 0)
return r;
off += r;
}
/* TODO: should we add slack here? */
return off;
}
/*
* Generate routing socket data for an NDP table entry, for either routing
* socket broadcasting or a sysctl(7) request. The NDP table entry number is
* given as 'num'. The type of the message (RTM_) is given as 'type'. The
* resulting routing message header is stored in 'rtm' and an address vector is
* stored in 'rta'. The latter may point to addresses generated in 'addr' and
* 'gateway'. The caller is responsible for combining the results into an
* appropriate routing message.
*/
static void
rtsock_get_ndp(struct rt_msghdr * rtm, struct rtsock_rta * rta,
struct sockaddr_in6 * addr, struct sockaddr_dlx * gateway,
lldata_ndp_num_t num, unsigned int type)
{
struct ifdev *ifdev;
unsigned int flags;
lldata_ndp_get(num, addr, gateway, &ifdev, &flags);
memset(rtm, 0, sizeof(*rtm));
rtm->rtm_version = RTM_VERSION;
rtm->rtm_type = type;
rtm->rtm_flags = flags;
rtm->rtm_index = ifdev_get_index(ifdev);
rtsock_rta_init(rta);
rtsock_rta_set(rta, RTAX_DST, addr, addr->sin6_len);
rtsock_rta_set(rta, RTAX_GATEWAY, gateway, gateway->sdlx_len);
}
/*
* Send a routing message about an NDP table entry, with the given type which
* may be one of RTM_ADD, RTM_CHANGE, RTM_DELETE, RTM_LOCK, and RTM_GET. The
* routing socket request information 'rtr', if not NULL, provides additional
* information about the routing socket that was the source of the request (if
* any) and various fields that should be echoed.
*/
void
rtsock_msg_ndp(lldata_ndp_num_t num, unsigned int type,
const struct rtsock_request * rtr)
{
struct sockaddr_in6 addr;
struct sockaddr_dlx gateway;
struct rt_msghdr rtm;
struct rtsock_rta rta;
struct pbuf *pbuf;
assert(rtr != NULL);
/*
* We do not maintain the link-local tables ourselves, and thus, we do
* not have a complete view of modifications to them. In order not to
* confuse userland with inconsistent updates (e.g., deletion of
* previously unreported entries), send these routing messages to the
* source of the routing request only.
*/
if (!rtsock_msg_one(rtr->rtr_src, AF_INET6, NULL /*pbuf*/))
return;
rtsock_get_ndp(&rtm, &rta, &addr, &gateway, num, type);
if (rtr != NULL) {
rtm.rtm_flags |= RTF_DONE;
rtm.rtm_pid = rtr->rtr_pid;
rtm.rtm_seq = rtr->rtr_seq;
}
if (rtsock_rta_finalize(&rtm, sizeof(rtm), &rtm.rtm_msglen,
&rtm.rtm_addrs, &rta, &pbuf, NULL, 0) > 0)
rtsock_msg_one(rtr->rtr_src, AF_INET6, pbuf);
}
/*
* Obtain NDP table entries.
*/
static ssize_t
rtsock_info_lltable_ndp(struct rmib_oldp * oldp)
{
struct rt_msghdr rtm;
struct rtsock_rta rta;
struct sockaddr_in6 addr;
struct sockaddr_dlx gateway;
lldata_ndp_num_t num;
ssize_t r, off;
off = 0;
for (num = 0; lldata_ndp_enum(&num); num++) {
/* Size (over)estimation shortcut. */
if (oldp == NULL) {
off += sizeof(struct rt_msghdr) +
RT_ROUNDUP(sizeof(addr)) +
RT_ROUNDUP(sizeof(gateway));
continue;
}
rtsock_get_ndp(&rtm, &rta, &addr, &gateway, num, RTM_GET);
if ((r = rtsock_rta_finalize(&rtm, sizeof(rtm),
&rtm.rtm_msglen, &rtm.rtm_addrs, &rta, NULL /*pbuf*/, oldp,
off)) < 0)
return r;
off += r;
}
/* TODO: should we add slack here? */
return off;
}
/*
* Obtain link-layer (ARP, NDP) table entries.
*/
static ssize_t
rtsock_info_lltable(struct rmib_oldp * oldp, int family)
{
switch (family) {
case AF_INET:
return rtsock_info_lltable_arp(oldp);
case AF_INET6:
return rtsock_info_lltable_ndp(oldp);
default:
return 0;
}
}
/*
* Obtain link-layer address information for one specific interface.
*/
static ssize_t
rtsock_info_if_dl(struct ifdev * ifdev, struct ifa_msghdr * ifam,
struct rmib_oldp * oldp, ssize_t off)
{
struct rtsock_rta rta;
struct sockaddr_dlx sdlx;
ifaddr_dl_num_t num;
ssize_t r, len;
len = 0;
for (num = 0; ifaddr_dl_enum(ifdev, &num); num++) {
if (oldp == NULL) {
len += sizeof(*ifam) + RT_ROUNDUP(sizeof(sdlx));
continue;
}
rtsock_rta_init(&rta);
rtsock_rta_add_dl(&rta, ifdev, num, &sdlx);
if ((r = rtsock_rta_finalize(ifam, sizeof(*ifam),
&ifam->ifam_msglen, &ifam->ifam_addrs, &rta, NULL /*pbuf*/,
oldp, off + len)) < 0)
return r;
len += r;
}
return len;
}
/*
* Obtain IPv4 address information for one specific interface.
*/
static ssize_t
rtsock_info_if_v4(struct ifdev * ifdev, struct ifa_msghdr * ifam,
struct rmib_oldp * oldp, ssize_t off)
{
struct sockaddr_in sin[4];
struct rtsock_rta rta;
ifaddr_v4_num_t num;
ssize_t r, len;
len = 0;
/*
* Mostly for future compatibility, we support multiple IPv4 interface
* addresses here. Every interface has an interface address and a
* netmask. In addition, an interface may have either a broadcast or a
* destination address.
*/
for (num = 0; ifaddr_v4_enum(ifdev, &num); num++) {
/* Size (over)estimation shortcut. */
if (oldp == NULL) {
len += sizeof(*ifam) + RT_ROUNDUP(sizeof(sin[0])) * 3;
continue;
}
rtsock_rta_init(&rta);
rtsock_rta_add_v4(&rta, ifdev, num, sin);
if ((r = rtsock_rta_finalize(ifam, sizeof(*ifam),
&ifam->ifam_msglen, &ifam->ifam_addrs, &rta, NULL /*pbuf*/,
oldp, off + len)) < 0)
return r;
len += r;
}
return len;
}
/*
* Obtain IPv6 address information for one specific interface.
*/
static ssize_t
rtsock_info_if_v6(struct ifdev * ifdev, struct ifa_msghdr * ifam,
struct rmib_oldp * oldp, ssize_t off)
{
struct sockaddr_in6 sin6[3];
struct rtsock_rta rta;
ifaddr_v6_num_t num;
ssize_t r, len;
len = 0;
/* As with IPv4, except that IPv6 has no broadcast addresses. */
for (num = 0; ifaddr_v6_enum(ifdev, &num); num++) {
/* Size (over)estimation shortcut. */
if (oldp == NULL) {
len += sizeof(*ifam) + RT_ROUNDUP(sizeof(sin6[0])) * 3;
continue;
}
rtsock_rta_init(&rta);
rtsock_rta_add_v6(&rta, ifdev, num, sin6);
if ((r = rtsock_rta_finalize(ifam, sizeof(*ifam),
&ifam->ifam_msglen, &ifam->ifam_addrs, &rta, NULL /*pbuf*/,
oldp, off + len)) < 0)
return r;
len += r;
}
return len;
}
/*
* Obtain information for one specific interface.
*/
static ssize_t
rtsock_info_if(struct ifdev * ifdev, struct rmib_oldp * oldp, ssize_t off,
int family)
{
struct rtsock_rta rta;
struct sockaddr_dlx sdlx;
struct if_msghdr ifm;
struct ifa_msghdr ifam;
unsigned int ifflags;
ssize_t r, len, sdlxsize;
len = 0;
ifflags = ifdev_get_ifflags(ifdev);
/* Create an interface information entry. */
rtsock_rta_init(&rta);
if (oldp != NULL) {
memset(&ifm, 0, sizeof(ifm));
ifm.ifm_version = RTM_VERSION;
ifm.ifm_type = RTM_IFINFO;
ifm.ifm_flags = ifflags;
ifm.ifm_index = ifdev_get_index(ifdev);
memcpy(&ifm.ifm_data, ifdev_get_ifdata(ifdev),
sizeof(ifm.ifm_data));
}
/*
* Generate a datalink socket address structure. TODO: see if it is
* worth obtaining just the length for the (oldp == NULL) case here.
*/
memset(&sdlx, 0, sizeof(sdlx));
ifaddr_dl_get(ifdev, 0, &sdlx);
sdlxsize = RT_ROUNDUP(sdlx.sdlx_len);
rtsock_rta_set(&rta, RTAX_IFP, &sdlx, sdlxsize);
if ((r = rtsock_rta_finalize(&ifm, sizeof(ifm), &ifm.ifm_msglen,
&ifm.ifm_addrs, &rta, NULL /*pbuf*/, oldp, off + len)) < 0)
return r;
len += r;
/* Generate a header for all addresses once. */
if (oldp != NULL) {
memset(&ifam, 0, sizeof(ifam));
ifam.ifam_version = RTM_VERSION;
ifam.ifam_type = RTM_NEWADDR;
ifam.ifam_flags = 0;
ifam.ifam_index = ifdev_get_index(ifdev);
ifam.ifam_metric = ifdev_get_metric(ifdev);
}
/* If requested and applicable, add any datalink addresses. */
if (family == AF_UNSPEC || family == AF_LINK) {
if ((r = rtsock_info_if_dl(ifdev, &ifam, oldp, off + len)) < 0)
return r;
len += r;
}
/* If requested and applicable, add any IPv4 addresses. */
if (family == AF_UNSPEC || family == AF_INET) {
if ((r = rtsock_info_if_v4(ifdev, &ifam, oldp, off + len)) < 0)
return r;
len += r;
}
/* If requested and applicable, add any IPv6 addresses. */
if (family == AF_UNSPEC || family == AF_INET6) {
if ((r = rtsock_info_if_v6(ifdev, &ifam, oldp, off + len)) < 0)
return r;
len += r;
}
return len;
}
/*
* Obtain interface information.
*/
static ssize_t
rtsock_info_iflist(struct rmib_oldp * oldp, int family, uint32_t ifindex)
{
struct ifdev *ifdev;
ssize_t r, off;
/*
* If information about a specific interface index is requested, then
* return information for just that interface.
*/
if (ifindex != 0) {
if ((ifdev = ifdev_get_by_index(ifindex)) != NULL)
return rtsock_info_if(ifdev, oldp, 0, family);
else
return 0;
}
/* Otherwise, iterate through the list of all interfaces. */
off = 0;
for (ifdev = ifdev_enum(NULL); ifdev != NULL;
ifdev = ifdev_enum(ifdev)) {
/* Avoid generating results that are never copied out. */
if (oldp != NULL && !rmib_inrange(oldp, off))
oldp = NULL;
if ((r = rtsock_info_if(ifdev, oldp, off, family)) < 0)
return r;
off += r;
}
/* TODO: should we add slack here? */
return off;
}
/*
* Obtain routing table, ARP cache, and interface information through
* sysctl(7). Return the (produced, or if oldp is NULL, estimated) byte size
* of the output on success, or a negative error code on failure.
*/
static ssize_t
rtsock_info(struct rmib_call * call, struct rmib_node * node __unused,
struct rmib_oldp * oldp, struct rmib_newp * newp __unused)
{
int family, filter;
if (call->call_namelen != 3)
return EINVAL;
family = call->call_name[0];
filter = call->call_name[2];
switch (call->call_name[1]) {
case NET_RT_FLAGS:
/*
* Preliminary support for changes as of NetBSD 8, where by
* default, the use of this subcall implies an ARP/NDP-only
* request.
*/
if (filter == 0)
filter |= RTF_LLDATA;
if (filter & RTF_LLDATA) {
if (family == AF_UNSPEC)
return EINVAL;
/*
* Split off ARP/NDP handling from the normal routing
* table listing, as done since NetBSD 8. We generate
* the ARP/NDP listing from here, and keep those
* entries out of the routing table dump below. Since
* the filter is of a match-any type, and we have just
* matched a flag, no further filtering is needed here.
*/
return rtsock_info_lltable(oldp, family);
}
/* FALLTHROUGH */
case NET_RT_DUMP:
return rtsock_info_rtable(oldp, family, filter);
case NET_RT_IFLIST:
return rtsock_info_iflist(oldp, family, filter);
default:
return EINVAL;
}
}