phunix/minix/net/lwip/loopif.c
David van Moolenbroek ef8d499e2d Add lwip: a new lwIP-based TCP/IP service
This commit adds a new TCP/IP service to MINIX 3.  As its core, the
service uses the lwIP TCP/IP stack for maintenance reasons.  The
service aims to be compatible with NetBSD userland, including its
low-level network management utilities.  It also aims to support
modern features such as IPv6.  In summary, the new LWIP service has
support for the following main features:

- TCP, UDP, RAW sockets with mostly standard BSD API semantics;
- IPv6 support: host mode (complete) and router mode (partial);
- most of the standard BSD API socket options (SO_);
- all of the standard BSD API message flags (MSG_);
- the most used protocol-specific socket and control options;
- a default loopback interface and the ability to create one more;
- configuration-free ethernet interfaces and driver tracking;
- queuing and multiple concurrent requests to each ethernet driver;
- standard ioctl(2)-based BSD interface management;
- radix tree backed, destination-based routing;
- routing sockets for standard BSD route reporting and management;
- multicast traffic and multicast group membership tracking;
- Berkeley Packet Filter (BPF) devices;
- standard and custom sysctl(7) nodes for many internals;
- a slab allocation based, hybrid static/dynamic memory pool model.

Many of its modules come with fairly elaborate comments that cover
many aspects of what is going on.  The service is primarily a socket
driver built on top of the libsockdriver library, but for BPF devices
it is at the same time also a character driver.

Change-Id: Ib0c02736234b21143915e5fcc0fda8fe408f046f
2017-04-30 13:16:03 +00:00

421 lines
12 KiB
C

/* LWIP service - loopif.c - loopback interfaces */
/*
* There is always at least one loopback device. This device is used also to
* loop back packets sent on other interfaces to the local interface address.
* Therefore, not all packets on the loopback device have a source or
* destination address corresponding to the loopback device.
*/
#include "lwip.h"
/*
* As a safety measure, if lwIP somehow gets stuck in a loop replying to its
* own packets on a loopback interface, stop with immediately feeding packets
* back into lwIP after this many packets. The remaining packets will still be
* delivered, but not before the main message loop has had a chance to run.
*/
#define LOOPIF_LIMIT 65536
/*
* The MTU is restricted to 65531 bytes, because we need space for a 4-byte
* header to identify the original interface of the packet.
*/
#define LOOPIF_MAX_MTU (UINT16_MAX - sizeof(uint32_t)) /* maximum MTU */
#define LOOPIF_DEF_MTU LOOPIF_MAX_MTU /* default MTU */
#define NR_LOOPIF 2 /* number of loopback devices */
struct loopif {
struct ifdev loopif_ifdev; /* interface device, MUST be first */
struct pbuf *loopif_head; /* head of pending loopback packets */
struct pbuf **loopif_tailp; /* tail ptr-ptr of pending packets */
TAILQ_ENTRY(loopif) loopif_next; /* next in free list */
} loopif_array[NR_LOOPIF];
static TAILQ_HEAD(, loopif) loopif_freelist; /* free loop interfaces list */
static TAILQ_HEAD(, loopif) loopif_activelist; /* active loop interfaces */
#define loopif_get_netif(loopif) (ifdev_get_netif(&(loopif)->loopif_ifdev))
static unsigned int loopif_cksum_flags;
static int loopif_create(const char *name);
static const struct ifdev_ops loopif_ops;
/*
* Initialize the loopback interface module.
*/
void
loopif_init(void)
{
unsigned int slot;
/* Initialize the lists of loopback interfaces. */
TAILQ_INIT(&loopif_freelist);
TAILQ_INIT(&loopif_activelist);
for (slot = 0; slot < __arraycount(loopif_array); slot++)
TAILQ_INSERT_TAIL(&loopif_freelist, &loopif_array[slot],
loopif_next);
/*
* The default is to perform no checksumming on loopback interfaces,
* except for ICMP messages because otherwise we would need additional
* changes in the code receiving those. In fact, for future
* compatibility, disable only those flags that we manage ourselves.
*/
loopif_cksum_flags = NETIF_CHECKSUM_ENABLE_ALL &
~(NETIF_CHECKSUM_GEN_IP | NETIF_CHECKSUM_CHECK_IP |
NETIF_CHECKSUM_GEN_UDP | NETIF_CHECKSUM_CHECK_UDP |
NETIF_CHECKSUM_GEN_TCP | NETIF_CHECKSUM_CHECK_TCP);
/* Tell the ifdev module that users may create more loopif devices. */
ifdev_register("lo", loopif_create);
}
/*
* Polling function, invoked after each message loop iteration. Forward any
* packets received on the output side of the loopback device during this
* loop iteration, to the input side of the device.
*/
static void
loopif_poll(struct ifdev * ifdev)
{
struct loopif *loopif = (struct loopif *)ifdev;
struct pbuf *pbuf, **pnext;
struct ifdev *oifdev;
struct netif *netif;
uint32_t oifindex;
unsigned int count;
static int warned = FALSE;
count = 0;
while ((pbuf = loopif->loopif_head) != NULL) {
/*
* Prevent endless loops. Keep in mind that packets may be
* added to the queue as part of processing packets from the
* queue here, so the queue itself will never reach this
* length. As such the limit can (and must) be fairly high.
*
* In any case, if this warning is shown, that basically means
* that a bug in lwIP has been triggered. There should be no
* such bugs, so if there are, they should be fixed in lwIP.
*/
if (count++ == LOOPIF_LIMIT) {
if (!warned) {
printf("LWIP: excess loopback traffic, "
"throttling output\n");
warned = TRUE;
}
break;
}
pnext = pchain_end(pbuf);
if ((loopif->loopif_head = *pnext) == NULL)
loopif->loopif_tailp = &loopif->loopif_head;
*pnext = NULL;
/*
* Get the original interface for the packet, which if non-zero
* must also be used to pass the packet back to. The interface
* should still exist in all cases, but better safe than sorry.
*/
memcpy(&oifindex, pbuf->payload, sizeof(oifindex));
util_pbuf_header(pbuf, -(int)sizeof(oifindex));
if (oifindex != 0 &&
(oifdev = ifdev_get_by_index(oifindex)) != NULL)
netif = ifdev_get_netif(oifdev);
else
netif = NULL;
/*
* Loopback devices hand packets to BPF on output only. Doing
* so on input as well would duplicate all captured packets.
*/
ifdev_input(ifdev, pbuf, netif, FALSE /*to_bpf*/);
}
}
/*
* Process a packet as output on a loopback interface. Packets cannot be
* passed back into lwIP right away, nor can the original packets be passed
* back into lwIP. Therefore, make a copy of the packet, and pass it back to
* lwIP at the end of the current message loop iteration.
*/
static err_t
loopif_output(struct ifdev * ifdev, struct pbuf * pbuf, struct netif * netif)
{
struct loopif *loopif = (struct loopif *)ifdev;
struct ifdev *oifdev;
struct pbuf *pcopy;
uint32_t oifindex;
/* Reject oversized packets immediately. This should not happen. */
if (pbuf->tot_len > UINT16_MAX - sizeof(oifindex)) {
printf("LWIP: attempt to send oversized loopback packet\n");
return ERR_MEM;
}
/*
* If the service is low on memory, this is a likely place where
* allocation failures will occur. Thus, do not print anything here.
* The user can diagnose such problems with interface statistics.
*/
pcopy = pchain_alloc(PBUF_RAW, sizeof(oifindex) + pbuf->tot_len);
if (pcopy == NULL) {
ifdev_output_drop(ifdev);
return ERR_MEM;
}
/*
* If the packet was purposely diverted from a non-loopback interface
* to this interface, we have to remember the original interface, so
* that we can pass back the packet to that interface as well. If we
* don't, packets to link-local addresses assigned to non-loopback
* interfaces will not be processed correctly.
*/
if (netif != NULL) {
oifdev = netif_get_ifdev(netif);
oifindex = ifdev_get_index(oifdev);
} else
oifindex = 0;
assert(pcopy->len >= sizeof(oifindex));
memcpy(pcopy->payload, &oifindex, sizeof(oifindex));
util_pbuf_header(pcopy, -(int)sizeof(oifindex));
if (pbuf_copy(pcopy, pbuf) != ERR_OK)
panic("unexpected pbuf copy failure");
pcopy->flags |= pbuf->flags & (PBUF_FLAG_LLMCAST | PBUF_FLAG_LLBCAST);
util_pbuf_header(pcopy, sizeof(oifindex));
*loopif->loopif_tailp = pcopy;
loopif->loopif_tailp = pchain_end(pcopy);
return ERR_OK;
}
/*
* Initialization function for a loopback-type netif interface, called from
* lwIP at interface creation time.
*/
static err_t
loopif_init_netif(struct ifdev * ifdev, struct netif * netif)
{
netif->name[0] = 'l';
netif->name[1] = 'o';
/*
* FIXME: unfortunately, lwIP does not allow one to enable multicast on
* an interface without also enabling multicast management traffic
* (that is, IGMP and MLD). Thus, for now, joining multicast groups
* and assigning local IPv6 addresses will incur such traffic even on
* loopback interfaces. For now this is preferable over not supporting
* multicast on loopback interfaces at all.
*/
netif->flags |= NETIF_FLAG_IGMP | NETIF_FLAG_MLD6;
NETIF_SET_CHECKSUM_CTRL(netif, loopif_cksum_flags);
return ERR_OK;
}
/*
* Create a new loopback device.
*/
static int
loopif_create(const char * name)
{
struct loopif *loopif;
/* Find a free loopback interface slot, if available. */
if (TAILQ_EMPTY(&loopif_freelist))
return ENOBUFS;
loopif = TAILQ_FIRST(&loopif_freelist);
TAILQ_REMOVE(&loopif_freelist, loopif, loopif_next);
/* Initialize the loopif structure. */
TAILQ_INSERT_HEAD(&loopif_activelist, loopif, loopif_next);
loopif->loopif_head = NULL;
loopif->loopif_tailp = &loopif->loopif_head;
/*
* For simplicity and efficiency, we do not prepend the address family
* (IPv4/IPv6) to the packet for BPF, which means our loopback devices
* are of type DLT_RAW rather than (NetBSD's) DLT_NULL.
*/
ifdev_add(&loopif->loopif_ifdev, name, IFF_LOOPBACK | IFF_MULTICAST,
IFT_LOOP, 0 /*hdrlen*/, 0 /*addrlen*/, DLT_RAW, LOOPIF_MAX_MTU,
0 /*nd6flags*/, &loopif_ops);
ifdev_update_link(&loopif->loopif_ifdev, LINK_STATE_UP);
return OK;
}
/*
* Destroy an existing loopback device.
*/
static int
loopif_destroy(struct ifdev * ifdev)
{
struct loopif *loopif = (struct loopif *)ifdev;
struct pbuf *pbuf, **pnext;
int r;
/*
* The ifdev module may refuse to remove this interface if it is the
* loopback interface used to loop back packets for other interfaces.
*/
if ((r = ifdev_remove(&loopif->loopif_ifdev)) != OK)
return r;
/*
* Clean up. The loopback queue can be non-empty only if we have been
* throttling in case of a feedback loop.
*/
while ((pbuf = loopif->loopif_head) != NULL) {
pnext = pchain_end(pbuf);
if ((loopif->loopif_head = *pnext) == NULL)
loopif->loopif_tailp = &loopif->loopif_head;
*pnext = NULL;
pbuf_free(pbuf);
}
TAILQ_REMOVE(&loopif_activelist, loopif, loopif_next);
TAILQ_INSERT_HEAD(&loopif_freelist, loopif, loopif_next);
return OK;
}
/*
* Set NetBSD-style interface flags (IFF_) for a loopback interface.
*/
static int
loopif_set_ifflags(struct ifdev * ifdev, unsigned int ifflags)
{
struct loopif *loopif = (struct loopif *)ifdev;
/*
* Only the IFF_UP flag may be set and cleared. We adjust the
* IFF_RUNNING flag immediately based on this flag. This is a bit
* dangerous, but the caller takes this possibility into account.
*/
if ((ifflags & ~IFF_UP) != 0)
return EINVAL;
if (ifflags & IFF_UP)
ifdev_update_ifflags(&loopif->loopif_ifdev,
ifdev_get_ifflags(&loopif->loopif_ifdev) | IFF_RUNNING);
else
ifdev_update_ifflags(&loopif->loopif_ifdev,
ifdev_get_ifflags(&loopif->loopif_ifdev) & ~IFF_RUNNING);
return OK;
}
/*
* Set the Maximum Transmission Unit for this interface. Return TRUE if the
* new value is acceptable, in which case the caller will do the rest. Return
* FALSE otherwise.
*/
static int
loopif_set_mtu(struct ifdev * ifdev __unused, unsigned int mtu)
{
return (mtu <= LOOPIF_MAX_MTU);
}
static const struct ifdev_ops loopif_ops = {
.iop_init = loopif_init_netif,
.iop_input = ip_input,
.iop_output = loopif_output,
.iop_poll = loopif_poll,
.iop_set_ifflags = loopif_set_ifflags,
.iop_set_mtu = loopif_set_mtu,
.iop_destroy = loopif_destroy,
};
/*
* Set and/or retrieve a per-protocol loopback checksumming option through
* sysctl(7).
*/
ssize_t
loopif_cksum(struct rmib_call * call, struct rmib_node * node __unused,
struct rmib_oldp * oldp, struct rmib_newp * newp)
{
struct loopif *loopif;
unsigned int flags;
int r, val;
/*
* The third name field is the protocol. We ignore the domain (the
* second field), thus sharing settings between PF_INET and PF_INET6.
* This is necessary because lwIP does not support TCP/UDP checksumming
* flags on a per-domain basis.
*/
switch (call->call_oname[2]) {
case IPPROTO_IP:
flags = NETIF_CHECKSUM_GEN_IP | NETIF_CHECKSUM_CHECK_IP;
break;
case IPPROTO_UDP:
flags = NETIF_CHECKSUM_GEN_UDP | NETIF_CHECKSUM_CHECK_UDP;
break;
case IPPROTO_TCP:
flags = NETIF_CHECKSUM_GEN_TCP | NETIF_CHECKSUM_CHECK_TCP;
break;
default:
return EINVAL;
}
/* Copy out the old (current) checksumming option. */
if (oldp != NULL) {
val = !!(loopif_cksum_flags & flags);
if ((r = rmib_copyout(oldp, 0, &val, sizeof(val))) < 0)
return r;
}
if (newp != NULL) {
if ((r = rmib_copyin(newp, &val, sizeof(val))) != OK)
return r;
if (val)
loopif_cksum_flags |= flags;
else
loopif_cksum_flags &= ~flags;
/*
* Apply the new checksum flags to all loopback interfaces.
* Technically, this may result in dropped packets when
* enabling checksumming on a throttled loopif, but that is a
* case so rare and unimportant that we ignore it.
*/
TAILQ_FOREACH(loopif, &loopif_activelist, loopif_next) {
NETIF_SET_CHECKSUM_CTRL(loopif_get_netif(loopif),
loopif_cksum_flags);
}
}
/* Return the length of the node. */
return sizeof(val);
}