David van Moolenbroek ef8d499e2d Add lwip: a new lwIP-based TCP/IP service
This commit adds a new TCP/IP service to MINIX 3.  As its core, the
service uses the lwIP TCP/IP stack for maintenance reasons.  The
service aims to be compatible with NetBSD userland, including its
low-level network management utilities.  It also aims to support
modern features such as IPv6.  In summary, the new LWIP service has
support for the following main features:

- TCP, UDP, RAW sockets with mostly standard BSD API semantics;
- IPv6 support: host mode (complete) and router mode (partial);
- most of the standard BSD API socket options (SO_);
- all of the standard BSD API message flags (MSG_);
- the most used protocol-specific socket and control options;
- a default loopback interface and the ability to create one more;
- configuration-free ethernet interfaces and driver tracking;
- queuing and multiple concurrent requests to each ethernet driver;
- standard ioctl(2)-based BSD interface management;
- radix tree backed, destination-based routing;
- routing sockets for standard BSD route reporting and management;
- multicast traffic and multicast group membership tracking;
- Berkeley Packet Filter (BPF) devices;
- standard and custom sysctl(7) nodes for many internals;
- a slab allocation based, hybrid static/dynamic memory pool model.

Many of its modules come with fairly elaborate comments that cover
many aspects of what is going on.  The service is primarily a socket
driver built on top of the libsockdriver library, but for BPF devices
it is at the same time also a character driver.

Change-Id: Ib0c02736234b21143915e5fcc0fda8fe408f046f
2017-04-30 13:16:03 +00:00

1065 lines
31 KiB
C

/* LWIP service - ifdev.c - network interface devices */
#include "lwip.h"
#include "mcast.h"
#include "ifaddr.h"
#include "rtsock.h"
#include "route.h"
#include "bpfdev.h"
#include <net/if_media.h>
/*
* The highest possible interface index number, plus one. We currently let
* lwIP choose the interface index. lwIP will generate a number between 1 and
* 255 inclusive. For efficiency, we use an array to look up an interface
* device object by its index. Thus, this array must be large enough to be
* indexed by the largest possible index number generated by lwIP. lwIP uses
* an unsigned 8-bit field to store the index number.
*/
#define MAX_IFDEV (UINT8_MAX + 1)
/* The table is indexed by the interface index minus one. */
static struct ifdev *ifdev_table[MAX_IFDEV]; /* index-based lookup table */
static TAILQ_HEAD(, ifdev) ifdev_list; /* list of active interfaces */
static struct ifdev *ifdev_loopback; /* loopback interface */
/*
* The maximum number of virtual interface types--that is, interface types for
* which interfaces may be created and destroyed dynamically. The BSDs call
* these "clones". There should be enough slots for all types, which are
* registered by their respective modules through ifdev_register(). Increase
* as necessary.
*/
#define MAX_VTYPE 4
static struct {
const char *ifvt_name; /* interface name without digits (e.g. "lo") */
size_t ifvt_namelen; /* length of the name, excluding null term. */
int (*ifvt_create)(const char *); /* ifdev create function */
} ifdev_vtype[MAX_VTYPE];
static unsigned int ifdev_vtypes; /* number of in-use vtype slots */
#define IFDEV_MIN_MTU 1280 /* minimum interface MTU, required by IPv6 */
/*
* Initialize the network interface devices module. This call must be issued
* before any virtual interfaces are initialized, because the virtual types
* array is initialized here.
*/
void
ifdev_init(void)
{
memset(ifdev_table, 0, sizeof(ifdev_table));
TAILQ_INIT(&ifdev_list);
memset(ifdev_vtype, 0, sizeof(ifdev_vtype));
ifdev_vtypes = 0;
}
/*
* Check all active interfaces to see if any tasks need to be performed. This
* function is called as part of each message loop iteration.
*/
void
ifdev_poll(void)
{
struct ifdev *ifdev;
/*
* Call the polling function of the active interfaces. Note that
* interfaces may not remove themselves as a result of polling!
*/
TAILQ_FOREACH(ifdev, &ifdev_list, ifdev_next) {
if (ifdev->ifdev_ops->iop_poll != NULL)
ifdev->ifdev_ops->iop_poll(ifdev);
}
}
/*
* Handle an incoming packet on an interface. This function assumes ownership
* of the packet buffers: the caller must no longer refer to it afterward. For
* packets looped back for a non-loopback interface, 'ifdev' is the loopback
* interface and 'netif' is the original (non-loopback) interface's netif. For
* other packets, 'ifdev' is the actual interface and 'netif' is NULL. The
* packet is passed to BPF devices only if 'to_bpf' is set.
*/
void
ifdev_input(struct ifdev * ifdev, struct pbuf * pbuf, struct netif * netif,
int to_bpf)
{
struct bpfdev_link *bpfl;
err_t err;
/*
* Looped-back packets are captured on the loopback device, not on the
* original interface. Similarly, we account the traffic to the
* loopback interface. This is a policy decision (inspired by NetBSD's
* behavior) and may be changed later.
*/
if (to_bpf) {
TAILQ_FOREACH(bpfl, &ifdev->ifdev_bpf, bpfl_next)
bpfdev_input(bpfl, pbuf);
}
ifdev->ifdev_data.ifi_ipackets++;
ifdev->ifdev_data.ifi_ibytes += pbuf->tot_len;
if (pbuf->flags & PBUF_FLAG_LLMCAST)
ifdev->ifdev_data.ifi_imcasts++;
/*
* For looped-back packets, we must bypass the regular netif input
* function (as that one is for link-layer packet handling) and instead
* pass it directly to the IP-layer packet handling function of lwIP.
*/
if (netif != NULL)
err = ip_input(pbuf, netif);
else
err = ifdev->ifdev_netif.input(pbuf, &ifdev->ifdev_netif);
if (err != ERR_OK)
pbuf_free(pbuf);
}
/*
* Handle an outgoing packet on an interface. Return ERR_OK if the packet was
* transmitted or another lwIP ERR_ error code upon failure. Either way, the
* caller is responsible for freeing the packet buffers. If the packet is
* to be looped back to a non-loopback interface (because its destination is a
* local address), 'ifdev' is the loopback interface and 'netif' is set to the
* original interface's netif. In all other cases, 'ifdev' is the packet's
* source interface and 'netif' is NULL. The packet is passed to attached BPF
* devices only if 'to_bpf' is set. If 'hdrcmplt' is set, the source address
* of the data link header is already filled in; otherwise, the source address
* must be set to the device's source address, if applicable.
*/
err_t
ifdev_output(struct ifdev * ifdev, struct pbuf * pbuf, struct netif * netif,
int to_bpf, int hdrcmplt)
{
struct bpfdev_link *bpfl;
/*
* If the interface and/or the link is down, discard the packet without
* reporting it to BPF or the actual interface module.
*/
if (!ifdev_is_up(ifdev) || !ifdev_is_link_up(ifdev))
return ERR_IF; /* this should translate to ENETDOWN */
/*
* If the link-layer header is not yet complete, fill in the source
* address now. This exception applies to BPF-generated packets only.
* Complete the header before passing the packet back to BPF, which
* should see the completed version of the packet.
*/
if (!hdrcmplt && ifdev->ifdev_ops->iop_hdrcmplt != NULL)
ifdev->ifdev_ops->iop_hdrcmplt(ifdev, pbuf);
/*
* As in ifdev_input(), we use the loopback interface for BPF and
* statistics even if the packet originates from a non-loopback device.
*/
if (to_bpf) {
TAILQ_FOREACH(bpfl, &ifdev->ifdev_bpf, bpfl_next)
bpfdev_output(bpfl, pbuf);
}
ifdev->ifdev_data.ifi_opackets++;
ifdev->ifdev_data.ifi_obytes += pbuf->tot_len;
/*
* TODO: this is rather imprecise, because it works only when we set
* the pbuf flag explicitly ourselves. That happens only for UDP/RAW
* packets, and not for (e.g.) ND6 multicast traffic. We have reasons
* to set the flags ourselves anyway, namely to support MSG_MCAST and
* MSG_BCAST on loopback interfaces, but they should be complemented by
* additional checks here on, say, the destination ethernet address.
*/
if (pbuf->flags & PBUF_FLAG_LLMCAST)
ifdev->ifdev_data.ifi_omcasts++;
return ifdev->ifdev_ops->iop_output(ifdev, pbuf, netif);
}
/*
* Transmit an IPv4 packet on an interface, as requested by lwIP. Pass on the
* packet to the interface's link processor (e.g., etharp), unless the packet
* should be rejected or blackholed according to route information, or it is to
* be looped back into the interface. The latter may occur if the destination
* address belongs to the interface. In that case, we send the packet over a
* loopback interface instead. In addition, if this is a multicast packet that
* should be looped back, send a copy over a loopback interface as well.
* Loopback interfaces themselves are exempt from these special cases.
*/
static err_t
ifdev_output_v4(struct netif * netif, struct pbuf * pbuf,
const ip4_addr_t * ipaddr)
{
struct ifdev *ifdev = netif_get_ifdev(netif);
err_t err;
assert(ifdev_loopback != NULL);
/* Check for reject/blackhole routes. */
if (!route_output_v4(ifdev, ipaddr, &err))
return err;
/* Handle looping of multicast packets on non-loopback interfaces. */
if (!ifdev_is_loopback(ifdev) && (pbuf->flags & PBUF_FLAG_MCASTLOOP))
(void)ifdev_output(ifdev_loopback, pbuf, netif,
FALSE /*to_bpf*/, TRUE /*hdrcmplt*/);
/* Divert packets sent to the local interface address. */
if (!ifdev_is_loopback(ifdev) && ifdev->ifdev_v4set &&
ip4_addr_cmp(netif_ip4_addr(&ifdev->ifdev_netif), ipaddr))
ifdev = ifdev_loopback;
else
netif = NULL;
if (ifdev->ifdev_ops->iop_output_v4 != NULL)
return ifdev->ifdev_ops->iop_output_v4(ifdev_get_netif(ifdev),
pbuf, ipaddr);
else
return ifdev_output(ifdev, pbuf, netif, TRUE /*to_bpf*/,
TRUE /*hdrcmplt*/);
}
/*
* Transmit an IPv6 packet on an interface, as requested by lwIP. As for IPv4.
*/
static err_t
ifdev_output_v6(struct netif * netif, struct pbuf * pbuf,
const ip6_addr_t * ipaddr)
{
struct ifdev *ifdev = netif_get_ifdev(netif);
err_t err;
assert(ifdev_loopback != NULL);
/* Check for reject/blackhole routes. */
if (!route_output_v6(ifdev, ipaddr, &err))
return err;
/* Handle looping of multicast packets on non-loopback interfaces. */
if (!ifdev_is_loopback(ifdev) && (pbuf->flags & PBUF_FLAG_MCASTLOOP))
(void)ifdev_output(ifdev_loopback, pbuf, netif,
FALSE /*to_bpf*/, TRUE /*hdrcmplt*/);
/* Divert packets sent to the local interface address. */
if (!ifdev_is_loopback(ifdev) &&
(netif_get_ip6_addr_match(&ifdev->ifdev_netif, ipaddr) != -1 ||
ip6_addr_ismulticast_iflocal(ipaddr)))
ifdev = ifdev_loopback;
else
netif = NULL;
if (ifdev->ifdev_ops->iop_output_v6 != NULL)
return ifdev->ifdev_ops->iop_output_v6(ifdev_get_netif(ifdev),
pbuf, ipaddr);
else
return ifdev_output(ifdev, pbuf, netif, TRUE /*to_bpf*/,
TRUE /*hdrcmplt*/);
}
/*
* Status callback function, called by lwIP whenever certain status changes are
* made on the netif. These changes may be initiated either by lwIP itself or
* by us. We use this callback to check lwIP-initiated state changes on local
* IPv6 addresses, using shadow state to filter out self-initiated changes.
*
* One day we might switch to the extended netif callback mechanism offered by
* lwIP. Currently, netif state changes are rare and it takes us little effort
* to find out whether anything changed, so there is no immediate need.
*/
static void
ifdev_status_callback(struct netif * netif)
{
struct ifdev *ifdev = netif_get_ifdev(netif);
ifaddr_v6_check(ifdev);
}
/*
* Initialize the netif structure for a new interface. Most of this is handled
* by the specific interface module.
*/
static err_t
ifdev_init_netif(struct netif * netif)
{
struct ifdev *ifdev = netif_get_ifdev(netif);
assert(ifdev != NULL);
netif->output = ifdev_output_v4;
netif->output_ip6 = ifdev_output_v6;
netif->hwaddr_len = ifdev->ifdev_data.ifi_addrlen;
netif->mtu = ifdev->ifdev_data.ifi_mtu;
netif_set_status_callback(netif, ifdev_status_callback);
return ifdev->ifdev_ops->iop_init(ifdev, netif);
}
/*
* Retrieve an interface device by its interface index. Return a pointer to
* the interface device if found, or NULL otherwise. If the given interface
* index is zero, this function will always return NULL.
*/
struct ifdev *
ifdev_get_by_index(uint32_t ifindex)
{
if (ifindex >= __arraycount(ifdev_table))
return NULL;
return ifdev_table[ifindex];
}
/*
* Find an interface device by its name. Return a pointer to the interface
* device if found, or NULL otherwise.
*/
struct ifdev *
ifdev_find_by_name(const char * name)
{
struct ifdev *ifdev;
TAILQ_FOREACH(ifdev, &ifdev_list, ifdev_next) {
if (!strcmp(ifdev->ifdev_name, name))
return ifdev;
}
return NULL;
}
/*
* Given either NULL or a previously returned interface device object pointer,
* return the first or next interface device object pointer, or NULL if there
* are no more.
*/
struct ifdev *
ifdev_enum(struct ifdev * last)
{
if (last == NULL)
return TAILQ_FIRST(&ifdev_list);
else
return TAILQ_NEXT(last, ifdev_next);
}
/*
* Attach a BPF device as listener to this interface.
*/
void
ifdev_attach_bpf(struct ifdev * ifdev, struct bpfdev_link * bpfl)
{
TAILQ_INSERT_TAIL(&ifdev->ifdev_bpf, bpfl, bpfl_next);
}
/*
* Detach a previously attached BPF device from this interface.
*/
void
ifdev_detach_bpf(struct ifdev * ifdev, struct bpfdev_link * bpfl)
{
TAILQ_REMOVE(&ifdev->ifdev_bpf, bpfl, bpfl_next);
}
/*
* Register the calling party as interested in putting the interface in
* promiscuous mode. There may be multiple such parties, each of which can
* call this function once, after which they must call ifdev_clear_promisc()
* later. If possible, the interface is put in promiscuous mode if there is at
* least one interested party. Return TRUE on success, or FALSE on failure.
*/
int
ifdev_set_promisc(struct ifdev * ifdev)
{
/*
* A bit silly, but we want to retain the ability to fail this call for
* other reasons in the future, with BPF handling that case properly.
*/
if (ifdev->ifdev_promisc == UINT_MAX)
return FALSE;
if (ifdev->ifdev_promisc++ == 0) {
ifdev_update_ifflags(ifdev,
ifdev->ifdev_ifflags | IFF_PROMISC);
if (ifdev->ifdev_ops->iop_set_promisc != NULL)
ifdev->ifdev_ops->iop_set_promisc(ifdev, TRUE);
}
return TRUE;
}
/*
* Deregister a previously registered party interested in putting the interface
* in promiscuous mode. Once the last party deregisters, the device is pulled
* out of promiscuous mode.
*/
void
ifdev_clear_promisc(struct ifdev * ifdev)
{
assert(ifdev->ifdev_promisc > 0);
if (--ifdev->ifdev_promisc == 0) {
if (ifdev->ifdev_ops->iop_set_promisc != NULL)
ifdev->ifdev_ops->iop_set_promisc(ifdev, FALSE);
ifdev_update_ifflags(ifdev,
ifdev->ifdev_ifflags & ~IFF_PROMISC);
}
}
/*
* Set NetBSD-style interface flags (IFF_) for an interface.
*/
int
ifdev_set_ifflags(struct ifdev * ifdev, unsigned int ifflags)
{
int r;
/* Check and update only the subset of flags that may be changed. */
ifflags &= ~(IFF_CANTCHANGE | IFF_LOOPBACK);
/*
* Important: the callback function may call ifdev_update_ifflags()
* itself immediately, to update read-only flags such as IFF_RUNNING
* based on read-write flags such as IFF_UP. So as to make that work..
*
* 1) this function MUST succeed if the callback function succeeds;
* 2) this function MUST NOT make assumptions about the ifdev_ifflags
* field across the callback invocation.
*
* Conversely, the callback function should be aware that the flags
* field will still be updated with the flags. In this model, it is
* not possible for the callback function to silently change any of the
* given flags. If that is ever necessary, API changes are needed.
*/
if ((r = ifdev->ifdev_ops->iop_set_ifflags(ifdev, ifflags)) != OK)
return r;
/*
* On success, merge the updated subset with the subset that may not be
* changed.
*/
ifflags |= ifdev->ifdev_ifflags & (IFF_CANTCHANGE | IFF_LOOPBACK);
ifdev_update_ifflags(ifdev, ifflags);
return OK;
}
/*
* Update NetBSD-style interface flags (IFF_) for an interface, and perform any
* required operations as a result of certain flags changing. This function
* bypasses all input checks and directly changes the flags field to exactly
* the given set of flags.
*/
void
ifdev_update_ifflags(struct ifdev * ifdev, unsigned int ifflags)
{
struct netif *netif;
/*
* First update the flags field itself. The new value should be
* visible in the routing messages generated below, for example.
*/
ifdev->ifdev_ifflags = ifflags;
/*
* Then perform operations as a result of the flags field changing.
* For now, this is relevant for IFF_UP only.
*/
netif = ifdev_get_netif(ifdev);
if ((ifflags & IFF_UP) && !netif_is_up(netif)) {
netif_set_up(netif);
rtsock_msg_ifinfo(ifdev);
/*
* Check if all conditions are now met for link-local IPv6
* address assignment.
*/
ifaddr_v6_set_linklocal(ifdev);
/* See if we should also reset address states now. */
if (netif_is_link_up(netif))
ifaddr_v6_set_up(ifdev);
} else if (!(ifflags & IFF_UP) && netif_is_up(netif)) {
netif_set_down(netif);
rtsock_msg_ifinfo(ifdev);
}
}
/*
* Retrieve NetBSD-style interface capabilities (IFCAP_) for an interface: both
* the supported and the enabled capabilities.
*/
void
ifdev_get_ifcap(struct ifdev * ifdev, uint64_t * ifcap, uint64_t * ifena)
{
*ifcap = 0;
*ifena = 0;
if (ifdev->ifdev_ops->iop_get_ifcap != NULL)
ifdev->ifdev_ops->iop_get_ifcap(ifdev, ifcap, ifena);
}
/*
* Set enabled NetBSD-style interface capabilities (IFCAP_) for an interface.
*/
int
ifdev_set_ifcap(struct ifdev * ifdev, uint64_t ifena)
{
if (ifdev->ifdev_ops->iop_set_ifcap != NULL)
return ifdev->ifdev_ops->iop_set_ifcap(ifdev, ifena);
else
return EINVAL;
}
/*
* Retrieve NetBSD-style media type (IFM_) for an interface. Return OK on
* success, with the current media type selection stored in 'ifcurrent', the
* driver-reported active media type in 'ifactive', and the link status in
* 'ifstatus'. Return a negative error code on failure.
*/
int
ifdev_get_ifmedia(struct ifdev * ifdev, int * ifcurrent, int * ifactive)
{
if (ifdev->ifdev_ops->iop_get_ifmedia == NULL)
return ENOTTY;
ifdev->ifdev_ops->iop_get_ifmedia(ifdev, ifcurrent, ifactive);
return OK;
}
/*
* Set NetBSD-style media type (IFM_) for an interface. Return OK on success,
* or a negative error code on failure.
*/
int
ifdev_set_ifmedia(struct ifdev * ifdev, int ifmedia)
{
if (ifdev->ifdev_ops->iop_set_ifmedia == NULL)
return ENOTTY;
if (ifmedia < 0)
return EINVAL;
return ifdev->ifdev_ops->iop_set_ifmedia(ifdev, ifmedia);
}
/*
* Set the Maximum Transmission Unit for an interface. Return OK on success,
* or a negative error code on failure.
*/
int
ifdev_set_mtu(struct ifdev * ifdev, unsigned int mtu)
{
if (ifdev->ifdev_ops->iop_set_mtu == NULL)
return ENOTTY;
if (mtu < IFDEV_MIN_MTU || mtu > UINT16_MAX ||
!ifdev->ifdev_ops->iop_set_mtu(ifdev, mtu))
return EINVAL;
ifdev->ifdev_data.ifi_mtu = mtu;
ifdev->ifdev_netif.mtu = mtu;
return OK;
}
/*
* Set IPv6 Neighbor Discovery related flags.
*/
int
ifdev_set_nd6flags(struct ifdev * ifdev, uint32_t nd6flags)
{
/* For now, refuse setting any flags that are not even known. */
if ((nd6flags & ~(ND6_IFF_PERFORMNUD | ND6_IFF_ACCEPT_RTADV |
ND6_IFF_IFDISABLED | ND6_IFF_OVERRIDE_RTADV |
ND6_IFF_AUTO_LINKLOCAL)) != 0)
return EINVAL;
/*
* Unfortunately, the mismatch between NetBSD and lwIP requires us to
* support but butcher ND6 flags. The current status is as follows:
*
* - ND6_IFF_PERFORMNUD: set by default as lwIP always implements NUD;
* changes are disregarded but possible, for dhcpcd(8).
* - ND6_IFF_ACCEPT_RTADV: disregarded but settable, for dhcpcd(8); in
* our case, lwIP always processes router advertisements but never
* autoconfigures addresses, so this flag has no meaning for us.
* - ND6_IFF_IFDISABLED: not supported; can only be cleared; we could
* probably do detection of link-local address collision and set this
* flag (and disable the interface if set) when that happens; TODO.
* - ND6_IFF_OVERRIDE_RTADV: same as _ACCEPT_ above.
* - ND6_IFF_AUTO_LINKLOCAL: supported, but not initialized based on
* the corresponding sysctl(7) flag for reasons mentioned in ifaddr.
*/
if (nd6flags & ND6_IFF_IFDISABLED)
return EINVAL;
ifdev->ifdev_nd6flags = nd6flags;
return OK;
}
/*
* Report an update to the interface's active hardware address that is *not*
* the result of a user action. If the 'is_factory' flag is set, the address
* is the factory (driver-given) address. This function is for use by
* interface modules, to update the internal state to their current external
* state.
*/
void
ifdev_update_hwaddr(struct ifdev * ifdev, const uint8_t * hwaddr,
int is_factory)
{
return ifaddr_dl_update(ifdev, hwaddr, is_factory);
}
/*
* Insert a new interface device into the list of interface devices, at a
* location determined by policy.
*/
static void
ifdev_insert(struct ifdev * ifdev)
{
struct ifdev *ifdev2;
const char *p;
unsigned int unit, unit2;
size_t namelen;
int found;
/*
* While NetBSD can set up all interfaces in the order it wants them to
* appear in, we do not have such luxury: network device drivers come
* up and report to us in no particular predefined order, and we have
* no way to know how many and which will appear. The result is that
* we always have to create the loopback device first, something that
* is explicitly said to be bad in NetBSD. Instead, we create an
* illusion of a reasonable order by performing insertion sort on the
* interface list, using (for now) these rules, ordered by priority:
*
* 1. same-named devices are sorted by their unit number;
* 2. loopback interfaces are inserted after all other interfaces;
* 3. new devices are added at the end of their type category.
*
* In the future, other forms of real-vs-virtual sorting may be added.
*/
/* First check for same-named devices (#1). */
for (p = ifdev->ifdev_name; *p != '\0' && (*p < '0' || *p > '9'); p++);
namelen = (size_t)(p - ifdev->ifdev_name);
for (unit = 0; *p >= '0' && *p <= '9'; p++)
unit = unit * 10 + *p - '0';
found = FALSE;
TAILQ_FOREACH(ifdev2, &ifdev_list, ifdev_next) {
if (!strncmp(ifdev->ifdev_name, ifdev2->ifdev_name, namelen) &&
*(p = &ifdev2->ifdev_name[namelen]) >= '0' && *p <= '9') {
for (unit2 = 0; *p >= '0' && *p <= '9'; p++)
unit2 = unit2 * 10 + *p - '0';
assert(unit != unit2);
found = TRUE;
if (unit2 > unit)
break;
} else if (found)
break;
}
if (found) {
if (ifdev2 != NULL)
TAILQ_INSERT_BEFORE(ifdev2, ifdev, ifdev_next);
else
TAILQ_INSERT_TAIL(&ifdev_list, ifdev, ifdev_next);
return;
}
/*
* No same-named device found. Is this a loopback interface? If not,
* insert before the first loopback device, if any.
*/
if (!ifdev_is_loopback(ifdev)) {
TAILQ_FOREACH(ifdev2, &ifdev_list, ifdev_next) {
if (ifdev_is_loopback(ifdev2)) {
TAILQ_INSERT_BEFORE(ifdev2, ifdev, ifdev_next);
return;
}
}
}
/*
* The given device is not a loopback device, or there was no loopback
* device in the list, possibly because it was empty. Add to the tail.
*/
TAILQ_INSERT_TAIL(&ifdev_list, ifdev, ifdev_next);
}
/*
* Add and initialize an interface device.
*/
void
ifdev_add(struct ifdev * ifdev, const char * name, unsigned int ifflags,
unsigned int iftype, size_t hdrlen, size_t addrlen, unsigned int dlt,
unsigned int mtu, uint32_t nd6flags, const struct ifdev_ops * iop)
{
unsigned int ifindex;
ip4_addr_t ip4addr_any, ip4addr_none;
/*
* Since the call to netif_add() may end up invoking some of our
* callbacks (the add-multicast-address ones in particular), make sure
* that everything else is set up first. We cannot set up the index
* mapping until netif_add() returns, but this is currently no problem.
*/
strlcpy(ifdev->ifdev_name, name, sizeof(ifdev->ifdev_name));
ifdev->ifdev_ifflags = 0; /* will be updated below */
ifdev->ifdev_dlt = dlt;
ifdev->ifdev_nd6flags = nd6flags;
ifdev->ifdev_ops = iop;
memset(&ifdev->ifdev_data, 0, sizeof(ifdev->ifdev_data));
assert(addrlen <= NETIF_MAX_HWADDR_LEN);
assert(mtu >= IFDEV_MIN_MTU && mtu <= UINT16_MAX);
ifdev->ifdev_data.ifi_type = iftype;
ifdev->ifdev_data.ifi_hdrlen = hdrlen;
ifdev->ifdev_data.ifi_addrlen = addrlen;
ifdev->ifdev_data.ifi_link_state = LINK_STATE_UNKNOWN;
ifdev->ifdev_data.ifi_mtu = mtu;
TAILQ_INIT(&ifdev->ifdev_bpf);
ifaddr_init(ifdev);
/*
* We have to assign an IPv4 address at netif addition time, but we may
* not have one yet, so pass in an "any" address for now. Hopefully
* lwIP will not mistake this for a real IPv4 address if we happen to
* enable the interface with only an IPv6 address later on.
*/
ip4_addr_set_any(&ip4addr_any);
ip4_addr_set_u32(&ip4addr_none, PP_HTONL(INADDR_NONE));
/*
* Insert the new interface device into a sensible place in the current
* list of interfaces.
*/
ifdev_insert(ifdev);
/*
* netif_add() can fail only as a result of the initialization callback
* failing, which is something that should never happen in our case.
*/
if (netif_add(&ifdev->ifdev_netif, &ip4addr_any, &ip4addr_none,
&ip4addr_any, ifdev, ifdev_init_netif, iop->iop_input) == NULL)
panic("unable to add netif");
/*
* Set up the index mapping. Since interface index zero never
* generated, table slot zero is always NULL. We could shift all
* elements by one to save four bytes, but there's no real point.
*/
ifindex = netif_get_index(&ifdev->ifdev_netif);
if (ifindex == 0 || ifindex >= __arraycount(ifdev_table))
panic("invalid lwIP-generated interface index %u", ifindex);
ifdev_table[ifindex] = ifdev;
/*
* Set the initial interface flags. Use the regular procedure for this
* just in case the interface module is crazy enough to set the
* interface up right away (which is never a good idea but still).
*/
ifdev_update_ifflags(ifdev, ifflags);
/*
* If this is the first loopback interface to be registered, save it as
* the loopback interface that we will use to loop back self-destined
* packets on other interfaces. Do this after setting the interface
* flags, since those are what we use to perform this loopback check.
*/
if (ifdev_loopback == NULL && ifdev_is_loopback(ifdev))
ifdev_loopback = ifdev;
/* Finally, announce the new interface. */
rtsock_msg_ifannounce(ifdev, TRUE /*arrival*/);
}
/*
* Remove an interface device. Return OK on success, or a negative error code
* on failure. Only loopback interfaces may be refused for removal.
*/
int
ifdev_remove(struct ifdev * ifdev)
{
struct bpfdev_link *bpfl;
/*
* If this is the loopback interface used to loop back packets for
* other interfaces (typically lo0), we cannot afford to get rid of it.
*/
if (ifdev == ifdev_loopback)
return EPERM;
/*
* Take down the interface for the purpose of sending a routing
* message. NetBSD sends a RTM_IFINFO even if the interface was down
* already, and so we do not check whether IFF_UP was set at all here.
*/
ifdev_update_ifflags(ifdev, ifdev->ifdev_ifflags & ~IFF_UP);
/*
* Report all associated addresses as deleted. It is not necessary to
* actually delete the addresses, nor is that even possible in all
* cases. In particular, the active hardware address cannot be
* deleted. Since the active hardware address is used in all address
* change announcements, delete it at the very end.
*/
ifaddr_v4_clear(ifdev);
ifaddr_v6_clear(ifdev);
ifaddr_dl_clear(ifdev);
/*
* Delete all remaining routes associated with the interface. These
* are reported as well. We do this after clearing the addresses so as
* not to confuse the route deletion part of clearing addresses.
*/
route_clear(ifdev);
/* Finally, announce the interface itself as gone. */
rtsock_msg_ifannounce(ifdev, FALSE /*arrival*/);
/*
* Free up all per-socket multicast membership structures associated to
* the interface. There is no need to leave the multicast groups.
*/
mcast_clear(ifdev);
/*
* Also tell attached BPF devices that the interface is now gone. Do
* not bother to reset the list.
*/
TAILQ_FOREACH(bpfl, &ifdev->ifdev_bpf, bpfl_next)
bpfdev_detach(bpfl);
/* Then perform the actual interface removal. */
netif_remove(&ifdev->ifdev_netif);
TAILQ_REMOVE(&ifdev_list, ifdev, ifdev_next);
assert(ifdev_table[ifdev_get_index(ifdev)] == ifdev);
ifdev_table[ifdev_get_index(ifdev)] = NULL;
return OK;
}
/*
* Return the loopback interface.
*/
struct ifdev *
ifdev_get_loopback(void)
{
assert(ifdev_loopback != NULL);
return ifdev_loopback;
}
/*
* Report an update of the link state of the given interface, to 'unknown',
* 'up', or 'down', using NetBSD's LINK_STATE_ values. The link state is
* changed in the associated lwIP netif, and is reported on monitoring routing
* sockets. This function is for use by interface modules, to update the
* internal state to their current external state.
*/
void
ifdev_update_link(struct ifdev * ifdev, int iflink)
{
struct netif *netif;
int was_up, is_up;
ifdev->ifdev_data.ifi_link_state = iflink;
/*
* For netif, 'up' and 'unknown' are the same link state: we simply try
* to send and receive packets in both cases. Thus, transitions from
* and to the 'down' link state are the ones that matter.
*/
netif = ifdev_get_netif(ifdev);
was_up = netif_is_link_up(netif);
is_up = (iflink != LINK_STATE_DOWN);
if (was_up != is_up) {
if (is_up) {
netif_set_link_up(netif);
/* See if we should also reset address states now. */
if (ifdev_is_up(ifdev))
ifaddr_v6_set_up(ifdev);
} else
netif_set_link_down(netif);
rtsock_msg_ifinfo(ifdev);
}
}
/*
* Register a virtual interface type, using a name prefix and a function that
* is called when creation of a virtual interface of that type is requested.
*/
void
ifdev_register(const char * name, int (* create)(const char *))
{
if (ifdev_vtypes == __arraycount(ifdev_vtype))
panic("too few slots for all virtual interface types");
ifdev_vtype[ifdev_vtypes].ifvt_name = name;
ifdev_vtype[ifdev_vtypes].ifvt_namelen = strlen(name);
ifdev_vtype[ifdev_vtypes].ifvt_create = create;
ifdev_vtypes++;
}
/*
* Verify that the given name is a valid interface name that can be used for
* creating a new interface. In particular, check that the given name is a
* valid interface name, consisting of an alphabetic string (the interface type
* or driver name) followed by a number string (the unit or instance number).
* Furthermore, make sure that the name does not already exist. Finally, see
* if the name prefix is reserved for a virtual interface type. If the given
* 'vtype_slot' pointer is not NULL, the prefix must be, and the virtual type
* slot number is returned in 'vtype_slot' on success. If 'vtype_slot' is
* NULL, the name must not have a virtual interface prefix, and an error is
* returned if it is. Since vtype slot numbers are meaningless outside of this
* module, external callers must always pass in NULL. This function returns OK
* on succes or a negative error code on error.
*/
int
ifdev_check_name(const char * name, unsigned int * vtype_slot)
{
const char *p;
size_t namelen;
unsigned int slot;
/*
* First see if the name is valid at all. TODO: decide if we want to
* allow uppercase letters, dashes, and/or underscores.
*/
for (p = name; *p >= 'a' && *p <= 'z'; p++);
if (p == name || *p == '\0')
return EINVAL;
namelen = (size_t)(p - name);
for (; *p >= '0' && *p <= '9'; p++);
if (*p != '\0')
return EINVAL;
/* Then make sure that it does not already exist. */
if (ifdev_find_by_name(name) != NULL)
return EEXIST;
/* See if there is a matching virtual interface type for the name. */
for (slot = 0; slot < ifdev_vtypes; slot++) {
if (ifdev_vtype[slot].ifvt_namelen == namelen &&
!strncmp(ifdev_vtype[slot].ifvt_name, name, namelen))
break;
}
/* The interpretation of the result depends on 'vtype_slot'. */
if (vtype_slot != NULL) {
if (slot == ifdev_vtypes)
return EINVAL;
*vtype_slot = slot;
} else if (slot != ifdev_vtypes)
return EINVAL;
return OK;
}
/*
* Create a new virtual interface. The virtual interface type is based on the
* given name (without unit number). Return OK if the virtual interface has
* been successfully created, or a negative error code otherwise. This
* function is used both for the SIOCIFCREATE ioctl and internally.
*/
int
ifdev_create(const char * name)
{
unsigned int slot;
int r;
/* Verify that the given name is an acceptable interface name. */
if ((r = ifdev_check_name(name, &slot)) != OK)
return EINVAL;
/* Let the virtual interface implementation handle the rest. */
return ifdev_vtype[slot].ifvt_create(name);
}
/*
* Destroy an interface, if possible.
*/
int
ifdev_destroy(struct ifdev * ifdev)
{
if (ifdev->ifdev_ops->iop_destroy == NULL)
return EINVAL;
return ifdev->ifdev_ops->iop_destroy(ifdev);
}
/*
* Enumerate the names of currently supported virtual interface types. Return
* a pointer to the null-terminated name prefix of the Nth virtual interface
* type if the (zero-based) N value is within range, or NULL otherwise.
*/
const char *
ifdev_enum_vtypes(unsigned int num)
{
if (num < ifdev_vtypes)
return ifdev_vtype[num].ifvt_name;
else
return NULL;
}