
This commit adds a new TCP/IP service to MINIX 3. As its core, the service uses the lwIP TCP/IP stack for maintenance reasons. The service aims to be compatible with NetBSD userland, including its low-level network management utilities. It also aims to support modern features such as IPv6. In summary, the new LWIP service has support for the following main features: - TCP, UDP, RAW sockets with mostly standard BSD API semantics; - IPv6 support: host mode (complete) and router mode (partial); - most of the standard BSD API socket options (SO_); - all of the standard BSD API message flags (MSG_); - the most used protocol-specific socket and control options; - a default loopback interface and the ability to create one more; - configuration-free ethernet interfaces and driver tracking; - queuing and multiple concurrent requests to each ethernet driver; - standard ioctl(2)-based BSD interface management; - radix tree backed, destination-based routing; - routing sockets for standard BSD route reporting and management; - multicast traffic and multicast group membership tracking; - Berkeley Packet Filter (BPF) devices; - standard and custom sysctl(7) nodes for many internals; - a slab allocation based, hybrid static/dynamic memory pool model. Many of its modules come with fairly elaborate comments that cover many aspects of what is going on. The service is primarily a socket driver built on top of the libsockdriver library, but for BPF devices it is at the same time also a character driver. Change-Id: Ib0c02736234b21143915e5fcc0fda8fe408f046f
1065 lines
31 KiB
C
1065 lines
31 KiB
C
/* LWIP service - ifdev.c - network interface devices */
|
|
|
|
#include "lwip.h"
|
|
#include "mcast.h"
|
|
#include "ifaddr.h"
|
|
#include "rtsock.h"
|
|
#include "route.h"
|
|
#include "bpfdev.h"
|
|
|
|
#include <net/if_media.h>
|
|
|
|
/*
|
|
* The highest possible interface index number, plus one. We currently let
|
|
* lwIP choose the interface index. lwIP will generate a number between 1 and
|
|
* 255 inclusive. For efficiency, we use an array to look up an interface
|
|
* device object by its index. Thus, this array must be large enough to be
|
|
* indexed by the largest possible index number generated by lwIP. lwIP uses
|
|
* an unsigned 8-bit field to store the index number.
|
|
*/
|
|
#define MAX_IFDEV (UINT8_MAX + 1)
|
|
|
|
/* The table is indexed by the interface index minus one. */
|
|
static struct ifdev *ifdev_table[MAX_IFDEV]; /* index-based lookup table */
|
|
|
|
static TAILQ_HEAD(, ifdev) ifdev_list; /* list of active interfaces */
|
|
|
|
static struct ifdev *ifdev_loopback; /* loopback interface */
|
|
|
|
/*
|
|
* The maximum number of virtual interface types--that is, interface types for
|
|
* which interfaces may be created and destroyed dynamically. The BSDs call
|
|
* these "clones". There should be enough slots for all types, which are
|
|
* registered by their respective modules through ifdev_register(). Increase
|
|
* as necessary.
|
|
*/
|
|
#define MAX_VTYPE 4
|
|
|
|
static struct {
|
|
const char *ifvt_name; /* interface name without digits (e.g. "lo") */
|
|
size_t ifvt_namelen; /* length of the name, excluding null term. */
|
|
int (*ifvt_create)(const char *); /* ifdev create function */
|
|
} ifdev_vtype[MAX_VTYPE];
|
|
|
|
static unsigned int ifdev_vtypes; /* number of in-use vtype slots */
|
|
|
|
#define IFDEV_MIN_MTU 1280 /* minimum interface MTU, required by IPv6 */
|
|
|
|
/*
|
|
* Initialize the network interface devices module. This call must be issued
|
|
* before any virtual interfaces are initialized, because the virtual types
|
|
* array is initialized here.
|
|
*/
|
|
void
|
|
ifdev_init(void)
|
|
{
|
|
|
|
memset(ifdev_table, 0, sizeof(ifdev_table));
|
|
|
|
TAILQ_INIT(&ifdev_list);
|
|
|
|
memset(ifdev_vtype, 0, sizeof(ifdev_vtype));
|
|
ifdev_vtypes = 0;
|
|
}
|
|
|
|
/*
|
|
* Check all active interfaces to see if any tasks need to be performed. This
|
|
* function is called as part of each message loop iteration.
|
|
*/
|
|
void
|
|
ifdev_poll(void)
|
|
{
|
|
struct ifdev *ifdev;
|
|
|
|
/*
|
|
* Call the polling function of the active interfaces. Note that
|
|
* interfaces may not remove themselves as a result of polling!
|
|
*/
|
|
TAILQ_FOREACH(ifdev, &ifdev_list, ifdev_next) {
|
|
if (ifdev->ifdev_ops->iop_poll != NULL)
|
|
ifdev->ifdev_ops->iop_poll(ifdev);
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Handle an incoming packet on an interface. This function assumes ownership
|
|
* of the packet buffers: the caller must no longer refer to it afterward. For
|
|
* packets looped back for a non-loopback interface, 'ifdev' is the loopback
|
|
* interface and 'netif' is the original (non-loopback) interface's netif. For
|
|
* other packets, 'ifdev' is the actual interface and 'netif' is NULL. The
|
|
* packet is passed to BPF devices only if 'to_bpf' is set.
|
|
*/
|
|
void
|
|
ifdev_input(struct ifdev * ifdev, struct pbuf * pbuf, struct netif * netif,
|
|
int to_bpf)
|
|
{
|
|
struct bpfdev_link *bpfl;
|
|
err_t err;
|
|
|
|
/*
|
|
* Looped-back packets are captured on the loopback device, not on the
|
|
* original interface. Similarly, we account the traffic to the
|
|
* loopback interface. This is a policy decision (inspired by NetBSD's
|
|
* behavior) and may be changed later.
|
|
*/
|
|
if (to_bpf) {
|
|
TAILQ_FOREACH(bpfl, &ifdev->ifdev_bpf, bpfl_next)
|
|
bpfdev_input(bpfl, pbuf);
|
|
}
|
|
|
|
ifdev->ifdev_data.ifi_ipackets++;
|
|
ifdev->ifdev_data.ifi_ibytes += pbuf->tot_len;
|
|
|
|
if (pbuf->flags & PBUF_FLAG_LLMCAST)
|
|
ifdev->ifdev_data.ifi_imcasts++;
|
|
|
|
/*
|
|
* For looped-back packets, we must bypass the regular netif input
|
|
* function (as that one is for link-layer packet handling) and instead
|
|
* pass it directly to the IP-layer packet handling function of lwIP.
|
|
*/
|
|
if (netif != NULL)
|
|
err = ip_input(pbuf, netif);
|
|
else
|
|
err = ifdev->ifdev_netif.input(pbuf, &ifdev->ifdev_netif);
|
|
|
|
if (err != ERR_OK)
|
|
pbuf_free(pbuf);
|
|
}
|
|
|
|
/*
|
|
* Handle an outgoing packet on an interface. Return ERR_OK if the packet was
|
|
* transmitted or another lwIP ERR_ error code upon failure. Either way, the
|
|
* caller is responsible for freeing the packet buffers. If the packet is
|
|
* to be looped back to a non-loopback interface (because its destination is a
|
|
* local address), 'ifdev' is the loopback interface and 'netif' is set to the
|
|
* original interface's netif. In all other cases, 'ifdev' is the packet's
|
|
* source interface and 'netif' is NULL. The packet is passed to attached BPF
|
|
* devices only if 'to_bpf' is set. If 'hdrcmplt' is set, the source address
|
|
* of the data link header is already filled in; otherwise, the source address
|
|
* must be set to the device's source address, if applicable.
|
|
*/
|
|
err_t
|
|
ifdev_output(struct ifdev * ifdev, struct pbuf * pbuf, struct netif * netif,
|
|
int to_bpf, int hdrcmplt)
|
|
{
|
|
struct bpfdev_link *bpfl;
|
|
|
|
/*
|
|
* If the interface and/or the link is down, discard the packet without
|
|
* reporting it to BPF or the actual interface module.
|
|
*/
|
|
if (!ifdev_is_up(ifdev) || !ifdev_is_link_up(ifdev))
|
|
return ERR_IF; /* this should translate to ENETDOWN */
|
|
|
|
/*
|
|
* If the link-layer header is not yet complete, fill in the source
|
|
* address now. This exception applies to BPF-generated packets only.
|
|
* Complete the header before passing the packet back to BPF, which
|
|
* should see the completed version of the packet.
|
|
*/
|
|
if (!hdrcmplt && ifdev->ifdev_ops->iop_hdrcmplt != NULL)
|
|
ifdev->ifdev_ops->iop_hdrcmplt(ifdev, pbuf);
|
|
|
|
/*
|
|
* As in ifdev_input(), we use the loopback interface for BPF and
|
|
* statistics even if the packet originates from a non-loopback device.
|
|
*/
|
|
if (to_bpf) {
|
|
TAILQ_FOREACH(bpfl, &ifdev->ifdev_bpf, bpfl_next)
|
|
bpfdev_output(bpfl, pbuf);
|
|
}
|
|
|
|
ifdev->ifdev_data.ifi_opackets++;
|
|
ifdev->ifdev_data.ifi_obytes += pbuf->tot_len;
|
|
|
|
/*
|
|
* TODO: this is rather imprecise, because it works only when we set
|
|
* the pbuf flag explicitly ourselves. That happens only for UDP/RAW
|
|
* packets, and not for (e.g.) ND6 multicast traffic. We have reasons
|
|
* to set the flags ourselves anyway, namely to support MSG_MCAST and
|
|
* MSG_BCAST on loopback interfaces, but they should be complemented by
|
|
* additional checks here on, say, the destination ethernet address.
|
|
*/
|
|
if (pbuf->flags & PBUF_FLAG_LLMCAST)
|
|
ifdev->ifdev_data.ifi_omcasts++;
|
|
|
|
return ifdev->ifdev_ops->iop_output(ifdev, pbuf, netif);
|
|
}
|
|
|
|
/*
|
|
* Transmit an IPv4 packet on an interface, as requested by lwIP. Pass on the
|
|
* packet to the interface's link processor (e.g., etharp), unless the packet
|
|
* should be rejected or blackholed according to route information, or it is to
|
|
* be looped back into the interface. The latter may occur if the destination
|
|
* address belongs to the interface. In that case, we send the packet over a
|
|
* loopback interface instead. In addition, if this is a multicast packet that
|
|
* should be looped back, send a copy over a loopback interface as well.
|
|
* Loopback interfaces themselves are exempt from these special cases.
|
|
*/
|
|
static err_t
|
|
ifdev_output_v4(struct netif * netif, struct pbuf * pbuf,
|
|
const ip4_addr_t * ipaddr)
|
|
{
|
|
struct ifdev *ifdev = netif_get_ifdev(netif);
|
|
err_t err;
|
|
|
|
assert(ifdev_loopback != NULL);
|
|
|
|
/* Check for reject/blackhole routes. */
|
|
if (!route_output_v4(ifdev, ipaddr, &err))
|
|
return err;
|
|
|
|
/* Handle looping of multicast packets on non-loopback interfaces. */
|
|
if (!ifdev_is_loopback(ifdev) && (pbuf->flags & PBUF_FLAG_MCASTLOOP))
|
|
(void)ifdev_output(ifdev_loopback, pbuf, netif,
|
|
FALSE /*to_bpf*/, TRUE /*hdrcmplt*/);
|
|
|
|
/* Divert packets sent to the local interface address. */
|
|
if (!ifdev_is_loopback(ifdev) && ifdev->ifdev_v4set &&
|
|
ip4_addr_cmp(netif_ip4_addr(&ifdev->ifdev_netif), ipaddr))
|
|
ifdev = ifdev_loopback;
|
|
else
|
|
netif = NULL;
|
|
|
|
if (ifdev->ifdev_ops->iop_output_v4 != NULL)
|
|
return ifdev->ifdev_ops->iop_output_v4(ifdev_get_netif(ifdev),
|
|
pbuf, ipaddr);
|
|
else
|
|
return ifdev_output(ifdev, pbuf, netif, TRUE /*to_bpf*/,
|
|
TRUE /*hdrcmplt*/);
|
|
}
|
|
|
|
/*
|
|
* Transmit an IPv6 packet on an interface, as requested by lwIP. As for IPv4.
|
|
*/
|
|
static err_t
|
|
ifdev_output_v6(struct netif * netif, struct pbuf * pbuf,
|
|
const ip6_addr_t * ipaddr)
|
|
{
|
|
struct ifdev *ifdev = netif_get_ifdev(netif);
|
|
err_t err;
|
|
|
|
assert(ifdev_loopback != NULL);
|
|
|
|
/* Check for reject/blackhole routes. */
|
|
if (!route_output_v6(ifdev, ipaddr, &err))
|
|
return err;
|
|
|
|
/* Handle looping of multicast packets on non-loopback interfaces. */
|
|
if (!ifdev_is_loopback(ifdev) && (pbuf->flags & PBUF_FLAG_MCASTLOOP))
|
|
(void)ifdev_output(ifdev_loopback, pbuf, netif,
|
|
FALSE /*to_bpf*/, TRUE /*hdrcmplt*/);
|
|
|
|
/* Divert packets sent to the local interface address. */
|
|
if (!ifdev_is_loopback(ifdev) &&
|
|
(netif_get_ip6_addr_match(&ifdev->ifdev_netif, ipaddr) != -1 ||
|
|
ip6_addr_ismulticast_iflocal(ipaddr)))
|
|
ifdev = ifdev_loopback;
|
|
else
|
|
netif = NULL;
|
|
|
|
if (ifdev->ifdev_ops->iop_output_v6 != NULL)
|
|
return ifdev->ifdev_ops->iop_output_v6(ifdev_get_netif(ifdev),
|
|
pbuf, ipaddr);
|
|
else
|
|
return ifdev_output(ifdev, pbuf, netif, TRUE /*to_bpf*/,
|
|
TRUE /*hdrcmplt*/);
|
|
}
|
|
|
|
/*
|
|
* Status callback function, called by lwIP whenever certain status changes are
|
|
* made on the netif. These changes may be initiated either by lwIP itself or
|
|
* by us. We use this callback to check lwIP-initiated state changes on local
|
|
* IPv6 addresses, using shadow state to filter out self-initiated changes.
|
|
*
|
|
* One day we might switch to the extended netif callback mechanism offered by
|
|
* lwIP. Currently, netif state changes are rare and it takes us little effort
|
|
* to find out whether anything changed, so there is no immediate need.
|
|
*/
|
|
static void
|
|
ifdev_status_callback(struct netif * netif)
|
|
{
|
|
struct ifdev *ifdev = netif_get_ifdev(netif);
|
|
|
|
ifaddr_v6_check(ifdev);
|
|
}
|
|
|
|
/*
|
|
* Initialize the netif structure for a new interface. Most of this is handled
|
|
* by the specific interface module.
|
|
*/
|
|
static err_t
|
|
ifdev_init_netif(struct netif * netif)
|
|
{
|
|
struct ifdev *ifdev = netif_get_ifdev(netif);
|
|
|
|
assert(ifdev != NULL);
|
|
|
|
netif->output = ifdev_output_v4;
|
|
netif->output_ip6 = ifdev_output_v6;
|
|
|
|
netif->hwaddr_len = ifdev->ifdev_data.ifi_addrlen;
|
|
netif->mtu = ifdev->ifdev_data.ifi_mtu;
|
|
|
|
netif_set_status_callback(netif, ifdev_status_callback);
|
|
|
|
return ifdev->ifdev_ops->iop_init(ifdev, netif);
|
|
}
|
|
|
|
/*
|
|
* Retrieve an interface device by its interface index. Return a pointer to
|
|
* the interface device if found, or NULL otherwise. If the given interface
|
|
* index is zero, this function will always return NULL.
|
|
*/
|
|
struct ifdev *
|
|
ifdev_get_by_index(uint32_t ifindex)
|
|
{
|
|
|
|
if (ifindex >= __arraycount(ifdev_table))
|
|
return NULL;
|
|
|
|
return ifdev_table[ifindex];
|
|
}
|
|
|
|
/*
|
|
* Find an interface device by its name. Return a pointer to the interface
|
|
* device if found, or NULL otherwise.
|
|
*/
|
|
struct ifdev *
|
|
ifdev_find_by_name(const char * name)
|
|
{
|
|
struct ifdev *ifdev;
|
|
|
|
TAILQ_FOREACH(ifdev, &ifdev_list, ifdev_next) {
|
|
if (!strcmp(ifdev->ifdev_name, name))
|
|
return ifdev;
|
|
}
|
|
|
|
return NULL;
|
|
}
|
|
|
|
/*
|
|
* Given either NULL or a previously returned interface device object pointer,
|
|
* return the first or next interface device object pointer, or NULL if there
|
|
* are no more.
|
|
*/
|
|
struct ifdev *
|
|
ifdev_enum(struct ifdev * last)
|
|
{
|
|
|
|
if (last == NULL)
|
|
return TAILQ_FIRST(&ifdev_list);
|
|
else
|
|
return TAILQ_NEXT(last, ifdev_next);
|
|
}
|
|
|
|
/*
|
|
* Attach a BPF device as listener to this interface.
|
|
*/
|
|
void
|
|
ifdev_attach_bpf(struct ifdev * ifdev, struct bpfdev_link * bpfl)
|
|
{
|
|
|
|
TAILQ_INSERT_TAIL(&ifdev->ifdev_bpf, bpfl, bpfl_next);
|
|
}
|
|
|
|
/*
|
|
* Detach a previously attached BPF device from this interface.
|
|
*/
|
|
void
|
|
ifdev_detach_bpf(struct ifdev * ifdev, struct bpfdev_link * bpfl)
|
|
{
|
|
|
|
TAILQ_REMOVE(&ifdev->ifdev_bpf, bpfl, bpfl_next);
|
|
}
|
|
|
|
/*
|
|
* Register the calling party as interested in putting the interface in
|
|
* promiscuous mode. There may be multiple such parties, each of which can
|
|
* call this function once, after which they must call ifdev_clear_promisc()
|
|
* later. If possible, the interface is put in promiscuous mode if there is at
|
|
* least one interested party. Return TRUE on success, or FALSE on failure.
|
|
*/
|
|
int
|
|
ifdev_set_promisc(struct ifdev * ifdev)
|
|
{
|
|
|
|
/*
|
|
* A bit silly, but we want to retain the ability to fail this call for
|
|
* other reasons in the future, with BPF handling that case properly.
|
|
*/
|
|
if (ifdev->ifdev_promisc == UINT_MAX)
|
|
return FALSE;
|
|
|
|
if (ifdev->ifdev_promisc++ == 0) {
|
|
ifdev_update_ifflags(ifdev,
|
|
ifdev->ifdev_ifflags | IFF_PROMISC);
|
|
|
|
if (ifdev->ifdev_ops->iop_set_promisc != NULL)
|
|
ifdev->ifdev_ops->iop_set_promisc(ifdev, TRUE);
|
|
}
|
|
|
|
return TRUE;
|
|
}
|
|
|
|
/*
|
|
* Deregister a previously registered party interested in putting the interface
|
|
* in promiscuous mode. Once the last party deregisters, the device is pulled
|
|
* out of promiscuous mode.
|
|
*/
|
|
void
|
|
ifdev_clear_promisc(struct ifdev * ifdev)
|
|
{
|
|
|
|
assert(ifdev->ifdev_promisc > 0);
|
|
|
|
if (--ifdev->ifdev_promisc == 0) {
|
|
if (ifdev->ifdev_ops->iop_set_promisc != NULL)
|
|
ifdev->ifdev_ops->iop_set_promisc(ifdev, FALSE);
|
|
|
|
ifdev_update_ifflags(ifdev,
|
|
ifdev->ifdev_ifflags & ~IFF_PROMISC);
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Set NetBSD-style interface flags (IFF_) for an interface.
|
|
*/
|
|
int
|
|
ifdev_set_ifflags(struct ifdev * ifdev, unsigned int ifflags)
|
|
{
|
|
int r;
|
|
|
|
/* Check and update only the subset of flags that may be changed. */
|
|
ifflags &= ~(IFF_CANTCHANGE | IFF_LOOPBACK);
|
|
|
|
/*
|
|
* Important: the callback function may call ifdev_update_ifflags()
|
|
* itself immediately, to update read-only flags such as IFF_RUNNING
|
|
* based on read-write flags such as IFF_UP. So as to make that work..
|
|
*
|
|
* 1) this function MUST succeed if the callback function succeeds;
|
|
* 2) this function MUST NOT make assumptions about the ifdev_ifflags
|
|
* field across the callback invocation.
|
|
*
|
|
* Conversely, the callback function should be aware that the flags
|
|
* field will still be updated with the flags. In this model, it is
|
|
* not possible for the callback function to silently change any of the
|
|
* given flags. If that is ever necessary, API changes are needed.
|
|
*/
|
|
if ((r = ifdev->ifdev_ops->iop_set_ifflags(ifdev, ifflags)) != OK)
|
|
return r;
|
|
|
|
/*
|
|
* On success, merge the updated subset with the subset that may not be
|
|
* changed.
|
|
*/
|
|
ifflags |= ifdev->ifdev_ifflags & (IFF_CANTCHANGE | IFF_LOOPBACK);
|
|
|
|
ifdev_update_ifflags(ifdev, ifflags);
|
|
|
|
return OK;
|
|
}
|
|
|
|
/*
|
|
* Update NetBSD-style interface flags (IFF_) for an interface, and perform any
|
|
* required operations as a result of certain flags changing. This function
|
|
* bypasses all input checks and directly changes the flags field to exactly
|
|
* the given set of flags.
|
|
*/
|
|
void
|
|
ifdev_update_ifflags(struct ifdev * ifdev, unsigned int ifflags)
|
|
{
|
|
struct netif *netif;
|
|
|
|
/*
|
|
* First update the flags field itself. The new value should be
|
|
* visible in the routing messages generated below, for example.
|
|
*/
|
|
ifdev->ifdev_ifflags = ifflags;
|
|
|
|
/*
|
|
* Then perform operations as a result of the flags field changing.
|
|
* For now, this is relevant for IFF_UP only.
|
|
*/
|
|
netif = ifdev_get_netif(ifdev);
|
|
|
|
if ((ifflags & IFF_UP) && !netif_is_up(netif)) {
|
|
netif_set_up(netif);
|
|
|
|
rtsock_msg_ifinfo(ifdev);
|
|
|
|
/*
|
|
* Check if all conditions are now met for link-local IPv6
|
|
* address assignment.
|
|
*/
|
|
ifaddr_v6_set_linklocal(ifdev);
|
|
|
|
/* See if we should also reset address states now. */
|
|
if (netif_is_link_up(netif))
|
|
ifaddr_v6_set_up(ifdev);
|
|
} else if (!(ifflags & IFF_UP) && netif_is_up(netif)) {
|
|
netif_set_down(netif);
|
|
|
|
rtsock_msg_ifinfo(ifdev);
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Retrieve NetBSD-style interface capabilities (IFCAP_) for an interface: both
|
|
* the supported and the enabled capabilities.
|
|
*/
|
|
void
|
|
ifdev_get_ifcap(struct ifdev * ifdev, uint64_t * ifcap, uint64_t * ifena)
|
|
{
|
|
|
|
*ifcap = 0;
|
|
*ifena = 0;
|
|
|
|
if (ifdev->ifdev_ops->iop_get_ifcap != NULL)
|
|
ifdev->ifdev_ops->iop_get_ifcap(ifdev, ifcap, ifena);
|
|
}
|
|
|
|
/*
|
|
* Set enabled NetBSD-style interface capabilities (IFCAP_) for an interface.
|
|
*/
|
|
int
|
|
ifdev_set_ifcap(struct ifdev * ifdev, uint64_t ifena)
|
|
{
|
|
|
|
if (ifdev->ifdev_ops->iop_set_ifcap != NULL)
|
|
return ifdev->ifdev_ops->iop_set_ifcap(ifdev, ifena);
|
|
else
|
|
return EINVAL;
|
|
}
|
|
|
|
/*
|
|
* Retrieve NetBSD-style media type (IFM_) for an interface. Return OK on
|
|
* success, with the current media type selection stored in 'ifcurrent', the
|
|
* driver-reported active media type in 'ifactive', and the link status in
|
|
* 'ifstatus'. Return a negative error code on failure.
|
|
*/
|
|
int
|
|
ifdev_get_ifmedia(struct ifdev * ifdev, int * ifcurrent, int * ifactive)
|
|
{
|
|
|
|
if (ifdev->ifdev_ops->iop_get_ifmedia == NULL)
|
|
return ENOTTY;
|
|
|
|
ifdev->ifdev_ops->iop_get_ifmedia(ifdev, ifcurrent, ifactive);
|
|
|
|
return OK;
|
|
}
|
|
|
|
/*
|
|
* Set NetBSD-style media type (IFM_) for an interface. Return OK on success,
|
|
* or a negative error code on failure.
|
|
*/
|
|
int
|
|
ifdev_set_ifmedia(struct ifdev * ifdev, int ifmedia)
|
|
{
|
|
|
|
if (ifdev->ifdev_ops->iop_set_ifmedia == NULL)
|
|
return ENOTTY;
|
|
|
|
if (ifmedia < 0)
|
|
return EINVAL;
|
|
|
|
return ifdev->ifdev_ops->iop_set_ifmedia(ifdev, ifmedia);
|
|
}
|
|
|
|
/*
|
|
* Set the Maximum Transmission Unit for an interface. Return OK on success,
|
|
* or a negative error code on failure.
|
|
*/
|
|
int
|
|
ifdev_set_mtu(struct ifdev * ifdev, unsigned int mtu)
|
|
{
|
|
|
|
if (ifdev->ifdev_ops->iop_set_mtu == NULL)
|
|
return ENOTTY;
|
|
|
|
if (mtu < IFDEV_MIN_MTU || mtu > UINT16_MAX ||
|
|
!ifdev->ifdev_ops->iop_set_mtu(ifdev, mtu))
|
|
return EINVAL;
|
|
|
|
ifdev->ifdev_data.ifi_mtu = mtu;
|
|
ifdev->ifdev_netif.mtu = mtu;
|
|
|
|
return OK;
|
|
}
|
|
|
|
/*
|
|
* Set IPv6 Neighbor Discovery related flags.
|
|
*/
|
|
int
|
|
ifdev_set_nd6flags(struct ifdev * ifdev, uint32_t nd6flags)
|
|
{
|
|
|
|
/* For now, refuse setting any flags that are not even known. */
|
|
if ((nd6flags & ~(ND6_IFF_PERFORMNUD | ND6_IFF_ACCEPT_RTADV |
|
|
ND6_IFF_IFDISABLED | ND6_IFF_OVERRIDE_RTADV |
|
|
ND6_IFF_AUTO_LINKLOCAL)) != 0)
|
|
return EINVAL;
|
|
|
|
/*
|
|
* Unfortunately, the mismatch between NetBSD and lwIP requires us to
|
|
* support but butcher ND6 flags. The current status is as follows:
|
|
*
|
|
* - ND6_IFF_PERFORMNUD: set by default as lwIP always implements NUD;
|
|
* changes are disregarded but possible, for dhcpcd(8).
|
|
* - ND6_IFF_ACCEPT_RTADV: disregarded but settable, for dhcpcd(8); in
|
|
* our case, lwIP always processes router advertisements but never
|
|
* autoconfigures addresses, so this flag has no meaning for us.
|
|
* - ND6_IFF_IFDISABLED: not supported; can only be cleared; we could
|
|
* probably do detection of link-local address collision and set this
|
|
* flag (and disable the interface if set) when that happens; TODO.
|
|
* - ND6_IFF_OVERRIDE_RTADV: same as _ACCEPT_ above.
|
|
* - ND6_IFF_AUTO_LINKLOCAL: supported, but not initialized based on
|
|
* the corresponding sysctl(7) flag for reasons mentioned in ifaddr.
|
|
*/
|
|
if (nd6flags & ND6_IFF_IFDISABLED)
|
|
return EINVAL;
|
|
|
|
ifdev->ifdev_nd6flags = nd6flags;
|
|
|
|
return OK;
|
|
}
|
|
|
|
/*
|
|
* Report an update to the interface's active hardware address that is *not*
|
|
* the result of a user action. If the 'is_factory' flag is set, the address
|
|
* is the factory (driver-given) address. This function is for use by
|
|
* interface modules, to update the internal state to their current external
|
|
* state.
|
|
*/
|
|
void
|
|
ifdev_update_hwaddr(struct ifdev * ifdev, const uint8_t * hwaddr,
|
|
int is_factory)
|
|
{
|
|
|
|
return ifaddr_dl_update(ifdev, hwaddr, is_factory);
|
|
}
|
|
|
|
/*
|
|
* Insert a new interface device into the list of interface devices, at a
|
|
* location determined by policy.
|
|
*/
|
|
static void
|
|
ifdev_insert(struct ifdev * ifdev)
|
|
{
|
|
struct ifdev *ifdev2;
|
|
const char *p;
|
|
unsigned int unit, unit2;
|
|
size_t namelen;
|
|
int found;
|
|
|
|
/*
|
|
* While NetBSD can set up all interfaces in the order it wants them to
|
|
* appear in, we do not have such luxury: network device drivers come
|
|
* up and report to us in no particular predefined order, and we have
|
|
* no way to know how many and which will appear. The result is that
|
|
* we always have to create the loopback device first, something that
|
|
* is explicitly said to be bad in NetBSD. Instead, we create an
|
|
* illusion of a reasonable order by performing insertion sort on the
|
|
* interface list, using (for now) these rules, ordered by priority:
|
|
*
|
|
* 1. same-named devices are sorted by their unit number;
|
|
* 2. loopback interfaces are inserted after all other interfaces;
|
|
* 3. new devices are added at the end of their type category.
|
|
*
|
|
* In the future, other forms of real-vs-virtual sorting may be added.
|
|
*/
|
|
|
|
/* First check for same-named devices (#1). */
|
|
for (p = ifdev->ifdev_name; *p != '\0' && (*p < '0' || *p > '9'); p++);
|
|
|
|
namelen = (size_t)(p - ifdev->ifdev_name);
|
|
|
|
for (unit = 0; *p >= '0' && *p <= '9'; p++)
|
|
unit = unit * 10 + *p - '0';
|
|
|
|
found = FALSE;
|
|
TAILQ_FOREACH(ifdev2, &ifdev_list, ifdev_next) {
|
|
if (!strncmp(ifdev->ifdev_name, ifdev2->ifdev_name, namelen) &&
|
|
*(p = &ifdev2->ifdev_name[namelen]) >= '0' && *p <= '9') {
|
|
for (unit2 = 0; *p >= '0' && *p <= '9'; p++)
|
|
unit2 = unit2 * 10 + *p - '0';
|
|
|
|
assert(unit != unit2);
|
|
|
|
found = TRUE;
|
|
if (unit2 > unit)
|
|
break;
|
|
} else if (found)
|
|
break;
|
|
}
|
|
|
|
if (found) {
|
|
if (ifdev2 != NULL)
|
|
TAILQ_INSERT_BEFORE(ifdev2, ifdev, ifdev_next);
|
|
else
|
|
TAILQ_INSERT_TAIL(&ifdev_list, ifdev, ifdev_next);
|
|
|
|
return;
|
|
}
|
|
|
|
/*
|
|
* No same-named device found. Is this a loopback interface? If not,
|
|
* insert before the first loopback device, if any.
|
|
*/
|
|
if (!ifdev_is_loopback(ifdev)) {
|
|
TAILQ_FOREACH(ifdev2, &ifdev_list, ifdev_next) {
|
|
if (ifdev_is_loopback(ifdev2)) {
|
|
TAILQ_INSERT_BEFORE(ifdev2, ifdev, ifdev_next);
|
|
|
|
return;
|
|
}
|
|
}
|
|
}
|
|
|
|
/*
|
|
* The given device is not a loopback device, or there was no loopback
|
|
* device in the list, possibly because it was empty. Add to the tail.
|
|
*/
|
|
TAILQ_INSERT_TAIL(&ifdev_list, ifdev, ifdev_next);
|
|
}
|
|
|
|
/*
|
|
* Add and initialize an interface device.
|
|
*/
|
|
void
|
|
ifdev_add(struct ifdev * ifdev, const char * name, unsigned int ifflags,
|
|
unsigned int iftype, size_t hdrlen, size_t addrlen, unsigned int dlt,
|
|
unsigned int mtu, uint32_t nd6flags, const struct ifdev_ops * iop)
|
|
{
|
|
unsigned int ifindex;
|
|
ip4_addr_t ip4addr_any, ip4addr_none;
|
|
|
|
/*
|
|
* Since the call to netif_add() may end up invoking some of our
|
|
* callbacks (the add-multicast-address ones in particular), make sure
|
|
* that everything else is set up first. We cannot set up the index
|
|
* mapping until netif_add() returns, but this is currently no problem.
|
|
*/
|
|
strlcpy(ifdev->ifdev_name, name, sizeof(ifdev->ifdev_name));
|
|
ifdev->ifdev_ifflags = 0; /* will be updated below */
|
|
ifdev->ifdev_dlt = dlt;
|
|
ifdev->ifdev_nd6flags = nd6flags;
|
|
ifdev->ifdev_ops = iop;
|
|
|
|
memset(&ifdev->ifdev_data, 0, sizeof(ifdev->ifdev_data));
|
|
|
|
assert(addrlen <= NETIF_MAX_HWADDR_LEN);
|
|
assert(mtu >= IFDEV_MIN_MTU && mtu <= UINT16_MAX);
|
|
|
|
ifdev->ifdev_data.ifi_type = iftype;
|
|
ifdev->ifdev_data.ifi_hdrlen = hdrlen;
|
|
ifdev->ifdev_data.ifi_addrlen = addrlen;
|
|
ifdev->ifdev_data.ifi_link_state = LINK_STATE_UNKNOWN;
|
|
ifdev->ifdev_data.ifi_mtu = mtu;
|
|
|
|
TAILQ_INIT(&ifdev->ifdev_bpf);
|
|
|
|
ifaddr_init(ifdev);
|
|
|
|
/*
|
|
* We have to assign an IPv4 address at netif addition time, but we may
|
|
* not have one yet, so pass in an "any" address for now. Hopefully
|
|
* lwIP will not mistake this for a real IPv4 address if we happen to
|
|
* enable the interface with only an IPv6 address later on.
|
|
*/
|
|
ip4_addr_set_any(&ip4addr_any);
|
|
ip4_addr_set_u32(&ip4addr_none, PP_HTONL(INADDR_NONE));
|
|
|
|
/*
|
|
* Insert the new interface device into a sensible place in the current
|
|
* list of interfaces.
|
|
*/
|
|
ifdev_insert(ifdev);
|
|
|
|
/*
|
|
* netif_add() can fail only as a result of the initialization callback
|
|
* failing, which is something that should never happen in our case.
|
|
*/
|
|
if (netif_add(&ifdev->ifdev_netif, &ip4addr_any, &ip4addr_none,
|
|
&ip4addr_any, ifdev, ifdev_init_netif, iop->iop_input) == NULL)
|
|
panic("unable to add netif");
|
|
|
|
/*
|
|
* Set up the index mapping. Since interface index zero never
|
|
* generated, table slot zero is always NULL. We could shift all
|
|
* elements by one to save four bytes, but there's no real point.
|
|
*/
|
|
ifindex = netif_get_index(&ifdev->ifdev_netif);
|
|
|
|
if (ifindex == 0 || ifindex >= __arraycount(ifdev_table))
|
|
panic("invalid lwIP-generated interface index %u", ifindex);
|
|
|
|
ifdev_table[ifindex] = ifdev;
|
|
|
|
/*
|
|
* Set the initial interface flags. Use the regular procedure for this
|
|
* just in case the interface module is crazy enough to set the
|
|
* interface up right away (which is never a good idea but still).
|
|
*/
|
|
ifdev_update_ifflags(ifdev, ifflags);
|
|
|
|
/*
|
|
* If this is the first loopback interface to be registered, save it as
|
|
* the loopback interface that we will use to loop back self-destined
|
|
* packets on other interfaces. Do this after setting the interface
|
|
* flags, since those are what we use to perform this loopback check.
|
|
*/
|
|
if (ifdev_loopback == NULL && ifdev_is_loopback(ifdev))
|
|
ifdev_loopback = ifdev;
|
|
|
|
/* Finally, announce the new interface. */
|
|
rtsock_msg_ifannounce(ifdev, TRUE /*arrival*/);
|
|
}
|
|
|
|
/*
|
|
* Remove an interface device. Return OK on success, or a negative error code
|
|
* on failure. Only loopback interfaces may be refused for removal.
|
|
*/
|
|
int
|
|
ifdev_remove(struct ifdev * ifdev)
|
|
{
|
|
struct bpfdev_link *bpfl;
|
|
|
|
/*
|
|
* If this is the loopback interface used to loop back packets for
|
|
* other interfaces (typically lo0), we cannot afford to get rid of it.
|
|
*/
|
|
if (ifdev == ifdev_loopback)
|
|
return EPERM;
|
|
|
|
/*
|
|
* Take down the interface for the purpose of sending a routing
|
|
* message. NetBSD sends a RTM_IFINFO even if the interface was down
|
|
* already, and so we do not check whether IFF_UP was set at all here.
|
|
*/
|
|
ifdev_update_ifflags(ifdev, ifdev->ifdev_ifflags & ~IFF_UP);
|
|
|
|
/*
|
|
* Report all associated addresses as deleted. It is not necessary to
|
|
* actually delete the addresses, nor is that even possible in all
|
|
* cases. In particular, the active hardware address cannot be
|
|
* deleted. Since the active hardware address is used in all address
|
|
* change announcements, delete it at the very end.
|
|
*/
|
|
ifaddr_v4_clear(ifdev);
|
|
ifaddr_v6_clear(ifdev);
|
|
ifaddr_dl_clear(ifdev);
|
|
|
|
/*
|
|
* Delete all remaining routes associated with the interface. These
|
|
* are reported as well. We do this after clearing the addresses so as
|
|
* not to confuse the route deletion part of clearing addresses.
|
|
*/
|
|
route_clear(ifdev);
|
|
|
|
/* Finally, announce the interface itself as gone. */
|
|
rtsock_msg_ifannounce(ifdev, FALSE /*arrival*/);
|
|
|
|
/*
|
|
* Free up all per-socket multicast membership structures associated to
|
|
* the interface. There is no need to leave the multicast groups.
|
|
*/
|
|
mcast_clear(ifdev);
|
|
|
|
/*
|
|
* Also tell attached BPF devices that the interface is now gone. Do
|
|
* not bother to reset the list.
|
|
*/
|
|
TAILQ_FOREACH(bpfl, &ifdev->ifdev_bpf, bpfl_next)
|
|
bpfdev_detach(bpfl);
|
|
|
|
/* Then perform the actual interface removal. */
|
|
netif_remove(&ifdev->ifdev_netif);
|
|
|
|
TAILQ_REMOVE(&ifdev_list, ifdev, ifdev_next);
|
|
|
|
assert(ifdev_table[ifdev_get_index(ifdev)] == ifdev);
|
|
ifdev_table[ifdev_get_index(ifdev)] = NULL;
|
|
|
|
return OK;
|
|
}
|
|
|
|
/*
|
|
* Return the loopback interface.
|
|
*/
|
|
struct ifdev *
|
|
ifdev_get_loopback(void)
|
|
{
|
|
|
|
assert(ifdev_loopback != NULL);
|
|
|
|
return ifdev_loopback;
|
|
}
|
|
|
|
/*
|
|
* Report an update of the link state of the given interface, to 'unknown',
|
|
* 'up', or 'down', using NetBSD's LINK_STATE_ values. The link state is
|
|
* changed in the associated lwIP netif, and is reported on monitoring routing
|
|
* sockets. This function is for use by interface modules, to update the
|
|
* internal state to their current external state.
|
|
*/
|
|
void
|
|
ifdev_update_link(struct ifdev * ifdev, int iflink)
|
|
{
|
|
struct netif *netif;
|
|
int was_up, is_up;
|
|
|
|
ifdev->ifdev_data.ifi_link_state = iflink;
|
|
|
|
/*
|
|
* For netif, 'up' and 'unknown' are the same link state: we simply try
|
|
* to send and receive packets in both cases. Thus, transitions from
|
|
* and to the 'down' link state are the ones that matter.
|
|
*/
|
|
netif = ifdev_get_netif(ifdev);
|
|
|
|
was_up = netif_is_link_up(netif);
|
|
is_up = (iflink != LINK_STATE_DOWN);
|
|
|
|
if (was_up != is_up) {
|
|
if (is_up) {
|
|
netif_set_link_up(netif);
|
|
|
|
/* See if we should also reset address states now. */
|
|
if (ifdev_is_up(ifdev))
|
|
ifaddr_v6_set_up(ifdev);
|
|
} else
|
|
netif_set_link_down(netif);
|
|
|
|
rtsock_msg_ifinfo(ifdev);
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Register a virtual interface type, using a name prefix and a function that
|
|
* is called when creation of a virtual interface of that type is requested.
|
|
*/
|
|
void
|
|
ifdev_register(const char * name, int (* create)(const char *))
|
|
{
|
|
|
|
if (ifdev_vtypes == __arraycount(ifdev_vtype))
|
|
panic("too few slots for all virtual interface types");
|
|
|
|
ifdev_vtype[ifdev_vtypes].ifvt_name = name;
|
|
ifdev_vtype[ifdev_vtypes].ifvt_namelen = strlen(name);
|
|
ifdev_vtype[ifdev_vtypes].ifvt_create = create;
|
|
ifdev_vtypes++;
|
|
}
|
|
|
|
/*
|
|
* Verify that the given name is a valid interface name that can be used for
|
|
* creating a new interface. In particular, check that the given name is a
|
|
* valid interface name, consisting of an alphabetic string (the interface type
|
|
* or driver name) followed by a number string (the unit or instance number).
|
|
* Furthermore, make sure that the name does not already exist. Finally, see
|
|
* if the name prefix is reserved for a virtual interface type. If the given
|
|
* 'vtype_slot' pointer is not NULL, the prefix must be, and the virtual type
|
|
* slot number is returned in 'vtype_slot' on success. If 'vtype_slot' is
|
|
* NULL, the name must not have a virtual interface prefix, and an error is
|
|
* returned if it is. Since vtype slot numbers are meaningless outside of this
|
|
* module, external callers must always pass in NULL. This function returns OK
|
|
* on succes or a negative error code on error.
|
|
*/
|
|
int
|
|
ifdev_check_name(const char * name, unsigned int * vtype_slot)
|
|
{
|
|
const char *p;
|
|
size_t namelen;
|
|
unsigned int slot;
|
|
|
|
/*
|
|
* First see if the name is valid at all. TODO: decide if we want to
|
|
* allow uppercase letters, dashes, and/or underscores.
|
|
*/
|
|
for (p = name; *p >= 'a' && *p <= 'z'; p++);
|
|
|
|
if (p == name || *p == '\0')
|
|
return EINVAL;
|
|
|
|
namelen = (size_t)(p - name);
|
|
|
|
for (; *p >= '0' && *p <= '9'; p++);
|
|
|
|
if (*p != '\0')
|
|
return EINVAL;
|
|
|
|
/* Then make sure that it does not already exist. */
|
|
if (ifdev_find_by_name(name) != NULL)
|
|
return EEXIST;
|
|
|
|
/* See if there is a matching virtual interface type for the name. */
|
|
for (slot = 0; slot < ifdev_vtypes; slot++) {
|
|
if (ifdev_vtype[slot].ifvt_namelen == namelen &&
|
|
!strncmp(ifdev_vtype[slot].ifvt_name, name, namelen))
|
|
break;
|
|
}
|
|
|
|
/* The interpretation of the result depends on 'vtype_slot'. */
|
|
if (vtype_slot != NULL) {
|
|
if (slot == ifdev_vtypes)
|
|
return EINVAL;
|
|
|
|
*vtype_slot = slot;
|
|
} else if (slot != ifdev_vtypes)
|
|
return EINVAL;
|
|
|
|
return OK;
|
|
}
|
|
|
|
/*
|
|
* Create a new virtual interface. The virtual interface type is based on the
|
|
* given name (without unit number). Return OK if the virtual interface has
|
|
* been successfully created, or a negative error code otherwise. This
|
|
* function is used both for the SIOCIFCREATE ioctl and internally.
|
|
*/
|
|
int
|
|
ifdev_create(const char * name)
|
|
{
|
|
unsigned int slot;
|
|
int r;
|
|
|
|
/* Verify that the given name is an acceptable interface name. */
|
|
if ((r = ifdev_check_name(name, &slot)) != OK)
|
|
return EINVAL;
|
|
|
|
/* Let the virtual interface implementation handle the rest. */
|
|
return ifdev_vtype[slot].ifvt_create(name);
|
|
}
|
|
|
|
/*
|
|
* Destroy an interface, if possible.
|
|
*/
|
|
int
|
|
ifdev_destroy(struct ifdev * ifdev)
|
|
{
|
|
|
|
if (ifdev->ifdev_ops->iop_destroy == NULL)
|
|
return EINVAL;
|
|
|
|
return ifdev->ifdev_ops->iop_destroy(ifdev);
|
|
}
|
|
|
|
/*
|
|
* Enumerate the names of currently supported virtual interface types. Return
|
|
* a pointer to the null-terminated name prefix of the Nth virtual interface
|
|
* type if the (zero-based) N value is within range, or NULL otherwise.
|
|
*/
|
|
const char *
|
|
ifdev_enum_vtypes(unsigned int num)
|
|
{
|
|
|
|
if (num < ifdev_vtypes)
|
|
return ifdev_vtype[num].ifvt_name;
|
|
else
|
|
return NULL;
|
|
}
|