
This commit adds a new TCP/IP service to MINIX 3. As its core, the service uses the lwIP TCP/IP stack for maintenance reasons. The service aims to be compatible with NetBSD userland, including its low-level network management utilities. It also aims to support modern features such as IPv6. In summary, the new LWIP service has support for the following main features: - TCP, UDP, RAW sockets with mostly standard BSD API semantics; - IPv6 support: host mode (complete) and router mode (partial); - most of the standard BSD API socket options (SO_); - all of the standard BSD API message flags (MSG_); - the most used protocol-specific socket and control options; - a default loopback interface and the ability to create one more; - configuration-free ethernet interfaces and driver tracking; - queuing and multiple concurrent requests to each ethernet driver; - standard ioctl(2)-based BSD interface management; - radix tree backed, destination-based routing; - routing sockets for standard BSD route reporting and management; - multicast traffic and multicast group membership tracking; - Berkeley Packet Filter (BPF) devices; - standard and custom sysctl(7) nodes for many internals; - a slab allocation based, hybrid static/dynamic memory pool model. Many of its modules come with fairly elaborate comments that cover many aspects of what is going on. The service is primarily a socket driver built on top of the libsockdriver library, but for BPF devices it is at the same time also a character driver. Change-Id: Ib0c02736234b21143915e5fcc0fda8fe408f046f
1237 lines
33 KiB
C
1237 lines
33 KiB
C
/* LWIP service - pktsock.c - packet code shared between UDP and RAW */
|
|
|
|
#include "lwip.h"
|
|
#include "pktsock.h"
|
|
#include "ifaddr.h"
|
|
|
|
/*
|
|
* This buffer should be much bigger (at least 10KB, according to RFC 3542),
|
|
* but we do not support the ancillary options that take so much space anyway.
|
|
*/
|
|
#define PKTSOCK_CTLBUF_SIZE 256
|
|
|
|
static char pktsock_ctlbuf[PKTSOCK_CTLBUF_SIZE];
|
|
|
|
/*
|
|
* Header structures with ancillary data for received packets. The reason that
|
|
* we do not simply use a generic pkthdr structure with ip_addr_t source and
|
|
* destination addresses, is that for UDP packets, we put this structure in
|
|
* place of the received (ethernet and IP headers), and such a full structure
|
|
* (including IPv6-size addresses) would not fit in the header space for IPv4
|
|
* packets. So instead we use two address structures, one for IPv4 and one for
|
|
* IPv6, and a generic header structure on top of it, which also identifies
|
|
* which address structure is underneath. The combination of the address
|
|
* structure and the header structure must fit in the IP header. The IPv6
|
|
* packet header is already so close to the limit here that we have to use
|
|
* packed addresses. For IPv4 we use the regular addresses for simplicity.
|
|
*/
|
|
struct pkthdr {
|
|
uint16_t port; /* source port number (UDP only) */
|
|
uint8_t dstif; /* interface that received the pkt */
|
|
uint8_t addrif; /* interface that accepted the pkt */
|
|
uint8_t tos; /* TOS/TC value from the IP header */
|
|
uint8_t ttl; /* TTL/HL value from the IP header */
|
|
uint8_t flags; /* packet flags (PKTHF_) */
|
|
uint8_t _unused; /* all that is still available.. */
|
|
};
|
|
|
|
#define PKTHF_IPV6 0x01 /* packet has IPv6 header */
|
|
#define PKTHF_MCAST 0x02 /* packet has multicast destination */
|
|
#define PKTHF_BCAST 0x04 /* packet has broadcast destination */
|
|
|
|
struct pktaddr4 {
|
|
ip4_addr_t srcaddr;
|
|
ip4_addr_t dstaddr;
|
|
};
|
|
|
|
struct pktaddr6 {
|
|
ip6_addr_p_t srcaddr;
|
|
ip6_addr_p_t dstaddr;
|
|
};
|
|
|
|
/*
|
|
* Create a packet socket. Relay parameters and return values to and from the
|
|
* IP module's socket creation function. This function must not allocate any
|
|
* resources in any form, as socket creation may still fail later, in which
|
|
* case no destruction function is called.
|
|
*/
|
|
int
|
|
pktsock_socket(struct pktsock * pkt, int domain, size_t sndbuf, size_t rcvbuf,
|
|
struct sock ** sockp)
|
|
{
|
|
|
|
pkt->pkt_rcvhead = NULL;
|
|
pkt->pkt_rcvtailp = &pkt->pkt_rcvhead;
|
|
pkt->pkt_rcvlen = 0;
|
|
|
|
mcast_reset(&pkt->pkt_mcast);
|
|
|
|
memset(&pkt->pkt_srcaddr, 0, sizeof(pkt->pkt_srcaddr));
|
|
pkt->pkt_ifindex = 0;
|
|
|
|
/*
|
|
* Any PKTF_ type flags should be initialized on the socket only after
|
|
* the following call, as this call will clear the flags field. For
|
|
* now, no PKTF_ flags need to be set by default, though.
|
|
*/
|
|
return ipsock_socket(&pkt->pkt_ipsock, domain, sndbuf, rcvbuf, sockp);
|
|
}
|
|
|
|
/*
|
|
* Return TRUE if the given packet can and should be received on the given
|
|
* socket, or FALSE if there is a reason not to receive the packet.
|
|
*/
|
|
static int
|
|
pktsock_may_recv(struct pktsock * pkt, struct pbuf * pbuf)
|
|
{
|
|
|
|
/*
|
|
* By policy, multicast packets should not be received on sockets of
|
|
* which the owning application is not multicast aware.
|
|
*/
|
|
if (ip_addr_ismulticast(ip_current_dest_addr()) &&
|
|
!(ipsock_get_flag(&pkt->pkt_ipsock, PKTF_MCAWARE)))
|
|
return FALSE;
|
|
|
|
/*
|
|
* Due to fragment reassembly, we might end up with packets that take
|
|
* up more buffer space than their byte size, even after rounding up
|
|
* the latter. The user probably does not want packets to get dropped
|
|
* for that reason, e.g. when they set a 64K limit and the packet ends
|
|
* up being estimated as 65K and dropped. So, we test against
|
|
* 'pbuf->tot_len' rather than the rounded-up packet size. However,
|
|
* 'pkt->pkt_rcvlen' itself is increased by the rounded-up packet size
|
|
* when enqueuing the packet, so that we still count the memory
|
|
* consumption (generally) conservatively, which is what we want.
|
|
*/
|
|
return (pkt->pkt_rcvlen + pbuf->tot_len <=
|
|
ipsock_get_rcvbuf(&pkt->pkt_ipsock));
|
|
}
|
|
|
|
/*
|
|
* Check whether the given packet can and should be received on the given
|
|
* socket. If so, return the amount of space for ancillary information that
|
|
* will be necessary for the packet. If not, return a negative value.
|
|
*/
|
|
int
|
|
pktsock_test_input(struct pktsock * pkt, struct pbuf * pbuf)
|
|
{
|
|
|
|
/*
|
|
* This check will be done again in pktsock_input(), but this function
|
|
* is called for raw packets only (not for UDP packets) and, if this
|
|
* (cheap) check fails, we can avoid a (rather expensive) packet copy.
|
|
*/
|
|
if (!pktsock_may_recv(pkt, pbuf))
|
|
return -1;
|
|
|
|
if (ip_current_is_v6())
|
|
return (int)(sizeof(struct pktaddr6) + sizeof(struct pkthdr));
|
|
else
|
|
return (int)(sizeof(struct pktaddr4) + sizeof(struct pkthdr));
|
|
}
|
|
|
|
/*
|
|
* A packet has arrived on a packet socket. We own the given packet buffer,
|
|
* and so we must free it if we do not want to keep it.
|
|
*/
|
|
void
|
|
pktsock_input(struct pktsock * pkt, struct pbuf * pbuf,
|
|
const ip_addr_t * srcaddr, uint16_t port)
|
|
{
|
|
struct pktaddr4 pktaddr4;
|
|
struct pktaddr6 pktaddr6;
|
|
struct pkthdr pkthdr;
|
|
void *pktaddr;
|
|
struct ifdev *ifdev;
|
|
size_t pktaddrlen;
|
|
|
|
/*
|
|
* We are going to mess with the packet's header and contents, so we
|
|
* must be the exclusive owner of the packet. For UDP packets, lwIP
|
|
* must have made a copy for us in case of non-exclusive delivery
|
|
* (e.g., multicast packets). For raw packets, we have made a copy of
|
|
* the packet ourselves just before the call to this function.
|
|
*/
|
|
if (pbuf->ref != 1)
|
|
panic("input packet has multiple references!");
|
|
|
|
/* If the packet should not be received on this socket, drop it. */
|
|
if (!pktsock_may_recv(pkt, pbuf)) {
|
|
pbuf_free(pbuf);
|
|
|
|
return;
|
|
}
|
|
|
|
/*
|
|
* Enqueue the packet. Overwrite the leading IP header with packet
|
|
* information that is used at the time of receipt by userland. The
|
|
* data structures are such that the information always fits in what
|
|
* was the IP header. The reference count check earlier ensures that
|
|
* we never overwrite part of a packet that is still in use elsewhere.
|
|
*/
|
|
if (ip_current_is_v6()) {
|
|
assert(IP_IS_V6(srcaddr));
|
|
assert(ip6_current_dest_addr() != NULL);
|
|
|
|
ip6_addr_copy_to_packed(pktaddr6.srcaddr, *ip_2_ip6(srcaddr));
|
|
ip6_addr_copy_to_packed(pktaddr6.dstaddr,
|
|
*ip6_current_dest_addr());
|
|
pktaddr = &pktaddr6;
|
|
pktaddrlen = sizeof(pktaddr6);
|
|
|
|
assert(pktaddrlen + sizeof(pkthdr) <= IP6_HLEN);
|
|
|
|
pkthdr.tos = IP6H_TC(ip6_current_header());
|
|
pkthdr.ttl = IP6H_HOPLIM(ip6_current_header());
|
|
pkthdr.flags = PKTHF_IPV6;
|
|
} else {
|
|
assert(IP_IS_V4(srcaddr));
|
|
assert(ip4_current_dest_addr() != NULL);
|
|
|
|
memcpy(&pktaddr4.srcaddr, ip_2_ip4(srcaddr),
|
|
sizeof(pktaddr4.srcaddr));
|
|
memcpy(&pktaddr4.dstaddr, ip4_current_dest_addr(),
|
|
sizeof(pktaddr4.srcaddr));
|
|
pktaddr = &pktaddr4;
|
|
pktaddrlen = sizeof(pktaddr4);
|
|
|
|
assert(pktaddrlen + sizeof(pkthdr) <= IP_HLEN);
|
|
|
|
pkthdr.tos = IPH_TOS(ip4_current_header());
|
|
pkthdr.ttl = IPH_TTL(ip4_current_header());
|
|
pkthdr.flags = 0;
|
|
}
|
|
|
|
/*
|
|
* Save both the interface on which the packet was received (for
|
|
* PKTINFO) and the interface that owns the destination address of the
|
|
* packet (for the source address's zone ID).
|
|
*/
|
|
assert(ip_current_input_netif() != NULL);
|
|
ifdev = netif_get_ifdev(ip_current_input_netif());
|
|
pkthdr.dstif = (uint16_t)ifdev_get_index(ifdev);
|
|
|
|
assert(ip_current_netif() != NULL);
|
|
ifdev = netif_get_ifdev(ip_current_netif());
|
|
pkthdr.addrif = (uint16_t)ifdev_get_index(ifdev);
|
|
|
|
if ((pbuf->flags & PBUF_FLAG_LLMCAST) ||
|
|
ip_addr_ismulticast(ip_current_dest_addr()))
|
|
pkthdr.flags |= PKTHF_MCAST;
|
|
else if ((pbuf->flags & PBUF_FLAG_LLBCAST) ||
|
|
ip_addr_isbroadcast(ip_current_dest_addr(), ip_current_netif()))
|
|
pkthdr.flags |= PKTHF_BCAST;
|
|
|
|
pkthdr.port = port;
|
|
|
|
util_pbuf_header(pbuf, sizeof(pkthdr));
|
|
|
|
memcpy(pbuf->payload, &pkthdr, sizeof(pkthdr));
|
|
|
|
util_pbuf_header(pbuf, pktaddrlen);
|
|
|
|
memcpy(pbuf->payload, pktaddr, pktaddrlen);
|
|
|
|
util_pbuf_header(pbuf, -(int)(sizeof(pkthdr) + pktaddrlen));
|
|
|
|
*pkt->pkt_rcvtailp = pbuf;
|
|
pkt->pkt_rcvtailp = pchain_end(pbuf);
|
|
pkt->pkt_rcvlen += pchain_size(pbuf);
|
|
|
|
sockevent_raise(ipsock_get_sock(&pkt->pkt_ipsock), SEV_RECV);
|
|
}
|
|
|
|
/*
|
|
* Obtain interface and source address information for an outgoing packet. In
|
|
* particular, parse any IPV6_PKTINFO options provided as either sticky options
|
|
* on the socket 'pkt' or as ancillary options in the packet options 'pkto'.
|
|
* On success, return OK, with 'ifdevp' set to either the outgoing interface to
|
|
* use for the packet, or NULL if no outgoing interface was specified using
|
|
* either of the aforementioned options. If, and only if, 'ifdevp' is set to
|
|
* an actual interface (i.e., not NULL), then 'src_addrp' is filled with either
|
|
* a locally owned, validated, unicast address to use as source of the packet,
|
|
* or the unspecified ('any') address if no source address was specified using
|
|
* the options. On failure, return a negative error code.
|
|
*/
|
|
int
|
|
pktsock_get_pktinfo(struct pktsock * pkt, struct pktopt * pkto,
|
|
struct ifdev ** ifdevp, ip_addr_t * src_addrp)
|
|
{
|
|
struct ifdev *ifdev, *ifdev2;
|
|
ip_addr_t ipaddr;
|
|
uint32_t ifindex;
|
|
int r;
|
|
|
|
/* We support only IPV6_PKTINFO. IP_PKTINFO is not supported. */
|
|
if (!ipsock_is_ipv6(&pkt->pkt_ipsock)) {
|
|
*ifdevp = NULL;
|
|
return OK;
|
|
}
|
|
|
|
/*
|
|
* TODO: we are spending a lot of effort on initializing and copying
|
|
* stuff around, even just to find out whether there is anything to do
|
|
* at all here. See if this can be optimized.
|
|
*/
|
|
ip_addr_set_zero_ip6(&ipaddr);
|
|
|
|
/*
|
|
* Ancillary data takes precedence over sticky options. We treat the
|
|
* source address and interface index fields as separate, overriding
|
|
* each earlier value only if non-zero. TODO: is that correct?
|
|
*/
|
|
if (pkto->pkto_flags & PKTOF_PKTINFO) {
|
|
memcpy(ip_2_ip6(&ipaddr)->addr, &pkto->pkto_srcaddr.addr,
|
|
sizeof(ip_2_ip6(&ipaddr)->addr));
|
|
ifindex = pkto->pkto_ifindex;
|
|
} else
|
|
ifindex = 0;
|
|
|
|
if (ip6_addr_isany(ip_2_ip6(&ipaddr)))
|
|
memcpy(ip_2_ip6(&ipaddr)->addr, &pkt->pkt_srcaddr.addr,
|
|
sizeof(ip_2_ip6(&ipaddr)->addr));
|
|
if (ifindex == 0)
|
|
ifindex = pkt->pkt_ifindex;
|
|
|
|
/* If both fields are blank, there is nothing more to do. */
|
|
if (ip6_addr_isany(ip_2_ip6(&ipaddr)) && ifindex == 0) {
|
|
*ifdevp = NULL;
|
|
return OK;
|
|
}
|
|
|
|
/* If an interface index is specified, it must be valid. */
|
|
ifdev = NULL;
|
|
|
|
if (ifindex != 0 && (ifdev = ifdev_get_by_index(ifindex)) == NULL)
|
|
return ENXIO;
|
|
|
|
/*
|
|
* Use the interface index to set a zone on the source address, if the
|
|
* source address has a scope.
|
|
*/
|
|
if (ip6_addr_has_scope(ip_2_ip6(&ipaddr), IP6_UNKNOWN)) {
|
|
if (ifindex == 0)
|
|
return EADDRNOTAVAIL;
|
|
|
|
ip6_addr_set_zone(ip_2_ip6(&ipaddr), ifindex);
|
|
}
|
|
|
|
/*
|
|
* We need to validate the given address just as thoroughly as an
|
|
* address given through bind(). If we don't, we could allow forged
|
|
* source addresses etcetera. To be sure: this call may change the
|
|
* address to an IPv4 type address if needed.
|
|
*/
|
|
if ((r = ipsock_check_src_addr(pktsock_get_ipsock(pkt), &ipaddr,
|
|
FALSE /*allow_mcast*/, &ifdev2)) != OK)
|
|
return r;
|
|
|
|
if (ifdev2 != NULL) {
|
|
if (ifdev == NULL)
|
|
ifdev = ifdev2;
|
|
else if (ifdev != ifdev2)
|
|
return EADDRNOTAVAIL;
|
|
} else {
|
|
/*
|
|
* There should be no cases where the (non-multicast) address
|
|
* successfully parsed, is not unspecified, and yet did not map
|
|
* to an interface. Eliminate the possibility anyway by
|
|
* throwing an error for this case. As a result, we are left
|
|
* with one of two cases:
|
|
*
|
|
* 1) ifdevp is not NULL, and src_addrp is unspecified;
|
|
* 2) ifdevp is not NULL, and src_addrp is a locally assigned
|
|
* (unicast) address.
|
|
*
|
|
* This is why we need not fill src_addrp when ifdevp is NULL.
|
|
*/
|
|
if (!ip_addr_isany(&ipaddr))
|
|
return EADDRNOTAVAIL;
|
|
}
|
|
|
|
*ifdevp = ifdev;
|
|
if (ifdev != NULL)
|
|
*src_addrp = ipaddr;
|
|
return OK;
|
|
}
|
|
|
|
/*
|
|
* Parse a chunk of user-provided control data, on an IPv4 socket provided as
|
|
* 'pkt'. The control chunk is given as 'cmsg', and the length of the data
|
|
* following the control header (possibly zero) is given as 'len'. On success,
|
|
* return OK, with any parsed options merged into the set of packet options
|
|
* 'pkto'. On failure, return a negative error code.
|
|
*/
|
|
static int
|
|
pktsock_parse_ctl_v4(struct pktsock * pkt __unused, struct cmsghdr * cmsg,
|
|
socklen_t len, struct pktopt * pkto)
|
|
{
|
|
uint8_t byte;
|
|
int val;
|
|
|
|
if (cmsg->cmsg_level != IPPROTO_IP)
|
|
return EAFNOSUPPORT;
|
|
|
|
switch (cmsg->cmsg_type) {
|
|
case IP_TOS:
|
|
/*
|
|
* Some userland code (bind's libisc in particular) supplies
|
|
* a single byte instead of a full integer for this option.
|
|
* We go out of our way to accept that format, too.
|
|
*/
|
|
if (len != sizeof(val) && len != sizeof(byte))
|
|
return EINVAL;
|
|
|
|
if (len == sizeof(byte)) {
|
|
memcpy(&byte, CMSG_DATA(cmsg), sizeof(byte));
|
|
val = (int)byte;
|
|
} else
|
|
memcpy(&val, CMSG_DATA(cmsg), sizeof(val));
|
|
|
|
if (val < 0 || val > UINT8_MAX)
|
|
return EINVAL;
|
|
|
|
pkto->pkto_flags |= PKTOF_TOS;
|
|
pkto->pkto_tos = (uint8_t)val;
|
|
|
|
return OK;
|
|
|
|
case IP_TTL:
|
|
if (len != sizeof(val))
|
|
return EINVAL;
|
|
|
|
memcpy(&val, CMSG_DATA(cmsg), sizeof(val));
|
|
|
|
if (val < 0 || val > UINT8_MAX)
|
|
return EINVAL;
|
|
|
|
pkto->pkto_flags |= PKTOF_TTL;
|
|
pkto->pkto_ttl = (uint8_t)val;
|
|
|
|
return OK;
|
|
|
|
/*
|
|
* Implementing IP_PKTINFO might be a bit harder than its IPV6_PKTINFO
|
|
* sibling, because it would require the use of zone IDs (interface
|
|
* indices) for IPv4, which is not supported yet.
|
|
*/
|
|
}
|
|
|
|
return EINVAL;
|
|
}
|
|
|
|
/*
|
|
* Parse a chunk of user-provided control data, on an IPv6 socket provided as
|
|
* 'pkt'. The control chunk is given as 'cmsg', and the length of the data
|
|
* following the control header (possibly zero) is given as 'len'. On success,
|
|
* return OK, with any parsed options merged into the set of packet options
|
|
* 'pkto'. On failure, return a negative error code.
|
|
*/
|
|
static int
|
|
pktsock_parse_ctl_v6(struct pktsock * pkt, struct cmsghdr * cmsg,
|
|
socklen_t len, struct pktopt * pkto)
|
|
{
|
|
struct in6_pktinfo ipi6;
|
|
int val;
|
|
|
|
if (cmsg->cmsg_level != IPPROTO_IPV6)
|
|
return EAFNOSUPPORT;
|
|
|
|
switch (cmsg->cmsg_type) {
|
|
case IPV6_TCLASS:
|
|
if (len != sizeof(val))
|
|
return EINVAL;
|
|
|
|
memcpy(&val, CMSG_DATA(cmsg), sizeof(val));
|
|
|
|
if (val < -1 || val > UINT8_MAX)
|
|
return EINVAL;
|
|
|
|
if (val == -1)
|
|
val = 0;
|
|
|
|
pkto->pkto_flags |= PKTOF_TOS;
|
|
pkto->pkto_tos = (uint8_t)val;
|
|
|
|
return OK;
|
|
|
|
case IPV6_HOPLIMIT:
|
|
if (len != sizeof(val))
|
|
return EINVAL;
|
|
|
|
memcpy(&val, CMSG_DATA(cmsg), sizeof(val));
|
|
|
|
if (val < -1 || val > UINT8_MAX)
|
|
return EINVAL;
|
|
|
|
if (val == -1)
|
|
val = IP_DEFAULT_TTL;
|
|
|
|
pkto->pkto_flags |= PKTOF_TTL;
|
|
pkto->pkto_ttl = (uint8_t)val;
|
|
|
|
return OK;
|
|
|
|
case IPV6_PKTINFO:
|
|
if (len != sizeof(ipi6))
|
|
return EINVAL;
|
|
|
|
memcpy(&ipi6, CMSG_DATA(cmsg), sizeof(ipi6));
|
|
|
|
pkto->pkto_flags |= PKTOF_PKTINFO;
|
|
memcpy(&pkto->pkto_srcaddr.addr, &ipi6.ipi6_addr,
|
|
sizeof(pkto->pkto_srcaddr.addr));
|
|
pkto->pkto_ifindex = ipi6.ipi6_ifindex;
|
|
|
|
return OK;
|
|
|
|
case IPV6_USE_MIN_MTU:
|
|
if (len != sizeof(int))
|
|
return EINVAL;
|
|
|
|
memcpy(&val, CMSG_DATA(cmsg), sizeof(val));
|
|
|
|
if (val < -1 || val > 1)
|
|
return EINVAL;
|
|
|
|
/* TODO: not supported by lwIP, but needed by applications. */
|
|
return OK;
|
|
}
|
|
|
|
return EINVAL;
|
|
}
|
|
|
|
/*
|
|
* Copy in and parse control data, as part of sending a packet on socket 'pkt'.
|
|
* The control data is accessible through 'ctl', with a user-provided length of
|
|
* 'ctl_len'. On success, return OK, with any parsed packet options stored in
|
|
* 'pkto'. On failure, return a negative error code.
|
|
*/
|
|
int
|
|
pktsock_get_ctl(struct pktsock * pkt, const struct sockdriver_data * ctl,
|
|
socklen_t ctl_len, struct pktopt * pkto)
|
|
{
|
|
struct msghdr msghdr;
|
|
struct cmsghdr *cmsg;
|
|
socklen_t left, len;
|
|
int r;
|
|
|
|
/* The default: no packet options are being overridden. */
|
|
assert(pkto->pkto_flags == 0);
|
|
|
|
/* If no control length is given, we are done here. */
|
|
if (ctl_len == 0)
|
|
return OK;
|
|
|
|
/*
|
|
* For now, we put a rather aggressive limit on the size of the control
|
|
* data. We copy in and parse the whole thing in a single buffer.
|
|
*/
|
|
if (ctl_len > sizeof(pktsock_ctlbuf)) {
|
|
printf("LWIP: too much control data given (%u bytes)\n",
|
|
ctl_len);
|
|
|
|
return ENOBUFS;
|
|
}
|
|
|
|
if ((r = sockdriver_copyin(ctl, 0, pktsock_ctlbuf, ctl_len)) != OK)
|
|
return r;
|
|
|
|
memset(&msghdr, 0, sizeof(msghdr));
|
|
msghdr.msg_control = pktsock_ctlbuf;
|
|
msghdr.msg_controllen = ctl_len;
|
|
|
|
for (cmsg = CMSG_FIRSTHDR(&msghdr); cmsg != NULL;
|
|
cmsg = CMSG_NXTHDR(&msghdr, cmsg)) {
|
|
/* Check for bogus lengths. */
|
|
assert((socklen_t)((char *)cmsg - pktsock_ctlbuf) <= ctl_len);
|
|
left = ctl_len - (socklen_t)((char *)cmsg - pktsock_ctlbuf);
|
|
assert(left >= CMSG_LEN(0)); /* guaranteed by CMSG_xxHDR */
|
|
|
|
if (cmsg->cmsg_len < CMSG_LEN(0) || cmsg->cmsg_len > left) {
|
|
printf("LWIP: malformed control data rejected\n");
|
|
|
|
return EINVAL;
|
|
}
|
|
|
|
len = cmsg->cmsg_len - CMSG_LEN(0);
|
|
|
|
if (ipsock_is_ipv6(&pkt->pkt_ipsock))
|
|
r = pktsock_parse_ctl_v6(pkt, cmsg, len, pkto);
|
|
else
|
|
r = pktsock_parse_ctl_v4(pkt, cmsg, len, pkto);
|
|
|
|
if (r != OK)
|
|
return r;
|
|
}
|
|
|
|
return OK;
|
|
}
|
|
|
|
/*
|
|
* Copy in the packet data from the calling user process, and store it in the
|
|
* buffer 'pbuf' that must already have been allocated with the appropriate
|
|
* size.
|
|
*/
|
|
int
|
|
pktsock_get_data(struct pktsock * pkt, const struct sockdriver_data * data,
|
|
size_t len, struct pbuf * pbuf)
|
|
|
|
{
|
|
|
|
return util_copy_data(data, len, 0, pbuf, 0, TRUE /*copy_in*/);
|
|
}
|
|
|
|
/*
|
|
* Dequeue and free the head of the receive queue of a packet socket.
|
|
*/
|
|
static void
|
|
pktsock_dequeue(struct pktsock * pkt)
|
|
{
|
|
struct pbuf *pbuf, **pnext;
|
|
size_t size;
|
|
|
|
pbuf = pkt->pkt_rcvhead;
|
|
assert(pbuf != NULL);
|
|
|
|
pnext = pchain_end(pbuf);
|
|
size = pchain_size(pbuf);
|
|
|
|
if ((pkt->pkt_rcvhead = *pnext) == NULL)
|
|
pkt->pkt_rcvtailp = &pkt->pkt_rcvhead;
|
|
|
|
assert(pkt->pkt_rcvlen >= size);
|
|
pkt->pkt_rcvlen -= size;
|
|
|
|
*pnext = NULL;
|
|
pbuf_free(pbuf);
|
|
}
|
|
|
|
/*
|
|
* Perform preliminary checks on a receive request.
|
|
*/
|
|
int
|
|
pktsock_pre_recv(struct sock * sock __unused, endpoint_t user_endpt __unused,
|
|
int flags)
|
|
{
|
|
|
|
/*
|
|
* We accept the same flags across all socket types in LWIP, and then
|
|
* simply ignore the ones we do not support for packet sockets.
|
|
*/
|
|
if ((flags & ~(MSG_PEEK | MSG_WAITALL)) != 0)
|
|
return EOPNOTSUPP;
|
|
|
|
return OK;
|
|
}
|
|
|
|
/*
|
|
* Add a chunk of control data to the global control buffer, starting from
|
|
* offset 'off'. The chunk has the given level and type, and its data is given
|
|
* in the buffer 'ptr' with size 'len'. Return the (padded) size of the chunk
|
|
* that was generated as a result.
|
|
*/
|
|
static size_t
|
|
pktsock_add_ctl(int level, int type, void * ptr, socklen_t len, size_t off)
|
|
{
|
|
struct cmsghdr cmsg;
|
|
size_t size;
|
|
|
|
size = CMSG_SPACE(len);
|
|
|
|
/*
|
|
* The global control buffer must be large enough to store one chunk
|
|
* of each of the supported options. If this panic triggers, increase
|
|
* PKTSOCK_CTLBUF_SIZE by as much as needed.
|
|
*/
|
|
if (off + size > sizeof(pktsock_ctlbuf))
|
|
panic("control buffer too small, increase "
|
|
"PKTSOCK_CTLBUF_SIZE");
|
|
|
|
memset(&cmsg, 0, sizeof(cmsg));
|
|
cmsg.cmsg_len = CMSG_LEN(len);
|
|
cmsg.cmsg_level = level;
|
|
cmsg.cmsg_type = type;
|
|
|
|
/*
|
|
* Clear any padding space. This can be optimized, but in any case we
|
|
* must be careful not to copy out any bytes that have not been
|
|
* initialized at all.
|
|
*/
|
|
memset(&pktsock_ctlbuf[off], 0, size);
|
|
|
|
memcpy(&pktsock_ctlbuf[off], &cmsg, sizeof(cmsg));
|
|
memcpy(CMSG_DATA((struct cmsghdr *)&pktsock_ctlbuf[off]), ptr, len);
|
|
|
|
return size;
|
|
}
|
|
|
|
/*
|
|
* Generate and copy out control data, as part of delivering a packet from
|
|
* socket 'pkt' to userland. The control data buffer is given as 'ctl', with
|
|
* a user-given length of 'ctl_len' bytes. The packet's header information is
|
|
* provided as 'pkthdr', and its source and destination addresses as 'pktaddr',
|
|
* which maybe a pktaddr4 or pktaddr6 structure depending on the value of the
|
|
* PKTHF_IPV6 flag in the 'flags' field in 'pkthdr'. Note that we support
|
|
* dual-stack sockets, and as such it is possible that the socket is of domain
|
|
* AF_INET6 while the received packet is an IPv4 packet. On success, return
|
|
* the size of the control data copied out (possibly zero). If more control
|
|
* data were generated than copied out, also merge the MSG_CTRUNC flag into
|
|
* 'rflags'. On failure, return a negative error code.
|
|
*/
|
|
static int
|
|
pktsock_put_ctl(struct pktsock * pkt, const struct sockdriver_data * ctl,
|
|
socklen_t ctl_len, struct pkthdr * pkthdr, void * pktaddr,
|
|
int * rflags)
|
|
{
|
|
struct pktaddr6 *pktaddr6;
|
|
struct pktaddr4 *pktaddr4;
|
|
struct in_pktinfo ipi;
|
|
struct in6_pktinfo ipi6;
|
|
ip_addr_t ipaddr;
|
|
unsigned int flags;
|
|
uint8_t byte;
|
|
size_t off;
|
|
int r, val;
|
|
|
|
flags = ipsock_get_flags(&pkt->pkt_ipsock);
|
|
|
|
if (!(flags & (PKTF_RECVINFO | PKTF_RECVTOS | PKTF_RECVTTL)))
|
|
return 0;
|
|
|
|
/*
|
|
* Important: all generated control chunks must fit in the global
|
|
* control buffer together. When adding more options here, ensure that
|
|
* the control buffer remains large enough to receive all options at
|
|
* once. See also the panic in pktsock_add_ctl().
|
|
*/
|
|
off = 0;
|
|
|
|
/*
|
|
* IPv6 sockets may receive IPv4 packets. The ancillary data is in the
|
|
* format corresponding to the socket, which means we may have to
|
|
* convert any IPv4 addresses from the packet to IPv4-mapped IPv6
|
|
* addresses for the ancillary data, just like the source address.
|
|
*/
|
|
if (ipsock_is_ipv6(&pkt->pkt_ipsock)) {
|
|
if (flags & PKTF_RECVTTL) {
|
|
val = pkthdr->ttl;
|
|
|
|
off += pktsock_add_ctl(IPPROTO_IPV6, IPV6_HOPLIMIT,
|
|
&val, sizeof(val), off);
|
|
}
|
|
|
|
if (flags & PKTF_RECVTOS) {
|
|
val = pkthdr->tos;
|
|
|
|
off += pktsock_add_ctl(IPPROTO_IPV6, IPV6_TCLASS, &val,
|
|
sizeof(val), off);
|
|
}
|
|
|
|
if (flags & PKTF_RECVINFO) {
|
|
memset(&ipi6, 0, sizeof(ipi6));
|
|
|
|
if (pkthdr->flags & PKTHF_IPV6) {
|
|
pktaddr6 = (struct pktaddr6 *)pktaddr;
|
|
memcpy(&ipi6.ipi6_addr, &pktaddr6->dstaddr,
|
|
sizeof(ipi6.ipi6_addr));
|
|
} else {
|
|
pktaddr4 = (struct pktaddr4 *)pktaddr;
|
|
|
|
addr_make_v4mapped_v6(&ipaddr,
|
|
&pktaddr4->dstaddr);
|
|
|
|
memcpy(&ipi6.ipi6_addr,
|
|
ip_2_ip6(&ipaddr)->addr,
|
|
sizeof(ipi6.ipi6_addr));
|
|
}
|
|
ipi6.ipi6_ifindex = pkthdr->dstif;
|
|
|
|
off += pktsock_add_ctl(IPPROTO_IPV6, IPV6_PKTINFO,
|
|
&ipi6, sizeof(ipi6), off);
|
|
}
|
|
} else {
|
|
if (flags & PKTF_RECVTTL) {
|
|
byte = pkthdr->ttl;
|
|
|
|
off += pktsock_add_ctl(IPPROTO_IP, IP_TTL, &byte,
|
|
sizeof(byte), off);
|
|
}
|
|
|
|
if (flags & PKTF_RECVINFO) {
|
|
assert(!(pkthdr->flags & PKTHF_IPV6));
|
|
pktaddr4 = (struct pktaddr4 *)pktaddr;
|
|
|
|
memset(&ipi, 0, sizeof(ipi));
|
|
memcpy(&ipi.ipi_addr, &pktaddr4->dstaddr,
|
|
sizeof(ipi.ipi_addr));
|
|
ipi.ipi_ifindex = pkthdr->dstif;
|
|
|
|
off += pktsock_add_ctl(IPPROTO_IP, IP_PKTINFO, &ipi,
|
|
sizeof(ipi), off);
|
|
}
|
|
}
|
|
|
|
assert(off > 0);
|
|
|
|
if (ctl_len >= off)
|
|
ctl_len = off;
|
|
else
|
|
*rflags |= MSG_CTRUNC;
|
|
|
|
if (ctl_len > 0 &&
|
|
(r = sockdriver_copyout(ctl, 0, pktsock_ctlbuf, ctl_len)) != OK)
|
|
return r;
|
|
|
|
return ctl_len;
|
|
}
|
|
|
|
/*
|
|
* Receive data on a packet socket.
|
|
*/
|
|
int
|
|
pktsock_recv(struct sock * sock, const struct sockdriver_data * data,
|
|
size_t len, size_t * off, const struct sockdriver_data * ctl,
|
|
socklen_t ctl_len, socklen_t * ctl_off, struct sockaddr * addr,
|
|
socklen_t * addr_len, endpoint_t user_endpt __unused, int flags,
|
|
size_t min __unused, int * rflags)
|
|
{
|
|
struct pktsock *pkt = (struct pktsock *)sock;
|
|
struct pktaddr4 pktaddr4;
|
|
struct pktaddr6 pktaddr6;
|
|
struct pkthdr pkthdr;
|
|
void *pktaddr;
|
|
struct pbuf *pbuf;
|
|
ip_addr_t srcaddr;
|
|
int r;
|
|
|
|
if ((pbuf = pkt->pkt_rcvhead) == NULL)
|
|
return SUSPEND;
|
|
|
|
/*
|
|
* Get the ancillary data for the packet. The format of the ancillary
|
|
* data depends on the received packet type, which may be different
|
|
* from the socket type.
|
|
*/
|
|
util_pbuf_header(pbuf, sizeof(pkthdr));
|
|
|
|
memcpy(&pkthdr, pbuf->payload, sizeof(pkthdr));
|
|
|
|
if (pkthdr.flags & PKTHF_IPV6) {
|
|
util_pbuf_header(pbuf, sizeof(pktaddr6));
|
|
|
|
memcpy(&pktaddr6, pbuf->payload, sizeof(pktaddr6));
|
|
pktaddr = &pktaddr6;
|
|
|
|
ip_addr_copy_from_ip6_packed(srcaddr, pktaddr6.srcaddr);
|
|
if (ip6_addr_has_scope(ip_2_ip6(&srcaddr), IP6_UNICAST))
|
|
ip6_addr_set_zone(ip_2_ip6(&srcaddr), pkthdr.addrif);
|
|
|
|
util_pbuf_header(pbuf,
|
|
-(int)(sizeof(pkthdr) + sizeof(pktaddr6)));
|
|
} else {
|
|
util_pbuf_header(pbuf, sizeof(pktaddr4));
|
|
|
|
memcpy(&pktaddr4, pbuf->payload, sizeof(pktaddr4));
|
|
pktaddr = &pktaddr4;
|
|
|
|
ip_addr_copy_from_ip4(srcaddr, pktaddr4.srcaddr);
|
|
|
|
util_pbuf_header(pbuf,
|
|
-(int)(sizeof(pkthdr) + sizeof(pktaddr4)));
|
|
}
|
|
|
|
/* Copy out the packet data to the calling user process. */
|
|
if (len >= pbuf->tot_len)
|
|
len = pbuf->tot_len;
|
|
else
|
|
*rflags |= MSG_TRUNC;
|
|
|
|
r = util_copy_data(data, len, 0, pbuf, 0, FALSE /*copy_in*/);
|
|
|
|
if (r != OK)
|
|
return r;
|
|
|
|
/* Generate and copy out ancillary (control) data, if requested. */
|
|
if ((r = pktsock_put_ctl(pkt, ctl, ctl_len, &pkthdr, pktaddr,
|
|
rflags)) < 0)
|
|
return r;
|
|
|
|
/* Store the source IP address. */
|
|
ipsock_put_addr(&pkt->pkt_ipsock, addr, addr_len, &srcaddr,
|
|
pkthdr.port);
|
|
|
|
/* Set multicast or broadcast message flag, if applicable. */
|
|
if (pkthdr.flags & PKTHF_MCAST)
|
|
*rflags |= MSG_MCAST;
|
|
else if (pkthdr.flags & PKTHF_BCAST)
|
|
*rflags |= MSG_BCAST;
|
|
|
|
/* Discard the packet now, unless we were instructed to peek only. */
|
|
if (!(flags & MSG_PEEK))
|
|
pktsock_dequeue(pkt);
|
|
|
|
/* Return the received part of the packet length. */
|
|
*off = len;
|
|
*ctl_off = r;
|
|
return OK;
|
|
}
|
|
|
|
/*
|
|
* Test whether data can be received on a packet socket, and if so, how many
|
|
* bytes of data.
|
|
*/
|
|
int
|
|
pktsock_test_recv(struct sock * sock, size_t min __unused, size_t * size)
|
|
{
|
|
struct pktsock *pkt = (struct pktsock *)sock;
|
|
|
|
if (pkt->pkt_rcvhead == NULL)
|
|
return SUSPEND;
|
|
|
|
if (size != NULL)
|
|
*size = pkt->pkt_rcvhead->tot_len;
|
|
return OK;
|
|
}
|
|
|
|
/*
|
|
* The caller has performed a multicast operation on the given socket. Thus,
|
|
* the caller is multicast aware. Remember this, because that means the socket
|
|
* may also receive traffic to multicast destinations.
|
|
*/
|
|
void
|
|
pktsock_set_mcaware(struct pktsock * pkt)
|
|
{
|
|
|
|
ipsock_set_flag(&pkt->pkt_ipsock, PKTF_MCAWARE);
|
|
}
|
|
|
|
/*
|
|
* Set socket options on a packet socket.
|
|
*/
|
|
int
|
|
pktsock_setsockopt(struct pktsock * pkt, int level, int name,
|
|
const struct sockdriver_data * data, socklen_t len,
|
|
struct ipopts * ipopts)
|
|
{
|
|
struct ip_mreq imr;
|
|
struct ipv6_mreq ipv6mr;
|
|
struct in6_pktinfo ipi6;
|
|
ip_addr_t ipaddr, ifaddr;
|
|
struct ifdev *ifdev;
|
|
unsigned int flag;
|
|
uint32_t ifindex;
|
|
int r, val, has_scope;
|
|
|
|
switch (level) {
|
|
case IPPROTO_IP:
|
|
if (ipsock_is_ipv6(&pkt->pkt_ipsock))
|
|
break;
|
|
|
|
switch (name) {
|
|
case IP_ADD_MEMBERSHIP:
|
|
case IP_DROP_MEMBERSHIP:
|
|
pktsock_set_mcaware(pkt);
|
|
|
|
if ((r = sockdriver_copyin_opt(data, &imr, sizeof(imr),
|
|
len)) != OK)
|
|
return r;
|
|
|
|
ip_addr_set_ip4_u32(&ipaddr, imr.imr_multiaddr.s_addr);
|
|
ip_addr_set_ip4_u32(&ifaddr, imr.imr_interface.s_addr);
|
|
|
|
if (!ip_addr_isany(&ifaddr)) {
|
|
ifdev = ifaddr_map_by_addr(&ifaddr);
|
|
|
|
if (ifdev == NULL)
|
|
return EADDRNOTAVAIL;
|
|
} else
|
|
ifdev = NULL;
|
|
|
|
if (name == IP_ADD_MEMBERSHIP)
|
|
r = mcast_join(&pkt->pkt_mcast, &ipaddr,
|
|
ifdev);
|
|
else
|
|
r = mcast_leave(&pkt->pkt_mcast, &ipaddr,
|
|
ifdev);
|
|
|
|
return r;
|
|
|
|
case IP_RECVTTL:
|
|
case IP_RECVPKTINFO:
|
|
if ((r = sockdriver_copyin_opt(data, &val, sizeof(val),
|
|
len)) != OK)
|
|
return r;
|
|
|
|
switch (name) {
|
|
case IP_RECVTTL: flag = PKTF_RECVTTL; break;
|
|
case IP_RECVPKTINFO: flag = PKTF_RECVINFO; break;
|
|
default: flag = 0; assert(0); break;
|
|
}
|
|
|
|
if (val)
|
|
ipsock_set_flag(&pkt->pkt_ipsock, flag);
|
|
else
|
|
ipsock_clear_flag(&pkt->pkt_ipsock, flag);
|
|
|
|
return OK;
|
|
}
|
|
|
|
break;
|
|
|
|
case IPPROTO_IPV6:
|
|
if (!ipsock_is_ipv6(&pkt->pkt_ipsock))
|
|
break;
|
|
|
|
switch (name) {
|
|
case IPV6_JOIN_GROUP:
|
|
case IPV6_LEAVE_GROUP:
|
|
pktsock_set_mcaware(pkt);
|
|
|
|
if ((r = sockdriver_copyin_opt(data, &ipv6mr,
|
|
sizeof(ipv6mr), len)) != OK)
|
|
return r;
|
|
|
|
ip_addr_set_zero_ip6(&ipaddr);
|
|
memcpy(ip_2_ip6(&ipaddr)->addr,
|
|
&ipv6mr.ipv6mr_multiaddr,
|
|
sizeof(ip_2_ip6(&ipaddr)->addr));
|
|
|
|
/*
|
|
* We currently do not support joining IPv4 multicast
|
|
* groups on IPv6 sockets. The reason for this is that
|
|
* this would require decisions on what to do if the
|
|
* socket is set to V6ONLY later, as well as various
|
|
* additional exceptions for a case that hopefully
|
|
* doesn't occur in practice anyway.
|
|
*/
|
|
if (ip6_addr_isipv4mappedipv6(ip_2_ip6(&ipaddr)))
|
|
return EADDRNOTAVAIL;
|
|
|
|
has_scope = ip6_addr_has_scope(ip_2_ip6(&ipaddr),
|
|
IP6_UNKNOWN);
|
|
|
|
if ((ifindex = ipv6mr.ipv6mr_interface) != 0) {
|
|
ifdev = ifdev_get_by_index(ifindex);
|
|
|
|
if (ifdev == NULL)
|
|
return ENXIO;
|
|
|
|
if (has_scope)
|
|
ip6_addr_set_zone(ip_2_ip6(&ipaddr),
|
|
ifindex);
|
|
} else {
|
|
if (has_scope)
|
|
return EADDRNOTAVAIL;
|
|
|
|
ifdev = NULL;
|
|
}
|
|
|
|
if (name == IPV6_JOIN_GROUP)
|
|
r = mcast_join(&pkt->pkt_mcast, &ipaddr,
|
|
ifdev);
|
|
else
|
|
r = mcast_leave(&pkt->pkt_mcast, &ipaddr,
|
|
ifdev);
|
|
|
|
return r;
|
|
|
|
case IPV6_USE_MIN_MTU:
|
|
if ((r = sockdriver_copyin_opt(data, &val, sizeof(val),
|
|
len)) != OK)
|
|
return r;
|
|
|
|
if (val < -1 || val > 1)
|
|
return EINVAL;
|
|
|
|
/*
|
|
* lwIP does not support path MTU discovery, so do
|
|
* nothing. TODO: see if this is actually good enough.
|
|
*/
|
|
return OK;
|
|
|
|
case IPV6_PKTINFO:
|
|
if ((r = sockdriver_copyin_opt(data, &ipi6,
|
|
sizeof(ipi6), len)) != OK)
|
|
return r;
|
|
|
|
/*
|
|
* Simply copy in what is given. The values will be
|
|
* parsed only once a packet is sent, in
|
|
* pktsock_get_pktinfo(). Otherwise, if we perform
|
|
* checks here, they may be outdated by the time the
|
|
* values are actually used.
|
|
*/
|
|
memcpy(&pkt->pkt_srcaddr.addr, &ipi6.ipi6_addr,
|
|
sizeof(pkt->pkt_srcaddr.addr));
|
|
pkt->pkt_ifindex = ipi6.ipi6_ifindex;
|
|
|
|
return OK;
|
|
|
|
case IPV6_RECVPKTINFO:
|
|
case IPV6_RECVHOPLIMIT:
|
|
case IPV6_RECVTCLASS:
|
|
if ((r = sockdriver_copyin_opt(data, &val, sizeof(val),
|
|
len)) != OK)
|
|
return r;
|
|
|
|
switch (name) {
|
|
case IPV6_RECVPKTINFO: flag = PKTF_RECVINFO; break;
|
|
case IPV6_RECVHOPLIMIT: flag = PKTF_RECVTTL; break;
|
|
case IPV6_RECVTCLASS: flag = PKTF_RECVTOS; break;
|
|
default: flag = 0; assert(0); break;
|
|
}
|
|
|
|
if (val)
|
|
ipsock_set_flag(&pkt->pkt_ipsock, flag);
|
|
else
|
|
ipsock_clear_flag(&pkt->pkt_ipsock, flag);
|
|
|
|
return OK;
|
|
}
|
|
|
|
break;
|
|
}
|
|
|
|
return ipsock_setsockopt(&pkt->pkt_ipsock, level, name, data, len,
|
|
ipopts);
|
|
}
|
|
|
|
/*
|
|
* Retrieve socket options on a packet socket.
|
|
*/
|
|
int
|
|
pktsock_getsockopt(struct pktsock * pkt, int level, int name,
|
|
const struct sockdriver_data * data, socklen_t * len,
|
|
struct ipopts * ipopts)
|
|
{
|
|
struct in6_pktinfo ipi6;
|
|
unsigned int flag;
|
|
int val;
|
|
|
|
switch (level) {
|
|
case IPPROTO_IP:
|
|
if (ipsock_is_ipv6(&pkt->pkt_ipsock))
|
|
break;
|
|
|
|
switch (name) {
|
|
case IP_RECVTTL:
|
|
case IP_RECVPKTINFO:
|
|
switch (name) {
|
|
case IP_RECVTTL: flag = PKTF_RECVTTL; break;
|
|
case IP_RECVPKTINFO: flag = PKTF_RECVINFO; break;
|
|
default: flag = 0; assert(0); break;
|
|
}
|
|
|
|
val = !!(ipsock_get_flag(&pkt->pkt_ipsock, flag));
|
|
|
|
return sockdriver_copyout_opt(data, &val, sizeof(val),
|
|
len);
|
|
}
|
|
|
|
break;
|
|
|
|
case IPPROTO_IPV6:
|
|
if (!ipsock_is_ipv6(&pkt->pkt_ipsock))
|
|
break;
|
|
|
|
switch (name) {
|
|
case IPV6_USE_MIN_MTU:
|
|
/*
|
|
* TODO: sort out exactly what lwIP actually supports
|
|
* in the way of path MTU discovery. Value 1 means
|
|
* that path MTU discovery is disabled and packets are
|
|
* sent at the minimum MTU (RFC 3542).
|
|
*/
|
|
val = 1;
|
|
|
|
return sockdriver_copyout_opt(data, &val, sizeof(val),
|
|
len);
|
|
|
|
case IPV6_PKTINFO:
|
|
memset(&ipi6, 0, sizeof(ipi6));
|
|
|
|
/*
|
|
* Simply copy out whatever was given before. These
|
|
* fields are initialized to zero on socket creation.
|
|
*/
|
|
memcpy(&ipi6.ipi6_addr, &pkt->pkt_srcaddr.addr,
|
|
sizeof(ipi6.ipi6_addr));
|
|
ipi6.ipi6_ifindex = pkt->pkt_ifindex;
|
|
|
|
return sockdriver_copyout_opt(data, &ipi6,
|
|
sizeof(ipi6), len);
|
|
|
|
case IPV6_RECVPKTINFO:
|
|
case IPV6_RECVHOPLIMIT:
|
|
case IPV6_RECVTCLASS:
|
|
switch (name) {
|
|
case IPV6_RECVPKTINFO: flag = PKTF_RECVINFO; break;
|
|
case IPV6_RECVHOPLIMIT: flag = PKTF_RECVTTL; break;
|
|
case IPV6_RECVTCLASS: flag = PKTF_RECVTOS; break;
|
|
default: flag = 0; assert(0); break;
|
|
}
|
|
|
|
val = !!(ipsock_get_flag(&pkt->pkt_ipsock, flag));
|
|
|
|
return sockdriver_copyout_opt(data, &val, sizeof(val),
|
|
len);
|
|
}
|
|
|
|
break;
|
|
}
|
|
|
|
return ipsock_getsockopt(&pkt->pkt_ipsock, level, name, data, len,
|
|
ipopts);
|
|
}
|
|
|
|
/*
|
|
* Drain the receive queue of a packet socket.
|
|
*/
|
|
static void
|
|
pktsock_drain(struct pktsock * pkt)
|
|
{
|
|
|
|
while (pkt->pkt_rcvhead != NULL)
|
|
pktsock_dequeue(pkt);
|
|
|
|
assert(pkt->pkt_rcvlen == 0);
|
|
assert(pkt->pkt_rcvtailp == &pkt->pkt_rcvhead);
|
|
}
|
|
|
|
/*
|
|
* Shut down a packet socket for reading and/or writing.
|
|
*/
|
|
void
|
|
pktsock_shutdown(struct pktsock * pkt, unsigned int mask)
|
|
{
|
|
|
|
if (mask & SFL_SHUT_RD)
|
|
pktsock_drain(pkt);
|
|
}
|
|
|
|
/*
|
|
* Close a packet socket.
|
|
*/
|
|
void
|
|
pktsock_close(struct pktsock * pkt)
|
|
{
|
|
|
|
pktsock_drain(pkt);
|
|
|
|
mcast_leave_all(&pkt->pkt_mcast);
|
|
}
|
|
|
|
/*
|
|
* Return the rounded-up number of bytes in the packet socket's receive queue,
|
|
* for sysctl(7). NetBSD returns the used portion of each buffer, but that
|
|
* would be quite some extra effort for us (TODO).
|
|
*/
|
|
size_t
|
|
pktsock_get_recvlen(struct pktsock * pkt)
|
|
{
|
|
|
|
return pkt->pkt_rcvlen;
|
|
}
|