VFS: add BSD socket API, socket driver support

This patch adds the implementation of the BSD socket system calls
which have been introduced in an earlier patch.  At the same time, it
adds support for communication with socket drivers, using a new
"socket device" (SDEV_) protocol.  These two parts, implemented in
socket.c and sdev.c respectively, form the upper and lower halves of
the new BSD socket support in VFS.  New mapping functionality for
socket domains and drivers is added as well, implemented in smap.c.

The rest of the changes mainly facilitate the separation of character
and socket driver calls, and do not make any fundamental alterations.
For example, while this patch changes VFS's select.c rather heavily,
the new select logic for socket drivers is the exact same as for
character drivers; the changes mainly separate the driver type
specific parts from the generic select logic further than before.

Change-Id: I2f13084dd3c8d3a68bfc69da0621120c8291f707
This commit is contained in:
David van Moolenbroek 2016-02-21 19:28:24 +00:00
parent 181fb1b2b5
commit e3b8d4bb58
27 changed files with 2785 additions and 211 deletions

View File

@ -132,7 +132,7 @@ pfs_newnode(mode_t mode, uid_t uid, gid_t gid, dev_t dev,
/* Check the file type. Do we support it at all? */
isfifo = S_ISFIFO(mode);
isdev = S_ISBLK(mode) || S_ISCHR(mode);
isdev = S_ISBLK(mode) || S_ISCHR(mode) || S_ISSOCK(mode);
if (!isfifo && !isdev)
return EINVAL; /* this means VFS is misbehaving.. */

View File

@ -30,6 +30,7 @@
* 0x1600 - 0x16FF VirtualBox (VBOX) requests (see vboxif.h)
* 0x1700 - 0x17FF PTYFS requests
* 0x1800 - 0x18FF Management Information Base (MIB) requests
* 0x1900 - 0x19FF Socket device requests and responses
*
* Zero and negative values are widely used for OK and error responses.
*/
@ -1027,6 +1028,54 @@
#define NR_MIB_CALLS 3 /* highest number from base plus one */
/*===========================================================================*
* Messages for socket devices *
*===========================================================================*/
/* Base type for socket device requests and responses. */
#define SDEV_RQ_BASE 0x1900
#define SDEV_RS_BASE 0x1980
#define IS_SDEV_RQ(type) (((type) & ~0x7f) == SDEV_RQ_BASE)
#define IS_SDEV_RS(type) (((type) & ~0x7f) == SDEV_RS_BASE)
/* Message types for socket device requests. */
#define SDEV_SOCKET (SDEV_RQ_BASE + 0) /* create socket */
#define SDEV_SOCKETPAIR (SDEV_RQ_BASE + 1) /* make socket pair */
#define SDEV_BIND (SDEV_RQ_BASE + 2) /* bind to address */
#define SDEV_CONNECT (SDEV_RQ_BASE + 3) /* start connection */
#define SDEV_LISTEN (SDEV_RQ_BASE + 4) /* enter listen mode */
#define SDEV_ACCEPT (SDEV_RQ_BASE + 5) /* accept connection */
#define SDEV_SEND (SDEV_RQ_BASE + 6) /* send data */
#define SDEV_RECV (SDEV_RQ_BASE + 7) /* receive data */
#define SDEV_IOCTL (SDEV_RQ_BASE + 8) /* I/O control */
#define SDEV_SETSOCKOPT (SDEV_RQ_BASE + 9) /* set socket option */
#define SDEV_GETSOCKOPT (SDEV_RQ_BASE + 10) /* get socket option */
#define SDEV_GETSOCKNAME (SDEV_RQ_BASE + 11) /* get socket name */
#define SDEV_GETPEERNAME (SDEV_RQ_BASE + 12) /* get peer name */
#define SDEV_SHUTDOWN (SDEV_RQ_BASE + 13) /* shut down I/O */
#define SDEV_CLOSE (SDEV_RQ_BASE + 14) /* close socket */
#define SDEV_CANCEL (SDEV_RQ_BASE + 15) /* cancel request */
#define SDEV_SELECT (SDEV_RQ_BASE + 16) /* select on socket */
/* Message types for socket device responses. */
#define SDEV_REPLY (SDEV_RS_BASE + 0) /* generic reply */
#define SDEV_SOCKET_REPLY (SDEV_RS_BASE + 1) /* socket reply */
#define SDEV_ACCEPT_REPLY (SDEV_RS_BASE + 2) /* accept reply */
#define SDEV_RECV_REPLY (SDEV_RS_BASE + 3) /* receive reply */
#define SDEV_SELECT1_REPLY (SDEV_RS_BASE + 4) /* select reply 1 */
#define SDEV_SELECT2_REPLY (SDEV_RS_BASE + 5) /* select reply 2 */
/* Bits in the 'sflags' field of socket device transfer requests. */
# define SDEV_NOFLAGS 0x00 /* no flags are set */
# define SDEV_NONBLOCK 0x01 /* do not suspend I/O request */
/* Bits in the 'ops', 'status' fields of socket device select messages. */
# define SDEV_OP_RD 0x01 /* selected for read operation */
# define SDEV_OP_WR 0x02 /* selected for write operation */
# define SDEV_OP_ERR 0x04 /* selected for error operation */
# define SDEV_NOTIFY 0x08 /* notification requested */
/*===========================================================================*
* Internal codes used by several services *
*===========================================================================*/

View File

@ -999,6 +999,52 @@ typedef struct {
} mess_linputdriver_input_event;
_ASSERT_MSG_SIZE(mess_linputdriver_input_event);
typedef struct {
int32_t req_id;
int32_t sock_id;
int status;
unsigned int len;
uint8_t padding[40];
} mess_lsockdriver_vfs_accept_reply;
_ASSERT_MSG_SIZE(mess_lsockdriver_vfs_accept_reply);
typedef struct {
int32_t req_id;
int status;
unsigned int ctl_len;
unsigned int addr_len;
int flags;
uint8_t padding[36];
} mess_lsockdriver_vfs_recv_reply;
_ASSERT_MSG_SIZE(mess_lsockdriver_vfs_recv_reply);
typedef struct {
int32_t req_id;
int status;
uint8_t padding[48];
} mess_lsockdriver_vfs_reply;
_ASSERT_MSG_SIZE(mess_lsockdriver_vfs_reply);
typedef struct {
int32_t sock_id;
int status;
uint8_t padding[48];
} mess_lsockdriver_vfs_select_reply;
_ASSERT_MSG_SIZE(mess_lsockdriver_vfs_select_reply);
typedef struct {
int32_t req_id;
int32_t sock_id;
int32_t sock_id2;
uint8_t padding[44];
} mess_lsockdriver_vfs_socket_reply;
_ASSERT_MSG_SIZE(mess_lsockdriver_vfs_socket_reply);
typedef struct {
cp_grant_id_t gid;
size_t size;
@ -2131,6 +2177,86 @@ typedef struct {
} mess_vfs_lchardriver_select;
_ASSERT_MSG_SIZE(mess_vfs_lchardriver_select);
typedef struct {
int32_t req_id;
int32_t sock_id;
cp_grant_id_t grant;
unsigned int len;
endpoint_t user_endpt;
int sflags;
uint8_t padding[32];
} mess_vfs_lsockdriver_addr;
_ASSERT_MSG_SIZE(mess_vfs_lsockdriver_addr);
typedef struct {
int32_t req_id;
int32_t sock_id;
int level;
int name;
cp_grant_id_t grant;
unsigned int len;
uint8_t padding[32];
} mess_vfs_lsockdriver_getset;
_ASSERT_MSG_SIZE(mess_vfs_lsockdriver_getset);
typedef struct {
int32_t req_id;
int32_t sock_id;
unsigned long request;
cp_grant_id_t grant;
endpoint_t user_endpt;
int sflags;
uint8_t padding[32];
} mess_vfs_lsockdriver_ioctl;
_ASSERT_MSG_SIZE(mess_vfs_lsockdriver_ioctl);
typedef struct {
int32_t sock_id;
int ops;
uint8_t padding[48];
} mess_vfs_lsockdriver_select;
_ASSERT_MSG_SIZE(mess_vfs_lsockdriver_select);
typedef struct {
int32_t req_id;
int32_t sock_id;
cp_grant_id_t data_grant;
size_t data_len;
cp_grant_id_t ctl_grant;
unsigned int ctl_len;
cp_grant_id_t addr_grant;
unsigned int addr_len;
endpoint_t user_endpt;
int flags;
uint8_t padding[16];
} mess_vfs_lsockdriver_sendrecv;
_ASSERT_MSG_SIZE(mess_vfs_lsockdriver_sendrecv);
typedef struct {
int32_t req_id;
int32_t sock_id;
int param;
uint8_t padding[44];
} mess_vfs_lsockdriver_simple;
_ASSERT_MSG_SIZE(mess_vfs_lsockdriver_simple);
typedef struct {
int32_t req_id;
int domain;
int type;
int protocol;
endpoint_t user_endpt;
uint8_t padding[36];
} mess_vfs_lsockdriver_socket;
_ASSERT_MSG_SIZE(mess_vfs_lsockdriver_socket);
typedef struct {
cp_grant_id_t grant;
size_t size;
@ -2301,6 +2427,15 @@ typedef struct noxfer_message {
mess_li2cdriver_i2c_busc_i2c_exec m_li2cdriver_i2c_busc_i2c_exec;
mess_li2cdriver_i2c_busc_i2c_reserve m_li2cdriver_i2c_busc_i2c_reserve;
mess_linputdriver_input_event m_linputdriver_input_event;
mess_lsockdriver_vfs_accept_reply
m_lsockdriver_vfs_accept_reply;
mess_lsockdriver_vfs_recv_reply
m_lsockdriver_vfs_recv_reply;
mess_lsockdriver_vfs_reply m_lsockdriver_vfs_reply;
mess_lsockdriver_vfs_select_reply
m_lsockdriver_vfs_select_reply;
mess_lsockdriver_vfs_socket_reply
m_lsockdriver_vfs_socket_reply;
mess_lsys_fi_ctl m_lsys_fi_ctl;
mess_lsys_fi_reply m_lsys_fi_reply;
mess_lsys_getsysinfo m_lsys_getsysinfo;
@ -2423,6 +2558,13 @@ typedef struct noxfer_message {
mess_vfs_lchardriver_openclose m_vfs_lchardriver_openclose;
mess_vfs_lchardriver_readwrite m_vfs_lchardriver_readwrite;
mess_vfs_lchardriver_select m_vfs_lchardriver_select;
mess_vfs_lsockdriver_addr m_vfs_lsockdriver_addr;
mess_vfs_lsockdriver_getset m_vfs_lsockdriver_getset;
mess_vfs_lsockdriver_ioctl m_vfs_lsockdriver_ioctl;
mess_vfs_lsockdriver_select m_vfs_lsockdriver_select;
mess_vfs_lsockdriver_sendrecv m_vfs_lsockdriver_sendrecv;
mess_vfs_lsockdriver_simple m_vfs_lsockdriver_simple;
mess_vfs_lsockdriver_socket m_vfs_lsockdriver_socket;
mess_vfs_lsys_gcov m_vfs_lsys_gcov;
mess_vfs_utimens m_vfs_utimens;
mess_vm_vfs_mmap m_vm_vfs_mmap;

View File

@ -51,6 +51,7 @@ fproc_dmp(void)
);
if (fp->fp_blocked_on == FP_BLOCKED_ON_CDEV)
printf("%4d\n", fp->fp_cdev.endpt);
/* TODO: for FP_BLOCKED_ON_SDEV we do not have the endpoint.. */
else
printf(" nil\n");
}

View File

@ -306,10 +306,11 @@ get_lwp_stat(int mslot, uint64_t * wcptr, char * wmptr, size_t wmsz,
wmesg = "select";
break;
case FP_BLOCKED_ON_CDEV:
case FP_BLOCKED_ON_SDEV:
/*
* Add the task (= character driver) endpoint to the
* wchan value, and use the driver's process name,
* without parentheses, as wmesg text.
* Add the task (= character or socket driver) endpoint
* to the wchan value, and use the driver's process
* name, without parentheses, as wmesg text.
*/
wchan |= (uint64_t)fp->fpl_task << 16;
fill_wmesg(wmptr, wmsz, fp->fpl_task, FALSE /*ipc*/);

View File

@ -8,7 +8,7 @@ SRCS= main.c open.c read.c write.c pipe.c dmap.c \
lock.c misc.c utility.c select.c table.c \
vnode.c vmnt.c request.c \
tll.c comm.c worker.c coredump.c \
bdev.c cdev.c socket.c
bdev.c cdev.c sdev.c smap.c socket.c
.if ${MKCOVERAGE} != "no"
SRCS+= gcov.c

View File

@ -47,9 +47,9 @@ spread out over the kernel, VM, PM, and VFS). For example, it maintains state
for select(2) calls, file descriptors and file positions. Also, it cooperates
with the Process Manager to handle the fork, exec, and exit system calls.
Third, VFS keeps track of endpoints that are supposed to be drivers for
character or block special files. File Servers can be regarded as drivers for
block special files, although they are handled entirely different compared
to other drivers.
character or block special files, as well as for socket protocol families.
File Servers can be regarded as drivers for block special files, although they
are handled entirely different compared to other drivers.
The following diagram depicts how a read() on a file in /home is being handled:
{{{
@ -88,10 +88,10 @@ fetches a message (internally referred to as a job in some cases), executes
the request embedded in the message, returns a reply, and fetches the next
job. There are several sources for new jobs: from user processes, from PM, from
the kernel, and from suspended jobs inside VFS itself (suspended operations
on pipes, locks, or character special files). File Servers are regarded as
normal user processes in this case, but their abilities are limited. This
is to prevent deadlocks. Once a job is received, a worker thread starts
executing it. During the lifetime of a job, the worker thread might need
on pipes, locks, character special files, or sockets). File Servers are
regarded as normal user processes in this case, but their abilities are
limited. This is to prevent deadlocks. Once a job is received, a worker thread
starts executing it. During the lifetime of a job, the worker thread might need
to talk to several File Servers. The protocol VFS speaks with File Servers
is fully documented on the Wiki at [0]. The protocol fields are defined in
<minix/vfsif.h>. If the job is an operation on a character or block special
@ -122,10 +122,10 @@ Driver replies are processed directly from the main thread. As a consequence,
these processing routines may not block their calling thread. In some cases,
these routines may resume a thread that is blocked waiting for the reply. This
is always the case for block driver replies, and may or may not be the case for
character driver replies. The character driver reply processing routines may
also unblock suspended processes which in turn generate new jobs to be handled
by the main loop (e.g., suspended reads and writes on pipes). So depending
on the reply a new thread may have to be started.
character and socket driver replies. The character and socket driver reply
processing routines may also unblock suspended processes which in turn generate
new jobs to be handled by the main loop (e.g., suspended reads and writes on
pipes). So depending on the reply a new thread may have to be started.
Worker threads are strictly tied to a process, and each process can have at
most one worker thread running for it. Generally speaking, there are two types
@ -655,9 +655,9 @@ Table 7: VFS-FS requests locking guarantees
== Recovery from driver crashes ==
## 5 Recovery from driver crashes
VFS can recover from block special file and character special file driver
crashes. It can recover to some degree from a crashed File Server (which we
can regard as a driver).
VFS can recover from block, character, and socket driver crashes. It can
recover to some degree from a crashed File Server (which we can regard as a
driver).
=== Recovery from block drivers crashes ===
## 5.1 Recovery from block drivers crashes
@ -672,17 +672,18 @@ files can cause the block driver to crash again. When that happens, VFS will
stop the recovery. A driver can return ERESTART to VFS to tell it to retry
a request. VFS does this with an arbitrary maximum of 5 attempts.
=== Recovery from character driver crashes ===
## 5.2 Recovery from character driver crashes
=== Recovery from character and socket driver crashes ===
## 5.2 Recovery from character and socket driver crashes
While VFS used to support minimal recovery from character driver crashes, the
added complexity has so far proven to outweigh the benefits, especially since
such crash recovery can never be fully transparent: it depends entirely on the
character device as to whether repeating an I/O request makes sense at all.
Currently, all operations except close(2) on a file descriptor that identifies
a device on a crashed character driver, will result in an EIO error. It is up
to the application to reopen the character device and retry whatever it was
doing in the appropriate manner. In the future, automatic reopen and I/O
restart may be reintroduced for a limited subset of character drivers.
a device on a crashed character or socket driver, will result in an EIO error.
It is up to the application to reopen the character device or socket and retry
whatever it was doing in the appropriate manner. In the future, automatic
reopen and I/O restart may be reintroduced for a limited subset of character
drivers.
=== Recovery from File Server crashes ===
## 5.3 Recovery from File Server crashes

View File

@ -195,7 +195,7 @@ bdev_reply(void)
struct worker_thread *wp;
struct dmap *dp;
if ((dp = get_dmap(who_e)) == NULL) {
if ((dp = get_dmap_by_endpt(who_e)) == NULL) {
printf("VFS: ignoring block dev reply from unknown driver "
"%d\n", who_e);
return;

View File

@ -481,7 +481,7 @@ void
cdev_reply(void)
{
if (get_dmap(who_e) == NULL) {
if (get_dmap_by_endpt(who_e) == NULL) {
printf("VFS: ignoring char dev reply from unknown driver %d\n",
who_e);
return;
@ -492,11 +492,13 @@ cdev_reply(void)
cdev_generic_reply(&m_in);
break;
case CDEV_SEL1_REPLY:
select_reply1(m_in.m_source, m_in.m_lchardriver_vfs_sel1.minor,
select_cdev_reply1(m_in.m_source,
m_in.m_lchardriver_vfs_sel1.minor,
m_in.m_lchardriver_vfs_sel1.status);
break;
case CDEV_SEL2_REPLY:
select_reply2(m_in.m_source, m_in.m_lchardriver_vfs_sel2.minor,
select_cdev_reply2(m_in.m_source,
m_in.m_lchardriver_vfs_sel2.minor,
m_in.m_lchardriver_vfs_sel2.status);
break;
default:

View File

@ -101,7 +101,7 @@ int drv_sendrec(endpoint_t drv_e, message *reqmp)
return EIO;
}
if ((dp = get_dmap(drv_e)) == NULL)
if ((dp = get_dmap_by_endpt(drv_e)) == NULL)
panic("driver endpoint %d invalid", drv_e);
lock_dmap(dp);

View File

@ -7,6 +7,7 @@
#define NR_MNTS 16 /* # slots in mount table */
#define NR_VNODES 1024 /* # slots in vnode table */
#define NR_WTHREADS 9 /* # slots in worker thread table */
#define NR_SOCKDEVS 8 /* # slots in smap table */
#define NR_NONEDEVS NR_MNTS /* # slots in nonedev bitmap */
@ -21,6 +22,7 @@
#define FP_BLOCKED_ON_POPEN 3 /* susp'd on pipe open */
#define FP_BLOCKED_ON_SELECT 4 /* susp'd on select */
#define FP_BLOCKED_ON_CDEV 5 /* blocked on character device I/O */
#define FP_BLOCKED_ON_SDEV 6 /* blocked on socket I/O */
/* test if the process is blocked on something */
#define fp_is_blocked(fp) ((fp)->fp_blocked_on != FP_BLOCKED_ON_NONE)
@ -40,6 +42,11 @@
#define SEL_WR CDEV_OP_WR
#define SEL_ERR CDEV_OP_ERR
#define SEL_NOTIFY CDEV_NOTIFY /* not a real select operation */
/* If these constants diverge, VFS must be extended to perform mapping. */
#if (CDEV_OP_RD != SDEV_OP_RD || CDEV_OP_WR != SDEV_OP_WR || \
CDEV_OP_ERR != SDEV_OP_ERR || CDEV_NOTIFY != SDEV_NOTIFY)
#error "CDEV and SDEV select constants are different"
#endif
/* special driver endpoint for CTTY_MAJOR; must be able to pass isokendpt() */
#define CTTY_ENDPT VFS_PROC_NR

View File

@ -45,6 +45,10 @@ do_ioctl(void)
f->filp_flags);
break;
case S_IFSOCK:
r = sdev_ioctl(vp->v_sdev, request, arg, f->filp_flags);
break;
default:
r = ENOTTY;
}

View File

@ -110,6 +110,7 @@ int do_mapdriver(void)
* etc), and its label. This label is registered with DS, and allows us to
* retrieve the driver's endpoint.
*/
const int *domains;
int r, slot, ndomains;
devmajor_t major;
endpoint_t endpoint;
@ -125,7 +126,7 @@ int do_mapdriver(void)
label_len = job_m_in.m_lsys_vfs_mapdriver.labellen;
major = job_m_in.m_lsys_vfs_mapdriver.major;
ndomains = job_m_in.m_lsys_vfs_mapdriver.ndomains;
/* domains = job_m_in.m_lsys_vfs_mapdriver.domains; */
domains = job_m_in.m_lsys_vfs_mapdriver.domains;
/* Get the label */
if (label_len > sizeof(label)) { /* Can we store this label? */
@ -164,8 +165,7 @@ int do_mapdriver(void)
return r;
}
if (ndomains != 0) {
r = EINVAL; /* TODO: add support for mapping socket drivers */
if (r != OK) {
if ((r = smap_map(label, endpoint, domains, ndomains)) != OK) {
if (major != NO_DEV)
map_driver(NULL, major, NONE); /* undo */
return r;
@ -314,7 +314,7 @@ void dmap_endpt_up(endpoint_t proc_e, int is_blk)
/*===========================================================================*
* get_dmap *
*===========================================================================*/
struct dmap *get_dmap(endpoint_t proc_e)
struct dmap *get_dmap_by_endpt(endpoint_t proc_e)
{
/* See if 'proc_e' endpoint belongs to a valid dmap entry. If so, return a
* pointer */

View File

@ -28,8 +28,8 @@ EXTERN struct filp {
int filp_select_flags; /* Select flags for the filp */
/* following are for fd-type-specific select() */
int filp_pipe_select_ops;
dev_t filp_char_select_dev;
int filp_pipe_select_ops; /* used for pipes */
dev_t filp_select_dev; /* used for character and socket devices */
} filp[NR_FILPS];
#define FILP_CLOSED 0 /* filp_mode: associated device closed/gone */

View File

@ -82,6 +82,28 @@ void init_filps(void)
}
/*===========================================================================*
* check_fds *
*===========================================================================*/
int check_fds(struct fproc *rfp, int nfds)
{
/* Check whether at least 'nfds' file descriptors can be created in the process
* 'rfp'. Return OK on success, or otherwise an appropriate error code.
*/
int i;
assert(nfds >= 1);
for (i = 0; i < OPEN_MAX; i++) {
if (rfp->fp_filp[i] == NULL) {
if (--nfds == 0)
return OK;
}
}
return EMFILE;
}
/*===========================================================================*
* get_fd *
*===========================================================================*/
@ -119,7 +141,7 @@ int get_fd(struct fproc *rfp, int start, mode_t bits, int *k, struct filp **fpt)
f->filp_selectors = 0;
f->filp_select_ops = 0;
f->filp_pipe_select_ops = 0;
f->filp_char_select_dev = NO_DEV;
f->filp_select_dev = NO_DEV;
f->filp_flags = 0;
f->filp_select_flags = 0;
f->filp_softlock = NULL;
@ -201,6 +223,27 @@ struct filp *find_filp(struct vnode *vp, mode_t bits)
return(NULL);
}
/*===========================================================================*
* find_filp_by_sock_dev *
*===========================================================================*/
struct filp *find_filp_by_sock_dev(dev_t dev)
{
/* See if there is a file pointer for a socket with the given socket device
* number.
*/
struct filp *f;
for (f = &filp[0]; f < &filp[NR_FILPS]; f++) {
if (f->filp_count != 0 && f->filp_vno != NULL &&
S_ISSOCK(f->filp_vno->v_mode) && f->filp_vno->v_sdev == dev &&
f->filp_mode != FILP_CLOSED) {
return f;
}
}
return NULL;
}
/*===========================================================================*
* invalidate_filp *
*===========================================================================*/
@ -228,6 +271,27 @@ void invalidate_filp_by_char_major(devmajor_t major)
}
}
/*===========================================================================*
* invalidate_filp_by_sock_drv *
*===========================================================================*/
void invalidate_filp_by_sock_drv(unsigned int num)
{
/* Invalidate all file pointers for sockets owned by the socket driver with the
* smap number 'num'.
*/
struct filp *f;
struct smap *sp;
for (f = &filp[0]; f < &filp[NR_FILPS]; f++) {
if (f->filp_count != 0 && f->filp_vno != NULL) {
if (S_ISSOCK(f->filp_vno->v_mode) &&
(sp = get_smap_by_dev(f->filp_vno->v_sdev, NULL)) != NULL
&& sp->smap_num == num)
invalidate_filp(f);
}
}
}
/*===========================================================================*
* invalidate_filp_by_endpt *
*===========================================================================*/
@ -363,7 +427,8 @@ close_filp(struct filp *f)
if (f->filp_count - 1 == 0 && f->filp_mode != FILP_CLOSED) {
/* Check to see if the file is special. */
if (S_ISCHR(vp->v_mode) || S_ISBLK(vp->v_mode)) {
if (S_ISCHR(vp->v_mode) || S_ISBLK(vp->v_mode) ||
S_ISSOCK(vp->v_mode)) {
dev = vp->v_sdev;
if (S_ISBLK(vp->v_mode)) {
lock_bsf();
@ -377,8 +442,23 @@ close_filp(struct filp *f)
unlock_bsf();
(void) bdev_close(dev); /* Ignore errors */
} else {
} else if (S_ISCHR(vp->v_mode)) {
(void) cdev_close(dev); /* Ignore errors */
} else {
/*
* TODO: this should be completely redone. Sockets may
* take a while to be closed (SO_LINGER etc), and thus,
* we should be able to issue a suspending close to a
* socket driver. Getting this working for close(2) is
* the easy case, but there's also eg dup2(2), which if
* interrupted by a signal should fail without closing
* the file descriptor. Then there are cases where the
* close should probably never block: close-on-exec,
* exit, and UDS closing in-flight FDs (currently just
* using close(2), but it could set the FD to non-
* blocking) for instance. There is much to do here.
*/
(void) sdev_close(dev); /* Ignore errors */
}
f->filp_mode = FILP_CLOSED;

View File

@ -49,6 +49,15 @@ EXTERN struct fproc {
endpoint_t endpt; /* driver endpoint */
cp_grant_id_t grant; /* data grant */
} u_cdev;
struct { /* FP_BLOCKED_ON_SDEV */
dev_t dev; /* socket number for blocking call */
int callnr; /* user call: a VFS_ socket call */
cp_grant_id_t grant[3]; /* data grant(s) */
union ixfer_u_aux {
int fd; /* listener file descr. (VFS_ACCEPT) */
vir_bytes buf; /* user buffer address (VFS_RECVMSG) */
} aux; /* call-specific auxiliary data */
} u_sdev;
} fp_u;
uid_t fp_realuid; /* real user id */
@ -77,6 +86,7 @@ EXTERN struct fproc {
#define fp_popen fp_u.u_popen
#define fp_flock fp_u.u_flock
#define fp_cdev fp_u.u_cdev
#define fp_sdev fp_u.u_sdev
/* fp_flags */
#define FP_NOFLAGS 0000

View File

@ -18,10 +18,14 @@
#include <minix/dmap.h>
#include <minix/ds.h>
#include <minix/rs.h>
#include <minix/callnr.h>
#include <limits.h>
#include <errno.h>
#include <unistd.h>
#include <string.h>
#include <fcntl.h>
#include <assert.h>
#include <minix/syslib.h>
#include <minix/sysutil.h>

View File

@ -38,7 +38,6 @@ static void do_reply(struct worker_thread *wp);
static void do_work(void);
static void do_init_root(void);
static void handle_work(void (*func)(void));
static void reply(message *m_out, endpoint_t whom, int result);
static int get_work(void);
static void service_pm(void);
@ -130,6 +129,9 @@ int main(void)
} else if (IS_CDEV_RS(call_nr)) {
/* We've got results for a character device request. */
cdev_reply();
} else if (IS_SDEV_RS(call_nr)) {
/* We've got results for a socket driver request. */
sdev_reply();
} else {
/* Normal syscall. This spawns a new thread. */
handle_work(do_work);
@ -447,6 +449,7 @@ static int sef_cb_init_fresh(int UNUSED(type), sef_init_info_t *info)
panic("VFS: couldn't initialize block special file lock");
init_dmap(); /* Initialize device table. */
init_smap(); /* Initialize socket table. */
/* Map all the services in the boot image. */
if ((s = sys_safecopyfrom(RS_PROC_NR, info->rproctab_gid, 0,
@ -632,7 +635,7 @@ static int get_work(void)
/*===========================================================================*
* reply *
*===========================================================================*/
static void reply(message *m_out, endpoint_t whom, int result)
void reply(message *m_out, endpoint_t whom, int result)
{
/* Send a reply to a user process. If the send fails, just ignore it. */
int r;

View File

@ -53,6 +53,7 @@ int do_getsysinfo(void)
{
struct fproc *rfp;
struct fproc_light *rfpl;
struct smap *sp;
vir_bytes src_addr, dst_addr;
size_t len, buf_size;
int what;
@ -85,6 +86,9 @@ int do_getsysinfo(void)
rfpl->fpl_blocked_on = rfp->fp_blocked_on;
if (rfp->fp_blocked_on == FP_BLOCKED_ON_CDEV)
rfpl->fpl_task = rfp->fp_cdev.endpt;
else if (rfp->fp_blocked_on == FP_BLOCKED_ON_SDEV &&
(sp = get_smap_by_dev(rfp->fp_sdev.dev, NULL)) != NULL)
rfpl->fpl_task = sp->smap_endpt;
else
rfpl->fpl_task = NONE;
}
@ -656,10 +660,11 @@ static void free_proc(int flags)
/* Check if any process is SUSPENDed on this driver.
* If a driver exits, unmap its entries in the dmap table.
* (unmapping has to be done after the first step, because the
* dmap table is used in the first step.)
* dmap/smap tables are used in the first step.)
*/
unsuspend_by_endpt(fp->fp_endpoint);
dmap_unmap_by_endpt(fp->fp_endpoint);
smap_unmap_by_endpt(fp->fp_endpoint);
worker_stop_by_endpt(fp->fp_endpoint); /* Unblock waiting threads */
vmnt_unmap_by_endpt(fp->fp_endpoint); /* Invalidate open files if this
@ -939,17 +944,20 @@ ds_event(void)
char key[DS_MAX_KEYLEN];
char *blkdrv_prefix = "drv.blk.";
char *chrdrv_prefix = "drv.chr.";
char *sckdrv_prefix = "drv.sck.";
u32_t value;
int type, r, is_blk;
int type, ftype, r;
endpoint_t owner_endpoint;
/* Get the event and the owner from DS. */
while ((r = ds_check(key, &type, &owner_endpoint)) == OK) {
/* Only check for block and character driver up events. */
/* Only check for block, character, socket driver up events. */
if (!strncmp(key, blkdrv_prefix, strlen(blkdrv_prefix))) {
is_blk = TRUE;
ftype = S_IFBLK;
} else if (!strncmp(key, chrdrv_prefix, strlen(chrdrv_prefix))) {
is_blk = FALSE;
ftype = S_IFCHR;
} else if (!strncmp(key, sckdrv_prefix, strlen(sckdrv_prefix))) {
ftype = S_IFSOCK;
} else {
continue;
}
@ -961,7 +969,10 @@ ds_event(void)
if (value != DS_DRIVER_UP) continue;
/* Perform up. */
dmap_endpt_up(owner_endpoint, is_blk);
if (ftype == S_IFBLK || ftype == S_IFCHR)
dmap_endpt_up(owner_endpoint, (ftype == S_IFBLK));
else
smap_endpt_up(owner_endpoint);
}
if (r != ENOENT) printf("VFS: ds_event: ds_check failed: %d\n", r);

View File

@ -337,12 +337,17 @@ void unsuspend_by_endpt(endpoint_t proc_e)
* return code EIO.
*/
struct fproc *rp;
struct smap *sp;
for (rp = &fproc[0]; rp < &fproc[NR_PROCS]; rp++) {
if (rp->fp_pid == PID_FREE) continue;
if (rp->fp_blocked_on == FP_BLOCKED_ON_CDEV &&
rp->fp_cdev.endpt == proc_e)
revive(rp->fp_endpoint, EIO);
else if (rp->fp_blocked_on == FP_BLOCKED_ON_SDEV &&
(sp = get_smap_by_dev(rp->fp_sdev.dev, NULL)) != NULL &&
sp->smap_endpt == proc_e)
sdev_stop(rp);
}
/* Revive processes waiting in drivers on select()s with EAGAIN too */
@ -430,8 +435,8 @@ void release(struct vnode * vp, int op, int count)
void revive(endpoint_t proc_e, int returned)
{
/* Revive a previously blocked process. When a process hangs on tty, this
* is the way it is eventually released. For processes blocked on _SELECT and
* _CDEV, this function MUST NOT block its calling thread.
* is the way it is eventually released. For processes blocked on _SELECT,
* _CDEV, or _SDEV, this function MUST NOT block its calling thread.
*/
struct fproc *rfp;
int blocked_on;
@ -454,13 +459,15 @@ void revive(endpoint_t proc_e, int returned)
reviving++; /* process was waiting on pipe or lock */
} else {
rfp->fp_blocked_on = FP_BLOCKED_ON_NONE;
if (blocked_on == FP_BLOCKED_ON_POPEN) {
switch (blocked_on) {
case FP_BLOCKED_ON_POPEN:
/* process blocked in open or create */
replycode(proc_e, rfp->fp_popen.fd);
} else if (blocked_on == FP_BLOCKED_ON_SELECT) {
break;
case FP_BLOCKED_ON_SELECT:
replycode(proc_e, returned);
} else {
assert(blocked_on == FP_BLOCKED_ON_CDEV);
break;
case FP_BLOCKED_ON_CDEV:
/* If a grant has been issued by FS for this I/O, revoke
* it again now that I/O is done.
*/
@ -471,6 +478,15 @@ void revive(endpoint_t proc_e, int returned)
}
}
replycode(proc_e, returned);/* unblock the process */
break;
case FP_BLOCKED_ON_SDEV:
/*
* Cleaning up socket requests is too complex to put here, and
* neither sdev_reply() nor sdev_stop() call revive().
*/
panic("revive should not be used for socket calls");
default:
panic("unknown block state %d", blocked_on);
}
}
}
@ -491,8 +507,9 @@ void unpause(void)
blocked_on = fp->fp_blocked_on;
/* Clear the block status now. The procedure below might make blocking calls
* and it is imperative that while at least cdev_cancel() is executing, other
* parts of VFS do not perceive this process as blocked on something.
* and it is imperative that while at least cdev_cancel() or sdev_cancel()
* are executing, other parts of VFS do not perceive this process as blocked
* on something.
*/
fp->fp_blocked_on = FP_BLOCKED_ON_NONE;
@ -526,6 +543,11 @@ void unpause(void)
fp->fp_cdev.grant);
break;
case FP_BLOCKED_ON_SDEV: /* process blocked on socket I/O */
sdev_cancel();
return; /* sdev_cancel() sends its own reply */
default :
panic("VFS: unknown block reason: %d", blocked_on);
}

View File

@ -9,6 +9,7 @@
#include "request.h"
#include "threads.h"
#include "tll.h"
#include "type.h"
/* Structs used in prototypes must be declared as such first. */
struct filp;
@ -58,7 +59,7 @@ int do_mapdriver(void);
void init_dmap(void);
int dmap_driver_match(endpoint_t proc, devmajor_t major);
void dmap_endpt_up(endpoint_t proc_nr, int is_blk);
struct dmap *get_dmap(endpoint_t proc_e);
struct dmap *get_dmap_by_endpt(endpoint_t proc_e);
struct dmap *get_dmap_by_major(devmajor_t major);
void dmap_unmap_by_endpt(endpoint_t proc_nr);
int map_service(struct rprocpub *rpub);
@ -75,6 +76,8 @@ void check_filp_locks(void);
void check_filp_locks_by_me(void);
void init_filps(void);
struct filp *find_filp(struct vnode *vp, mode_t bits);
struct filp *find_filp_by_sock_dev(dev_t dev);
int check_fds(struct fproc *rfp, int nfds);
int get_fd(struct fproc *rfp, int start, mode_t bits, int *k,
struct filp **fpt);
struct filp *get_filp(int fild, tll_access_t locktype);
@ -85,6 +88,7 @@ void unlock_filps(struct filp *filp1, struct filp *filp2);
void invalidate_filp(struct filp *);
void invalidate_filp_by_endpt(endpoint_t proc_e);
void invalidate_filp_by_char_major(devmajor_t major);
void invalidate_filp_by_sock_drv(unsigned int num);
void close_filp(struct filp *fp);
int do_copyfd(void);
@ -108,6 +112,7 @@ void lock_revive(void);
int main(void);
void lock_proc(struct fproc *rfp);
void unlock_proc(struct fproc *rfp);
void reply(message *m_out, endpoint_t whom, int result);
void replycode(endpoint_t whom, int result);
void service_pm_postponed(void);
void thread_cleanup(void);
@ -254,6 +259,45 @@ int req_utime(endpoint_t fs_e, ino_t inode_nr, struct timespec * actv,
struct timespec * modtv);
int req_newdriver(endpoint_t fs_e, dev_t dev, char *label);
/* sdev.c */
int sdev_socket(int domain, int type, int protocol, dev_t *dev, int pair);
int sdev_bind(dev_t dev, vir_bytes addr, unsigned int addr_len,
int filp_flags);
int sdev_connect(dev_t dev, vir_bytes addr, unsigned int addr_len,
int filp_flags);
int sdev_listen(dev_t dev, int backlog);
int sdev_accept(dev_t dev, vir_bytes addr, unsigned int addr_len,
int filp_flags, int fd);
int sdev_readwrite(dev_t dev, vir_bytes data_buf, size_t data_len,
vir_bytes ctl_buf, unsigned int ctl_len, vir_bytes addr_buf,
unsigned int addr_len, int flags, int rw_flag, int filp_flags,
vir_bytes user_buf);
int sdev_ioctl(dev_t dev, unsigned long request, vir_bytes buf,
int filp_flags);
int sdev_setsockopt(dev_t dev, int level, int name, vir_bytes addr,
unsigned int len);
int sdev_getsockopt(dev_t dev, int level, int name, vir_bytes addr,
unsigned int *len);
int sdev_getsockname(dev_t dev, vir_bytes addr, unsigned int *addr_len);
int sdev_getpeername(dev_t dev, vir_bytes addr, unsigned int *addr_len);
int sdev_shutdown(dev_t dev, int how);
int sdev_close(dev_t dev);
int sdev_select(dev_t dev, int ops);
void sdev_stop(struct fproc *rfp);
void sdev_cancel(void);
void sdev_reply(void);
/* smap.c */
void init_smap(void);
int smap_map(const char *label, endpoint_t endpt, const int *domains,
unsigned int ndomains);
void smap_unmap_by_endpt(endpoint_t endpt);
void smap_endpt_up(endpoint_t endpt);
dev_t make_smap_dev(struct smap *sp, sockid_t sockid);
struct smap *get_smap_by_endpt(endpoint_t endpt);
struct smap *get_smap_by_domain(int domain);
struct smap *get_smap_by_dev(dev_t dev, sockid_t * sockidp);
/* socket.c */
int do_socket(void);
int do_socketpair(void);
@ -356,8 +400,10 @@ int do_select(void);
void init_select(void);
void select_callback(struct filp *, int ops);
void select_forget(void);
void select_reply1(endpoint_t driver_e, devminor_t minor, int status);
void select_reply2(endpoint_t driver_e, devminor_t minor, int status);
void select_cdev_reply1(endpoint_t driver_e, devminor_t minor, int status);
void select_cdev_reply2(endpoint_t driver_e, devminor_t minor, int status);
void select_sdev_reply1(dev_t dev, int status);
void select_sdev_reply2(dev_t dev, int status);
void select_unsuspend_by_endpt(endpoint_t proc);
void select_dump(void);

View File

@ -199,6 +199,17 @@ int read_write(struct fproc *rfp, int rw_flag, int fd, struct filp *f,
*/
position += size;
}
} else if (S_ISSOCK(vp->v_mode)) {
if (rw_flag == PEEKING) {
printf("VFS: read_write tries to peek on sock dev\n");
return EINVAL;
}
if (vp->v_sdev == NO_DEV)
panic("VFS: read_write tries to access sock dev NO_DEV");
r = sdev_readwrite(vp->v_sdev, buf, size, 0, 0, 0, 0, 0, rw_flag,
f->filp_flags, 0);
} else if (S_ISBLK(vp->v_mode)) { /* Block special files. */
if (vp->v_sdev == NO_DEV)
panic("VFS: read_write tries to access block dev NO_DEV");

1090
minix/servers/vfs/sdev.c Normal file

File diff suppressed because it is too large Load Diff

View File

@ -60,11 +60,14 @@ static void ops2tab(int ops, int fd, struct selectentry *e);
static int is_regular_file(struct filp *f);
static int is_pipe(struct filp *f);
static int is_char_device(struct filp *f);
static int is_sock_device(struct filp *f);
static void select_lock_filp(struct filp *f, int ops);
static int select_request_file(struct filp *f, int *ops, int block,
struct fproc *rfp);
static int select_request_char(struct filp *f, int *ops, int block,
struct fproc *rfp);
static int select_request_sock(struct filp *f, int *ops, int block,
struct fproc *rfp);
static int select_request_pipe(struct filp *f, int *ops, int block,
struct fproc *rfp);
static void select_cancel_all(struct selectentry *e);
@ -81,6 +84,7 @@ static struct fdtype {
int (*type_match)(struct filp *f);
} fdtypes[] = {
{ select_request_char, is_char_device },
{ select_request_sock, is_sock_device },
{ select_request_file, is_regular_file },
{ select_request_pipe, is_pipe },
};
@ -198,8 +202,8 @@ int do_select(void)
* other types of file is unspecified."
*
* In our case, terminal and pseudo-terminal devices are handled by the
* TTY major and sockets by either INET major (socket type AF_INET) or
* UDS major (socket type AF_UNIX). Additionally, we give other
* TTY and PTY character drivers respectively. Sockets are handled by
* by their respective socket drivers. Additionally, we give other
* character drivers the chance to handle select for any of their
* device nodes. Some may not implement support for select and let
* libchardriver return EBADF, which we then pass to the calling
@ -361,6 +365,71 @@ static int is_char_device(struct filp *f)
return (f && f->filp_vno && S_ISCHR(f->filp_vno->v_mode));
}
/*===========================================================================*
* is_sock_device *
*===========================================================================*/
static int is_sock_device(struct filp *f)
{
/* See if this filp is a handle on a socket device. This function MUST NOT
* block its calling thread. The given filp may or may not be locked.
*/
return (f && f->filp_vno && S_ISSOCK(f->filp_vno->v_mode));
}
/*===========================================================================*
* select_filter *
*===========================================================================*/
static int select_filter(struct filp *f, int *ops, int block)
{
/* Determine which select operations can be satisfied immediately and which
* should be requested. Used for character and socket devices. This function
* MUST NOT block its calling thread.
*/
int rops;
rops = *ops;
/* By default, nothing to do */
*ops = 0;
/*
* If we have previously asked the driver to notify us about certain ready
* operations, but it has not notified us yet, then we can safely assume that
* those operations are not ready right now. Therefore, if this call is not
* supposed to block, we can disregard the pending operations as not ready.
* We must make absolutely sure that the flags are "stable" right now though:
* we are neither waiting to query the driver about them (FSF_UPDATE) nor
* querying the driver about them right now (FSF_BUSY). This is a dangerous
* case of premature optimization and may be removed altogether if it proves
* to continue to be a source of bugs.
*/
if (!block && !(f->filp_select_flags & (FSF_UPDATE | FSF_BUSY)) &&
(f->filp_select_flags & FSF_BLOCKED)) {
if ((rops & SEL_RD) && (f->filp_select_flags & FSF_RD_BLOCK))
rops &= ~SEL_RD;
if ((rops & SEL_WR) && (f->filp_select_flags & FSF_WR_BLOCK))
rops &= ~SEL_WR;
if ((rops & SEL_ERR) && (f->filp_select_flags & FSF_ERR_BLOCK))
rops &= ~SEL_ERR;
if (!(rops & (SEL_RD|SEL_WR|SEL_ERR)))
return(0);
}
f->filp_select_flags |= FSF_UPDATE;
if (block) {
rops |= SEL_NOTIFY;
if (rops & SEL_RD) f->filp_select_flags |= FSF_RD_BLOCK;
if (rops & SEL_WR) f->filp_select_flags |= FSF_WR_BLOCK;
if (rops & SEL_ERR) f->filp_select_flags |= FSF_ERR_BLOCK;
}
if (f->filp_select_flags & FSF_BUSY)
return(SUSPEND);
return rops;
}
/*===========================================================================*
* select_request_char *
*===========================================================================*/
@ -394,7 +463,7 @@ static int select_request_char(struct filp *f, int *ops, int block,
if ((dev = cdev_map(f->filp_vno->v_sdev, rfp)) == NO_DEV)
return(ENXIO);
if (f->filp_char_select_dev != NO_DEV && f->filp_char_select_dev != dev) {
if (f->filp_select_dev != NO_DEV && f->filp_select_dev != dev) {
/* Currently, this case can occur as follows: a process with a
* controlling terminal opens /dev/tty and forks, the new child starts
* a new session, opens a new controlling terminal, and both parent and
@ -405,46 +474,10 @@ static int select_request_char(struct filp *f, int *ops, int block,
printf("VFS: file pointer has multiple controlling TTYs!\n");
return(EIO);
}
f->filp_char_select_dev = dev; /* set before possibly suspending */
f->filp_select_dev = dev; /* set before possibly suspending */
rops = *ops;
/* By default, nothing to do */
*ops = 0;
/*
* If we have previously asked the driver to notify us about certain ready
* operations, but it has not notified us yet, then we can safely assume that
* those operations are not ready right now. Therefore, if this call is not
* supposed to block, we can disregard the pending operations as not ready.
* We must make absolutely sure that the flags are "stable" right now though:
* we are neither waiting to query the driver about them (FSF_UPDATE) nor
* querying the driver about them right now (FSF_BUSY). This is a dangerous
* case of premature optimization and may be removed altogether if it proves
* to continue to be a source of bugs.
*/
if (!block && !(f->filp_select_flags & (FSF_UPDATE | FSF_BUSY)) &&
(f->filp_select_flags & FSF_BLOCKED)) {
if ((rops & SEL_RD) && (f->filp_select_flags & FSF_RD_BLOCK))
rops &= ~SEL_RD;
if ((rops & SEL_WR) && (f->filp_select_flags & FSF_WR_BLOCK))
rops &= ~SEL_WR;
if ((rops & SEL_ERR) && (f->filp_select_flags & FSF_ERR_BLOCK))
rops &= ~SEL_ERR;
if (!(rops & (SEL_RD|SEL_WR|SEL_ERR)))
return(OK);
}
f->filp_select_flags |= FSF_UPDATE;
if (block) {
rops |= SEL_NOTIFY;
if (rops & SEL_RD) f->filp_select_flags |= FSF_RD_BLOCK;
if (rops & SEL_WR) f->filp_select_flags |= FSF_WR_BLOCK;
if (rops & SEL_ERR) f->filp_select_flags |= FSF_ERR_BLOCK;
}
if (f->filp_select_flags & FSF_BUSY)
return(SUSPEND);
if ((rops = select_filter(f, ops, block)) <= 0)
return(rops); /* OK or suspend: nothing to do for now */
dp = &dmap[major(dev)];
if (dp->dmap_sel_busy)
@ -462,6 +495,46 @@ static int select_request_char(struct filp *f, int *ops, int block,
return(SUSPEND);
}
/*===========================================================================*
* select_request_sock *
*===========================================================================*/
static int select_request_sock(struct filp *f, int *ops, int block,
struct fproc *rfp __unused)
{
/* Check readiness status on a socket device. Unless suitable results are
* available right now, this will only initiate the polling process, causing
* result processing to be deferred. This function MUST NOT block its calling
* thread. The given filp may or may not be locked.
*/
struct smap *sp;
dev_t dev;
int r, rops;
dev = f->filp_vno->v_sdev;
if ((sp = get_smap_by_dev(dev, NULL)) == NULL)
return(ENXIO); /* this should not happen */
f->filp_select_dev = dev; /* set before possibly suspending */
if ((rops = select_filter(f, ops, block)) <= 0)
return(rops); /* OK or suspend: nothing to do for now */
if (sp->smap_sel_busy)
return(SUSPEND);
f->filp_select_flags &= ~FSF_UPDATE;
r = sdev_select(dev, rops);
if (r != OK)
return(r);
sp->smap_sel_busy = TRUE;
sp->smap_sel_filp = f;
f->filp_select_flags |= FSF_BUSY;
return(SUSPEND);
}
/*===========================================================================*
* select_request_file *
*===========================================================================*/
@ -644,6 +717,7 @@ static void select_cancel_filp(struct filp *f)
* its calling thread.
*/
devmajor_t major;
struct smap *sp;
assert(f);
assert(f->filp_selectors > 0);
@ -657,16 +731,22 @@ static void select_cancel_filp(struct filp *f)
f->filp_pipe_select_ops = 0;
/* If this filp is the subject of an ongoing select query to a
* character device, mark the query as stale, so that this filp will
* not be checked when the result arrives. The filp select device may
* still be NO_DEV if do_select fails on the initial fd check.
* character or socket device, mark the query as stale, so that this
* filp will not be checked when the result arrives. The filp select
* device may still be NO_DEV if do_select fails on the initial fd
* check.
*/
if (is_char_device(f) && f->filp_char_select_dev != NO_DEV) {
major = major(f->filp_char_select_dev);
if (is_char_device(f) && f->filp_select_dev != NO_DEV) {
major = major(f->filp_select_dev);
if (dmap[major].dmap_sel_busy &&
dmap[major].dmap_sel_filp == f)
dmap[major].dmap_sel_filp = NULL; /* leave _busy set */
f->filp_char_select_dev = NO_DEV;
f->filp_select_dev = NO_DEV;
} else if (is_sock_device(f) && f->filp_select_dev != NO_DEV) {
if ((sp = get_smap_by_dev(f->filp_select_dev, NULL)) != NULL &&
sp->smap_sel_busy && sp->smap_sel_filp == f)
sp->smap_sel_filp = NULL; /* leave _busy set */
f->filp_select_dev = NO_DEV;
}
}
}
@ -778,11 +858,19 @@ void select_timeout_check(int s)
void select_unsuspend_by_endpt(endpoint_t proc_e)
{
/* Revive blocked processes when a driver has disappeared */
struct dmap *dp;
struct smap *sp;
devmajor_t major;
int fd, s;
int fd, s, is_driver;
struct selectentry *se;
struct filp *f;
/* Either or both of these may be NULL. */
dp = get_dmap_by_endpt(proc_e);
sp = get_smap_by_endpt(proc_e);
is_driver = (dp != NULL || sp != NULL);
for (s = 0; s < MAXSELECTS; s++) {
int wakehim = 0;
se = &selecttab[s];
@ -793,31 +881,102 @@ void select_unsuspend_by_endpt(endpoint_t proc_e)
continue;
}
for (fd = 0; fd < se->nfds; fd++) {
if ((f = se->filps[fd]) == NULL || !is_char_device(f))
continue;
/* Skip the more expensive "driver died" checks for non-drivers. */
if (!is_driver)
continue;
assert(f->filp_char_select_dev != NO_DEV);
major = major(f->filp_char_select_dev);
if (dmap_driver_match(proc_e, major)) {
se->filps[fd] = NULL;
se->error = EIO;
select_cancel_filp(f);
wakehim = 1;
for (fd = 0; fd < se->nfds; fd++) {
if ((f = se->filps[fd]) == NULL)
continue;
if (is_char_device(f)) {
assert(f->filp_select_dev != NO_DEV);
major = major(f->filp_select_dev);
if (dmap_driver_match(proc_e, major)) {
se->filps[fd] = NULL;
se->error = EIO;
select_cancel_filp(f);
wakehim = 1;
}
} else if (sp != NULL && is_sock_device(f)) {
assert(f->filp_select_dev != NO_DEV);
if (get_smap_by_dev(f->filp_select_dev, NULL) == sp) {
se->filps[fd] = NULL;
se->error = EIO;
select_cancel_filp(f);
wakehim = 1;
}
}
}
if (wakehim && !is_deferred(se))
select_return(se);
}
/* Any outstanding queries will never be answered, so forget about them. */
if (dp != NULL) {
assert(dp->dmap_sel_filp == NULL);
dp->dmap_sel_busy = FALSE;
}
if (sp != NULL) {
assert(sp->smap_sel_filp == NULL);
sp->smap_sel_busy = FALSE;
}
}
/*===========================================================================*
* select_reply1 *
*===========================================================================*/
void select_reply1(endpoint_t driver_e, devminor_t minor, int status)
static void select_reply1(struct filp *f, int status)
{
/* Handle the initial reply to CDEV_SELECT request. This function MUST NOT
/* Handle the initial reply to a character or socket select request. This
* function MUST NOT block its calling thread.
*/
assert(f->filp_count >= 1);
assert(f->filp_select_flags & FSF_BUSY);
f->filp_select_flags &= ~FSF_BUSY;
/* The select call is done now, except when
* - another process started a select on the same filp with possibly a
* different set of operations.
* - a process does a select on the same filp but using different file
* descriptors.
* - the select has a timeout. Upon receiving this reply the operations
* might not be ready yet, so we want to wait for that to ultimately
* happen.
* Therefore we need to keep remembering what the operations are.
*/
if (!(f->filp_select_flags & (FSF_UPDATE|FSF_BLOCKED)))
f->filp_select_ops = 0; /* done selecting */
else if (status > 0 && !(f->filp_select_flags & FSF_UPDATE))
/* there may be operations pending */
f->filp_select_ops &= ~status;
/* Record new filp status */
if (!(status == 0 && (f->filp_select_flags & FSF_BLOCKED))) {
if (status > 0) { /* operations ready */
if (status & SEL_RD)
f->filp_select_flags &= ~FSF_RD_BLOCK;
if (status & SEL_WR)
f->filp_select_flags &= ~FSF_WR_BLOCK;
if (status & SEL_ERR)
f->filp_select_flags &= ~FSF_ERR_BLOCK;
} else if (status < 0) { /* error */
/* Always unblock upon error */
f->filp_select_flags &= ~FSF_BLOCKED;
}
}
filp_status(f, status); /* Tell filp owners about the results */
}
/*===========================================================================*
* select_cdev_reply1 *
*===========================================================================*/
void select_cdev_reply1(endpoint_t driver_e, devminor_t minor, int status)
{
/* Handle the initial reply to a CDEV_SELECT request. This function MUST NOT
* block its calling thread.
*/
devmajor_t major;
@ -826,7 +985,7 @@ void select_reply1(endpoint_t driver_e, devminor_t minor, int status)
struct dmap *dp;
/* Figure out which device is replying */
if ((dp = get_dmap(driver_e)) == NULL) return;
if ((dp = get_dmap_by_endpt(driver_e)) == NULL) return;
major = dp-dmap;
dev = makedev(major, minor);
@ -845,13 +1004,13 @@ void select_reply1(endpoint_t driver_e, devminor_t minor, int status)
if ((f = dp->dmap_sel_filp) != NULL) {
/* Find vnode and check we got a reply from the device we expected */
assert(is_char_device(f));
assert(f->filp_char_select_dev != NO_DEV);
if (f->filp_char_select_dev != dev) {
assert(f->filp_select_dev != NO_DEV);
if (f->filp_select_dev != dev) {
/* This should never happen. The driver may be misbehaving.
* For now we assume that the reply we want will arrive later..
*/
printf("VFS (%s:%d): expected reply from dev %llx not %llx\n",
__FILE__, __LINE__, f->filp_char_select_dev, dev);
__FILE__, __LINE__, f->filp_select_dev, dev);
return;
}
}
@ -860,83 +1019,78 @@ void select_reply1(endpoint_t driver_e, devminor_t minor, int status)
dp->dmap_sel_busy = FALSE;
dp->dmap_sel_filp = NULL;
/* Process the select result only if the filp is valid. */
if (f != NULL) {
assert(f->filp_count >= 1);
assert(f->filp_select_flags & FSF_BUSY);
f->filp_select_flags &= ~FSF_BUSY;
/* The select call is done now, except when
* - another process started a select on the same filp with possibly a
* different set of operations.
* - a process does a select on the same filp but using different file
* descriptors.
* - the select has a timeout. Upon receiving this reply the operations
* might not be ready yet, so we want to wait for that to ultimately
* happen.
* Therefore we need to keep remembering what the operations are.
*/
if (!(f->filp_select_flags & (FSF_UPDATE|FSF_BLOCKED)))
f->filp_select_ops = 0; /* done selecting */
else if (status > 0 && !(f->filp_select_flags & FSF_UPDATE))
/* there may be operations pending */
f->filp_select_ops &= ~status;
/* Record new filp status */
if (!(status == 0 && (f->filp_select_flags & FSF_BLOCKED))) {
if (status > 0) { /* operations ready */
if (status & SEL_RD)
f->filp_select_flags &= ~FSF_RD_BLOCK;
if (status & SEL_WR)
f->filp_select_flags &= ~FSF_WR_BLOCK;
if (status & SEL_ERR)
f->filp_select_flags &= ~FSF_ERR_BLOCK;
} else if (status < 0) { /* error */
/* Always unblock upon error */
f->filp_select_flags &= ~FSF_BLOCKED;
}
}
filp_status(f, status); /* Tell filp owners about the results */
}
/* Process the status change, if still applicable. */
if (f != NULL)
select_reply1(f, status);
/* See if we should send a select request for another filp now. */
select_restart_filps();
}
/*===========================================================================*
* select_sdev_reply1 *
*===========================================================================*/
void select_sdev_reply1(dev_t dev, int status)
{
/* Handle the initial reply to a SDEV_SELECT request. This function MUST NOT
* block its calling thread.
*/
struct smap *sp;
struct filp *f;
if ((sp = get_smap_by_dev(dev, NULL)) == NULL)
return;
/* Get the file pointer for the socket device. */
if (!sp->smap_sel_busy) {
printf("VFS: was not expecting a SDEV_SELECT reply from %d\n",
sp->smap_endpt);
return;
}
/* The select filp may have been set to NULL if the requestor has been
* unpaused in the meantime. In that case, we ignore the result, but we do
* look for other filps to restart later.
*/
if ((f = sp->smap_sel_filp) != NULL) {
/* Find vnode and check we got a reply from the device we expected */
assert(is_sock_device(f));
assert(f->filp_select_dev != NO_DEV);
if (f->filp_select_dev != dev) {
/* This should never happen. The driver may be misbehaving.
* For now we assume that the reply we want will arrive later..
*/
printf("VFS: expected reply from sock dev %llx, not %llx\n",
f->filp_select_dev, dev);
return;
}
}
/* We are no longer waiting for a reply from this socket driver. */
sp->smap_sel_busy = FALSE;
sp->smap_sel_filp = NULL;
/* Process the status change, if still applicable. */
if (f != NULL)
select_reply1(f, status);
/* See if we should send a select request for another filp now. */
select_restart_filps();
}
/*===========================================================================*
* select_reply2 *
*===========================================================================*/
void select_reply2(endpoint_t driver_e, devminor_t minor, int status)
static void select_reply2(int is_char, dev_t dev, int status)
{
/* Handle secondary reply to DEV_SELECT request. A secondary reply occurs when
* the select request is 'blocking' until an operation becomes ready. This
* function MUST NOT block its calling thread.
/* Find all file descriptors selecting for the given character (is_char==TRUE)
* or socket (is_char==FALSE) device, update their statuses, and resume
* activities accordingly.
*/
int slot, found, fd;
devmajor_t major;
dev_t dev;
struct filp *f;
struct dmap *dp;
struct selectentry *se;
if (status == 0) {
printf("VFS (%s:%d): weird status (%d) to report\n",
__FILE__, __LINE__, status);
return;
}
/* Figure out which device is replying */
if ((dp = get_dmap(driver_e)) == NULL) {
printf("VFS (%s:%d): endpoint %d is not a known driver endpoint\n",
__FILE__, __LINE__, driver_e);
return;
}
major = dp-dmap;
dev = makedev(major, minor);
/* Find all file descriptors selecting for this device */
for (slot = 0; slot < MAXSELECTS; slot++) {
se = &selecttab[slot];
if (se->requestor == NULL) continue; /* empty slot */
@ -944,9 +1098,10 @@ void select_reply2(endpoint_t driver_e, devminor_t minor, int status)
found = FALSE;
for (fd = 0; fd < se->nfds; fd++) {
if ((f = se->filps[fd]) == NULL) continue;
if (!is_char_device(f)) continue;
assert(f->filp_char_select_dev != NO_DEV);
if (f->filp_char_select_dev != dev) continue;
if (is_char && !is_char_device(f)) continue;
if (!is_char && !is_sock_device(f)) continue;
assert(f->filp_select_dev != NO_DEV);
if (f->filp_select_dev != dev) continue;
if (status > 0) { /* Operations ready */
/* Clear the replied bits from the request
@ -979,6 +1134,56 @@ void select_reply2(endpoint_t driver_e, devminor_t minor, int status)
select_restart_filps();
}
/*===========================================================================*
* select_cdev_reply2 *
*===========================================================================*/
void select_cdev_reply2(endpoint_t driver_e, devminor_t minor, int status)
{
/* Handle a secondary reply to a CDEV_SELECT request. A secondary reply occurs
* when the select request is 'blocking' until an operation becomes ready. This
* function MUST NOT block its calling thread.
*/
devmajor_t major;
struct dmap *dp;
dev_t dev;
if (status == 0) {
printf("VFS (%s:%d): weird status (%d) to report\n",
__FILE__, __LINE__, status);
return;
}
/* Figure out which device is replying */
if ((dp = get_dmap_by_endpt(driver_e)) == NULL) {
printf("VFS (%s:%d): endpoint %d is not a known driver endpoint\n",
__FILE__, __LINE__, driver_e);
return;
}
major = dp-dmap;
dev = makedev(major, minor);
select_reply2(TRUE /*is_char*/, dev, status);
}
/*===========================================================================*
* select_sdev_reply2 *
*===========================================================================*/
void select_sdev_reply2(dev_t dev, int status)
{
/* Handle a secondary reply to a SDEV_SELECT request. A secondary reply occurs
* when the select request is 'blocking' until an operation becomes ready. This
* function MUST NOT block its calling thread.
*/
if (status == 0) {
printf("VFS: weird socket device status (%d)\n", status);
return;
}
select_reply2(FALSE /*is_char*/, dev, status);
}
/*===========================================================================*
* select_restart_filps *
*===========================================================================*/
@ -1013,14 +1218,19 @@ static void select_restart_filps(void)
if (!(f->filp_select_flags & FSF_UPDATE)) /* Must be in */
continue; /* 'update' state */
/* This function is suitable only for character devices. In
* particular, checking pipes the same way would introduce a
* serious locking problem.
/* This function is suitable only for character and socket
* devices. In particular, checking pipes the same way would
* introduce a serious locking problem.
*/
assert(is_char_device(f));
assert(is_char_device(f) || is_sock_device(f));
wantops = ops = f->filp_select_ops;
r = select_request_char(f, &wantops, se->block, se->requestor);
if (is_char_device(f))
r = select_request_char(f, &wantops, se->block,
se->requestor);
else
r = select_request_sock(f, &wantops, se->block,
se->requestor);
if (r != OK && r != SUSPEND) {
se->error = r;
restart_proc(se);
@ -1122,7 +1332,9 @@ select_dump(void)
struct selectentry *se;
struct filp *f;
struct dmap *dp;
struct smap *sp;
dev_t dev;
sockid_t sockid;
int s, fd;
for (s = 0; s < MAXSELECTS; s++) {
@ -1158,6 +1370,18 @@ select_dump(void)
dp->dmap_sel_filp);
} else
printf("unknown)\n");
} else if (is_sock_device(f)) {
dev = f->filp_vno->v_sdev;
printf("sock (dev ");
sp = get_smap_by_dev(dev, &sockid);
if (sp != NULL) {
printf("<%d,%d>, smap busy %d filp "
"%p)\n", sp->smap_num, sockid,
sp->smap_sel_busy,
sp->smap_sel_filp);
} else
printf("<0x%"PRIx64">, smap "
"unknown)\n", dev);
} else
printf("unknown\n");
}

273
minix/servers/vfs/smap.c Normal file
View File

@ -0,0 +1,273 @@
/*
* This file contains the table with socket driver mappings. One socket driver
* may implement multiple domains (e.g., PF_INET and PF_INET6). For this
* reason, we assign a unique number to each socket driver, and use a "socket
* device map" table (smap) that maps from those numbers to information about
* socket drivers. This number is combined with a per-driver socket identifier
* to form a globally unique socket ID (64-bit, stored as dev_t). In addition,
* we use a table that maps from PF_xxx domains to socket drivers (pfmap).
*/
#include "fs.h"
#include <sys/socket.h>
#include <assert.h>
static struct smap smap[NR_SOCKDEVS];
static struct smap *pfmap[PF_MAX];
/*
* Initialize the socket device map table.
*/
void
init_smap(void)
{
unsigned int i;
for (i = 0; i < __arraycount(smap); i++) {
/*
* The smap numbers are one-based so as to ensure that no
* socket will have the device number NO_DEV, which would
* create problems with eg the select code.
*/
smap[i].smap_num = i + 1;
smap[i].smap_endpt = NONE;
}
memset(pfmap, 0, sizeof(pfmap));
}
/*
* Register a socket driver. This action can only be requested by RS. The
* process identified by the given DS label 'label' and endpoint 'endpt' is to
* be responsible for sockets created in the domains as given in the 'domains'
* array, which contains 'ndomains' elements. Return OK upon successful
* registration, or an error code otherwise.
*/
int
smap_map(const char * label, endpoint_t endpt, const int * domains,
unsigned int ndomains)
{
struct smap *sp;
unsigned int i, num = 0;
int domain;
if (ndomains <= 0 || ndomains > NR_DOMAIN)
return EINVAL;
/*
* See if there is already a socket device map entry for this label.
* If so, the socket driver is probably being restarted, and we should
* overwrite its previous entry.
*/
sp = NULL;
for (i = 0; i < __arraycount(smap); i++) {
if (smap[i].smap_endpt != NONE &&
!strcmp(smap[i].smap_label, label)) {
sp = &smap[i];
break;
}
}
/*
* See if all given domains are valid and not already reserved by a
* socket driver other than (if applicable) this driver's old instance.
*/
for (i = 0; i < ndomains; i++) {
domain = domains[i];
if (domain < 0 || domain >= __arraycount(pfmap))
return EINVAL;
if (domain == PF_UNSPEC)
return EINVAL;
if (pfmap[domain] != NULL && pfmap[domain] != sp)
return EBUSY;
}
/*
* If we are not about to replace an existing socket device map entry,
* find a free entry, returning an error if all entries are in use.
*/
if (sp == NULL) {
for (num = 0; num < __arraycount(smap); num++)
if (smap[num].smap_endpt == NONE)
break;
if (num == __arraycount(smap))
return ENOMEM;
} else
num = (unsigned int)(sp - smap);
/*
* At this point, the registration will succeed, and we can start
* modifying tables. Just to be sure, unmap the domain mappings for
* the old instance, in case it is somehow registered with a different
* set of domains. Also, if the endpoint of the service has changed,
* cancel any operations involving the previous endpoint and invalidate
* any preexisting sockets. However, for stateful restarts where the
* service endpoint does not change, leave things as is.
*/
if (sp != NULL) {
if (sp->smap_endpt != endpt) {
/*
* For stateless restarts, it is common that the new
* endpoint is made ready before the old endpoint is
* exited, so we cannot wait for the exit handling code
* to do these steps, as they rely on the old socket
* mapping still being around.
*/
unsuspend_by_endpt(sp->smap_endpt);
invalidate_filp_by_sock_drv(sp->smap_num);
}
for (i = 0; i < __arraycount(pfmap); i++)
if (pfmap[i] == sp)
pfmap[i] = NULL;
}
/*
* Initialize the socket driver map entry, and set up the domain map
* entries.
*/
sp = &smap[num];
sp->smap_endpt = endpt;
strlcpy(sp->smap_label, label, sizeof(sp->smap_label));
sp->smap_sel_busy = FALSE;
sp->smap_sel_filp = NULL;
for (i = 0; i < ndomains; i++)
pfmap[domains[i]] = sp;
return OK;
}
/*
* The process with the given endpoint has exited. If the endpoint identifies
* a socket driver, deregister the driver and invalidate any sockets it owned.
*/
void
smap_unmap_by_endpt(endpoint_t endpt)
{
struct smap *sp;
unsigned int i;
if ((sp = get_smap_by_endpt(endpt)) == NULL)
return;
/*
* Invalidation requires that the smap entry still be around, so do
* this before clearing the endpoint.
*/
invalidate_filp_by_sock_drv(sp->smap_num);
sp->smap_endpt = NONE;
for (i = 0; i < __arraycount(pfmap); i++)
if (pfmap[i] == sp)
pfmap[i] = NULL;
}
/*
* The given endpoint has announced itself as a socket driver.
*/
void
smap_endpt_up(endpoint_t endpt)
{
struct smap *sp;
if ((sp = get_smap_by_endpt(endpt)) == NULL)
return;
/*
* The announcement indicates that the socket driver has either started
* anew or restarted statelessly. In the second case, none of its
* previously existing sockets will have survived, so mark them as
* invalid.
*/
invalidate_filp_by_sock_drv(sp->smap_num);
}
/*
* Construct a device number that combines the entry number of the given socket
* map and the given per-driver socket identifier, thus constructing a unique
* identifier for the socket. Generally speaking, we use the dev_t type
* because the value is stored as special device number (sdev) on a socket node
* on PFS. We use our own bit division rather than the standard major/minor
* division because this simplifies using each half as a 32-bit value. The
* block/character device numbers and socket device numbers are in different
* namespaces, and numbers may overlap (even though this is currently
* practically impossible), so one must always test the file type first.
*/
dev_t
make_smap_dev(struct smap * sp, sockid_t sockid)
{
assert(sp->smap_endpt != NONE);
assert(sockid >= 0);
return (dev_t)(((uint64_t)sp->smap_num << 32) | (uint32_t)sockid);
}
/*
* Return a pointer to the smap structure for the socket driver associated with
* the socket device number. In addition, if the given socket ID pointer is
* not NULL, store the per-driver socket identifier in it. Return NULL if the
* given socket device number is not a socket for a valid socket driver.
*/
struct smap *
get_smap_by_dev(dev_t dev, sockid_t * sockidp)
{
struct smap *sp;
unsigned int num;
sockid_t id;
num = (unsigned int)(dev >> 32);
id = (sockid_t)(dev & ((1ULL << 32) - 1));
if (num == 0 || num > __arraycount(smap) || id < 0)
return NULL;
sp = &smap[num - 1];
assert(sp->smap_num == num);
if (sp->smap_endpt == NONE)
return NULL;
if (sockidp != NULL)
*sockidp = id;
return sp;
}
/*
* Return a pointer to the smap structure for the socket driver with the given
* endpoint. Return NULL if the endpoint does not identify a socket driver.
*/
struct smap *
get_smap_by_endpt(endpoint_t endpt)
{
unsigned int i;
/*
* TODO: this function is used rather frequently, so it would be nice
* to get rid of the O(n) loop here. The get_dmap_by_endpt() function
* suffers from the same problem. It might be worth adding an extra
* field to the fproc structure for this.
*/
for (i = 0; i < __arraycount(smap); i++)
if (smap[i].smap_endpt == endpt)
return &smap[i];
return NULL;
}
/*
* Return a pointer to the smap structure for the socket driver handling the
* given domain (protocol family). Return NULL if there is no match.
*/
struct smap *
get_smap_by_domain(int domain)
{
if (domain < 0 || domain >= __arraycount(pfmap))
return NULL;
return pfmap[domain]; /* may be NULL */
}

View File

@ -1,7 +1,4 @@
/*
* IMPORTANT NOTICE: THIS FILE CONTAINS STUBS ONLY RIGHT NOW, TO ENABLE A
* SEAMLESS TRANSITION TO THE NEW API FOR PROGRAMS STATICALLY LINKED TO LIBC!
*
* This file implements the upper socket layer of VFS: the BSD socket system
* calls, and any associated file descriptor, file pointer, vnode, and file
* system processing. In most cases, this layer will call into the lower
@ -35,17 +32,189 @@
*/
#include "fs.h"
#include "vnode.h"
#include "file.h"
#include <sys/socket.h>
/*
* Convert any SOCK_xx open flags to O_xx open flags.
*/
static int
get_sock_flags(int type)
{
int flags;
flags = 0;
if (type & SOCK_CLOEXEC)
flags |= O_CLOEXEC;
if (type & SOCK_NONBLOCK)
flags |= O_NONBLOCK;
if (type & SOCK_NOSIGPIPE)
flags |= O_NOSIGPIPE;
return flags;
}
/*
* Perform cheap pre-call checks to ensure that the given number of socket FDs
* can be created for the current process.
*/
static int
check_sock_fds(int nfds)
{
/*
* For now, we simply check if there are enough file descriptor slots
* free in the process. Since the process is blocked on a socket call,
* this aspect will not change. Availability of file pointers, vnodes,
* and PFS nodes may vary, and is therefore less interesting to check
* here - it will have to be checked again upon completion anyway.
*/
return check_fds(fp, nfds);
}
/*
* Create a new file descriptor, including supporting objects, for the open
* socket identified by 'dev', in the current process, using the O_xx open
* flags 'flags'. On success, return the file descriptor number. The results
* of a successful call can be undone with close_fd(), which will also close
* the socket itself. On failure, return a negative error code. In this case,
* the socket will be left open.
*/
static int
make_sock_fd(dev_t dev, int flags)
{
struct vmnt *vmp;
struct vnode *vp;
struct filp *filp;
struct node_details res;
int r, fd;
assert((flags & ~(O_CLOEXEC | O_NONBLOCK | O_NOSIGPIPE)) == 0);
#if !NDEBUG
/*
* Check whether there is a socket object for the new device already.
* This is an expensive check, but if the socket driver sends us a new
* socket ID that is already in use, this is a sure sign of driver
* misbehavior. So far it does seem like nothing would go wrong within
* VFS in this case though, which is why this is a debug-only check.
*/
if (find_filp_by_sock_dev(dev) != NULL) {
printf("VFS: socket driver %d generated in-use socket ID!\n",
get_smap_by_dev(dev, NULL)->smap_endpt);
return EIO;
}
#endif /* !NDEBUG */
/*
* Get a lock on PFS. TODO: it is not clear whether locking PFS is
* needed at all, let alone which lock: map_vnode() uses a write lock,
* create_pipe() uses a read lock, and cdev_clone() uses no lock at
* all. As is, the README prescribes VMNT_READ, so that's what we use
* here. The code below largely copies the create_pipe() code anyway.
*/
if ((vmp = find_vmnt(PFS_PROC_NR)) == NULL)
panic("PFS gone");
if ((r = lock_vmnt(vmp, VMNT_READ)) != OK)
return r;
/* Obtain a free vnode. */
if ((vp = get_free_vnode()) == NULL) {
unlock_vmnt(vmp);
return err_code;
}
lock_vnode(vp, VNODE_OPCL);
/* Acquire a file descriptor. */
if ((r = get_fd(fp, 0, R_BIT | W_BIT, &fd, &filp)) != OK) {
unlock_vnode(vp);
unlock_vmnt(vmp);
return r;
}
/* Create a PFS node for the socket. */
if ((r = req_newnode(PFS_PROC_NR, fp->fp_effuid, fp->fp_effgid,
S_IFSOCK | ACCESSPERMS, dev, &res)) != OK) {
unlock_filp(filp);
unlock_vnode(vp);
unlock_vmnt(vmp);
return r;
}
/* Fill in the objects, and link them together. */
vp->v_fs_e = res.fs_e;
vp->v_inode_nr = res.inode_nr;
vp->v_mode = res.fmode;
vp->v_sdev = dev;
vp->v_fs_count = 1;
vp->v_ref_count = 1;
vp->v_vmnt = NULL;
vp->v_dev = NO_DEV;
vp->v_size = 0;
filp->filp_vno = vp;
filp->filp_flags = O_RDWR | flags;
filp->filp_count = 1;
fp->fp_filp[fd] = filp;
if (flags & O_CLOEXEC)
FD_SET(fd, &fp->fp_cloexec_set);
/* Release locks, and return the new file descriptor. */
unlock_filp(filp); /* this also unlocks the vnode now! */
unlock_vmnt(vmp);
return fd;
}
/*
* Create a socket.
*/
int
do_socket(void)
{
int domain, type, sock_type, protocol;
dev_t dev;
int r, flags;
return EAFNOSUPPORT;
domain = job_m_in.m_lc_vfs_socket.domain;
type = job_m_in.m_lc_vfs_socket.type;
protocol = job_m_in.m_lc_vfs_socket.protocol;
/* Is there a socket driver for this domain at all? */
if (get_smap_by_domain(domain) == NULL)
return EAFNOSUPPORT;
/*
* Ensure that it is at least likely that after creating a socket, we
* will be able to create a file descriptor for it, along with all the
* necessary supporting objects. While it would be slightly neater to
* allocate these objects before trying to create the socket, this is
* offset by the fact that that approach results in a downright mess in
* do_socketpair() below, and with the current approach we can reuse
* the same code for accepting sockets as well. For newly created
* sockets, it is no big deal to close them right after creation; for
* newly accepted sockets, we have no choice but to do that anyway.
* Moreover, object creation failures should be rare and our approach
* does not cause significantly more overhead anyway, so the entire
* issue is largely philosophical anyway. For now, this will do.
*/
if ((r = check_sock_fds(1)) != OK)
return r;
sock_type = type & ~SOCK_FLAGS_MASK;
flags = get_sock_flags(type);
if ((r = sdev_socket(domain, sock_type, protocol, &dev,
FALSE /*pair*/)) != OK)
return r;
if ((r = make_sock_fd(dev, flags)) < 0)
(void)sdev_close(dev);
return r;
}
/*
@ -54,8 +223,82 @@ do_socket(void)
int
do_socketpair(void)
{
int domain, type, sock_type, protocol;
dev_t dev[2];
int r, fd0, fd1, flags;
return EAFNOSUPPORT;
domain = job_m_in.m_lc_vfs_socket.domain;
type = job_m_in.m_lc_vfs_socket.type;
protocol = job_m_in.m_lc_vfs_socket.protocol;
/* Is there a socket driver for this domain at all? */
if (get_smap_by_domain(domain) == NULL)
return EAFNOSUPPORT;
/*
* See the lengthy comment in do_socket(). This time we need two of
* everything, though.
*/
if ((r = check_sock_fds(2)) != OK)
return r;
sock_type = type & ~SOCK_FLAGS_MASK;
flags = get_sock_flags(type);
if ((r = sdev_socket(domain, sock_type, protocol, dev,
TRUE /*pair*/)) != OK)
return r;
if ((fd0 = make_sock_fd(dev[0], flags)) < 0) {
(void)sdev_close(dev[0]);
(void)sdev_close(dev[1]);
return fd0;
}
if ((fd1 = make_sock_fd(dev[1], flags)) < 0) {
close_fd(fp, fd0);
(void)sdev_close(dev[1]);
return fd1;
}
job_m_out.m_vfs_lc_fdpair.fd0 = fd0;
job_m_out.m_vfs_lc_fdpair.fd1 = fd1;
return OK;
}
/*
* Check whether the given file descriptor identifies an open socket in the
* current process. If so, return OK, with the socket device number stored in
* 'dev' and its file pointer flags stored in 'flags' (if not NULL). If not,
* return an appropriate error code.
*/
static int
get_sock(int fd, dev_t * dev, int * flags)
{
struct filp *filp;
if ((filp = get_filp(fd, VNODE_READ)) == NULL)
return err_code;
if (!S_ISSOCK(filp->filp_vno->v_mode)) {
unlock_filp(filp);
return ENOTSOCK;
}
*dev = filp->filp_vno->v_sdev;
if (flags != NULL)
*flags = filp->filp_flags;
/*
* It is safe to leave the file pointer object unlocked during the
* actual call. Since the current process is blocked for the duration
* of the socket call, we know the socket's file descriptor, and thus
* its file pointer, can not possibly be freed. In addition, we will
* not be accessing the file pointer anymore later, with the exception
* of accept calls, which reacquire the lock when the reply comes in.
*/
unlock_filp(filp);
return OK;
}
/*
@ -64,8 +307,16 @@ do_socketpair(void)
int
do_bind(void)
{
dev_t dev;
int r, fd, flags;
return ENOTSOCK;
fd = job_m_in.m_lc_vfs_sockaddr.fd;
if ((r = get_sock(fd, &dev, &flags)) != OK)
return r;
return sdev_bind(dev, job_m_in.m_lc_vfs_sockaddr.addr,
job_m_in.m_lc_vfs_sockaddr.addr_len, flags);
}
/*
@ -74,8 +325,16 @@ do_bind(void)
int
do_connect(void)
{
dev_t dev;
int r, fd, flags;
return ENOTSOCK;
fd = job_m_in.m_lc_vfs_sockaddr.fd;
if ((r = get_sock(fd, &dev, &flags)) != OK)
return r;
return sdev_connect(dev, job_m_in.m_lc_vfs_sockaddr.addr,
job_m_in.m_lc_vfs_sockaddr.addr_len, flags);
}
/*
@ -84,8 +343,19 @@ do_connect(void)
int
do_listen(void)
{
dev_t dev;
int r, fd, backlog;
return ENOTSOCK;
fd = job_m_in.m_lc_vfs_listen.fd;
backlog = job_m_in.m_lc_vfs_listen.backlog;
if ((r = get_sock(fd, &dev, NULL)) != OK)
return r;
if (backlog < 0)
backlog = 0;
return sdev_listen(dev, backlog);
}
/*
@ -94,8 +364,116 @@ do_listen(void)
int
do_accept(void)
{
dev_t dev;
int r, fd, flags;
return ENOTSOCK;
fd = job_m_in.m_lc_vfs_sockaddr.fd;
if ((r = get_sock(fd, &dev, &flags)) != OK)
return r;
if ((r = check_sock_fds(1)) != OK)
return r;
return sdev_accept(dev, job_m_in.m_lc_vfs_sockaddr.addr,
job_m_in.m_lc_vfs_sockaddr.addr_len, flags, fd);
}
/*
* Resume a previously suspended accept(2) system call. This routine must
* cover three distinct cases, depending on the 'status' and 'dev' values:
*
* #1. If the 'status' parameter is set to OK, the accept call succeeded. In
* that case, the function is guaranteed to be called from a worker thread,
* with 'fp' set to the user process that made the system call. In that
* case, this function may block its calling thread. The 'dev' parameter
* will contain the device number of the newly accepted socket.
* #2. If the 'status' parameter contains a negative error code, but 'dev' is
* *not* set to NO_DEV, then the same as above applies, except that the new
* socket must be closed immediately.
* #3. If 'status' is a negative error code and 'dev' is set to NO_DEV, then
* the accept call has failed and no new socket was ever created. In this
* case, the function MUST NOT block its calling thread.
*/
void
resume_accept(struct fproc * rfp, int status, dev_t dev, unsigned int addr_len,
int listen_fd)
{
message m;
dev_t ldev;
int r, flags;
/*
* If the call did not succeed and no socket was created (case #3), we
* cannot and should not do more than send the error to the user
* process.
*/
if (status != OK && dev == NO_DEV) {
replycode(rfp->fp_endpoint, status);
return;
}
/*
* The call succeeded. The lower socket layer (sdev.c) ensures that in
* that case, we are called from a worker thread which is associated
* with the original user process. Thus, we can block the current
* thread. Start by verifying that the listening socket is still
* around. If it is not, it must have been invalidated as a result of
* a socket driver death, in which case we must report an error but
* need not close the new socket. As a side effect, obtain the
* listening socket's flags, which on BSD systems are inherited by the
* accepted socket.
*/
assert(fp == rfp); /* needed for get_sock() and make_sock_fd() */
if (get_sock(listen_fd, &ldev, &flags) != OK) {
replycode(rfp->fp_endpoint, EIO);
return;
}
/* The same socket driver must host both sockets, obviously. */
assert(get_smap_by_dev(ldev, NULL) == get_smap_by_dev(dev, NULL));
/*
* If an error status was returned (case #2), we must now close the
* newly accepted socket. Effectively, this allows socket drivers to
* handle address copy failures in the cleanest possible way.
*/
if (status != OK) {
(void)sdev_close(dev);
replycode(rfp->fp_endpoint, status);
return;
}
/*
* A new socket has been successfully accepted (case #1). Try to
* create a file descriptor for the new socket. If this fails, we have
* to close the new socket after all. That is not great, but we have
* no way to prevent this except by preallocating all objects for the
* duration of the accept call, which is not exactly great either.
*/
flags &= O_CLOEXEC | O_NONBLOCK | O_NOSIGPIPE;
if ((r = make_sock_fd(dev, flags)) < 0) {
(void)sdev_close(dev);
replycode(rfp->fp_endpoint, r);
return;
}
/*
* The accept call has succeeded. Send a reply message with the new
* file descriptor and an address length (which may be zero).
*/
memset(&m, 0, sizeof(m));
m.m_vfs_lc_socklen.len = addr_len;
reply(&m, rfp->fp_endpoint, r);
}
/*
@ -104,8 +482,19 @@ do_accept(void)
int
do_sendto(void)
{
dev_t dev;
int r, fd, flags;
return ENOTSOCK;
fd = job_m_in.m_lc_vfs_sendrecv.fd;
if ((r = get_sock(fd, &dev, &flags)) != OK)
return r;
return sdev_readwrite(dev, job_m_in.m_lc_vfs_sendrecv.buf,
job_m_in.m_lc_vfs_sendrecv.len, 0, 0,
job_m_in.m_lc_vfs_sendrecv.addr,
job_m_in.m_lc_vfs_sendrecv.addr_len,
job_m_in.m_lc_vfs_sendrecv.flags, WRITING, flags, 0);
}
/*
@ -114,8 +503,37 @@ do_sendto(void)
int
do_recvfrom(void)
{
dev_t dev;
int r, fd, flags;
return ENOTSOCK;
fd = job_m_in.m_lc_vfs_sendrecv.fd;
if ((r = get_sock(fd, &dev, &flags)) != OK)
return r;
return sdev_readwrite(dev, job_m_in.m_lc_vfs_sendrecv.buf,
job_m_in.m_lc_vfs_sendrecv.len, 0, 0,
job_m_in.m_lc_vfs_sendrecv.addr,
job_m_in.m_lc_vfs_sendrecv.addr_len,
job_m_in.m_lc_vfs_sendrecv.flags, READING, flags, 0);
}
/*
* Resume a previously suspended recvfrom(2) system call. This function MUST
* NOT block its calling thread.
*/
void
resume_recvfrom(struct fproc * rfp, int status, unsigned int addr_len)
{
message m;
if (status >= 0) {
memset(&m, 0, sizeof(m));
m.m_vfs_lc_socklen.len = addr_len;
reply(&m, rfp->fp_endpoint, status);
} else
replycode(rfp->fp_endpoint, status);
}
/*
@ -124,8 +542,112 @@ do_recvfrom(void)
int
do_sockmsg(void)
{
struct msghdr msg;
struct iovec iov;
vir_bytes msg_buf, data_buf;
size_t data_len;
dev_t dev;
int r, fd, flags;
return ENOTSOCK;
assert(job_call_nr == VFS_SENDMSG || job_call_nr == VFS_RECVMSG);
fd = job_m_in.m_lc_vfs_sockmsg.fd;
msg_buf = job_m_in.m_lc_vfs_sockmsg.msgbuf;
if ((r = get_sock(fd, &dev, &flags)) != OK)
return r;
if ((r = sys_datacopy_wrapper(who_e, msg_buf, SELF, (vir_bytes)&msg,
sizeof(msg))) != OK)
return r;
data_buf = 0;
data_len = 0;
if (msg.msg_iovlen > 0) {
/*
* We do not yet support vectors with more than one element;
* for this reason, libc is currently expected to consolidate
* the entire vector into a single element. Once we do add
* proper vector support, the ABI itself need not be changed.
*/
if (msg.msg_iovlen > 1)
return EMSGSIZE;
if ((r = sys_datacopy_wrapper(who_e, (vir_bytes)msg.msg_iov,
SELF, (vir_bytes)&iov, sizeof(iov))) != OK)
return r;
if (iov.iov_len > SSIZE_MAX)
return EINVAL;
if (iov.iov_len > 0) {
data_buf = (vir_bytes)iov.iov_base;
data_len = iov.iov_len;
}
}
return sdev_readwrite(dev, data_buf, data_len,
(vir_bytes)msg.msg_control, msg.msg_controllen,
(vir_bytes)msg.msg_name, msg.msg_namelen,
job_m_in.m_lc_vfs_sockmsg.flags,
(job_call_nr == VFS_RECVMSG) ? READING : WRITING, flags,
(job_call_nr == VFS_RECVMSG) ? msg_buf : 0);
}
/*
* Resume a previously suspended recvmsg(2) system call. The 'status'
* parameter contains either the number of data bytes received or a negative
* error code. The 'msg_buf' parameter contains the user address of the msghdr
* structure. If a failure occurs in this function, the received data
* (including, in the worst case, references to received file descriptors) will
* be lost - while seriously ugly, this is always the calling process's fault,
* extremely hard to deal with, and on par with current behavior in other
* operating systems. This function MUST NOT block its calling thread.
*/
void
resume_recvmsg(struct fproc * rfp, int status, unsigned int ctl_len,
unsigned int addr_len, int flags, vir_bytes msg_buf)
{
struct msghdr msg;
int r;
if (status < 0) {
replycode(rfp->fp_endpoint, status);
return;
}
/*
* Unfortunately, we now need to update a subset of the fields of the
* msghdr structure. We can 1) copy in the entire structure for the
* second time, modify some fields, and copy it out in its entirety
* again, 2) copy out individual fields that have been changed, 3) save
* a copy of the original structure somewhere. The third option is the
* most efficient, but would increase the fproc structure size by quite
* a bit. The main difference between the first and second options is
* the number of kernel calls; we choose to use the first option.
*/
if ((r = sys_datacopy_wrapper(rfp->fp_endpoint, msg_buf, SELF,
(vir_bytes)&msg, sizeof(msg))) != OK) {
/* We copied it in before, how could it fail now? */
printf("VFS: resume_recvmsg cannot copy in msghdr? (%d)\n", r);
replycode(rfp->fp_endpoint, r);
return;
}
/* Modify and copy out the structure, and wake up the caller. */
msg.msg_controllen = ctl_len;
msg.msg_flags = flags;
if (addr_len > 0)
msg.msg_namelen = addr_len;
if ((r = sys_datacopy_wrapper(SELF, (vir_bytes)&msg, rfp->fp_endpoint,
msg_buf, sizeof(msg))) != OK)
status = r;
replycode(rfp->fp_endpoint, status);
}
/*
@ -134,8 +656,17 @@ do_sockmsg(void)
int
do_setsockopt(void)
{
dev_t dev;
int r, fd;
return ENOTSOCK;
fd = job_m_in.m_lc_vfs_sockopt.fd;
if ((r = get_sock(fd, &dev, NULL)) != OK)
return r;
return sdev_setsockopt(dev, job_m_in.m_lc_vfs_sockopt.level,
job_m_in.m_lc_vfs_sockopt.name, job_m_in.m_lc_vfs_sockopt.buf,
job_m_in.m_lc_vfs_sockopt.len);
}
/*
@ -144,8 +675,23 @@ do_setsockopt(void)
int
do_getsockopt(void)
{
unsigned int len;
dev_t dev;
int r, fd;
return ENOTSOCK;
fd = job_m_in.m_lc_vfs_sockopt.fd;
len = job_m_in.m_lc_vfs_sockopt.len;
if ((r = get_sock(fd, &dev, NULL)) != OK)
return r;
r = sdev_getsockopt(dev, job_m_in.m_lc_vfs_sockopt.level,
job_m_in.m_lc_vfs_sockopt.name, job_m_in.m_lc_vfs_sockopt.buf,
&len);
if (r == OK)
job_m_out.m_vfs_lc_socklen.len = len;
return r;
}
/*
@ -154,8 +700,21 @@ do_getsockopt(void)
int
do_getsockname(void)
{
unsigned int len;
dev_t dev;
int r, fd;
return ENOTSOCK;
fd = job_m_in.m_lc_vfs_sockaddr.fd;
len = job_m_in.m_lc_vfs_sockaddr.addr_len;
if ((r = get_sock(fd, &dev, NULL)) != OK)
return r;
r = sdev_getsockname(dev, job_m_in.m_lc_vfs_sockaddr.addr, &len);
if (r == OK)
job_m_out.m_vfs_lc_socklen.len = len;
return r;
}
/*
@ -164,8 +723,21 @@ do_getsockname(void)
int
do_getpeername(void)
{
unsigned int len;
dev_t dev;
int r, fd;
return ENOTSOCK;
fd = job_m_in.m_lc_vfs_sockaddr.fd;
len = job_m_in.m_lc_vfs_sockaddr.addr_len;
if ((r = get_sock(fd, &dev, NULL)) != OK)
return r;
r = sdev_getpeername(dev, job_m_in.m_lc_vfs_sockaddr.addr, &len);
if (r == OK)
job_m_out.m_vfs_lc_socklen.len = len;
return r;
}
/*
@ -174,6 +746,17 @@ do_getpeername(void)
int
do_shutdown(void)
{
dev_t dev;
int r, fd, how;
return ENOTSOCK;
fd = job_m_in.m_lc_vfs_shutdown.fd;
how = job_m_in.m_lc_vfs_shutdown.how;
if ((r = get_sock(fd, &dev, NULL)) != OK)
return r;
if (how != SHUT_RD && how != SHUT_WR && how != SHUT_RDWR)
return EINVAL;
return sdev_shutdown(dev, how);
}

View File

@ -38,4 +38,14 @@ struct statvfs_cache {
unsigned long f_namemax; /* maximum filename length */
};
struct smap {
unsigned int smap_num; /* one-based number into smap array */
endpoint_t smap_endpt; /* driver endpoint, NONE if free */
char smap_label[LABEL_MAX]; /* driver label */
int smap_sel_busy; /* doing initial select on socket? */
struct filp * smap_sel_filp; /* socket being selected on */
};
typedef int32_t sockid_t;
#endif