
This change effectively adds the VFS side of support for the SO_LINGER socket option, by allowing file descriptor close operations to be suspended (and later resumed) by socket drivers. Currently, support is limited to the close(2) system call--in all other cases where file descriptors are closed (dup2, close-on-exec, process exit..), the close operation still completes instantly. As a general policy, the close(2) return value will always indicate that the file descriptor has been closed: either 0, or -1 with errno set to EINPROGRESS. The latter error may be thrown only when a suspended close is interrupted by a signal. As necessary for UDS, this change also introduces a closenb(2) system call extension, allowing the caller to bypass blocking SO_LINGER close behavior. This extension allows UDS to avoid blocking on closing the last reference to an in-flight file descriptor, in an atomic fashion. The extension is currently part of libsys, but there is no reason why userland would not be allowed to make this call, so it is deliberately not protected from use by userland. Change-Id: Iec77d6665232110346180017fc1300b1614910b7
1000 lines
27 KiB
C
1000 lines
27 KiB
C
/* This file contains a collection of miscellaneous procedures. Some of them
|
|
* perform simple system calls. Some others do a little part of system calls
|
|
* that are mostly performed by the Memory Manager.
|
|
*
|
|
* The entry points into this file are
|
|
* do_fcntl: perform the FCNTL system call
|
|
* do_sync: perform the SYNC system call
|
|
* do_fsync: perform the FSYNC system call
|
|
* pm_setsid: perform VFS's side of setsid system call
|
|
* pm_reboot: sync disks and prepare for shutdown
|
|
* pm_fork: adjust the tables after PM has performed a FORK system call
|
|
* do_exec: handle files with FD_CLOEXEC on after PM has done an EXEC
|
|
* do_exit: a process has exited; note that in the tables
|
|
* do_set: set uid or gid for some process
|
|
* do_revive: revive a process that was waiting for something (e.g. TTY)
|
|
* do_svrctl: file system control
|
|
* do_getsysinfo: request copy of FS data structure
|
|
* pm_dumpcore: create a core dump
|
|
*/
|
|
|
|
#include "fs.h"
|
|
#include <fcntl.h>
|
|
#include <assert.h>
|
|
#include <unistd.h>
|
|
#include <string.h>
|
|
#include <minix/callnr.h>
|
|
#include <minix/safecopies.h>
|
|
#include <minix/endpoint.h>
|
|
#include <minix/com.h>
|
|
#include <minix/sysinfo.h>
|
|
#include <minix/u64.h>
|
|
#include <sys/ptrace.h>
|
|
#include <sys/svrctl.h>
|
|
#include <sys/resource.h>
|
|
#include "file.h"
|
|
#include <minix/vfsif.h>
|
|
#include "vnode.h"
|
|
#include "vmnt.h"
|
|
|
|
#define CORE_NAME "core"
|
|
#define CORE_MODE 0777 /* mode to use on core image files */
|
|
|
|
#if ENABLE_SYSCALL_STATS
|
|
unsigned long calls_stats[NR_VFS_CALLS];
|
|
#endif
|
|
|
|
static void free_proc(int flags);
|
|
|
|
/*===========================================================================*
|
|
* do_getsysinfo *
|
|
*===========================================================================*/
|
|
int do_getsysinfo(void)
|
|
{
|
|
struct fproc *rfp;
|
|
struct fproc_light *rfpl;
|
|
struct smap *sp;
|
|
vir_bytes src_addr, dst_addr;
|
|
size_t len, buf_size;
|
|
int what;
|
|
|
|
what = job_m_in.m_lsys_getsysinfo.what;
|
|
dst_addr = job_m_in.m_lsys_getsysinfo.where;
|
|
buf_size = job_m_in.m_lsys_getsysinfo.size;
|
|
|
|
/* Only su may call do_getsysinfo. This call may leak information (and is not
|
|
* stable enough to be part of the API/ABI). In the future, requests from
|
|
* non-system processes should be denied.
|
|
*/
|
|
|
|
if (!super_user) return(EPERM);
|
|
|
|
switch(what) {
|
|
case SI_PROC_TAB:
|
|
src_addr = (vir_bytes) fproc;
|
|
len = sizeof(struct fproc) * NR_PROCS;
|
|
break;
|
|
case SI_DMAP_TAB:
|
|
src_addr = (vir_bytes) dmap;
|
|
len = sizeof(struct dmap) * NR_DEVICES;
|
|
break;
|
|
case SI_PROCLIGHT_TAB:
|
|
/* Fill the light process table for the MIB service upon request. */
|
|
rfpl = &fproc_light[0];
|
|
for (rfp = &fproc[0]; rfp < &fproc[NR_PROCS]; rfp++, rfpl++) {
|
|
rfpl->fpl_tty = rfp->fp_tty;
|
|
rfpl->fpl_blocked_on = rfp->fp_blocked_on;
|
|
if (rfp->fp_blocked_on == FP_BLOCKED_ON_CDEV)
|
|
rfpl->fpl_task = rfp->fp_cdev.endpt;
|
|
else if (rfp->fp_blocked_on == FP_BLOCKED_ON_SDEV &&
|
|
(sp = get_smap_by_dev(rfp->fp_sdev.dev, NULL)) != NULL)
|
|
rfpl->fpl_task = sp->smap_endpt;
|
|
else
|
|
rfpl->fpl_task = NONE;
|
|
}
|
|
src_addr = (vir_bytes) fproc_light;
|
|
len = sizeof(fproc_light);
|
|
break;
|
|
#if ENABLE_SYSCALL_STATS
|
|
case SI_CALL_STATS:
|
|
src_addr = (vir_bytes) calls_stats;
|
|
len = sizeof(calls_stats);
|
|
break;
|
|
#endif
|
|
default:
|
|
return(EINVAL);
|
|
}
|
|
|
|
if (len != buf_size)
|
|
return(EINVAL);
|
|
|
|
return sys_datacopy_wrapper(SELF, src_addr, who_e, dst_addr, len);
|
|
}
|
|
|
|
/*===========================================================================*
|
|
* do_fcntl *
|
|
*===========================================================================*/
|
|
int do_fcntl(void)
|
|
{
|
|
/* Perform the fcntl(fd, cmd, ...) system call. */
|
|
struct filp *f;
|
|
int fd, new_fd, fl, r = OK, fcntl_req, fcntl_argx;
|
|
vir_bytes addr;
|
|
tll_access_t locktype;
|
|
|
|
fd = job_m_in.m_lc_vfs_fcntl.fd;
|
|
fcntl_req = job_m_in.m_lc_vfs_fcntl.cmd;
|
|
fcntl_argx = job_m_in.m_lc_vfs_fcntl.arg_int;
|
|
addr = job_m_in.m_lc_vfs_fcntl.arg_ptr;
|
|
|
|
/* Is the file descriptor valid? */
|
|
locktype = (fcntl_req == F_FREESP) ? VNODE_WRITE : VNODE_READ;
|
|
if ((f = get_filp(fd, locktype)) == NULL)
|
|
return(err_code);
|
|
|
|
switch (fcntl_req) {
|
|
case F_DUPFD:
|
|
case F_DUPFD_CLOEXEC:
|
|
/* This replaces the old dup() system call. */
|
|
if (fcntl_argx < 0 || fcntl_argx >= OPEN_MAX) r = EINVAL;
|
|
else if ((r = get_fd(fp, fcntl_argx, 0, &new_fd, NULL)) == OK) {
|
|
f->filp_count++;
|
|
fp->fp_filp[new_fd] = f;
|
|
assert(!FD_ISSET(new_fd, &fp->fp_cloexec_set));
|
|
if (fcntl_req == F_DUPFD_CLOEXEC)
|
|
FD_SET(new_fd, &fp->fp_cloexec_set);
|
|
r = new_fd;
|
|
}
|
|
break;
|
|
|
|
case F_GETFD:
|
|
/* Get close-on-exec flag (FD_CLOEXEC in POSIX Table 6-2). */
|
|
r = 0;
|
|
if (FD_ISSET(fd, &fp->fp_cloexec_set))
|
|
r = FD_CLOEXEC;
|
|
break;
|
|
|
|
case F_SETFD:
|
|
/* Set close-on-exec flag (FD_CLOEXEC in POSIX Table 6-2). */
|
|
if (fcntl_argx & FD_CLOEXEC)
|
|
FD_SET(fd, &fp->fp_cloexec_set);
|
|
else
|
|
FD_CLR(fd, &fp->fp_cloexec_set);
|
|
break;
|
|
|
|
case F_GETFL:
|
|
/* Get file status flags (O_NONBLOCK and O_APPEND). */
|
|
fl = f->filp_flags & (O_NONBLOCK | O_APPEND | O_ACCMODE);
|
|
r = fl;
|
|
break;
|
|
|
|
case F_SETFL:
|
|
/* Set file status flags (O_NONBLOCK and O_APPEND). */
|
|
fl = O_NONBLOCK | O_APPEND;
|
|
f->filp_flags = (f->filp_flags & ~fl) | (fcntl_argx & fl);
|
|
break;
|
|
|
|
case F_GETLK:
|
|
case F_SETLK:
|
|
case F_SETLKW:
|
|
/* Set or clear a file lock. */
|
|
r = lock_op(fd, fcntl_req, addr);
|
|
break;
|
|
|
|
case F_FREESP:
|
|
{
|
|
/* Free a section of a file */
|
|
off_t start, end, offset;
|
|
struct flock flock_arg;
|
|
|
|
/* Check if it's a regular file. */
|
|
if (!S_ISREG(f->filp_vno->v_mode)) r = EINVAL;
|
|
else if (!(f->filp_mode & W_BIT)) r = EBADF;
|
|
else {
|
|
/* Copy flock data from userspace. */
|
|
r = sys_datacopy_wrapper(who_e, addr, SELF,
|
|
(vir_bytes)&flock_arg, sizeof(flock_arg));
|
|
}
|
|
|
|
if (r != OK) break;
|
|
|
|
/* Convert starting offset to signed. */
|
|
offset = (off_t) flock_arg.l_start;
|
|
|
|
/* Figure out starting position base. */
|
|
switch(flock_arg.l_whence) {
|
|
case SEEK_SET: start = 0; break;
|
|
case SEEK_CUR: start = f->filp_pos; break;
|
|
case SEEK_END: start = f->filp_vno->v_size; break;
|
|
default: r = EINVAL;
|
|
}
|
|
if (r != OK) break;
|
|
|
|
/* Check for overflow or underflow. */
|
|
if (offset > 0 && start + offset < start) r = EINVAL;
|
|
else if (offset < 0 && start + offset > start) r = EINVAL;
|
|
else {
|
|
start += offset;
|
|
if (start < 0) r = EINVAL;
|
|
}
|
|
if (r != OK) break;
|
|
|
|
if (flock_arg.l_len != 0) {
|
|
if (start >= f->filp_vno->v_size) r = EINVAL;
|
|
else if ((end = start + flock_arg.l_len) <= start) r = EINVAL;
|
|
else if (end > f->filp_vno->v_size) end = f->filp_vno->v_size;
|
|
} else {
|
|
end = 0;
|
|
}
|
|
if (r != OK) break;
|
|
|
|
r = req_ftrunc(f->filp_vno->v_fs_e, f->filp_vno->v_inode_nr,start,end);
|
|
|
|
if (r == OK && flock_arg.l_len == 0)
|
|
f->filp_vno->v_size = start;
|
|
|
|
break;
|
|
}
|
|
case F_GETNOSIGPIPE:
|
|
r = !!(f->filp_flags & O_NOSIGPIPE);
|
|
break;
|
|
case F_SETNOSIGPIPE:
|
|
if (fcntl_argx)
|
|
f->filp_flags |= O_NOSIGPIPE;
|
|
else
|
|
f->filp_flags &= ~O_NOSIGPIPE;
|
|
break;
|
|
case F_FLUSH_FS_CACHE:
|
|
{
|
|
struct vnode *vn = f->filp_vno;
|
|
mode_t mode = f->filp_vno->v_mode;
|
|
if (!super_user) {
|
|
r = EPERM;
|
|
} else if (S_ISBLK(mode)) {
|
|
/* Block device; flush corresponding device blocks. */
|
|
r = req_flush(vn->v_bfs_e, vn->v_sdev);
|
|
} else if (S_ISREG(mode) || S_ISDIR(mode)) {
|
|
/* Directory or regular file; flush hosting FS blocks. */
|
|
r = req_flush(vn->v_fs_e, vn->v_dev);
|
|
} else {
|
|
/* Remaining cases.. Meaning unclear. */
|
|
r = ENODEV;
|
|
}
|
|
break;
|
|
}
|
|
default:
|
|
r = EINVAL;
|
|
}
|
|
|
|
unlock_filp(f);
|
|
return(r);
|
|
}
|
|
|
|
/*===========================================================================*
|
|
* do_sync *
|
|
*===========================================================================*/
|
|
int do_sync(void)
|
|
{
|
|
struct vmnt *vmp;
|
|
int r = OK;
|
|
|
|
for (vmp = &vmnt[0]; vmp < &vmnt[NR_MNTS]; ++vmp) {
|
|
if ((r = lock_vmnt(vmp, VMNT_READ)) != OK)
|
|
break;
|
|
if (vmp->m_dev != NO_DEV && vmp->m_fs_e != NONE &&
|
|
vmp->m_root_node != NULL) {
|
|
req_sync(vmp->m_fs_e);
|
|
}
|
|
unlock_vmnt(vmp);
|
|
}
|
|
|
|
return(r);
|
|
}
|
|
|
|
/*===========================================================================*
|
|
* do_fsync *
|
|
*===========================================================================*/
|
|
int do_fsync(void)
|
|
{
|
|
/* Perform the fsync() system call. */
|
|
struct filp *rfilp;
|
|
struct vmnt *vmp;
|
|
dev_t dev;
|
|
int fd, r = OK;
|
|
|
|
fd = job_m_in.m_lc_vfs_fsync.fd;
|
|
|
|
if ((rfilp = get_filp(fd, VNODE_READ)) == NULL)
|
|
return(err_code);
|
|
|
|
dev = rfilp->filp_vno->v_dev;
|
|
unlock_filp(rfilp);
|
|
|
|
for (vmp = &vmnt[0]; vmp < &vmnt[NR_MNTS]; ++vmp) {
|
|
if (vmp->m_dev != dev) continue;
|
|
if ((r = lock_vmnt(vmp, VMNT_READ)) != OK)
|
|
break;
|
|
if (vmp->m_dev != NO_DEV && vmp->m_dev == dev &&
|
|
vmp->m_fs_e != NONE && vmp->m_root_node != NULL) {
|
|
|
|
req_sync(vmp->m_fs_e);
|
|
}
|
|
unlock_vmnt(vmp);
|
|
}
|
|
|
|
return(r);
|
|
}
|
|
|
|
int dupvm(struct fproc *rfp, int pfd, int *vmfd, struct filp **newfilp)
|
|
{
|
|
int result, procfd;
|
|
struct filp *f = NULL;
|
|
struct fproc *vmf = fproc_addr(VM_PROC_NR);
|
|
|
|
*newfilp = NULL;
|
|
|
|
if ((f = get_filp2(rfp, pfd, VNODE_READ)) == NULL) {
|
|
printf("VFS dupvm: get_filp2 failed\n");
|
|
return EBADF;
|
|
}
|
|
|
|
if(!(f->filp_vno->v_vmnt->m_fs_flags & RES_HASPEEK)) {
|
|
unlock_filp(f);
|
|
#if 0 /* Noisy diagnostic for mmap() by ld.so */
|
|
printf("VFS dupvm: no peek available\n");
|
|
#endif
|
|
return EINVAL;
|
|
}
|
|
|
|
assert(f->filp_vno);
|
|
assert(f->filp_vno->v_vmnt);
|
|
|
|
if (!S_ISREG(f->filp_vno->v_mode) && !S_ISBLK(f->filp_vno->v_mode)) {
|
|
printf("VFS: mmap regular/blockdev only; dev 0x%llx ino %llu has mode 0%o\n",
|
|
f->filp_vno->v_dev, f->filp_vno->v_inode_nr, f->filp_vno->v_mode);
|
|
unlock_filp(f);
|
|
return EINVAL;
|
|
}
|
|
|
|
/* get free FD in VM */
|
|
if((result=get_fd(vmf, 0, 0, &procfd, NULL)) != OK) {
|
|
unlock_filp(f);
|
|
printf("VFS dupvm: getfd failed\n");
|
|
return result;
|
|
}
|
|
|
|
*vmfd = procfd;
|
|
|
|
f->filp_count++;
|
|
assert(f->filp_count > 0);
|
|
vmf->fp_filp[procfd] = f;
|
|
|
|
*newfilp = f;
|
|
|
|
return OK;
|
|
}
|
|
|
|
/*===========================================================================*
|
|
* do_vm_call *
|
|
*===========================================================================*/
|
|
int do_vm_call(void)
|
|
{
|
|
/* A call that VM does to VFS.
|
|
* We must reply with the fixed type VM_VFS_REPLY (and put our result info
|
|
* in the rest of the message) so VM can tell the difference between a
|
|
* request from VFS and a reply to this call.
|
|
*/
|
|
int req = job_m_in.VFS_VMCALL_REQ;
|
|
int req_fd = job_m_in.VFS_VMCALL_FD;
|
|
u32_t req_id = job_m_in.VFS_VMCALL_REQID;
|
|
endpoint_t ep = job_m_in.VFS_VMCALL_ENDPOINT;
|
|
u64_t offset = job_m_in.VFS_VMCALL_OFFSET;
|
|
u32_t length = job_m_in.VFS_VMCALL_LENGTH;
|
|
int result = OK;
|
|
int slot;
|
|
struct fproc *rfp;
|
|
#if !defined(NDEBUG)
|
|
struct fproc *vmf;
|
|
#endif /* !defined(NDEBUG) */
|
|
struct filp *f = NULL;
|
|
int r;
|
|
|
|
if(job_m_in.m_source != VM_PROC_NR)
|
|
return ENOSYS;
|
|
|
|
if(isokendpt(ep, &slot) != OK) rfp = NULL;
|
|
else rfp = &fproc[slot];
|
|
|
|
#if !defined(NDEBUG)
|
|
vmf = fproc_addr(VM_PROC_NR);
|
|
#endif /* !defined(NDEBUG) */
|
|
assert(fp == vmf);
|
|
assert(rfp != vmf);
|
|
|
|
switch(req) {
|
|
case VMVFSREQ_FDLOOKUP:
|
|
{
|
|
int procfd;
|
|
|
|
/* Lookup fd in referenced process. */
|
|
|
|
if(!rfp) {
|
|
printf("VFS: why isn't ep %d here?!\n", ep);
|
|
result = ESRCH;
|
|
goto reqdone;
|
|
}
|
|
|
|
if((result = dupvm(rfp, req_fd, &procfd, &f)) != OK) {
|
|
#if 0 /* Noisy diagnostic for mmap() by ld.so */
|
|
printf("vfs: dupvm failed\n");
|
|
#endif
|
|
goto reqdone;
|
|
}
|
|
|
|
if(S_ISBLK(f->filp_vno->v_mode)) {
|
|
assert(f->filp_vno->v_sdev != NO_DEV);
|
|
job_m_out.VMV_DEV = f->filp_vno->v_sdev;
|
|
job_m_out.VMV_INO = VMC_NO_INODE;
|
|
job_m_out.VMV_SIZE_PAGES = LONG_MAX;
|
|
} else {
|
|
job_m_out.VMV_DEV = f->filp_vno->v_dev;
|
|
job_m_out.VMV_INO = f->filp_vno->v_inode_nr;
|
|
job_m_out.VMV_SIZE_PAGES =
|
|
roundup(f->filp_vno->v_size,
|
|
PAGE_SIZE)/PAGE_SIZE;
|
|
}
|
|
|
|
job_m_out.VMV_FD = procfd;
|
|
|
|
result = OK;
|
|
|
|
break;
|
|
}
|
|
case VMVFSREQ_FDCLOSE:
|
|
{
|
|
result = close_fd(fp, req_fd, FALSE /*may_suspend*/);
|
|
if(result != OK) {
|
|
printf("VFS: VM fd close for fd %d, %d (%d)\n",
|
|
req_fd, fp->fp_endpoint, result);
|
|
}
|
|
break;
|
|
}
|
|
case VMVFSREQ_FDIO:
|
|
{
|
|
result = actual_lseek(fp, req_fd, SEEK_SET, offset,
|
|
NULL);
|
|
|
|
if(result == OK) {
|
|
result = actual_read_write_peek(fp, PEEKING,
|
|
req_fd, /* vir_bytes */ 0, length);
|
|
}
|
|
|
|
break;
|
|
}
|
|
default:
|
|
panic("VFS: bad request code from VM\n");
|
|
break;
|
|
}
|
|
|
|
reqdone:
|
|
if(f)
|
|
unlock_filp(f);
|
|
|
|
/* fp is VM still. */
|
|
assert(fp == vmf);
|
|
job_m_out.VMV_ENDPOINT = ep;
|
|
job_m_out.VMV_RESULT = result;
|
|
job_m_out.VMV_REQID = req_id;
|
|
|
|
/* Reply asynchronously as VM may not be able to receive
|
|
* an ipc_sendnb() message.
|
|
*/
|
|
job_m_out.m_type = VM_VFS_REPLY;
|
|
r = asynsend3(VM_PROC_NR, &job_m_out, 0);
|
|
if(r != OK) printf("VFS: couldn't asynsend3() to VM\n");
|
|
|
|
/* VFS does not reply any further */
|
|
return SUSPEND;
|
|
}
|
|
|
|
/*===========================================================================*
|
|
* pm_reboot *
|
|
*===========================================================================*/
|
|
void
|
|
pm_reboot(void)
|
|
{
|
|
/* Perform the VFS side of the reboot call. This call is performed from the PM
|
|
* process context.
|
|
*/
|
|
message m_out;
|
|
int i, r;
|
|
struct fproc *rfp, *pmfp;
|
|
|
|
pmfp = fp;
|
|
|
|
do_sync();
|
|
|
|
/* Do exit processing for all leftover processes and servers, but don't
|
|
* actually exit them (if they were really gone, PM will tell us about it).
|
|
* Skip processes that handle parts of the file system; we first need to give
|
|
* them the chance to unmount (which should be possible as all normal
|
|
* processes have no open files anymore).
|
|
*/
|
|
/* This is the only place where we allow special modification of "fp". The
|
|
* reboot procedure should really be implemented as a PM message broadcasted
|
|
* to all processes, so that each process will be shut down cleanly by a
|
|
* thread operating on its behalf. Doing everything here is simpler, but it
|
|
* requires an exception to the strict model of having "fp" be the process
|
|
* that owns the current worker thread.
|
|
*/
|
|
for (i = 0; i < NR_PROCS; i++) {
|
|
rfp = &fproc[i];
|
|
|
|
/* Don't just free the proc right away, but let it finish what it was
|
|
* doing first */
|
|
if (rfp != fp) lock_proc(rfp);
|
|
if (rfp->fp_endpoint != NONE && find_vmnt(rfp->fp_endpoint) == NULL) {
|
|
worker_set_proc(rfp); /* temporarily fake process context */
|
|
free_proc(0);
|
|
worker_set_proc(pmfp); /* restore original process context */
|
|
}
|
|
if (rfp != fp) unlock_proc(rfp);
|
|
}
|
|
|
|
do_sync();
|
|
unmount_all(0 /* Don't force */);
|
|
|
|
/* Try to exit all processes again including File Servers */
|
|
for (i = 0; i < NR_PROCS; i++) {
|
|
rfp = &fproc[i];
|
|
|
|
/* Don't just free the proc right away, but let it finish what it was
|
|
* doing first */
|
|
if (rfp != fp) lock_proc(rfp);
|
|
if (rfp->fp_endpoint != NONE) {
|
|
worker_set_proc(rfp); /* temporarily fake process context */
|
|
free_proc(0);
|
|
worker_set_proc(pmfp); /* restore original process context */
|
|
}
|
|
if (rfp != fp) unlock_proc(rfp);
|
|
}
|
|
|
|
do_sync();
|
|
unmount_all(1 /* Force */);
|
|
|
|
/* Reply to PM for synchronization */
|
|
memset(&m_out, 0, sizeof(m_out));
|
|
|
|
m_out.m_type = VFS_PM_REBOOT_REPLY;
|
|
|
|
if ((r = ipc_send(PM_PROC_NR, &m_out)) != OK)
|
|
panic("pm_reboot: ipc_send failed: %d", r);
|
|
}
|
|
|
|
/*===========================================================================*
|
|
* pm_fork *
|
|
*===========================================================================*/
|
|
void pm_fork(endpoint_t pproc, endpoint_t cproc, pid_t cpid)
|
|
{
|
|
/* Perform those aspects of the fork() system call that relate to files.
|
|
* In particular, let the child inherit its parent's file descriptors.
|
|
* The parent and child parameters tell who forked off whom. The file
|
|
* system uses the same slot numbers as the kernel. Only PM makes this call.
|
|
*/
|
|
struct fproc *cp, *pp;
|
|
int i, parentno, childno;
|
|
mutex_t c_fp_lock;
|
|
|
|
/* Check up-to-dateness of fproc. */
|
|
okendpt(pproc, &parentno);
|
|
|
|
/* PM gives child endpoint, which implies process slot information.
|
|
* Don't call isokendpt, because that will verify if the endpoint
|
|
* number is correct in fproc, which it won't be.
|
|
*/
|
|
childno = _ENDPOINT_P(cproc);
|
|
if (childno < 0 || childno >= NR_PROCS)
|
|
panic("VFS: bogus child for forking: %d", cproc);
|
|
if (fproc[childno].fp_pid != PID_FREE)
|
|
panic("VFS: forking on top of in-use child: %d", childno);
|
|
|
|
/* Copy the parent's fproc struct to the child. */
|
|
/* However, the mutex variables belong to a slot and must stay the same. */
|
|
c_fp_lock = fproc[childno].fp_lock;
|
|
fproc[childno] = fproc[parentno];
|
|
fproc[childno].fp_lock = c_fp_lock;
|
|
|
|
/* Increase the counters in the 'filp' table. */
|
|
cp = &fproc[childno];
|
|
pp = &fproc[parentno];
|
|
|
|
for (i = 0; i < OPEN_MAX; i++)
|
|
if (cp->fp_filp[i] != NULL) cp->fp_filp[i]->filp_count++;
|
|
|
|
/* Fill in new process and endpoint id. */
|
|
cp->fp_pid = cpid;
|
|
cp->fp_endpoint = cproc;
|
|
|
|
/* A forking process cannot possibly be suspended on anything. */
|
|
assert(pp->fp_blocked_on == FP_BLOCKED_ON_NONE);
|
|
|
|
/* A child is not a process leader, not being revived, etc. */
|
|
cp->fp_flags = FP_NOFLAGS;
|
|
|
|
/* Record the fact that both root and working dir have another user. */
|
|
if (cp->fp_rd) dup_vnode(cp->fp_rd);
|
|
if (cp->fp_wd) dup_vnode(cp->fp_wd);
|
|
}
|
|
|
|
/*===========================================================================*
|
|
* free_proc *
|
|
*===========================================================================*/
|
|
static void free_proc(int flags)
|
|
{
|
|
int i;
|
|
register struct fproc *rfp;
|
|
register struct filp *rfilp;
|
|
register struct vnode *vp;
|
|
dev_t dev;
|
|
|
|
if (fp->fp_endpoint == NONE)
|
|
panic("free_proc: already free");
|
|
|
|
if (fp_is_blocked(fp))
|
|
unpause();
|
|
|
|
/* Loop on file descriptors, closing any that are open. */
|
|
for (i = 0; i < OPEN_MAX; i++) {
|
|
(void) close_fd(fp, i, FALSE /*may_suspend*/);
|
|
}
|
|
|
|
/* Release root and working directories. */
|
|
if (fp->fp_rd) { put_vnode(fp->fp_rd); fp->fp_rd = NULL; }
|
|
if (fp->fp_wd) { put_vnode(fp->fp_wd); fp->fp_wd = NULL; }
|
|
|
|
/* The rest of these actions is only done when processes actually exit. */
|
|
if (!(flags & FP_EXITING)) return;
|
|
|
|
fp->fp_flags |= FP_EXITING;
|
|
|
|
/* Check if any process is SUSPENDed on this driver.
|
|
* If a driver exits, unmap its entries in the dmap table.
|
|
* (unmapping has to be done after the first step, because the
|
|
* dmap/smap tables are used in the first step.)
|
|
*/
|
|
unsuspend_by_endpt(fp->fp_endpoint);
|
|
dmap_unmap_by_endpt(fp->fp_endpoint);
|
|
smap_unmap_by_endpt(fp->fp_endpoint);
|
|
|
|
worker_stop_by_endpt(fp->fp_endpoint); /* Unblock waiting threads */
|
|
vmnt_unmap_by_endpt(fp->fp_endpoint); /* Invalidate open files if this
|
|
* was an active FS */
|
|
|
|
/* If a session leader exits and it has a controlling tty, then revoke
|
|
* access to its controlling tty from all other processes using it.
|
|
*/
|
|
if ((fp->fp_flags & FP_SESLDR) && fp->fp_tty != 0) {
|
|
dev = fp->fp_tty;
|
|
for (rfp = &fproc[0]; rfp < &fproc[NR_PROCS]; rfp++) {
|
|
if(rfp->fp_pid == PID_FREE) continue;
|
|
if (rfp->fp_tty == dev) rfp->fp_tty = 0;
|
|
|
|
for (i = 0; i < OPEN_MAX; i++) {
|
|
if ((rfilp = rfp->fp_filp[i]) == NULL) continue;
|
|
if (rfilp->filp_mode == FILP_CLOSED) continue;
|
|
vp = rfilp->filp_vno;
|
|
if (!S_ISCHR(vp->v_mode)) continue;
|
|
if (vp->v_sdev != dev) continue;
|
|
lock_filp(rfilp, VNODE_READ);
|
|
(void) cdev_close(dev); /* Ignore any errors. */
|
|
/* FIXME: missing select check */
|
|
rfilp->filp_mode = FILP_CLOSED;
|
|
unlock_filp(rfilp);
|
|
}
|
|
}
|
|
}
|
|
|
|
/* Exit done. Mark slot as free. */
|
|
fp->fp_endpoint = NONE;
|
|
fp->fp_pid = PID_FREE;
|
|
fp->fp_flags = FP_NOFLAGS;
|
|
}
|
|
|
|
/*===========================================================================*
|
|
* pm_exit *
|
|
*===========================================================================*/
|
|
void pm_exit(void)
|
|
{
|
|
/* Perform the file system portion of the exit(status) system call.
|
|
* This function is called from the context of the exiting process.
|
|
*/
|
|
|
|
free_proc(FP_EXITING);
|
|
}
|
|
|
|
/*===========================================================================*
|
|
* pm_setgid *
|
|
*===========================================================================*/
|
|
void
|
|
pm_setgid(endpoint_t proc_e, int egid, int rgid)
|
|
{
|
|
register struct fproc *tfp;
|
|
int slot;
|
|
|
|
okendpt(proc_e, &slot);
|
|
tfp = &fproc[slot];
|
|
|
|
tfp->fp_effgid = egid;
|
|
tfp->fp_realgid = rgid;
|
|
}
|
|
|
|
|
|
/*===========================================================================*
|
|
* pm_setgroups *
|
|
*===========================================================================*/
|
|
void
|
|
pm_setgroups(endpoint_t proc_e, int ngroups, gid_t *groups)
|
|
{
|
|
struct fproc *rfp;
|
|
int slot;
|
|
|
|
okendpt(proc_e, &slot);
|
|
rfp = &fproc[slot];
|
|
if (ngroups * sizeof(gid_t) > sizeof(rfp->fp_sgroups))
|
|
panic("VFS: pm_setgroups: too much data to copy");
|
|
if (sys_datacopy_wrapper(who_e, (vir_bytes) groups, SELF, (vir_bytes) rfp->fp_sgroups,
|
|
ngroups * sizeof(gid_t)) == OK) {
|
|
rfp->fp_ngroups = ngroups;
|
|
} else
|
|
panic("VFS: pm_setgroups: datacopy failed");
|
|
}
|
|
|
|
|
|
/*===========================================================================*
|
|
* pm_setuid *
|
|
*===========================================================================*/
|
|
void
|
|
pm_setuid(endpoint_t proc_e, int euid, int ruid)
|
|
{
|
|
struct fproc *tfp;
|
|
int slot;
|
|
|
|
okendpt(proc_e, &slot);
|
|
tfp = &fproc[slot];
|
|
|
|
tfp->fp_effuid = euid;
|
|
tfp->fp_realuid = ruid;
|
|
}
|
|
|
|
/*===========================================================================*
|
|
* pm_setsid *
|
|
*===========================================================================*/
|
|
void pm_setsid(endpoint_t proc_e)
|
|
{
|
|
/* Perform the VFS side of the SETSID call, i.e. get rid of the controlling
|
|
* terminal of a process, and make the process a session leader.
|
|
*/
|
|
struct fproc *rfp;
|
|
int slot;
|
|
|
|
/* Make the process a session leader with no controlling tty. */
|
|
okendpt(proc_e, &slot);
|
|
rfp = &fproc[slot];
|
|
rfp->fp_flags |= FP_SESLDR;
|
|
rfp->fp_tty = 0;
|
|
}
|
|
|
|
/*===========================================================================*
|
|
* do_svrctl *
|
|
*===========================================================================*/
|
|
int do_svrctl(void)
|
|
{
|
|
unsigned long svrctl;
|
|
vir_bytes ptr;
|
|
|
|
svrctl = job_m_in.m_lc_svrctl.request;
|
|
ptr = job_m_in.m_lc_svrctl.arg;
|
|
|
|
if (IOCGROUP(svrctl) != 'F') return(EINVAL);
|
|
|
|
switch (svrctl) {
|
|
case VFSSETPARAM:
|
|
case VFSGETPARAM:
|
|
{
|
|
struct sysgetenv sysgetenv;
|
|
char search_key[64];
|
|
char val[64];
|
|
int r, s;
|
|
|
|
/* Copy sysgetenv structure to VFS */
|
|
if (sys_datacopy_wrapper(who_e, ptr, SELF, (vir_bytes) &sysgetenv,
|
|
sizeof(sysgetenv)) != OK)
|
|
return(EFAULT);
|
|
|
|
/* Basic sanity checking */
|
|
if (svrctl == VFSSETPARAM) {
|
|
if (sysgetenv.keylen <= 0 ||
|
|
sysgetenv.keylen > (sizeof(search_key) - 1) ||
|
|
sysgetenv.vallen <= 0 ||
|
|
sysgetenv.vallen >= sizeof(val)) {
|
|
return(EINVAL);
|
|
}
|
|
}
|
|
|
|
/* Copy parameter "key" */
|
|
if ((s = sys_datacopy_wrapper(who_e, (vir_bytes) sysgetenv.key,
|
|
SELF, (vir_bytes) search_key,
|
|
sysgetenv.keylen)) != OK)
|
|
return(s);
|
|
search_key[sysgetenv.keylen] = '\0'; /* Limit string */
|
|
|
|
/* Is it a parameter we know? */
|
|
if (svrctl == VFSSETPARAM) {
|
|
if (!strcmp(search_key, "verbose")) {
|
|
int verbose_val;
|
|
if ((s = sys_datacopy_wrapper(who_e,
|
|
(vir_bytes) sysgetenv.val, SELF,
|
|
(vir_bytes) &val, sysgetenv.vallen)) != OK)
|
|
return(s);
|
|
val[sysgetenv.vallen] = '\0'; /* Limit string */
|
|
verbose_val = atoi(val);
|
|
if (verbose_val < 0 || verbose_val > 4) {
|
|
return(EINVAL);
|
|
}
|
|
verbose = verbose_val;
|
|
r = OK;
|
|
} else {
|
|
r = ESRCH;
|
|
}
|
|
} else { /* VFSGETPARAM */
|
|
char small_buf[60];
|
|
|
|
r = ESRCH;
|
|
if (!strcmp(search_key, "print_traces")) {
|
|
mthread_stacktraces();
|
|
sysgetenv.val = 0;
|
|
sysgetenv.vallen = 0;
|
|
r = OK;
|
|
} else if (!strcmp(search_key, "print_select")) {
|
|
select_dump();
|
|
sysgetenv.val = 0;
|
|
sysgetenv.vallen = 0;
|
|
r = OK;
|
|
} else if (!strcmp(search_key, "active_threads")) {
|
|
int active = NR_WTHREADS - worker_available();
|
|
snprintf(small_buf, sizeof(small_buf) - 1,
|
|
"%d", active);
|
|
sysgetenv.vallen = strlen(small_buf);
|
|
r = OK;
|
|
}
|
|
|
|
if (r == OK) {
|
|
if ((s = sys_datacopy_wrapper(SELF,
|
|
(vir_bytes) &sysgetenv, who_e, ptr,
|
|
sizeof(sysgetenv))) != OK)
|
|
return(s);
|
|
if (sysgetenv.val != 0) {
|
|
if ((s = sys_datacopy_wrapper(SELF,
|
|
(vir_bytes) small_buf, who_e,
|
|
(vir_bytes) sysgetenv.val,
|
|
sysgetenv.vallen)) != OK)
|
|
return(s);
|
|
}
|
|
}
|
|
}
|
|
|
|
return(r);
|
|
}
|
|
default:
|
|
return(EINVAL);
|
|
}
|
|
}
|
|
|
|
/*===========================================================================*
|
|
* pm_dumpcore *
|
|
*===========================================================================*/
|
|
int pm_dumpcore(int csig, vir_bytes exe_name)
|
|
{
|
|
int r, core_fd;
|
|
struct filp *f;
|
|
char core_path[PATH_MAX];
|
|
char proc_name[PROC_NAME_LEN];
|
|
|
|
/* In effect, the coredump is generated through the use of calls as if made
|
|
* by the process itself. As such, the process must not be doing anything
|
|
* else. Therefore, if the process was blocked on anything, unblock it
|
|
* first. This step is the reason we cannot use this function to generate a
|
|
* core dump of a process while it is still running (i.e., without
|
|
* terminating it), as it changes the state of the process.
|
|
*/
|
|
if (fp_is_blocked(fp))
|
|
unpause();
|
|
|
|
/* open core file */
|
|
snprintf(core_path, PATH_MAX, "%s.%d", CORE_NAME, fp->fp_pid);
|
|
r = core_fd = common_open(core_path, O_WRONLY | O_CREAT | O_TRUNC,
|
|
CORE_MODE, FALSE /*for_exec*/);
|
|
if (r < 0) goto core_exit;
|
|
|
|
/* get process name */
|
|
r = sys_datacopy_wrapper(PM_PROC_NR, exe_name, VFS_PROC_NR,
|
|
(vir_bytes) proc_name, PROC_NAME_LEN);
|
|
if (r != OK) goto core_exit;
|
|
proc_name[PROC_NAME_LEN - 1] = '\0';
|
|
|
|
/* write the core dump */
|
|
f = get_filp(core_fd, VNODE_WRITE);
|
|
assert(f != NULL);
|
|
write_elf_core_file(f, csig, proc_name);
|
|
unlock_filp(f);
|
|
|
|
core_exit:
|
|
/* The core file descriptor will be closed as part of the process exit. */
|
|
free_proc(FP_EXITING);
|
|
|
|
return(r);
|
|
}
|
|
|
|
/*===========================================================================*
|
|
* ds_event *
|
|
*===========================================================================*/
|
|
void
|
|
ds_event(void)
|
|
{
|
|
char key[DS_MAX_KEYLEN];
|
|
char *blkdrv_prefix = "drv.blk.";
|
|
char *chrdrv_prefix = "drv.chr.";
|
|
char *sckdrv_prefix = "drv.sck.";
|
|
u32_t value;
|
|
int type, ftype, r;
|
|
endpoint_t owner_endpoint;
|
|
|
|
/* Get the event and the owner from DS. */
|
|
while ((r = ds_check(key, &type, &owner_endpoint)) == OK) {
|
|
/* Only check for block, character, socket driver up events. */
|
|
if (!strncmp(key, blkdrv_prefix, strlen(blkdrv_prefix))) {
|
|
ftype = S_IFBLK;
|
|
} else if (!strncmp(key, chrdrv_prefix, strlen(chrdrv_prefix))) {
|
|
ftype = S_IFCHR;
|
|
} else if (!strncmp(key, sckdrv_prefix, strlen(sckdrv_prefix))) {
|
|
ftype = S_IFSOCK;
|
|
} else {
|
|
continue;
|
|
}
|
|
|
|
if ((r = ds_retrieve_u32(key, &value)) != OK) {
|
|
printf("VFS: ds_event: ds_retrieve_u32 failed\n");
|
|
break;
|
|
}
|
|
if (value != DS_DRIVER_UP) continue;
|
|
|
|
/* Perform up. */
|
|
if (ftype == S_IFBLK || ftype == S_IFCHR)
|
|
dmap_endpt_up(owner_endpoint, (ftype == S_IFBLK));
|
|
else
|
|
smap_endpt_up(owner_endpoint);
|
|
}
|
|
|
|
if (r != ENOENT) printf("VFS: ds_event: ds_check failed: %d\n", r);
|
|
}
|
|
|
|
/* A function to be called on panic(). */
|
|
void panic_hook(void)
|
|
{
|
|
printf("VFS mthread stacktraces:\n");
|
|
mthread_stacktraces();
|
|
}
|
|
|
|
/*===========================================================================*
|
|
* do_getrusage *
|
|
*===========================================================================*/
|
|
int do_getrusage(void)
|
|
{
|
|
/* Obsolete vfs_getrusage(2) call from userland. The getrusage call is
|
|
* now fully handled by PM, and for any future fields that should be
|
|
* supplied by VFS, VFS should be queried by PM rather than by the user
|
|
* program directly. TODO: remove this call after the next release.
|
|
*/
|
|
return OK;
|
|
}
|