David van Moolenbroek 56dc79cea0 IPC server: major fixes, test set for semaphores
- rewrite the semop(2) implementation so that it now conforms to the
  specification, including atomicity, support for blocking more than
  once, range checks, but also basic fairness support;
- fix permissions checking;
- fix missing time adjustments;
- fix off-by-one errors and other bugs;
- do not allocate dynamic memory for GETALL/SETALL;
- add test88, which properly tests the semaphore functionality.

Change-Id: I85f0d3408c0d6bba41cfb4c91a34c8b46b2a5959
2016-01-16 14:04:11 +01:00

791 lines
21 KiB
C

#include "inc.h"
struct sem_struct;
/* IPC-server process table, currently used for semaphores only. */
struct iproc {
struct sem_struct *ip_sem; /* affected semaphore set, or NULL */
struct sembuf *ip_sops; /* pending operations (malloc'ed) */
unsigned int ip_nsops; /* number of pending operations */
struct sembuf *ip_blkop; /* pointer to operation that blocked */
endpoint_t ip_endpt; /* process endpoint */
pid_t ip_pid; /* process PID */
TAILQ_ENTRY(iproc) ip_next; /* next waiting process */
} iproc[NR_PROCS];
struct semaphore {
unsigned short semval; /* semaphore value */
unsigned short semzcnt; /* # waiting for zero */
unsigned short semncnt; /* # waiting for increase */
pid_t sempid; /* process that did last op */
};
/*
* For the list of waiting processes, we use a doubly linked tail queue. In
* order to maintain a basic degree of fairness, we keep the pending processes
* in FCFS (well, at least first tested) order, which means we need to be able
* to add new processes at the end of the list. In order to remove waiting
* processes O(1) instead of O(n) we need a doubly linked list; in the common
* case we do have the element's predecessor, but STAILQ_REMOVE is O(n) anyway
* and NetBSD has no STAILQ_REMOVE_AFTER yet.
*
* We use one list per semaphore set: semop(2) affects only one semaphore set,
* but it may involve operations on multiple semaphores within the set. While
* it is possible to recheck only semaphores that were affected by a particular
* operation, and associate waiting lists to individual semaphores, the number
* of expected waiting processes is currently not high enough to justify the
* extra complexity of such an implementation.
*/
struct sem_struct {
struct semid_ds semid_ds;
struct semaphore sems[SEMMSL];
TAILQ_HEAD(waiters, iproc) waiters;
};
static struct sem_struct sem_list[SEMMNI];
static unsigned int sem_list_nr = 0; /* highest in-use slot number plus one */
/*
* Find a semaphore set by key. The given key must not be IPC_PRIVATE. Return
* a pointer to the semaphore set if found, or NULL otherwise.
*/
static struct sem_struct *
sem_find_key(key_t key)
{
unsigned int i;
for (i = 0; i < sem_list_nr; i++) {
if (!(sem_list[i].semid_ds.sem_perm.mode & SEM_ALLOC))
continue;
if (sem_list[i].semid_ds.sem_perm._key == key)
return &sem_list[i];
}
return NULL;
}
/*
* Find a semaphore set by identifier. Return a pointer to the semaphore set
* if found, or NULL otherwise.
*/
static struct sem_struct *
sem_find_id(int id)
{
struct sem_struct *sem;
unsigned int i;
i = IPCID_TO_IX(id);
if (i >= sem_list_nr)
return NULL;
sem = &sem_list[i];
if (!(sem->semid_ds.sem_perm.mode & SEM_ALLOC))
return NULL;
if (sem->semid_ds.sem_perm._seq != IPCID_TO_SEQ(id))
return NULL;
return sem;
}
/*
* Implementation of the semget(2) system call.
*/
int
do_semget(message * m)
{
struct sem_struct *sem;
unsigned int i, seq;
key_t key;
int nsems, flag;
key = m->m_lc_ipc_semget.key;
nsems = m->m_lc_ipc_semget.nr;
flag = m->m_lc_ipc_semget.flag;
if (key != IPC_PRIVATE && (sem = sem_find_key(key)) != NULL) {
if ((flag & IPC_CREAT) && (flag & IPC_EXCL))
return EEXIST;
if (!check_perm(&sem->semid_ds.sem_perm, m->m_source, flag))
return EACCES;
if (nsems > sem->semid_ds.sem_nsems)
return EINVAL;
i = sem - sem_list;
} else {
if (key != IPC_PRIVATE && !(flag & IPC_CREAT))
return ENOENT;
if (nsems <= 0 || nsems > SEMMSL)
return EINVAL;
/* Find a free entry. */
for (i = 0; i < __arraycount(sem_list); i++)
if (!(sem_list[i].semid_ds.sem_perm.mode & SEM_ALLOC))
break;
if (i == __arraycount(sem_list))
return ENOSPC;
/* Initialize the entry. */
sem = &sem_list[i];
seq = sem->semid_ds.sem_perm._seq;
memset(sem, 0, sizeof(*sem));
sem->semid_ds.sem_perm._key = key;
sem->semid_ds.sem_perm.cuid =
sem->semid_ds.sem_perm.uid = getnuid(m->m_source);
sem->semid_ds.sem_perm.cgid =
sem->semid_ds.sem_perm.gid = getngid(m->m_source);
sem->semid_ds.sem_perm.mode = SEM_ALLOC | (flag & ACCESSPERMS);
sem->semid_ds.sem_perm._seq = (seq + 1) & 0x7fff;
sem->semid_ds.sem_nsems = nsems;
sem->semid_ds.sem_otime = 0;
sem->semid_ds.sem_ctime = clock_time(NULL);
TAILQ_INIT(&sem->waiters);
assert(i <= sem_list_nr);
if (i == sem_list_nr)
sem_list_nr++;
}
m->m_lc_ipc_semget.retid = IXSEQ_TO_IPCID(i, sem->semid_ds.sem_perm);
return OK;
}
/*
* Increase the proper suspension count (semncnt or semzcnt) of the semaphore
* on which the given process is blocked.
*/
static void
inc_susp_count(struct iproc * ip)
{
struct sembuf *blkop;
struct semaphore *sp;
blkop = ip->ip_blkop;
sp = &ip->ip_sem->sems[blkop->sem_num];
if (blkop->sem_op != 0) {
assert(sp->semncnt < USHRT_MAX);
sp->semncnt++;
} else {
assert(sp->semncnt < USHRT_MAX);
sp->semzcnt++;
}
}
/*
* Decrease the proper suspension count (semncnt or semzcnt) of the semaphore
* on which the given process is blocked.
*/
static void
dec_susp_count(struct iproc * ip)
{
struct sembuf *blkop;
struct semaphore *sp;
blkop = ip->ip_blkop;
sp = &ip->ip_sem->sems[blkop->sem_num];
if (blkop->sem_op != 0) {
assert(sp->semncnt > 0);
sp->semncnt--;
} else {
assert(sp->semzcnt > 0);
sp->semzcnt--;
}
}
/*
* Send a reply for a semop(2) call suspended earlier, thus waking up the
* process.
*/
static void
send_reply(endpoint_t who, int ret)
{
message m;
memset(&m, 0, sizeof(m));
m.m_type = ret;
ipc_sendnb(who, &m);
}
/*
* Satisfy or cancel the semop(2) call on which the given process is blocked,
* and send the given reply code (OK or a negative error code) to wake it up,
* unless the given code is EDONTREPLY.
*/
static void
complete_semop(struct iproc * ip, int code)
{
struct sem_struct *sem;
sem = ip->ip_sem;
assert(sem != NULL);
TAILQ_REMOVE(&sem->waiters, ip, ip_next);
dec_susp_count(ip);
assert(ip->ip_sops != NULL);
free(ip->ip_sops);
ip->ip_sops = NULL;
ip->ip_blkop = NULL;
ip->ip_sem = NULL;
if (code != EDONTREPLY)
send_reply(ip->ip_endpt, code);
}
/*
* Free up the given semaphore set. This includes cancelling any blocking
* semop(2) calls on any of its semaphores.
*/
static void
remove_set(struct sem_struct * sem)
{
struct iproc *ip;
/*
* Cancel all semop(2) operations on this semaphore set, with an EIDRM
* reply code.
*/
while (!TAILQ_EMPTY(&sem->waiters)) {
ip = TAILQ_FIRST(&sem->waiters);
complete_semop(ip, EIDRM);
}
/* Mark the entry as free. */
sem->semid_ds.sem_perm.mode &= ~SEM_ALLOC;
/*
* This may have been the last in-use slot in the list. Ensure that
* sem_list_nr again equals the highest in-use slot number plus one.
*/
while (sem_list_nr > 0 &&
!(sem_list[sem_list_nr - 1].semid_ds.sem_perm.mode & SEM_ALLOC))
sem_list_nr--;
}
/*
* Try to perform a set of semaphore operations, as given by semop(2), on a
* semaphore set. The entire action must be atomic, i.e., either succeed in
* its entirety or fail without making any changes. Return OK on success, in
* which case the PIDs of all affected semaphores will be updated to the given
* 'pid' value, and the semaphore set's sem_otime will be updated as well.
* Return SUSPEND if the call should be suspended, in which case 'blkop' will
* be set to a pointer to the operation causing the call to block. Return an
* error code if the call failed altogether.
*/
static int
try_semop(struct sem_struct *sem, struct sembuf *sops, unsigned int nsops,
pid_t pid, struct sembuf ** blkop)
{
struct semaphore *sp;
struct sembuf *op;
unsigned int i;
int r;
/*
* The operation must be processed atomically. However, it must also
* be processed "in array order," which we assume to mean that while
* processing one operation, the changes of the previous operations
* must be taken into account. This is relevant for cases where the
* same semaphore is referenced by more than one operation, for example
* to perform an atomic increase-if-zero action on a single semaphore.
* As a result, we must optimistically modify semaphore values and roll
* back on suspension or failure afterwards.
*/
r = OK;
op = NULL;
for (i = 0; i < nsops; i++) {
sp = &sem->sems[sops[i].sem_num];
op = &sops[i];
if (op->sem_op > 0) {
if (SEMVMX - sp->semval < op->sem_op) {
r = ERANGE;
break;
}
sp->semval += op->sem_op;
} else if (op->sem_op < 0) {
/*
* No SEMVMX check; if the process wants to deadlock
* itself by supplying -SEMVMX it is free to do so..
*/
if ((int)sp->semval < -(int)op->sem_op) {
r = (op->sem_flg & IPC_NOWAIT) ? EAGAIN :
SUSPEND;
break;
}
sp->semval += op->sem_op;
} else /* (op->sem_op == 0) */ {
if (sp->semval != 0) {
r = (op->sem_flg & IPC_NOWAIT) ? EAGAIN :
SUSPEND;
break;
}
}
}
/*
* If we did not go through all the operations, then either an error
* occurred or the user process is to be suspended. In that case we
* must roll back any progress we have made so far, and return the
* operation that caused the call to block.
*/
if (i < nsops) {
assert(op != NULL);
*blkop = op;
/* Roll back all changes made so far. */
while (i-- > 0)
sem->sems[sops[i].sem_num].semval -= sops[i].sem_op;
assert(r != OK);
return r;
}
/*
* The operation has completed successfully. Also update all affected
* semaphores' PID values, and the semaphore set's last-semop time.
* The caller must do everything else.
*/
for (i = 0; i < nsops; i++)
sem->sems[sops[i].sem_num].sempid = pid;
sem->semid_ds.sem_otime = clock_time(NULL);
return OK;
}
/*
* Check whether any blocked operations can now be satisfied on any of the
* semaphores in the given semaphore set. Do this repeatedly as necessary, as
* any unblocked operation may in turn allow other operations to be resumed.
*/
static void
check_set(struct sem_struct * sem)
{
struct iproc *ip, *nextip;
struct sembuf *blkop;
int r, woken_up;
/*
* Go through all the waiting processes in FIFO order, which is our
* best attempt at providing at least some fairness. Keep trying as
* long as we woke up at least one process, which means we made actual
* progress.
*/
do {
woken_up = FALSE;
TAILQ_FOREACH_SAFE(ip, &sem->waiters, ip_next, nextip) {
/* Retry the entire semop(2) operation, atomically. */
r = try_semop(ip->ip_sem, ip->ip_sops, ip->ip_nsops,
ip->ip_pid, &blkop);
if (r != SUSPEND) {
/* Success or failure. */
complete_semop(ip, r);
/* No changes are made on failure. */
if (r == OK)
woken_up = TRUE;
} else if (blkop != ip->ip_blkop) {
/*
* The process stays suspended, but it is now
* blocked on a different semaphore. As a
* result, we need to adjust the semaphores'
* suspension counts.
*/
dec_susp_count(ip);
ip->ip_blkop = blkop;
inc_susp_count(ip);
}
}
} while (woken_up);
}
/*
* Implementation of the semctl(2) system call.
*/
int
do_semctl(message * m)
{
static unsigned short valbuf[SEMMSL];
unsigned int i;
vir_bytes opt;
uid_t uid;
int r, id, num, cmd, val;
struct semid_ds tmp_ds;
struct sem_struct *sem;
struct seminfo sinfo;
id = m->m_lc_ipc_semctl.id;
num = m->m_lc_ipc_semctl.num;
cmd = m->m_lc_ipc_semctl.cmd;
opt = m->m_lc_ipc_semctl.opt;
/*
* Look up the target semaphore set. The IPC_INFO and SEM_INFO
* commands have no associated semaphore set. The SEM_STAT command
* takes an array index into the semaphore set table. For all other
* commands, look up the semaphore set by its given identifier.
* */
switch (cmd) {
case IPC_INFO:
case SEM_INFO:
sem = NULL;
break;
case SEM_STAT:
if (id < 0 || (unsigned int)id >= sem_list_nr)
return EINVAL;
sem = &sem_list[id];
if (!(sem->semid_ds.sem_perm.mode & SEM_ALLOC))
return EINVAL;
break;
default:
if ((sem = sem_find_id(id)) == NULL)
return EINVAL;
break;
}
/*
* Check if the caller has the appropriate permissions on the target
* semaphore set. SETVAL and SETALL require write permission. IPC_SET
* and IPC_RMID require ownership permission, and return EPERM instead
* of EACCES on failure. IPC_INFO and SEM_INFO are free for general
* use. All other calls require read permission.
*/
switch (cmd) {
case SETVAL:
case SETALL:
assert(sem != NULL);
if (!check_perm(&sem->semid_ds.sem_perm, m->m_source, IPC_W))
return EACCES;
break;
case IPC_SET:
case IPC_RMID:
assert(sem != NULL);
uid = getnuid(m->m_source);
if (uid != sem->semid_ds.sem_perm.cuid &&
uid != sem->semid_ds.sem_perm.uid && uid != 0)
return EPERM;
break;
case IPC_INFO:
case SEM_INFO:
break;
default:
assert(sem != NULL);
if (!check_perm(&sem->semid_ds.sem_perm, m->m_source, IPC_R))
return EACCES;
}
switch (cmd) {
case IPC_STAT:
case SEM_STAT:
if ((r = sys_datacopy(SELF, (vir_bytes)&sem->semid_ds,
m->m_source, opt, sizeof(sem->semid_ds))) != OK)
return r;
if (cmd == SEM_STAT)
m->m_lc_ipc_semctl.ret =
IXSEQ_TO_IPCID(id, sem->semid_ds.sem_perm);
break;
case IPC_SET:
if ((r = sys_datacopy(m->m_source, opt, SELF,
(vir_bytes)&tmp_ds, sizeof(tmp_ds))) != OK)
return r;
sem->semid_ds.sem_perm.uid = tmp_ds.sem_perm.uid;
sem->semid_ds.sem_perm.gid = tmp_ds.sem_perm.gid;
sem->semid_ds.sem_perm.mode &= ~ACCESSPERMS;
sem->semid_ds.sem_perm.mode |=
tmp_ds.sem_perm.mode & ACCESSPERMS;
sem->semid_ds.sem_ctime = clock_time(NULL);
break;
case IPC_RMID:
/*
* Awaken all processes blocked in semop(2) on any semaphore in
* this set, and remove the semaphore set itself.
*/
remove_set(sem);
break;
case IPC_INFO:
case SEM_INFO:
memset(&sinfo, 0, sizeof(sinfo));
sinfo.semmap = SEMMNI;
sinfo.semmni = SEMMNI;
sinfo.semmns = SEMMNI * SEMMSL;
sinfo.semmnu = 0; /* TODO: support for SEM_UNDO */
sinfo.semmsl = SEMMSL;
sinfo.semopm = SEMOPM;
sinfo.semume = 0; /* TODO: support for SEM_UNDO */
if (cmd == SEM_INFO) {
/*
* For SEM_INFO the semusz field is expected to contain
* the number of semaphore sets currently in use.
*/
sinfo.semusz = sem_list_nr;
} else
sinfo.semusz = 0; /* TODO: support for SEM_UNDO */
sinfo.semvmx = SEMVMX;
if (cmd == SEM_INFO) {
/*
* For SEM_INFO the semaem field is expected to contain
* the total number of allocated semaphores.
*/
for (i = 0; i < sem_list_nr; i++)
sinfo.semaem += sem_list[i].semid_ds.sem_nsems;
} else
sinfo.semaem = 0; /* TODO: support for SEM_UNDO */
if ((r = sys_datacopy(SELF, (vir_bytes)&sinfo, m->m_source,
opt, sizeof(sinfo))) != OK)
return r;
/* Return the highest in-use slot number if any, or zero. */
if (sem_list_nr > 0)
m->m_lc_ipc_semctl.ret = sem_list_nr - 1;
else
m->m_lc_ipc_semctl.ret = 0;
break;
case GETALL:
assert(sem->semid_ds.sem_nsems <= __arraycount(valbuf));
for (i = 0; i < sem->semid_ds.sem_nsems; i++)
valbuf[i] = sem->sems[i].semval;
r = sys_datacopy(SELF, (vir_bytes)valbuf, m->m_source,
opt, sizeof(unsigned short) * sem->semid_ds.sem_nsems);
if (r != OK)
return r;
break;
case GETNCNT:
if (num < 0 || num >= sem->semid_ds.sem_nsems)
return EINVAL;
m->m_lc_ipc_semctl.ret = sem->sems[num].semncnt;
break;
case GETPID:
if (num < 0 || num >= sem->semid_ds.sem_nsems)
return EINVAL;
m->m_lc_ipc_semctl.ret = sem->sems[num].sempid;
break;
case GETVAL:
if (num < 0 || num >= sem->semid_ds.sem_nsems)
return EINVAL;
m->m_lc_ipc_semctl.ret = sem->sems[num].semval;
break;
case GETZCNT:
if (num < 0 || num >= sem->semid_ds.sem_nsems)
return EINVAL;
m->m_lc_ipc_semctl.ret = sem->sems[num].semzcnt;
break;
case SETALL:
assert(sem->semid_ds.sem_nsems <= __arraycount(valbuf));
r = sys_datacopy(m->m_source, opt, SELF, (vir_bytes)valbuf,
sizeof(unsigned short) * sem->semid_ds.sem_nsems);
if (r != OK)
return r;
for (i = 0; i < sem->semid_ds.sem_nsems; i++)
if (valbuf[i] > SEMVMX)
return ERANGE;
#ifdef DEBUG_SEM
for (i = 0; i < sem->semid_ds.sem_nsems; i++)
printf("SEMCTL: SETALL val: [%d] %d\n", i, valbuf[i]);
#endif
for (i = 0; i < sem->semid_ds.sem_nsems; i++)
sem->sems[i].semval = valbuf[i];
sem->semid_ds.sem_ctime = clock_time(NULL);
/* Awaken any waiting parties if now possible. */
check_set(sem);
break;
case SETVAL:
val = (int)opt;
if (num < 0 || num >= sem->semid_ds.sem_nsems)
return EINVAL;
if (val < 0 || val > SEMVMX)
return ERANGE;
sem->sems[num].semval = val;
#ifdef DEBUG_SEM
printf("SEMCTL: SETVAL: %d %d\n", num, val);
#endif
sem->semid_ds.sem_ctime = clock_time(NULL);
/* Awaken any waiting parties if now possible. */
check_set(sem);
break;
default:
return EINVAL;
}
return OK;
}
/*
* Implementation of the semop(2) system call.
*/
int
do_semop(message * m)
{
unsigned int i, mask, slot;
int id, r;
struct sembuf *sops, *blkop;
unsigned int nsops;
struct sem_struct *sem;
struct iproc *ip;
pid_t pid;
id = m->m_lc_ipc_semop.id;
nsops = m->m_lc_ipc_semop.size;
if ((sem = sem_find_id(id)) == NULL)
return EINVAL;
if (nsops == 0)
return OK; /* nothing to do */
if (nsops > SEMOPM)
return E2BIG;
/* Get the array from the user process. */
sops = malloc(sizeof(sops[0]) * nsops);
if (sops == NULL)
return ENOMEM;
r = sys_datacopy(m->m_source, (vir_bytes)m->m_lc_ipc_semop.ops, SELF,
(vir_bytes)sops, sizeof(sops[0]) * nsops);
if (r != OK)
goto out_free;
#ifdef DEBUG_SEM
for (i = 0; i < nsops; i++)
printf("SEMOP: num:%d op:%d flg:%d\n",
sops[i].sem_num, sops[i].sem_op, sops[i].sem_flg);
#endif
/*
* Check for permissions. We do this only once, even though the call
* might suspend and the semaphore set's permissions might be changed
* before the call resumes. The specification is not clear on this.
* Either way, perform the permission check before checking on the
* validity of semaphore numbers, since obtaining the semaphore set
* size itself requires read permission (except through sysctl(2)..).
*/
mask = 0;
for (i = 0; i < nsops; i++) {
if (sops[i].sem_op != 0)
mask |= IPC_W; /* check for write permission */
else
mask |= IPC_R; /* check for read permission */
}
r = EACCES;
if (!check_perm(&sem->semid_ds.sem_perm, m->m_source, mask))
goto out_free;
/* Check that all given semaphore numbers are within range. */
r = EFBIG;
for (i = 0; i < nsops; i++)
if (sops[i].sem_num >= sem->semid_ds.sem_nsems)
goto out_free;
/*
* Do not check if the same semaphore is referenced more than once
* (there was such a check here originally), because that is actually
* a valid case. The result is however that it is possible to
* construct a semop(2) request that will never complete, and thus,
* care must be taken that such requests do not create potential
* deadlock situations etc.
*/
pid = getnpid(m->m_source);
/*
* We do not yet support SEM_UNDO at all, so we better not give the
* caller the impression that we do. For now, print a warning so that
* we know when an application actually fails for that reason.
*/
for (i = 0; i < nsops; i++) {
if (sops[i].sem_flg & SEM_UNDO) {
/* Print a warning only if this isn't the test set.. */
if (sops[i].sem_flg != SHRT_MAX)
printf("IPC: pid %d tried to use SEM_UNDO\n",
pid);
r = EINVAL;
goto out_free;
}
}
/* Try to perform the operation now. */
r = try_semop(sem, sops, nsops, pid, &blkop);
if (r == SUSPEND) {
/*
* The operation ended up blocking on a particular semaphore
* operation. Save all details in the slot for the user
* process, and add it to the list of processes waiting for
* this semaphore set.
*/
slot = _ENDPOINT_P(m->m_source);
assert(slot < __arraycount(iproc));
ip = &iproc[slot];
assert(ip->ip_sem == NULL); /* can't already be in use */
ip->ip_endpt = m->m_source;
ip->ip_pid = pid;
ip->ip_sem = sem;
ip->ip_sops = sops;
ip->ip_nsops = nsops;
ip->ip_blkop = blkop;
TAILQ_INSERT_TAIL(&sem->waiters, ip, ip_next);
inc_susp_count(ip);
return r;
}
out_free:
free(sops);
/* Awaken any other waiting parties if now possible. */
if (r == OK)
check_set(sem);
return r;
}
/*
* Return TRUE iff no semaphore sets are allocated.
*/
int
is_sem_nil(void)
{
return (sem_list_nr == 0);
}
/*
* Check if the given endpoint is blocked on a semop(2) call. If so, cancel
* the call, because either it is interrupted by a signal or the process was
* killed. In the former case, unblock the process by replying with EINTR.
*/
void
sem_process_event(endpoint_t endpt, int has_exited)
{
unsigned int slot;
struct iproc *ip;
slot = _ENDPOINT_P(endpt);
assert(slot < __arraycount(iproc));
ip = &iproc[slot];
/* Was the process blocked on a semop(2) call at all? */
if (ip->ip_sem == NULL)
return;
assert(ip->ip_endpt == endpt);
/*
* It was; cancel the semop(2) call. If the process is being removed
* because its call was interrupted by a signal, then we must wake it
* up with EINTR.
*/
complete_semop(ip, has_exited ? EDONTREPLY : EINTR);
}