/* MIB service - proc.c - functionality based on service process tables */ /* Eventually, the CTL_PROC subtree might end up here as well. */ #include "mib.h" #include #include #include #include "kernel/proc.h" #include "servers/pm/mproc.h" #include "servers/vfs/const.h" #include "servers/vfs/fproc.h" typedef struct proc ixfer_proc_t; typedef struct mproc ixfer_mproc_t; static ixfer_proc_t proc_tab[NR_TASKS + NR_PROCS]; static ixfer_mproc_t mproc_tab[NR_PROCS]; static struct fproc_light fproc_tab[NR_PROCS]; /* * The number of processes added to the current number of processes when doing * a size estimation, so that the actual data retrieval does not end up with * too little space if new processes have forked between the two calls. We do * a process table update only once per clock tick, which means that typically * no update will take place between the user process's size estimation request * and its subsequent data retrieval request. On the other hand, if we do * update process tables in between, quite a bit might have changed. */ #define EXTRA_PROCS 8 #define HASH_SLOTS (NR_PROCS / 4) /* expected nr. of processes in use */ #define NO_SLOT (-1) static int hash_tab[HASH_SLOTS]; /* hash table mapping from PID.. */ static int hnext_tab[NR_PROCS]; /* ..to PM process slot */ static clock_t tabs_updated = 0; /* when the tables were last updated */ static int tabs_valid = TRUE; /* FALSE if obtaining tables failed */ /* * Update the process tables by pulling in new copies from the kernel, PM, and * VFS, but only every so often and only if it has not failed before. Return * TRUE iff the tables are now valid. */ static int update_tables(void) { clock_t now; pid_t pid; int r, kslot, mslot, hslot; /* * If retrieving the tables failed at some point, do not keep trying * all the time. Such a failure is very unlikely to be transient. */ if (tabs_valid == FALSE) return FALSE; /* * Update the tables once per clock tick at most. The update operation * is rather heavy, transferring several hundreds of kilobytes between * servers. Userland should be able to live with information that is * outdated by at most one clock tick. */ now = getticks(); if (tabs_updated != 0 && tabs_updated == now) return TRUE; /* Perform an actual update now. */ tabs_valid = FALSE; /* Retrieve and check the kernel process table. */ if ((r = sys_getproctab(proc_tab)) != OK) { printf("MIB: unable to obtain kernel process table (%d)\n", r); return FALSE; } for (kslot = 0; kslot < NR_TASKS + NR_PROCS; kslot++) { if (proc_tab[kslot].p_magic != PMAGIC) { printf("MIB: kernel process table mismatch\n"); return FALSE; } } /* Retrieve and check the PM process table. */ r = getsysinfo(PM_PROC_NR, SI_PROC_TAB, mproc_tab, sizeof(mproc_tab)); if (r != OK) { printf("MIB: unable to obtain PM process table (%d)\n", r); return FALSE; } for (mslot = 0; mslot < NR_PROCS; mslot++) { if (mproc_tab[mslot].mp_magic != MP_MAGIC) { printf("MIB: PM process table mismatch\n"); return FALSE; } } /* Retrieve an extract of the VFS process table. */ r = getsysinfo(VFS_PROC_NR, SI_PROCLIGHT_TAB, fproc_tab, sizeof(fproc_tab)); if (r != OK) { printf("MIB: unable to obtain VFS process table (%d)\n", r); return FALSE; } tabs_valid = TRUE; tabs_updated = now; /* * Build a hash table mapping from process IDs to slot numbers, for * fast access. TODO: decide if this is better done on demand only. */ for (hslot = 0; hslot < HASH_SLOTS; hslot++) hash_tab[hslot] = NO_SLOT; for (mslot = 0; mslot < NR_PROCS; mslot++) { if (mproc_tab[mslot].mp_flags & IN_USE) { if ((pid = mproc_tab[mslot].mp_pid) <= 0) continue; hslot = mproc_tab[mslot].mp_pid % HASH_SLOTS; hnext_tab[mslot] = hash_tab[hslot]; hash_tab[hslot] = mslot; } } return TRUE; } /* * Return the PM slot number for the given PID, or NO_SLOT if the PID is not in * use by a process. */ static int get_mslot(pid_t pid) { int mslot; /* PID 0 identifies the kernel; checking this is up to the caller. */ if (pid <= 0) return NO_SLOT; for (mslot = hash_tab[pid % HASH_SLOTS]; mslot != NO_SLOT; mslot = hnext_tab[mslot]) if (mproc_tab[mslot].mp_pid == pid) break; return mslot; } /* * Store the given number of clock ticks as a timeval structure. */ static void ticks_to_timeval(struct timeval * tv, clock_t ticks) { clock_t hz; hz = sys_hz(); tv->tv_sec = ticks / hz; tv->tv_usec = (long)((ticks % hz) * 1000000ULL / hz); } /* * Generate a wchan message text for the cases that the process is blocked on * IPC with another process, of which the endpoint is given as 'endpt' here. * The name of the other process is to be stored in 'wmesg', which is a buffer * of size 'wmsz'. The result should be null terminated. If 'ipc' is set, the * process is blocked on a direct IPC call, in which case the name of the other * process is enclosed in parentheses. If 'ipc' is not set, the call is made * indirectly through VFS, and the name of the other process should not be * enclosed in parentheses. If no name can be obtained, we use the endpoint of * the other process instead. */ static void fill_wmesg(char * wmesg, size_t wmsz, endpoint_t endpt, int ipc) { const char *name; int mslot; switch (endpt) { case ANY: name = "any"; break; case SELF: name = "self"; break; case NONE: name = "none"; break; default: mslot = _ENDPOINT_P(endpt); if (mslot >= -NR_TASKS && mslot < NR_PROCS && (mslot < 0 || (mproc_tab[mslot].mp_flags & IN_USE))) name = proc_tab[NR_TASKS + mslot].p_name; else name = NULL; } if (name != NULL) snprintf(wmesg, wmsz, "%s%s%s", ipc ? "(" : "", name, ipc ? ")" : ""); else snprintf(wmesg, wmsz, "%s%d%s", ipc ? "(" : "", endpt, ipc ? ")" : ""); } /* * Return the LWP status of a process, along with additional information in * case the process is sleeping (LSSLEEP): a wchan value and text to indicate * what the process is sleeping on, and possibly a flag field modification to * indicate that the sleep is interruptible. */ static int get_lwp_stat(int mslot, uint64_t * wcptr, char * wmptr, size_t wmsz, int32_t * flag) { struct mproc *mp; struct fproc_light *fp; struct proc *kp; const char *wmesg; uint64_t wchan; endpoint_t endpt; mp = &mproc_tab[mslot]; fp = &fproc_tab[mslot]; kp = &proc_tab[NR_TASKS + mslot]; /* * First cover all the cases that the process is not sleeping. In * those cases, we need not return additional sleep information either. */ if (mp->mp_flags & (TRACE_ZOMBIE | ZOMBIE)) return LSZOMB; if (mp->mp_flags & EXITING) return LSDEAD; if ((mp->mp_flags & TRACE_STOPPED) || RTS_ISSET(kp, RTS_P_STOP)) return LSSTOP; if (proc_is_runnable(kp)) return LSRUN; /* * The process is sleeping. In that case, we must also figure out why, * and return an appropriate wchan value and human-readable wmesg text. * * The process can be blocked on either a known sleep state in PM or * VFS, or otherwise on IPC communication with another process, or * otherwise on a kernel RTS flag. In each case, decide what to use as * wchan value and wmesg text, and whether the sleep is interruptible. * * The wchan value should be unique for the sleep reason. We use its * lower eight bits to indicate a class: * 0x00 = kernel task * 0x01 = kerel RTS block * 0x02 = PM call * 0x03 = VFS call * 0x04 = MIB call * 0xff = blocked on process * The upper bits are used for class-specific information. The actual * value does not really matter, as long as it is nonzero and there is * no overlap between the different values. */ wchan = 0; wmesg = NULL; /* * First see if the process is marked as blocked in the tables of PM or * VFS. Such a block reason is always an interruptible sleep. Note * that we do not use the kernel table at all in this case: each of the * three tables is consistent within itself, but not necessarily * consistent with any of the other tables, so we avoid internal * mismatches if we can. */ if (mp->mp_flags & WAITING) { wchan = 0x102; wmesg = "wait"; } else if (mp->mp_flags & SIGSUSPENDED) { wchan = 0x202; wmesg = "pause"; } else if (fp->fpl_blocked_on != FP_BLOCKED_ON_NONE) { wchan = (fp->fpl_blocked_on << 8) | 0x03; switch (fp->fpl_blocked_on) { case FP_BLOCKED_ON_PIPE: wmesg = "pipe"; break; case FP_BLOCKED_ON_FLOCK: wmesg = "flock"; break; case FP_BLOCKED_ON_POPEN: wmesg = "popen"; break; case FP_BLOCKED_ON_SELECT: wmesg = "select"; break; case FP_BLOCKED_ON_CDEV: /* * Add the task (= character driver) endpoint to the * wchan value, and use the driver's process name, * without parentheses, as wmesg text. */ wchan |= (uint64_t)fp->fpl_task << 16; fill_wmesg(wmptr, wmsz, fp->fpl_task, FALSE /*ipc*/); break; default: /* A newly added flag we don't yet know about? */ wmesg = "???"; break; } } if (wchan != 0) { *wcptr = wchan; if (wmesg != NULL) /* NULL means "already set" here */ strlcpy(wmptr, wmesg, wmsz); *flag |= L_SINTR; } /* * See if the process is blocked on sending or receiving. If not, then * use one of the kernel RTS flags as reason. */ endpt = P_BLOCKEDON(kp); switch (endpt) { case MIB_PROC_NR: /* This is really just aesthetics. */ wchan = 0x04; wmesg = "sysctl"; break; case NONE: /* * The process is not running, but also not blocked on IPC with * another process. This means it must be stopped on a kernel * RTS flag. */ wchan = ((uint64_t)kp->p_rts_flags << 8) | 0x01; if (RTS_ISSET(kp, RTS_PROC_STOP)) wmesg = "kstop"; else if (RTS_ISSET(kp, RTS_SIGNALED) || RTS_ISSET(kp, RTS_SIGNALED)) wmesg = "ksignal"; else if (RTS_ISSET(kp, RTS_NO_PRIV)) wmesg = "knopriv"; else if (RTS_ISSET(kp, RTS_PAGEFAULT) || RTS_ISSET(kp, RTS_VMREQTARGET)) wmesg = "fault"; else if (RTS_ISSET(kp, RTS_NO_QUANTUM)) wmesg = "sched"; else wmesg = "kflag"; break; case ANY: /* * If the process is blocked receiving from ANY, mark it as * being in an interruptible sleep. This looks nicer, even * though "interruptible" is not applicable to services at all. */ *flag |= L_SINTR; break; } /* * If at this point wchan is still zero, the process is blocked sending * or receiving. Use a wchan value based on the target endpoint, and * use "(procname)" as wmesg text. */ if (wchan == 0) { *wcptr = ((uint64_t)endpt << 8) | 0xff; fill_wmesg(wmptr, wmsz, endpt, TRUE /*ipc*/); } else { *wcptr = wchan; if (wmesg != NULL) /* NULL means "already set" here */ strlcpy(wmptr, wmesg, wmsz); } return LSSLEEP; } /* * Fill the part of a LWP structure that is common between kernel tasks and * user processes. Also return a CPU estimate in 'estcpu', because we generate * the value as a side effect here, and the LWP structure has no estcpu field. */ static void fill_lwp_common(struct kinfo_lwp * l, int kslot, uint32_t * estcpu) { struct proc *kp; struct timeval tv; clock_t uptime; uint32_t hz; kp = &proc_tab[kslot]; uptime = getticks(); hz = sys_hz(); /* * We use the process endpoint as the LWP ID. Not only does this allow * users to obtain process endpoints with "ps -s" (thus replacing the * MINIX3 ps(1)'s "ps -E"), but if we ever do implement kernel threads, * this is probably still going to be accurate. */ l->l_lid = kp->p_endpoint; /* * The time during which the process has not been swapped in or out is * not applicable for us, and thus, we set it to the time the process * has been running (in seconds). This value is relevant mostly for * ps(1)'s CPU usage correction for processes that have just started. */ if (kslot >= NR_TASKS) l->l_swtime = uptime - mproc_tab[kslot - NR_TASKS].mp_started; else l->l_swtime = uptime; l->l_swtime /= hz; /* * Sleep (dequeue) times are not maintained for kernel tasks, so * pretend they are never asleep (which is pretty accurate). */ if (kslot < NR_TASKS) l->l_slptime = 0; else l->l_slptime = (uptime - kp->p_dequeued) / hz; l->l_priority = kp->p_priority; l->l_usrpri = kp->p_priority; l->l_cpuid = kp->p_cpu; ticks_to_timeval(&tv, kp->p_user_time + kp->p_sys_time); l->l_rtime_sec = tv.tv_sec; l->l_rtime_usec = tv.tv_usec; /* * Obtain CPU usage percentages and estimates through library code * shared between the kernel and this service; see its source for * details. We note that the produced estcpu value is rather different * from the one produced by NetBSD, but this should not be a problem. */ l->l_pctcpu = cpuavg_getstats(&kp->p_cpuavg, &l->l_cpticks, estcpu, uptime, hz); } /* * Fill a LWP structure for a kernel task. Each kernel task has its own LWP, * and all of them have negative PIDs. */ static void fill_lwp_kern(struct kinfo_lwp * l, int kslot) { uint32_t estcpu; memset(l, 0, sizeof(*l)); l->l_flag = L_INMEM | L_SINTR | L_SYSTEM; l->l_stat = LSSLEEP; l->l_pid = kslot - NR_TASKS; /* * When showing LWP entries, ps(1) uses the process name rather than * the LWP name. All kernel tasks are therefore shown as "[kernel]" * anyway. We use the wmesg field to show the actual kernel task name. */ l->l_wchan = ((uint64_t)(l->l_pid) << 8) | 0x00; strlcpy(l->l_wmesg, proc_tab[kslot].p_name, sizeof(l->l_wmesg)); strlcpy(l->l_name, "kernel", sizeof(l->l_name)); fill_lwp_common(l, kslot, &estcpu); } /* * Fill a LWP structure for a user process. */ static void fill_lwp_user(struct kinfo_lwp * l, int mslot) { struct mproc *mp; uint32_t estcpu; memset(l, 0, sizeof(*l)); mp = &mproc_tab[mslot]; l->l_flag = L_INMEM; l->l_stat = get_lwp_stat(mslot, &l->l_wchan, l->l_wmesg, sizeof(l->l_wmesg), &l->l_flag); l->l_pid = mp->mp_pid; strlcpy(l->l_name, mp->mp_name, sizeof(l->l_name)); fill_lwp_common(l, NR_TASKS + mslot, &estcpu); } /* * Implementation of CTL_KERN KERN_LWP. */ ssize_t mib_kern_lwp(struct mib_call * call, struct mib_node * node __unused, struct mib_oldp * oldp, struct mib_newp * newp __unused) { struct kinfo_lwp lwp; struct mproc *mp; size_t copysz; ssize_t off; pid_t pid; int r, elsz, elmax, kslot, mslot, last_mslot; if (call->call_namelen != 3) return EINVAL; pid = (pid_t)call->call_name[0]; elsz = call->call_name[1]; elmax = call->call_name[2]; /* redundant with the given oldlen.. */ if (pid < -1 || elsz <= 0 || elmax < 0) return EINVAL; if (!update_tables()) return EINVAL; off = 0; copysz = MIN((size_t)elsz, sizeof(lwp)); /* * We model kernel tasks as LWP threads of the kernel (with PID 0). * Modeling the kernel tasks as processes with negative PIDs, like * ProcFS does, conflicts with the KERN_LWP API here: a PID of -1 * indicates that the caller wants a full listing of LWPs. */ if (pid <= 0) { for (kslot = 0; kslot < NR_TASKS; kslot++) { if (mib_inrange(oldp, off) && elmax > 0) { fill_lwp_kern(&lwp, kslot); if ((r = mib_copyout(oldp, off, &lwp, copysz)) < 0) return r; elmax--; } off += elsz; } /* No need to add extra space here: NR_TASKS is static. */ if (pid == 0) return off; } /* * With PID 0 out of the way: the user requested the LWP for either a * specific user process (pid > 0), or for all processes (pid < 0). */ if (pid > 0) { if ((mslot = get_mslot(pid)) == NO_SLOT || (mproc_tab[mslot].mp_flags & (TRACE_ZOMBIE | ZOMBIE))) return ESRCH; last_mslot = mslot; } else { mslot = 0; last_mslot = NR_PROCS - 1; } for (; mslot <= last_mslot; mslot++) { mp = &mproc_tab[mslot]; if ((mp->mp_flags & (IN_USE | TRACE_ZOMBIE | ZOMBIE)) != IN_USE) continue; if (mib_inrange(oldp, off) && elmax > 0) { fill_lwp_user(&lwp, mslot); if ((r = mib_copyout(oldp, off, &lwp, copysz)) < 0) return r; elmax--; } off += elsz; } if (oldp == NULL && pid < 0) off += EXTRA_PROCS * elsz; return off; } /* * Fill the part of a process structure that is common between kernel tasks and * user processes. */ static void fill_proc2_common(struct kinfo_proc2 * p, int kslot) { struct vm_usage_info vui; struct timeval tv; struct proc *kp; struct kinfo_lwp l; kp = &proc_tab[kslot]; /* * Much of the information in the LWP structure also ends up in the * process structure. In order to avoid duplication of some important * code, first generate LWP values and then copy it them into the * process structure. */ memset(&l, 0, sizeof(l)); fill_lwp_common(&l, kslot, &p->p_estcpu); /* Obtain memory usage information from VM. Ignore failures. */ memset(&vui, 0, sizeof(vui)); (void)vm_info_usage(kp->p_endpoint, &vui); ticks_to_timeval(&tv, kp->p_user_time + kp->p_sys_time); p->p_rtime_sec = l.l_rtime_sec; p->p_rtime_usec = l.l_rtime_usec; p->p_cpticks = l.l_cpticks; p->p_pctcpu = l.l_pctcpu; p->p_swtime = l.l_swtime; p->p_slptime = l.l_slptime; p->p_uticks = kp->p_user_time; p->p_sticks = kp->p_sys_time; /* TODO: p->p_iticks */ ticks_to_timeval(&tv, kp->p_user_time); p->p_uutime_sec = tv.tv_sec; p->p_uutime_usec = tv.tv_usec; ticks_to_timeval(&tv, kp->p_sys_time); p->p_ustime_sec = tv.tv_sec; p->p_ustime_usec = tv.tv_usec; p->p_priority = l.l_priority; p->p_usrpri = l.l_usrpri; p->p_vm_rssize = howmany(vui.vui_total, PAGE_SIZE); p->p_vm_vsize = howmany(vui.vui_virtual, PAGE_SIZE); p->p_vm_msize = howmany(vui.vui_mvirtual, PAGE_SIZE); p->p_uru_maxrss = vui.vui_maxrss; p->p_uru_minflt = vui.vui_minflt; p->p_uru_majflt = vui.vui_majflt; p->p_cpuid = l.l_cpuid; } /* * Fill a process structure for the kernel pseudo-process (with PID 0). */ static void fill_proc2_kern(struct kinfo_proc2 * p) { memset(p, 0, sizeof(*p)); p->p_flag = L_INMEM | L_SYSTEM | L_SINTR; p->p_pid = 0; p->p_stat = LSSLEEP; p->p_nice = NZERO; /* Use the KERNEL task wchan, for consistency between ps and top. */ p->p_wchan = ((uint64_t)KERNEL << 8) | 0x00; strlcpy(p->p_wmesg, "kernel", sizeof(p->p_wmesg)); strlcpy(p->p_comm, "kernel", sizeof(p->p_comm)); p->p_realflag = P_INMEM | P_SYSTEM | P_SINTR; p->p_realstat = SACTIVE; p->p_nlwps = NR_TASKS; /* * By using the KERNEL slot here, the kernel process will get a proper * CPU usage average. */ fill_proc2_common(p, KERNEL + NR_TASKS); } /* * Fill a process structure for a user process. */ static void fill_proc2_user(struct kinfo_proc2 * p, int mslot) { struct mproc *mp; struct fproc_light *fp; time_t boottime; dev_t tty; struct timeval tv; int i, r, kslot, zombie; memset(p, 0, sizeof(*p)); if ((r = getuptime(NULL, NULL, &boottime)) != OK) panic("getuptime failed: %d", r); kslot = NR_TASKS + mslot; mp = &mproc_tab[mslot]; fp = &fproc_tab[mslot]; zombie = (mp->mp_flags & (TRACE_ZOMBIE | ZOMBIE)); tty = (!zombie) ? fp->fpl_tty : NO_DEV; p->p_eflag = 0; if (tty != NO_DEV) p->p_eflag |= EPROC_CTTY; if (mp->mp_pid == mp->mp_procgrp) /* TODO: job control support */ p->p_eflag |= EPROC_SLEADER; p->p_exitsig = SIGCHLD; /* TODO */ p->p_flag = P_INMEM; if (mp->mp_flags & TAINTED) p->p_flag |= P_SUGID; if (mp->mp_tracer != NO_TRACER) p->p_flag |= P_TRACED; if (tty != NO_DEV) p->p_flag |= P_CONTROLT; p->p_pid = mp->mp_pid; if (mp->mp_parent >= 0 && mp->mp_parent < NR_PROCS) p->p_ppid = mproc_tab[mp->mp_parent].mp_pid; p->p_sid = mp->mp_procgrp; /* TODO: job control supported */ p->p__pgid = mp->mp_procgrp; p->p_tpgid = (tty != NO_DEV) ? mp->mp_procgrp : 0; p->p_uid = mp->mp_effuid; p->p_ruid = mp->mp_realuid; p->p_gid = mp->mp_effgid; p->p_rgid = mp->mp_realgid; p->p_ngroups = MIN(mp->mp_ngroups, KI_NGROUPS); for (i = 0; i < p->p_ngroups; i++) p->p_groups[i] = mp->mp_sgroups[i]; p->p_tdev = tty; memcpy(&p->p_siglist, &mp->mp_sigpending, sizeof(p->p_siglist)); memcpy(&p->p_sigmask, &mp->mp_sigmask, sizeof(p->p_sigmask)); memcpy(&p->p_sigcatch, &mp->mp_catch, sizeof(p->p_sigcatch)); memcpy(&p->p_sigignore, &mp->mp_ignore, sizeof(p->p_sigignore)); p->p_nice = mp->mp_nice + NZERO; strlcpy(p->p_comm, mp->mp_name, sizeof(p->p_comm)); p->p_uvalid = 1; ticks_to_timeval(&tv, mp->mp_started); p->p_ustart_sec = boottime + tv.tv_sec; p->p_ustart_usec = tv.tv_usec; /* TODO: other rusage fields */ ticks_to_timeval(&tv, mp->mp_child_utime + mp->mp_child_stime); p->p_uctime_sec = tv.tv_sec; p->p_uctime_usec = tv.tv_usec; p->p_realflag = p->p_flag; p->p_nlwps = (zombie) ? 0 : 1; p->p_svuid = mp->mp_svuid; p->p_svgid = mp->mp_svgid; p->p_stat = get_lwp_stat(mslot, &p->p_wchan, p->p_wmesg, sizeof(p->p_wmesg), &p->p_flag); switch (p->p_stat) { case LSRUN: p->p_realstat = SACTIVE; p->p_nrlwps = 1; break; case LSSLEEP: p->p_realstat = SACTIVE; if (p->p_flag & L_SINTR) p->p_realflag |= P_SINTR; break; case LSSTOP: p->p_realstat = SSTOP; break; case LSZOMB: p->p_realstat = SZOMB; break; case LSDEAD: p->p_stat = LSZOMB; /* ps(1) STAT does not know LSDEAD */ p->p_realstat = SDEAD; break; default: assert(0); } if (!zombie) fill_proc2_common(p, kslot); } /* * Implementation of CTL_KERN KERN_PROC2. */ ssize_t mib_kern_proc2(struct mib_call * call, struct mib_node * node __unused, struct mib_oldp * oldp, struct mib_newp * newp __unused) { struct kinfo_proc2 proc2; struct mproc *mp; size_t copysz; ssize_t off; dev_t tty; int r, req, arg, elsz, elmax, kmatch, zombie, mslot; if (call->call_namelen != 4) return EINVAL; req = call->call_name[0]; arg = call->call_name[1]; elsz = call->call_name[2]; elmax = call->call_name[3]; /* redundant with the given oldlen.. */ /* * The kernel is special, in that it does not have a slot in the PM or * VFS tables. As such, it is dealt with separately. While checking * arguments, we might as well check whether the kernel is matched. */ switch (req) { case KERN_PROC_ALL: kmatch = TRUE; break; case KERN_PROC_PID: case KERN_PROC_SESSION: case KERN_PROC_PGRP: case KERN_PROC_UID: case KERN_PROC_RUID: case KERN_PROC_GID: case KERN_PROC_RGID: kmatch = (arg == 0); break; case KERN_PROC_TTY: kmatch = ((dev_t)arg == KERN_PROC_TTY_NODEV); break; default: return EINVAL; } if (elsz <= 0 || elmax < 0) return EINVAL; if (!update_tables()) return EINVAL; off = 0; copysz = MIN((size_t)elsz, sizeof(proc2)); if (kmatch) { if (mib_inrange(oldp, off) && elmax > 0) { fill_proc2_kern(&proc2); if ((r = mib_copyout(oldp, off, &proc2, copysz)) < 0) return r; elmax--; } off += elsz; } for (mslot = 0; mslot < NR_PROCS; mslot++) { mp = &mproc_tab[mslot]; if (!(mp->mp_flags & IN_USE)) continue; switch (req) { case KERN_PROC_PID: if ((pid_t)arg != mp->mp_pid) continue; break; case KERN_PROC_SESSION: /* TODO: job control support */ case KERN_PROC_PGRP: if ((pid_t)arg != mp->mp_procgrp) continue; break; case KERN_PROC_TTY: if ((dev_t)arg == KERN_PROC_TTY_REVOKE) continue; /* TODO: revoke(2) support */ /* Do not access the fproc_tab slot of zombies. */ zombie = (mp->mp_flags & (TRACE_ZOMBIE | ZOMBIE)); tty = (zombie) ? fproc_tab[mslot].fpl_tty : NO_DEV; if ((dev_t)arg == KERN_PROC_TTY_NODEV) { if (tty != NO_DEV) continue; } else if ((dev_t)arg == NO_DEV || (dev_t)arg != tty) continue; break; case KERN_PROC_UID: if ((uid_t)arg != mp->mp_effuid) continue; break; case KERN_PROC_RUID: if ((uid_t)arg != mp->mp_realuid) continue; break; case KERN_PROC_GID: if ((gid_t)arg != mp->mp_effgid) continue; break; case KERN_PROC_RGID: if ((gid_t)arg != mp->mp_realgid) continue; break; } if (mib_inrange(oldp, off) && elmax > 0) { fill_proc2_user(&proc2, mslot); if ((r = mib_copyout(oldp, off, &proc2, copysz)) < 0) return r; elmax--; } off += elsz; } if (oldp == NULL && req != KERN_PROC_PID) off += EXTRA_PROCS * elsz; return off; } /* * Implementation of CTL_KERN KERN_PROC_ARGS. */ ssize_t mib_kern_proc_args(struct mib_call * call, struct mib_node * node __unused, struct mib_oldp * oldp, struct mib_newp * newp __unused) { char vbuf[PAGE_SIZE], sbuf[PAGE_SIZE], obuf[PAGE_SIZE]; struct ps_strings pss; struct mproc *mp; char *buf, *p, *q, *pptr; vir_bytes vaddr, vpage, spage, paddr, ppage; size_t max, off, olen, oleft, oldlen, bytes, pleft; unsigned int copybudget; pid_t pid; int req, mslot, count, aborted, ended; ssize_t r; if (call->call_namelen != 2) return EINVAL; pid = call->call_name[0]; req = call->call_name[1]; switch (req) { case KERN_PROC_ARGV: case KERN_PROC_ENV: case KERN_PROC_NARGV: case KERN_PROC_NENV: break; default: return EOPNOTSUPP; } if (!update_tables()) return EINVAL; if ((mslot = get_mslot(pid)) == NO_SLOT) return ESRCH; mp = &mproc_tab[mslot]; if (mp->mp_flags & (TRACE_ZOMBIE | ZOMBIE)) return ESRCH; /* We can return the count field size without copying in any data. */ if (oldp == NULL && (req == KERN_PROC_NARGV || req == KERN_PROC_NENV)) return sizeof(count); if (sys_datacopy(mp->mp_endpoint, mp->mp_frame_addr + mp->mp_frame_len - sizeof(pss), SELF, (vir_bytes)&pss, sizeof(pss)) != OK) return EINVAL; /* * Determine the upper size limit of the requested data. Not only may * the size never exceed ARG_MAX, it may also not exceed the frame * length as given in its original exec call. In fact, the frame * length should be substantially larger: all strings for both the * arguments and the environment are in there, along with other stuff, * and there must be no overlap between strings. It is possible that * the application called setproctitle(3), in which case the ps_strings * pointers refer to data outside the frame altogether. However, this * data should not exceed 2048 bytes, and we cover this by rounding up * the frame length to a multiple of the page size. Anyhow, NetBSD * blindly returns ARG_MAX when asked for a size estimate, so with this * maximum we are already quite a bit more accurate. */ max = roundup(MIN(mp->mp_frame_len, ARG_MAX), PAGE_SIZE); switch (req) { case KERN_PROC_NARGV: count = pss.ps_nargvstr; return mib_copyout(oldp, 0, &count, sizeof(count)); case KERN_PROC_NENV: count = pss.ps_nenvstr; return mib_copyout(oldp, 0, &count, sizeof(count)); case KERN_PROC_ARGV: if (oldp == NULL) return max; vaddr = (vir_bytes)pss.ps_argvstr; count = pss.ps_nargvstr; break; case KERN_PROC_ENV: if (oldp == NULL) return max; vaddr = (vir_bytes)pss.ps_envstr; count = pss.ps_nenvstr; break; } /* * Go through the strings. Copy in entire, machine-aligned pages at * once, in the hope that all data is stored consecutively, which it * should be: we expect that the vector is followed by the strings, and * that the strings are stored in order of vector reference. We keep * up to two pages with copied-in data: one for the vector, and * optionally one for string data. In addition, we keep one page with * data to be copied out, so that we do not cause a lot of copy * overhead for short strings. * * We stop whenever any of the following conditions are met: * - copying in data from the target process fails for any reason; * - we have processed the last index ('count') into the vector; * - the current vector element is a NULL pointer; * - the requested number of output bytes ('oldlen') has been reached; * - the maximum number of output bytes ('max') has been reached; * - the number of page copy-ins exceeds an estimated threshold; * - copying out data fails for any reason (we then return the error). * * We limit the number of page copy-ins because otherwise a rogue * process could create an argument vector consisting of only two-byte * strings that all span two pages, causing us to copy up to 1GB of * data with the current ARG_MAX value of 256K. No reasonable vector * should cause more than (ARG_MAX / PAGE_SIZE) page copies for * strings; we are nice enough to allow twice that. Vector copies do * not count, as they are linear anyway. * * Unlike every other sysctl(2) call, we are supposed to truncate the * resulting size (the returned 'oldlen') to the requested size (the * given 'oldlen') *and* return the resulting size, rather than ENOMEM * and the real size. Unfortunately, libkvm actually relies on this. * * Generally speaking, upon failure we just return a truncated result. * In case of truncation, the data we copy out need not be null * terminated. It is up to userland to process the data correctly. */ if (trunc_page(vaddr) == 0 || vaddr % sizeof(char *) != 0) return 0; off = 0; olen = 0; aborted = FALSE; oldlen = mib_getoldlen(oldp); if (oldlen > max) oldlen = max; copybudget = (ARG_MAX / PAGE_SIZE) * 2; vpage = 0; spage = 0; while (count > 0 && off + olen < oldlen && !aborted) { /* * Start by fetching the page containing the current vector * element, if needed. We could limit the fetch to the vector * size, but our hope is that for the simple cases, the strings * are on the remainder of the same page, so we save a copy * call. TODO: since the strings should follow the vector, we * could start the copy at the base of the vector. */ if (trunc_page(vaddr) != vpage) { vpage = trunc_page(vaddr); if (sys_datacopy(mp->mp_endpoint, vpage, SELF, (vir_bytes)vbuf, PAGE_SIZE) != OK) break; } /* Get the current vector element, pointing to a string. */ memcpy(&pptr, &vbuf[vaddr - vpage], sizeof(pptr)); paddr = (vir_bytes)pptr; ppage = trunc_page(paddr); if (ppage == 0) break; /* Fetch the string itself, one page at a time at most. */ do { /* * See if the string pointer falls inside either the * vector page or the previously fetched string page * (if any). If not, fetch a string page. */ if (ppage == vpage) { buf = vbuf; } else if (ppage == spage) { buf = sbuf; } else { if (--copybudget == 0) { aborted = TRUE; break; } spage = ppage; if (sys_datacopy(mp->mp_endpoint, spage, SELF, (vir_bytes)sbuf, PAGE_SIZE) != OK) { aborted = TRUE; break; } buf = sbuf; } /* * We now have a string fragment in a buffer. See if * the string is null terminated. If not, all the data * up to the buffer end is part of the string, and the * string continues on the next page. */ p = &buf[paddr - ppage]; pleft = PAGE_SIZE - (paddr - ppage); assert(pleft > 0); if ((q = memchr(p, '\0', pleft)) != NULL) { bytes = (size_t)(q - p + 1); assert(bytes <= pleft); ended = TRUE; } else { bytes = pleft; ended = FALSE; } /* Limit the result to the requested length. */ if (off + olen + bytes > oldlen) bytes = oldlen - off - olen; /* * Add 'bytes' bytes from string pointer 'p' to the * output buffer, copying out its contents to userland * if it has filled up. */ if (olen + bytes > sizeof(obuf)) { oleft = sizeof(obuf) - olen; memcpy(&obuf[olen], p, oleft); if ((r = mib_copyout(oldp, off, obuf, sizeof(obuf))) < 0) return r; off += sizeof(obuf); olen = 0; p += oleft; bytes -= oleft; } if (bytes > 0) { memcpy(&obuf[olen], p, bytes); olen += bytes; } /* * Continue as long as we have not yet found the string * end, and we have not yet filled the output buffer. */ paddr += pleft; assert(trunc_page(paddr) == paddr); ppage = paddr; } while (!ended && off + olen < oldlen); vaddr += sizeof(char *); count--; } /* Copy out any remainder of the output buffer. */ if (olen > 0) { if ((r = mib_copyout(oldp, off, obuf, olen)) < 0) return r; off += olen; } assert(off <= oldlen); return off; } /* * Implementation of CTL_MINIX MINIX_PROC PROC_LIST. */ ssize_t mib_minix_proc_list(struct mib_call * call __unused, struct mib_node * node __unused, struct mib_oldp * oldp, struct mib_newp * newp __unused) { struct minix_proc_list mpl[NR_PROCS]; struct minix_proc_list *mplp; struct mproc *mp; unsigned int mslot; if (oldp == NULL) return sizeof(mpl); if (!update_tables()) return EINVAL; memset(&mpl, 0, sizeof(mpl)); mplp = mpl; mp = mproc_tab; for (mslot = 0; mslot < NR_PROCS; mslot++, mplp++, mp++) { if (!(mp->mp_flags & IN_USE) || mp->mp_pid <= 0) continue; mplp->mpl_flags = MPLF_IN_USE; if (mp->mp_flags & (TRACE_ZOMBIE | ZOMBIE)) mplp->mpl_flags |= MPLF_ZOMBIE; mplp->mpl_pid = mp->mp_pid; mplp->mpl_uid = mp->mp_effuid; mplp->mpl_gid = mp->mp_effgid; } return mib_copyout(oldp, 0, &mpl, sizeof(mpl)); } /* * Implementation of CTL_MINIX MINIX_PROC PROC_DATA. */ ssize_t mib_minix_proc_data(struct mib_call * call, struct mib_node * node __unused, struct mib_oldp * oldp, struct mib_newp * newp __unused) { struct minix_proc_data mpd; struct proc *kp; int kslot, mslot = 0; unsigned int mflags; pid_t pid; /* * It is currently only possible to retrieve the process data for a * particular PID, which must be given as the last name component. */ if (call->call_namelen != 1) return EINVAL; pid = (pid_t)call->call_name[0]; if (!update_tables()) return EINVAL; /* * Unlike the CTL_KERN nodes, we use the ProcFS semantics here: if the * given PID is negative, it is a kernel task; otherwise, it identifies * a user process. A request for PID 0 will result in ESRCH. */ if (pid < 0) { if (pid < -NR_TASKS) return ESRCH; kslot = pid + NR_TASKS; assert(kslot < NR_TASKS); } else { if ((mslot = get_mslot(pid)) == NO_SLOT) return ESRCH; kslot = NR_TASKS + mslot; } if (oldp == NULL) return sizeof(mpd); kp = &proc_tab[kslot]; mflags = (pid > 0) ? mproc_tab[mslot].mp_flags : 0; memset(&mpd, 0, sizeof(mpd)); mpd.mpd_endpoint = kp->p_endpoint; if (mflags & PRIV_PROC) mpd.mpd_flags |= MPDF_SYSTEM; if (mflags & (TRACE_ZOMBIE | ZOMBIE)) mpd.mpd_flags |= MPDF_ZOMBIE; else if ((mflags & TRACE_STOPPED) || RTS_ISSET(kp, RTS_P_STOP)) mpd.mpd_flags |= MPDF_STOPPED; else if (proc_is_runnable(kp)) mpd.mpd_flags |= MPDF_RUNNABLE; mpd.mpd_blocked_on = P_BLOCKEDON(kp); mpd.mpd_priority = kp->p_priority; mpd.mpd_user_time = kp->p_user_time; mpd.mpd_sys_time = kp->p_sys_time; mpd.mpd_cycles = kp->p_cycles; mpd.mpd_kipc_cycles = kp->p_kipc_cycles; mpd.mpd_kcall_cycles = kp->p_kcall_cycles; if (kslot >= NR_TASKS) { mpd.mpd_nice = mproc_tab[mslot].mp_nice; strlcpy(mpd.mpd_name, mproc_tab[mslot].mp_name, sizeof(mpd.mpd_name)); } else strlcpy(mpd.mpd_name, kp->p_name, sizeof(mpd.mpd_name)); return mib_copyout(oldp, 0, &mpd, sizeof(mpd)); }