Kernel: per-state CPU accounting

This functionality is required for BSD top(1), as exposed through
the CTL_KERN KERN_CP_TIME sysctl(2) call.  The idea is that the
overall time spent in the system is divided into five categories.
While NetBSD uses a separate category for the kernel ("system") and
interrupts, we redefine "system" to mean userspace system services
and "interrupts" to mean time spent in the kernel, thereby providing
the same categories as MINIX3's own top(1), while adding the "nice"
category which, like on NetBSD, is used for time spent by processes
with a priority lowered by the system administrator.

Change-Id: I2114148d1e07d9635055ceca7b163f337c53c43a
This commit is contained in:
David van Moolenbroek 2015-11-07 14:42:40 +00:00 committed by Lionel Sambuc
parent e4e21ee1b2
commit 366d18b2b8
15 changed files with 132 additions and 26 deletions

View File

@ -334,6 +334,7 @@
# define GET_IDLETSC 21 /* get cumulative idle time stamp counter */
# define GET_CPUINFO 23 /* get information about cpus */
# define GET_REGS 24 /* get general process registers */
# define GET_CPUTICKS 25 /* get per-state ticks for a cpu */
/* Subfunctions for SYS_PRIVCTL */
#define SYS_PRIV_ALLOW 1 /* Allow process to run */

View File

@ -167,4 +167,12 @@
#define MKF_I386_INTEL_SYSENTER (1L << 0) /* SYSENTER available and supported */
#define MKF_I386_AMD_SYSCALL (1L << 1) /* SYSCALL available and supported */
/*
* Number of per-CPU states for which time will be accounted. This *must* be
* the same value as NetBSD's CPUSTATES, which is defined in a rather
* unfortunate location (sys/sched.h). If the NetBSD value changes, our kernel
* must be adapted accordingly.
*/
#define MINIX_CPUSTATES 5
#endif /* _MINIX_CONST_H */

View File

@ -987,8 +987,9 @@ typedef struct {
int quantum;
int priority;
int cpu;
int niced;
uint8_t padding[40];
uint8_t padding[36];
} mess_lsys_krn_schedule;
_ASSERT_MSG_SIZE(mess_lsys_krn_schedule);

View File

@ -37,8 +37,8 @@ int sys_clear(endpoint_t proc_ep);
int sys_exit(void);
int sys_trace(int req, endpoint_t proc_ep, long addr, long *data_p);
int sys_schedule(endpoint_t proc_ep, int priority, int quantum, int
cpu);
int sys_schedule(endpoint_t proc_ep, int priority, int quantum, int cpu,
int niced);
int sys_schedctl(unsigned flags, endpoint_t proc_ep, int priority, int
quantum, int cpu);
@ -190,6 +190,7 @@ int sys_diagctl(int ctl, char *arg1, int arg2);
#define sys_getpriv(dst, nr) sys_getinfo(GET_PRIV, dst, 0,0, nr)
#define sys_getidletsc(dst) sys_getinfo(GET_IDLETSC, dst, 0,0,0)
#define sys_getregs(dst,nr) sys_getinfo(GET_REGS, dst, 0,0, nr)
#define sys_getcputicks(dst,nr) sys_getinfo(GET_CPUTICKS, dst, 0,0, nr)
int sys_getinfo(int request, void *val_ptr, int val_len, void *val_ptr2,
int val_len2);
int sys_whoami(endpoint_t *ep, char *name, int namelen, int

View File

@ -12,6 +12,11 @@
#include <assert.h>
#include <sys/sched.h> /* for CP_*, CPUSTATES */
#if CPUSTATES != MINIX_CPUSTATES
/* If this breaks, the code in this file may have to be adapted accordingly. */
#error "MINIX_CPUSTATES value is out of sync with NetBSD's!"
#endif
#include "kernel/spinlock.h"
@ -23,6 +28,7 @@
#include "bsp_intr.h"
static unsigned tsc_per_ms[CONFIG_MAX_CPUS];
static uint64_t tsc_per_state[CONFIG_MAX_CPUS][CPUSTATES];
int init_local_timer(unsigned freq)
{
@ -61,6 +67,7 @@ void context_stop(struct proc * p)
{
u64_t tsc;
u32_t tsc_delta;
unsigned int counter;
u64_t * __tsc_ctr_switch = get_cpulocal_var_ptr(tsc_ctr_switch);
read_tsc_64(&tsc);
@ -81,9 +88,17 @@ void context_stop(struct proc * p)
/*
* deduct the just consumed cpu cycles from the cpu time left for this
* process during its current quantum. Skip IDLE and other pseudo kernel
* tasks
* tasks, except for accounting purposes.
*/
if (p->p_endpoint >= 0) {
/* On MINIX3, the "system" counter covers system processes. */
if (p->p_priv != priv_addr(USER_PRIV_ID))
counter = CP_SYS;
else if (p->p_misc_flags & MF_NICED)
counter = CP_NICE;
else
counter = CP_USER;
#if DEBUG_RACE
p->p_cpu_time_left = 0;
#else
@ -91,6 +106,13 @@ void context_stop(struct proc * p)
p->p_cpu_time_left -= tsc_delta;
} else p->p_cpu_time_left = 0;
#endif
} else {
/* On MINIX3, the "interrupts" counter covers the kernel. */
if (p->p_endpoint == IDLE)
counter = CP_IDLE;
else
counter = CP_INTR;
}
*__tsc_ctr_switch = tsc;
@ -139,3 +161,19 @@ short cpu_load(void)
{
return 0;
}
/*
* Return the number of clock ticks spent in each of a predefined number of
* CPU states.
*/
void
get_cpu_ticks(unsigned int cpu, uint64_t ticks[CPUSTATES])
{
unsigned int tsc_per_tick;
int i;
tsc_per_tick = tsc_per_ms[0] * 1000 / system_hz;
for (i = 0; i < CPUSTATES; i++)
ticks[i] = tsc_per_state[0][i] / tsc_per_tick;
}

View File

@ -12,6 +12,11 @@
#include "glo.h"
#include "kernel/profile.h"
#include <sys/sched.h> /* for CP_*, CPUSTATES */
#if CPUSTATES != MINIX_CPUSTATES
/* If this breaks, the code in this file may have to be adapted accordingly. */
#error "MINIX_CPUSTATES value is out of sync with NetBSD's!"
#endif
#ifdef USE_APIC
#include "apic.h"
@ -40,6 +45,8 @@ static u64_t tsc0, tsc1;
#define PROBE_TICKS (system_hz / 10)
static unsigned tsc_per_ms[CONFIG_MAX_CPUS];
static unsigned tsc_per_tick[CONFIG_MAX_CPUS];
static uint64_t tsc_per_state[CONFIG_MAX_CPUS][CPUSTATES];
/*===========================================================================*
* init_8235A_timer *
@ -133,7 +140,8 @@ int init_local_timer(unsigned freq)
/* if we know the address, lapic is enabled and we should use it */
if (lapic_addr) {
unsigned cpu = cpuid;
tsc_per_ms[cpu] = (unsigned long)(cpu_get_freq(cpu) / 1000);
tsc_per_ms[cpu] = (unsigned)(cpu_get_freq(cpu) / 1000);
tsc_per_tick[cpu] = (unsigned)(cpu_get_freq(cpu) / system_hz);
lapic_set_timer_one_shot(1000000 / system_hz);
} else {
DEBUGBASIC(("Initiating legacy i8253 timer\n"));
@ -144,6 +152,7 @@ int init_local_timer(unsigned freq)
estimate_cpu_freq();
/* always only 1 cpu in the system */
tsc_per_ms[0] = (unsigned long)(cpu_get_freq(0) / 1000);
tsc_per_tick[0] = (unsigned)(cpu_get_freq(0) / system_hz);
}
return 0;
@ -206,10 +215,12 @@ void context_stop(struct proc * p)
{
u64_t tsc, tsc_delta;
u64_t * __tsc_ctr_switch = get_cpulocal_var_ptr(tsc_ctr_switch);
unsigned int cpu, counter;
#ifdef CONFIG_SMP
unsigned cpu = cpuid;
int must_bkl_unlock = 0;
cpu = cpuid;
/*
* This function is called only if we switch from kernel to user or idle
* or back. Therefore this is a perfect location to place the big kernel
@ -261,6 +272,7 @@ void context_stop(struct proc * p)
#else
read_tsc_64(&tsc);
p->p_cycles = p->p_cycles + tsc - *__tsc_ctr_switch;
cpu = 0;
#endif
tsc_delta = tsc - *__tsc_ctr_switch;
@ -280,9 +292,17 @@ void context_stop(struct proc * p)
/*
* deduct the just consumed cpu cycles from the cpu time left for this
* process during its current quantum. Skip IDLE and other pseudo kernel
* tasks
* tasks, except for global accounting purposes.
*/
if (p->p_endpoint >= 0) {
/* On MINIX3, the "system" counter covers system processes. */
if (p->p_priv != priv_addr(USER_PRIV_ID))
counter = CP_SYS;
else if (p->p_misc_flags & MF_NICED)
counter = CP_NICE;
else
counter = CP_USER;
#if DEBUG_RACE
p->p_cpu_time_left = 0;
#else
@ -295,8 +315,16 @@ void context_stop(struct proc * p)
p->p_cpu_time_left = 0;
}
#endif
} else {
/* On MINIX3, the "interrupts" counter covers the kernel. */
if (p->p_endpoint == IDLE)
counter = CP_IDLE;
else
counter = CP_INTR;
}
tsc_per_state[cpu][counter] += tsc_delta;
*__tsc_ctr_switch = tsc;
#ifdef CONFIG_SMP
@ -383,3 +411,16 @@ void busy_delay_ms(int ms)
return;
}
/*
* Return the number of clock ticks spent in each of a predefined number of
* CPU states.
*/
void
get_cpu_ticks(unsigned int cpu, uint64_t ticks[CPUSTATES])
{
int i;
/* TODO: make this inter-CPU safe! */
for (i = 0; i < CPUSTATES; i++)
ticks[i] = tsc_per_state[cpu][i] / tsc_per_tick[cpu];
}

View File

@ -68,8 +68,11 @@ void bsp_finish_booting(void)
RTS_UNSET(proc_addr(i), RTS_PROC_STOP);
}
/*
* enable timer interrupts and clock task on the boot CPU
* Enable timer interrupts and clock task on the boot CPU. First reset the
* CPU accounting values, as the timer initialization (indirectly) uses them.
*/
cycles_accounting_init();
if (boot_cpu_init_timer(system_hz)) {
panic("FATAL : failed to initialize timer interrupts, "
"cannot continue without any clock source!");
@ -91,10 +94,6 @@ void bsp_finish_booting(void)
FIXME("PROC check enabled");
#endif
DEBUGEXTRA(("cycles_accounting_init()... "));
cycles_accounting_init();
DEBUGEXTRA(("done\n"));
#ifdef CONFIG_SMP
cpu_set_flag(bsp_cpu_id, CPU_IS_READY);
machine.processors_count = ncpus;

View File

@ -254,6 +254,7 @@ struct proc {
space*/
#define MF_STEP 0x40000 /* Single-step process */
#define MF_MSGFAILED 0x80000
#define MF_NICED 0x100000 /* user has lowered max process priority */
/* Magic process table addresses. */
#define BEG_PROC_ADDR (&proc[0])

View File

@ -38,6 +38,7 @@ void cycles_accounting_init(void);
void context_stop(struct proc * p);
/* this is a wrapper to make calling it from assembly easier */
void context_stop_idle(void);
void get_cpu_ticks(unsigned int cpu, uint64_t ticks[MINIX_CPUSTATES]);
int restore_fpu(struct proc *);
void save_fpu(struct proc *);
void save_local_fpu(struct proc *, int retain);
@ -105,7 +106,7 @@ void system_init(void);
void clear_endpoint(struct proc *rc);
void clear_ipc_refs(struct proc *rc, int caller_ret);
void kernel_call_resume(struct proc *p);
int sched_proc(struct proc *rp, int priority, int quantum, int cpu);
int sched_proc(struct proc *rp, int priority, int quantum, int cpu, int niced);
int add_ipc_filter(struct proc *rp, int type,
vir_bytes address, size_t length);
void clear_ipc_filters(struct proc *rp);

View File

@ -639,10 +639,7 @@ void kernel_call_resume(struct proc *caller)
/*===========================================================================*
* sched_proc *
*===========================================================================*/
int sched_proc(struct proc *p,
int priority,
int quantum,
int cpu)
int sched_proc(struct proc *p, int priority, int quantum, int cpu, int niced)
{
/* Make sure the values given are within the allowed range.*/
if ((priority < TASK_Q && priority != -1) || priority > NR_SCHED_QUEUES)
@ -691,6 +688,11 @@ int sched_proc(struct proc *p,
p->p_cpu = cpu;
#endif
if (niced)
p->p_misc_flags |= MF_NICED;
else
p->p_misc_flags &= ~MF_NICED;
/* Clear the scheduling bit and enqueue the process */
RTS_UNSET(p, RTS_NO_QUANTUM);

View File

@ -189,6 +189,17 @@ int do_getinfo(struct proc * caller, message * m_ptr)
src_vir = (vir_bytes) &idl->p_cycles;
break;
}
case GET_CPUTICKS: {
uint64_t ticks[MINIX_CPUSTATES];
unsigned int cpu;
cpu = (unsigned int)m_ptr->m_lsys_krn_sys_getinfo.val_len2_e;
if (cpu >= CONFIG_MAX_CPUS)
return EINVAL;
get_cpu_ticks(cpu, ticks);
length = sizeof(ticks);
src_vir = (vir_bytes)ticks;
break;
}
default:
printf("do_getinfo: invalid request %d\n",
m_ptr->m_lsys_krn_sys_getinfo.request);

View File

@ -34,7 +34,7 @@ int do_schedctl(struct proc * caller, message * m_ptr)
cpu = m_ptr->m_lsys_krn_schedctl.cpu;
/* Try to schedule the process. */
if((r = sched_proc(p, priority, quantum, cpu) != OK))
if((r = sched_proc(p, priority, quantum, cpu, FALSE) != OK))
return r;
p->p_scheduler = NULL;
} else {

View File

@ -9,7 +9,7 @@ int do_schedule(struct proc * caller, message * m_ptr)
{
struct proc *p;
int proc_nr;
int priority, quantum, cpu;
int priority, quantum, cpu, niced;
if (!isokendpt(m_ptr->m_lsys_krn_schedule.endpoint, &proc_nr))
return EINVAL;
@ -24,6 +24,7 @@ int do_schedule(struct proc * caller, message * m_ptr)
priority = m_ptr->m_lsys_krn_schedule.priority;
quantum = m_ptr->m_lsys_krn_schedule.quantum;
cpu = m_ptr->m_lsys_krn_schedule.cpu;
niced = !!(m_ptr->m_lsys_krn_schedule.niced);
return sched_proc(p, priority, quantum, cpu);
return sched_proc(p, priority, quantum, cpu, niced);
}

View File

@ -1,9 +1,7 @@
#include "syslib.h"
int sys_schedule(endpoint_t proc_ep,
int priority,
int quantum,
int cpu)
int
sys_schedule(endpoint_t proc_ep, int priority, int quantum, int cpu, int niced)
{
message m;
@ -11,5 +9,6 @@ int sys_schedule(endpoint_t proc_ep,
m.m_lsys_krn_schedule.priority = priority;
m.m_lsys_krn_schedule.quantum = quantum;
m.m_lsys_krn_schedule.cpu = cpu;
m.m_lsys_krn_schedule.niced = niced;
return(_kernel_call(SYS_SCHEDULE, &m));
}

View File

@ -297,7 +297,7 @@ int do_nice(message *m_ptr)
static int schedule_process(struct schedproc * rmp, unsigned flags)
{
int err;
int new_prio, new_quantum, new_cpu;
int new_prio, new_quantum, new_cpu, niced;
pick_cpu(rmp);
@ -316,8 +316,10 @@ static int schedule_process(struct schedproc * rmp, unsigned flags)
else
new_cpu = -1;
niced = (rmp->max_priority > USER_Q);
if ((err = sys_schedule(rmp->endpoint, new_prio,
new_quantum, new_cpu)) != OK) {
new_quantum, new_cpu, niced)) != OK) {
printf("PM: An error occurred when trying to schedule %d: %d\n",
rmp->endpoint, err);
}