diff --git a/Makefile b/Makefile index b9856cb58..c16526afe 100644 --- a/Makefile +++ b/Makefile @@ -30,12 +30,6 @@ usage: # 'make install' target. # # etcfiles has to be done first. -.if ${COMPILER_TYPE} == "ack" -world: mkfiles includes depend libraries install etcforce -.elif ${COMPILER_TYPE} == "gnu" -world: mkfiles includes depend gnu-libraries install etcforce -.endif - mkfiles: make -C share/mk install diff --git a/include/arch/i386/archtypes.h b/include/arch/i386/archtypes.h index 31f3ba9f3..48626018c 100644 --- a/include/arch/i386/archtypes.h +++ b/include/arch/i386/archtypes.h @@ -41,5 +41,7 @@ struct fpu_state_s { #define INMEMORY(p) (!p->p_seg.p_cr3 || get_cpulocal_var(ptproc) == p) +typedef u32_t atomic_t; /* access to an aligned 32bit value is atomic on i386 */ + #endif /* #ifndef _I386_TYPES_H */ diff --git a/kernel/Makefile b/kernel/Makefile index 0f8d295e3..780c414e0 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -9,9 +9,15 @@ SRCS+= start.c table.c main.c proc.c \ system.c clock.c utility.c debug.c profile.c interrupt.c \ watchdog.c cpulocals.c +.ifdef CONFIG_SMP +SRCS += smp.c +.endif + DPADD+= ${LIBTIMERS} ${LIBSYS} LDADD+= -ltimers -lsys +CFLAGS += -D__kernel__ + .if ${COMPILER_TYPE} == "ack" LDFLAGS+= -.o .elif ${COMPILER_TYPE} == "gnu" diff --git a/kernel/arch/i386/Makefile.inc b/kernel/arch/i386/Makefile.inc index c2b7d279b..5e6972e3e 100644 --- a/kernel/arch/i386/Makefile.inc +++ b/kernel/arch/i386/Makefile.inc @@ -33,10 +33,18 @@ SRCS+= arch_do_vmctl.c \ pre_init.c \ acpi.c +.ifdef CONFIG_SMP +SRCS += arch_smp.c trampoline.S +.endif + + .if ${COMPILER_TYPE} == "ack" I86CPPFLAGS = -mi86 I86LDFLAGS = -mi86 CPPFLAGS.klib16.S = ${I86CPPFLAGS} LDFLAGS.klib16.S = ${I86LDFLAGS} + +CPPFLAGS.trampoline.S = ${I86CPPFLAGS} +LDFLAGS.trampoline.S = ${I86LDFLAGS} .endif diff --git a/kernel/arch/i386/acpi.c b/kernel/arch/i386/acpi.c index 159f15b98..c0d319f87 100644 --- a/kernel/arch/i386/acpi.c +++ b/kernel/arch/i386/acpi.c @@ -243,3 +243,34 @@ PUBLIC struct acpi_madt_ioapic * acpi_get_ioapic_next(void) return ret; } + +PUBLIC struct acpi_madt_lapic * acpi_get_lapic_next(void) +{ + static unsigned idx = 0; + static struct acpi_madt_hdr * madt_hdr; + + struct acpi_madt_lapic * ret; + + if (idx == 0) { + madt_hdr = (struct acpi_madt_hdr *) + phys2vir(acpi_get_table_base("APIC")); + if (madt_hdr == NULL) + return NULL; + } + + for (;;) { + ret = (struct acpi_madt_lapic *) + acpi_madt_get_typed_item(madt_hdr, + ACPI_MADT_TYPE_LAPIC, idx); + if (!ret) + break; + + idx++; + + /* report only usable CPUs */ + if (ret->flags & 1) + break; + } + + return ret; +} diff --git a/kernel/arch/i386/acpi.h b/kernel/arch/i386/acpi.h index e2cf35217..e1678f4c0 100644 --- a/kernel/arch/i386/acpi.h +++ b/kernel/arch/i386/acpi.h @@ -90,5 +90,7 @@ _PROTOTYPE(void acpi_init, (void)); * this function thus no memory needs to be freed */ _PROTOTYPE(struct acpi_madt_ioapic * acpi_get_ioapic_next, (void)); +/* same as above for local APICs */ +_PROTOTYPE(struct acpi_madt_lapic * acpi_get_lapic_next, (void)); #endif /* __ACPI_H__ */ diff --git a/kernel/arch/i386/apic.c b/kernel/arch/i386/apic.c index 3fd7e68bf..29ad02cee 100644 --- a/kernel/arch/i386/apic.c +++ b/kernel/arch/i386/apic.c @@ -106,7 +106,6 @@ #define SPL0 0x0 #define SPLHI 0xF -#define cpu_is_bsp(x) 1 PUBLIC struct io_apic io_apic[MAX_NR_IOAPICS]; PUBLIC unsigned nioapics; @@ -124,6 +123,22 @@ struct irq { PRIVATE struct irq io_apic_irq[NR_IRQ_VECTORS]; +/* + * to make APIC work if SMP is not configured, we need to set the maximal number + * of CPUS to 1, cpuid to return 0 and the current cpu is always BSP + */ +#ifndef CONFIG_SMP +/* this is always true on an uniprocessor */ +#define cpu_is_bsp(x) 1 + +#else + +#include "kernel/smp.h" + +#endif + +#include "kernel/spinlock.h" + #define lapic_write_icr1(val) lapic_write(LAPIC_ICR1, val) #define lapic_write_icr2(val) lapic_write(LAPIC_ICR2, val) @@ -131,12 +146,15 @@ PRIVATE struct irq io_apic_irq[NR_IRQ_VECTORS]; #define lapic_read_icr1(x) lapic_read(LAPIC_ICR1) #define lapic_read_icr2(x) lapic_read(LAPIC_ICR2) +#define is_boot_apic(apicid) ((apicid) == bsp_lapic_id) + #define VERBOSE_APIC(x) x PUBLIC int ioapic_enabled; PUBLIC u32_t lapic_addr_vaddr; PUBLIC vir_bytes lapic_addr; PUBLIC vir_bytes lapic_eoi_addr; +PUBLIC int bsp_lapic_id; PRIVATE volatile unsigned probe_ticks; PRIVATE u64_t tsc0, tsc1; @@ -171,8 +189,8 @@ PRIVATE void ioapic_write(u32_t ioa_base, u8_t reg, u32_t val) *((u32_t *)(ioa_base + IOAPIC_IOWIN)) = val; } -FORWARD _PROTOTYPE(void lapic_microsec_sleep, (unsigned count)); -FORWARD _PROTOTYPE(void apic_idt_init, (const int reset)); +_PROTOTYPE(void lapic_microsec_sleep, (unsigned count)); +_PROTOTYPE(void apic_idt_init, (const int reset)); PRIVATE void ioapic_enable_pin(vir_bytes ioapic_addr, int pin) { @@ -375,6 +393,16 @@ PUBLIC void ioapic_mask_irq(unsigned irq) irq_8259_mask(irq); } +PUBLIC unsigned int apicid(void) +{ + return lapic_read(LAPIC_ID); +} + +PUBLIC void ioapic_set_id(u32_t addr, unsigned int id) +{ + ioapic_write(addr, IOAPIC_ID, id << 24); +} + PRIVATE int calib_clk_handler(irq_hook_t * UNUSED(hook)) { u32_t tcrt; @@ -398,7 +426,7 @@ PRIVATE int calib_clk_handler(irq_hook_t * UNUSED(hook)) return 1; } -PRIVATE void apic_calibrate_clocks(void) +PRIVATE void apic_calibrate_clocks(unsigned cpu) { u32_t lvtt, val, lapic_delta; u64_t tsc_delta; @@ -462,7 +490,7 @@ PRIVATE void apic_calibrate_clocks(void) BOOT_VERBOSE(cpu_print_freq(cpuid)); } -PRIVATE void lapic_set_timer_one_shot(const u32_t value) +PUBLIC void lapic_set_timer_one_shot(const u32_t value) { /* sleep in micro seconds */ u32_t lvtt; @@ -508,10 +536,11 @@ PUBLIC void lapic_stop_timer(void) lapic_write(LAPIC_LVTTR, lvtt | APIC_LVTT_MASK); } -PRIVATE void lapic_microsec_sleep(unsigned count) +PUBLIC void lapic_microsec_sleep(unsigned count) { lapic_set_timer_one_shot(count); - while (lapic_read(LAPIC_TIMER_CCR)); + while (lapic_read(LAPIC_TIMER_CCR)) + arch_pause(); } PRIVATE u32_t lapic_errstatus(void) @@ -541,7 +570,10 @@ PUBLIC void lapic_disable(void) if (!lapic_addr) return; - if (!apic_imcrp) { +#ifdef CONFIG_SMP + if (cpu_is_bsp(cpuid) && !apic_imcrp) +#endif + { /* leave it enabled if imcr is not set */ val = lapic_read(LAPIC_LINT0); val &= ~(APIC_ICR_DM_MASK|APIC_ICR_INT_MASK); @@ -591,14 +623,9 @@ PRIVATE int lapic_enable_in_msr(void) return 1; } -PUBLIC int lapic_enable(void) +PUBLIC int lapic_enable(unsigned cpu) { u32_t val, nlvt; -#if 0 - u32_t timeout = 0xFFFF; - u32_t errstatus = 0; -#endif - unsigned cpu = cpuid; if (!lapic_addr) return 0; @@ -629,8 +656,6 @@ PUBLIC int lapic_enable(void) apic_eoi(); - cpu = cpuid; - /* Program Logical Destination Register. */ val = lapic_read(LAPIC_LDR) & ~0xFF000000; val |= (cpu & 0xFF) << 24; @@ -663,7 +688,7 @@ PUBLIC int lapic_enable(void) (void) lapic_read (LAPIC_SIVR); apic_eoi(); - apic_calibrate_clocks(); + apic_calibrate_clocks(cpu); BOOT_VERBOSE(printf("APIC timer calibrated\n")); return 1; @@ -785,13 +810,14 @@ PRIVATE void lapic_set_dummy_handlers(void) #endif /* Build descriptors for interrupt gates in IDT. */ -PRIVATE void apic_idt_init(const int reset) +PUBLIC void apic_idt_init(const int reset) { u32_t val; /* Set up idt tables for smp mode. */ vir_bytes local_timer_intr_handler; + int is_bsp = is_boot_apic(apicid()); if (reset) { idt_copy_vectors(gate_table_pic); @@ -825,7 +851,7 @@ PRIVATE void apic_idt_init(const int reset) (void) lapic_read(LAPIC_LVTER); /* configure the timer interupt handler */ - if (cpu_is_bsp(cpuid)) { + if (is_bsp) { local_timer_intr_handler = (vir_bytes) lapic_bsp_timer_int_handler; BOOT_VERBOSE(printf("Initiating BSP timer handler\n")); } else { @@ -865,7 +891,7 @@ PRIVATE int acpi_get_ioapics(struct io_apic * ioa, unsigned * nioa, unsigned max return n; } -PRIVATE int detect_ioapics(void) +PUBLIC int detect_ioapics(void) { int status; @@ -874,11 +900,130 @@ PRIVATE int detect_ioapics(void) if (!status) { /* try something different like MPS */ } - - printf("nioapics %d\n", nioapics); return status; } +#ifdef CONFIG_SMP + +PUBLIC int apic_send_startup_ipi(unsigned cpu, phys_bytes trampoline) +{ + int timeout; + u32_t errstatus = 0; + int i; + + /* INIT-SIPI-SIPI sequence */ + + for (i = 0; i < 2; i++) { + u32_t val; + lapic_errstatus(); + + /* set target pe */ + val = lapic_read(LAPIC_ICR2) & 0xFFFFFF; + val |= cpuid2apicid[cpu] << 24; + lapic_write(LAPIC_ICR2, val); + + /* send SIPI */ + val = lapic_read(LAPIC_ICR1) & 0xFFF32000; + val |= APIC_ICR_LEVEL_ASSERT |APIC_ICR_DM_STARTUP; + val |= (((u32_t)trampoline >> 12)&0xff); + lapic_write(LAPIC_ICR1, val); + + timeout = 1000; + + /* wait for 200 micro-seconds*/ + lapic_microsec_sleep (200); + errstatus = 0; + + while ((lapic_read(LAPIC_ICR1) & APIC_ICR_DELIVERY_PENDING) && !errstatus) + { + errstatus = lapic_errstatus(); + timeout--; + if (!timeout) break; + } + + /* skip this one and continue with another cpu */ + if (errstatus) + return -1; + } + + return 0; +} + +PUBLIC int apic_send_init_ipi(unsigned cpu, phys_bytes trampoline) +{ + u32_t ptr, errstatus = 0; + int timeout; + + /* set the warm reset vector */ + ptr = (u32_t)(trampoline & 0xF); + phys_copy(0x467, vir2phys(&ptr), sizeof(u16_t )); + ptr = (u32_t)(trampoline >> 4); + phys_copy(0x469, vir2phys(&ptr), sizeof(u16_t )); + + /* set shutdown code */ + outb (RTC_INDEX, 0xF); + outb (RTC_IO, 0xA); + + /* clear error state register. */ + (void) lapic_errstatus(); + + /* assert INIT IPI , No Shorthand, destination mode : physical */ + lapic_write(LAPIC_ICR2, (lapic_read (LAPIC_ICR2) & 0xFFFFFF) | + (cpuid2apicid[cpu] << 24)); + lapic_write(LAPIC_ICR1, (lapic_read (LAPIC_ICR1) & 0xFFF32000) | + APIC_ICR_DM_INIT | APIC_ICR_TM_LEVEL | APIC_ICR_LEVEL_ASSERT); + + timeout = 1000; + + /* sleep for 200 micro-seconds */ + lapic_microsec_sleep(200); + + errstatus = 0; + + while ((lapic_read(LAPIC_ICR1) & APIC_ICR_DELIVERY_PENDING) && !errstatus) { + errstatus = lapic_errstatus(); + timeout--; + if (!timeout) break; + } + + if (errstatus) + return -1; /* to continue with a new processor */ + + /* clear error state register. */ + lapic_errstatus(); + + /* deassert INIT IPI , No Shorthand, destination mode : physical */ + lapic_write(LAPIC_ICR2, (lapic_read (LAPIC_ICR2) & 0xFFFFFF) | + (cpuid2apicid[cpu] << 24)); + lapic_write(LAPIC_ICR1, (lapic_read (LAPIC_ICR1) & 0xFFF32000) | + APIC_ICR_DEST_ALL | APIC_ICR_TM_LEVEL | APIC_ICR_DM_INIT); + + timeout = 1000; + errstatus = 0; + + /* sleep for 200 micro-seconds */ + lapic_microsec_sleep(200); + + while ((lapic_read(LAPIC_ICR1)&APIC_ICR_DELIVERY_PENDING) && !errstatus) { + errstatus = lapic_errstatus(); + timeout--; + if(!timeout) break; + } + + if (errstatus) + return -1; /* with the new processor */ + + /* clear error state register. */ + (void) lapic_errstatus(); + + /* wait 10ms */ + lapic_microsec_sleep (10000); + + return 0; +} +#endif + +#ifndef CONFIG_SMP PUBLIC int apic_single_cpu_init(void) { if (!cpu_feature_apic_on_chip()) @@ -887,7 +1032,7 @@ PUBLIC int apic_single_cpu_init(void) lapic_addr = phys2vir(LOCAL_APIC_DEF_ADDR); ioapic_enabled = 0; - if (!lapic_enable()) { + if (!lapic_enable(0)) { lapic_addr = 0x0; return 0; } @@ -909,6 +1054,7 @@ PUBLIC int apic_single_cpu_init(void) idt_reload(); return 1; } +#endif PRIVATE eoi_method_t set_eoi_method(unsigned irq) { diff --git a/kernel/arch/i386/apic.h b/kernel/arch/i386/apic.h index 49adaa8b9..ca7f6a9c3 100644 --- a/kernel/arch/i386/apic.h +++ b/kernel/arch/i386/apic.h @@ -97,6 +97,8 @@ EXTERN vir_bytes lapic_addr; EXTERN vir_bytes lapic_eoi_addr; +EXTERN int ioapic_enabled; +EXTERN int bsp_lapic_id; #define MAX_NR_IOAPICS 32 #define MAX_IOAPIC_IRQS 64 @@ -118,9 +120,35 @@ EXTERN unsigned nioapics; EXTERN u32_t lapic_addr_vaddr; /* we remember the virtual address here until we switch to paging */ -_PROTOTYPE(int apic_single_cpu_init, (void)); +_PROTOTYPE (int lapic_enable, (unsigned cpu)); -_PROTOTYPE(void lapic_set_timer_periodic, (unsigned freq)); +EXTERN int ioapic_enabled; +EXTERN unsigned nioapics; + +_PROTOTYPE (void lapic_microsec_sleep, (unsigned count)); +_PROTOTYPE (void ioapic_disable_irqs, (u32_t irqs)); +_PROTOTYPE (void ioapic_enable_irqs, (u32_t irqs)); + +_PROTOTYPE (int lapic_enable, (unsigned cpu)); +_PROTOTYPE (void lapic_disable, (void)); + +_PROTOTYPE (void ioapic_disable_all, (void)); +_PROTOTYPE (int ioapic_enable_all, (void)); + +_PROTOTYPE(int detect_ioapics, (void)); +_PROTOTYPE(void apic_idt_init, (int reset)); + +#ifdef CONFIG_SMP +_PROTOTYPE(int apic_send_startup_ipi, (unsigned cpu, phys_bytes trampoline)); +_PROTOTYPE(int apic_send_init_ipi, (unsigned cpu, phys_bytes trampoline)); +_PROTOTYPE(unsigned int apicid, (void)); +_PROTOTYPE(void ioapic_set_id, (u32_t addr, unsigned int id)); +#else +_PROTOTYPE(int apic_single_cpu_init, (void)); +#endif + +_PROTOTYPE(void lapic_set_timer_periodic, (const unsigned freq)); +_PROTOTYPE(void lapic_set_timer_one_shot, (const u32_t value)); _PROTOTYPE(void lapic_stop_timer, (void)); _PROTOTYPE(void ioapic_set_irq, (unsigned irq)); @@ -141,7 +169,7 @@ _PROTOTYPE(void dump_apic_irq_state, (void)); #define lapic_read(what) (*((volatile u32_t *)((what)))) #define lapic_write(what, data) do { \ - (*((volatile u32_t *)((what)))) = data; \ + (*((volatile u32_t *)((what)))) = data; \ } while(0) #endif /* __ASSEMBLY__ */ diff --git a/kernel/arch/i386/apic_asm.S b/kernel/arch/i386/apic_asm.S index 8a9f7b87c..8faa33963 100644 --- a/kernel/arch/i386/apic_asm.S +++ b/kernel/arch/i386/apic_asm.S @@ -70,6 +70,30 @@ ENTRY(lapic_bsp_timer_int_handler) ENTRY(lapic_ap_timer_int_handler) lapic_intr(_C_LABEL(ap_timer_int_handler)) +#ifdef CONFIG_SMP +#include "arch_smp.h" + +/* FIXME dummy stubs */ +ENTRY(smp_ipi_sched) +1: jmp 1b + +ENTRY(smp_ipi_dequeue) +1: jmp 1b + +ENTRY(smp_ipi_stop) +1: jmp 1b + +ENTRY(smp_ipi_reboot) +1: jmp 1b + +ENTRY(smp_ipi_err_int) +1: jmp 1b + +ENTRY(smp_ipi_spv_int) +1: jmp 1b + +#endif /* CONFIG_SMP */ + #ifdef CONFIG_APIC_DEBUG .data @@ -86,7 +110,7 @@ lapic_intr_dummy_handler_msg: #define LAPIC_INTR_DUMMY_HANDLER(vect) \ .balign LAPIC_INTR_DUMMY_HANDLER_SIZE; \ - lapic_intr_dummy_handler_##vect: lapic_intr_dummy_handler(vect) + _lapic_intr_dummy_handler_##vect: lapic_intr_dummy_handler(vect) apic_hwint(0) apic_hwint(1) @@ -414,4 +438,3 @@ LABEL(lapic_intr_dummy_handles_end) #endif /* CONFIG_APIC_DEBUG */ - diff --git a/kernel/arch/i386/arch_clock.c b/kernel/arch/i386/arch_clock.c index 8646a32cc..e66f4c30a 100644 --- a/kernel/arch/i386/arch_clock.c +++ b/kernel/arch/i386/arch_clock.c @@ -9,6 +9,7 @@ #include "kernel/clock.h" #include "kernel/proc.h" #include +#include "glo.h" #ifdef CONFIG_APIC diff --git a/kernel/arch/i386/arch_smp.c b/kernel/arch/i386/arch_smp.c new file mode 100644 index 000000000..13daca166 --- /dev/null +++ b/kernel/arch/i386/arch_smp.c @@ -0,0 +1,239 @@ +/* This file contains essentially the MP handling code of the Minix kernel. + * + * Changes: + * Apr 1, 2008 Added SMP support. + */ + +#define _SMP + +#include "kernel/kernel.h" +#include "kernel/proc.h" +#include "arch_proto.h" +#include "kernel/glo.h" +#include +#include +#include +#include + +#include "kernel/spinlock.h" +#include "kernel/smp.h" +#include "apic.h" +#include "acpi.h" + +#include "glo.h" + +_PROTOTYPE(void trampoline, (void)); + +/* + * arguments for trampoline. We need to pass the logical cpu id, gdt and idt. + * They have to be in location which is reachable using absolute addressing in + * 16-bit mode + */ +extern volatile u32_t __ap_id; +extern volatile struct segdesc_s __ap_gdt, __ap_idt; + +extern u32_t busclock[CONFIG_MAX_CPUS]; +extern int panicking; + +static int ap_cpu_ready; + +/* there can be at most 255 local APIC ids, each fits in 8 bits */ +PRIVATE unsigned char apicid2cpuid[255]; +PUBLIC unsigned char cpuid2apicid[CONFIG_MAX_CPUS]; + +SPINLOCK_DEFINE(smp_cpu_lock) +SPINLOCK_DEFINE(dispq_lock) + +FORWARD _PROTOTYPE(void smp_init_vars, (void)); +FORWARD _PROTOTYPE(void smp_reinit_vars, (void)); + +PRIVATE void smp_start_aps(void) +{ + /* + * Find an address and align it to a 4k boundary. + */ + unsigned cpu; + u32_t biosresetvector; + phys_bytes trampoline_base = vir2phys(trampoline); + + /* TODO hack around the alignment problem */ + + phys_copy (0x467, vir2phys(&biosresetvector), sizeof(u32_t)); + + /* set the bios shutdown code to 0xA */ + outb(RTC_INDEX, 0xF); + outb(RTC_IO, 0xA); + + /* setup the warm reset vector */ + phys_copy(vir2phys(&trampoline_base), 0x467, sizeof(u32_t)); + + /* prepare gdt and idt for the new cpus */ + __ap_gdt = gdt[GDT_INDEX]; + __ap_idt = gdt[IDT_INDEX]; + + /* okay, we're ready to go. boot all of the ap's now. we loop through + * using the processor's apic id values. + */ + for (cpu = 0; cpu < ncpus; cpu++) { + printf("Booting cpu %d\n", cpu); + ap_cpu_ready = -1; + /* Don't send INIT/SIPI to boot cpu. */ + if((apicid() == cpuid2apicid[cpu]) && + (apicid() == bsp_lapic_id)) { + cpu_set_flag(cpu, CPU_IS_READY); + printf("Skiping bsp\n"); + continue; + } + + __ap_id = cpu; + if (apic_send_init_ipi(cpu, trampoline_base) || + apic_send_startup_ipi(cpu, trampoline_base)) { + printf("WARNING cannot boot cpu %d\n", cpu); + continue; + } + + /* wait for 5 secs for the processors to boot */ + lapic_set_timer_one_shot(5000000); + + while (lapic_read(LAPIC_TIMER_CCR)) { + if (ap_cpu_ready == cpu) { + printf("CPU %d is up\n", cpu); + cpu_set_flag(cpu, CPU_IS_READY); + break; + } + } + if (ap_cpu_ready == -1) { + printf("WARNING : CPU %d didn't boot\n", cpu); + } + } + + phys_copy(vir2phys(&biosresetvector),(phys_bytes)0x467,sizeof(u32_t)); + + outb(RTC_INDEX, 0xF); + outb(RTC_IO, 0); + + bsp_finish_booting(); + NOT_REACHABLE; +} + +PUBLIC void smp_halt_cpu (void) +{ + NOT_IMPLEMENTED; +} + +PUBLIC void smp_shutdown_aps (void) +{ + NOT_IMPLEMENTED; +} + +PRIVATE void ap_finish_booting(void) +{ + unsigned cpu = cpuid; + + printf("CPU %d says hello world!\n", cpu); + /* inform the world of our presence. */ + ap_cpu_ready = cpu; + + while(!i386_paging_enabled) + arch_pause(); + for(;;); + + /* finish processor initialisation. */ + lapic_enable(cpu); + + switch_to_user(); + NOT_REACHABLE; +} + +PUBLIC void smp_ap_boot(void) +{ + switch_k_stack((char *)get_k_stack_top(__ap_id) - + X86_STACK_TOP_RESERVED, ap_finish_booting); +} + +PRIVATE void smp_reinit_vars(void) +{ + int i; + lapic_addr = lapic_eoi_addr = 0; + ioapic_enabled = 0; + + ncpus = 1; +} + +PRIVATE void tss_init_all(void) +{ + unsigned cpu; + + for(cpu = 0; cpu < ncpus ; cpu++) + tss_init(cpu, get_k_stack_top(cpu)); +} + +PRIVATE int discover_cpus(void) +{ + struct acpi_madt_lapic * cpu; + + while (ncpus < CONFIG_MAX_CPUS && (cpu = acpi_get_lapic_next())) { + apicid2cpuid[cpu->apic_id] = ncpus; + cpuid2apicid[ncpus] = cpu->apic_id; + printf("CPU %3d local APIC id %3d\n", ncpus, cpu->apic_id); + ncpus++; + } + + return ncpus; +} + +PUBLIC void smp_init (void) +{ + /* read the MP configuration */ + if (!discover_cpus()) { + ncpus = 1; + goto uniproc_fallback; + } + + lapic_addr = phys2vir(LOCAL_APIC_DEF_ADDR); + ioapic_enabled = 0; + + tss_init_all(); + + /* + * we still run on the boot stack and we cannot use cpuid as its value + * wasn't set yet. apicid2cpuid initialized in mps_init() + */ + bsp_cpu_id = apicid2cpuid[apicid()]; + + if (!lapic_enable(bsp_cpu_id)) { + printf("ERROR : failed to initialize BSP Local APIC\n"); + goto uniproc_fallback; + } + + acpi_init(); + + if (!detect_ioapics()) { + lapic_disable(); + lapic_addr = 0x0; + goto uniproc_fallback; + } + + ioapic_enable_all(); + + if (ioapic_enabled) + machine.apic_enabled = 1; + + /* set smp idt entries. */ + apic_idt_init(0); /* Not a reset ! */ + idt_reload(); + + BOOT_VERBOSE(printf("SMP initialized\n")); + + switch_k_stack((char *)get_k_stack_top(bsp_cpu_id) - + X86_STACK_TOP_RESERVED, smp_start_aps); + + return; + +uniproc_fallback: + apic_idt_init(1); /* Reset to PIC idt ! */ + idt_reload(); + smp_reinit_vars (); /* revert to a single proc system. */ + intr_init (INTS_MINIX, 0); /* no auto eoi */ + printf("WARNING : SMP initialization failed\n"); +} diff --git a/kernel/arch/i386/arch_system.c b/kernel/arch/i386/arch_system.c index bb9daa654..bf0f39934 100644 --- a/kernel/arch/i386/arch_system.c +++ b/kernel/arch/i386/arch_system.c @@ -24,6 +24,8 @@ #include "kernel/debug.h" #include "multiboot.h" +#include "glo.h" + #ifdef CONFIG_APIC #include "apic.h" #endif @@ -43,6 +45,8 @@ extern void poweroff16_end(); /* set OSXMMEXCPT[bit 10] if we provide #XM handler. */ #define CR4_OSXMMEXCPT (1L<<10) +PUBLIC void * k_stacks; + FORWARD _PROTOTYPE( void ser_debug, (int c)); PUBLIC __dead void arch_monitor(void) @@ -198,23 +202,6 @@ PUBLIC void arch_get_aout_headers(const int i, struct exec *h) phys_copy(aout + i * A_MINHDR, vir2phys(h), (phys_bytes) A_MINHDR); } -PRIVATE void tss_init(struct tss_s * tss, void * kernel_stack, - const unsigned cpu) -{ - /* - * make space for process pointer and cpu id and point to the first - * usable word - */ - tss->sp0 = ((unsigned) kernel_stack) - 2 * sizeof(void *); - tss->ss0 = DS_SELECTOR; - - /* - * set the cpu id at the top of the stack so we know on which cpu is - * this stak in use when we trap to kernel - */ - *((reg_t *)(tss->sp0 + 1 * sizeof(reg_t))) = cpu; -} - PRIVATE void fpu_init(void) { unsigned short cw, sw; @@ -313,7 +300,20 @@ PUBLIC void arch_init(void) idt_init(); - tss_init(&tss, &k_boot_stktop, 0); + /* FIXME stupid a.out + * align the stacks in the stack are to the K_STACK_SIZE which is a + * power of 2 + */ + k_stacks = (void*) (((vir_bytes)&k_stacks_start + K_STACK_SIZE - 1) & + ~(K_STACK_SIZE - 1)); + +#ifndef CONFIG_SMP + /* + * use stack 0 and cpu id 0 on a single processor machine, SMP + * configuration does this in smp_init() for all cpus at once + */ + tss_init(0, get_k_stack_top(0)); +#endif acpi_init(); @@ -547,7 +547,11 @@ PUBLIC struct proc * arch_finish_switch_to_user(void) char * stk; struct proc * p; - stk = (char *)tss.sp0; +#ifdef CONFIG_SMP + stk = (char *)tss[cpuid].sp0; +#else + stk = (char *)tss[0].sp0; +#endif /* set pointer to the process to run on the stack */ p = get_cpulocal_var(proc_ptr); *((reg_t *)stk) = (reg_t) p; diff --git a/kernel/arch/i386/arch_watchdog.c b/kernel/arch/i386/arch_watchdog.c index 6becd4f69..8fce5c2dc 100644 --- a/kernel/arch/i386/arch_watchdog.c +++ b/kernel/arch/i386/arch_watchdog.c @@ -1,6 +1,7 @@ #include "kernel/kernel.h" #include "kernel/watchdog.h" #include "arch_proto.h" +#include "glo.h" #include #include diff --git a/kernel/arch/i386/glo.h b/kernel/arch/i386/glo.h index d056d5c20..aaa910899 100644 --- a/kernel/arch/i386/glo.h +++ b/kernel/arch/i386/glo.h @@ -1,7 +1,14 @@ #ifndef __GLO_X86_H__ #define __GLO_X86_H__ +#include "kernel/kernel.h" +#include "proto.h" + EXTERN int cpu_has_tsc; /* signal whether this cpu has time stamp register. This feature was introduced by Pentium */ +EXTERN struct tss_s tss[CONFIG_MAX_CPUS]; + +EXTERN int i386_paging_enabled; + #endif /* __GLO_X86_H__ */ diff --git a/kernel/arch/i386/include/arch_proto.h b/kernel/arch/i386/include/arch_proto.h index a669981d2..54658754c 100644 --- a/kernel/arch/i386/include/arch_proto.h +++ b/kernel/arch/i386/include/arch_proto.h @@ -2,6 +2,12 @@ #ifndef _I386_PROTO_H #define _I386_PROTO_H +#include + +#define K_STACK_SIZE I386_PAGE_SIZE + +#ifndef __ASSEMBLY__ + /* Hardware interrupt handlers. */ _PROTOTYPE( void hwint00, (void) ); _PROTOTYPE( void hwint01, (void) ); @@ -95,6 +101,8 @@ _PROTOTYPE( void frstor, (void *)); _PROTOTYPE( unsigned short fnstsw, (void)); _PROTOTYPE( void fnstcw, (unsigned short* cw)); +_PROTOTYPE( void switch_k_stack, (void * esp, void (* continuation)(void))); + _PROTOTYPE(void __switch_address_space, (struct proc * p, struct proc ** __ptproc)); #define switch_address_space(proc) \ @@ -132,8 +140,7 @@ struct tss_s { /* u8_t iomap[0]; */ }; -EXTERN struct tss_s tss; - +_PROTOTYPE( void prot_init, (void) ); _PROTOTYPE( void idt_init, (void) ); _PROTOTYPE( void init_dataseg, (struct segdesc_s *segdp, phys_bytes base, vir_bytes size, int privilege) ); @@ -151,13 +158,32 @@ struct gate_table_s { unsigned char privilege; }; -EXTERN struct gate_table_s gate_table_pic[]; +extern struct gate_table_s gate_table_pic[]; /* copies an array of vectors to the IDT. The last vector must be zero filled */ _PROTOTYPE(void idt_copy_vectors, (struct gate_table_s * first)); _PROTOTYPE(void idt_reload,(void)); EXTERN void * k_boot_stktop; +EXTERN void * k_stacks_start; +extern void * k_stacks; + +#define get_k_stack_top(cpu) ((void *)(((char*)(k_stacks)) \ + + 2 * ((cpu) + 1) * K_STACK_SIZE)) + +#ifndef __GNUC__ +/* call a function to read the stack fram pointer (%ebp) */ +_PROTOTYPE(reg_t read_ebp, (void)); +#define get_stack_frame(__X) ((reg_t)read_ebp()) +#else +/* read %ebp directly */ +#define get_stack_frame(__X) ((reg_t)__builtin_frame_address(0)) +#endif + +/* + * sets up TSS for a cpu and assigns kernel stack and cpu id + */ +_PROTOTYPE(void tss_init, (unsigned cpu, void * kernel_stack)); _PROTOTYPE( void int_gate, (unsigned vec_nr, vir_bytes offset, unsigned dpl_type) ); @@ -193,4 +219,6 @@ _PROTOTYPE(int platform_tbl_ptr, (phys_bytes start, /* functions defined in architecture-independent kernel source. */ #include "kernel/proto.h" +#endif /* __ASSEMBLY__ */ + #endif diff --git a/kernel/arch/i386/include/arch_smp.h b/kernel/arch/i386/include/arch_smp.h new file mode 100644 index 000000000..90e70223d --- /dev/null +++ b/kernel/arch/i386/include/arch_smp.h @@ -0,0 +1,42 @@ +#ifndef __SMP_X86_H__ +#define __SMP_X86_H__ + +#include "arch_proto.h" /* K_STACK_SIZE */ + +#define MAX_NR_INTERRUPT_ENTRIES 128 + +#define SMP_SCHED_PROC 0xF0 +#define SMP_DEQUEUE_PROC 0xF1 +#define SMP_CPU_REBOOT 0xF2 +#define SMP_CPU_HALT 0xF3 +#define SMP_ERROR_INT 0xF4 + +/* currently only 2 interrupt priority levels are used */ +#define SPL0 0x0 +#define SPLHI 0xF + +#define SMP_IPI_DEST 0 +#define SMP_IPI_SELF 1 +#define SMP_IPI_TO_ALL 2 +#define SMP_IPI_TO_ALL_BUT_SELF 3 + +#ifndef __ASSEMBLY__ + +/* returns the current cpu id */ +#define cpuid (((u32_t *)(((u32_t)get_stack_frame() + (K_STACK_SIZE - 1)) \ + & ~(K_STACK_SIZE - 1)))[-1]) +/* + * in case apic or smp is disabled in boot monitor, we need to finish single cpu + * boot using the legacy PIC + */ +#define smp_single_cpu_fallback() do { \ + tss_init(0, get_k_stack_top(0)); \ + bsp_finish_booting(); \ +} while(0) + +extern unsigned char cpuid2apicid[CONFIG_MAX_CPUS]; + +#endif + +#endif /* __SMP_X86_H__ */ + diff --git a/kernel/arch/i386/include/archconst.h b/kernel/arch/i386/include/archconst.h index ad38d37b3..d66f19275 100644 --- a/kernel/arch/i386/include/archconst.h +++ b/kernel/arch/i386/include/archconst.h @@ -22,8 +22,10 @@ #define SS_INDEX 5 /* kernel SS (386: monitor SS at startup) */ #define CS_INDEX 6 /* kernel CS */ #define MON_CS_INDEX 7 /* temp for BIOS (386: monitor CS at startup) */ -#define TSS_INDEX 8 /* kernel TSS */ -#define FIRST_LDT_INDEX 9 /* rest of descriptors are LDT's */ +#define TSS_INDEX_FIRST 8 /* first kernel TSS */ +#define TSS_INDEX_BOOT TSS_INDEX_FIRST +#define TSS_INDEX(cpu) (TSS_INDEX_FIRST + (cpu)) /* per cpu kernel tss */ +#define FIRST_LDT_INDEX TSS_INDEX(CONFIG_MAX_CPUS) /* rest of descriptors are LDT's */ /* Descriptor structure offsets. */ #define DESC_BASE 2 /* to base_low */ @@ -44,7 +46,8 @@ #define SS_SELECTOR SS_INDEX * DESC_SIZE #define CS_SELECTOR CS_INDEX * DESC_SIZE #define MON_CS_SELECTOR MON_CS_INDEX * DESC_SIZE -#define TSS_SELECTOR TSS_INDEX * DESC_SIZE +#define TSS_SELECTOR(cpu) (TSS_INDEX(cpu) * DESC_SIZE) +#define TSS_SELECTOR_BOOT (TSS_INDEX_BOOT * DESC_SIZE) /* Privileges. */ #define INTR_PRIVILEGE 0 /* kernel and interrupt handlers */ @@ -156,4 +159,11 @@ /* Poweroff 16-bit code address */ #define BIOS_POWEROFF_ENTRY 0x1000 + +/* + * defines how many bytes are reserved at the top of the kernel stack for global + * information like currently scheduled process or current cpu id + */ +#define X86_STACK_TOP_RESERVED (2 * sizeof(reg_t)) + #endif /* _I386_ACONST_H */ diff --git a/kernel/arch/i386/klib.S b/kernel/arch/i386/klib.S index e6d9cd911..bf1091bbe 100644 --- a/kernel/arch/i386/klib.S +++ b/kernel/arch/i386/klib.S @@ -115,37 +115,6 @@ csinit: xchgl _C_LABEL(mon_sp), %esp /* unswitch stacks */ lidt _C_LABEL(gdt)+IDT_SELECTOR /* reload interrupt descriptor table */ -#ifdef CONFIG_APIC - cmpl $0x0, lapic_addr - jne 3f - mov $0, %ebx - jmp 4f - -3: - mov $FLAT_DS_SELECTOR, %ebx - mov %bx, %fs - movl lapic_addr, %eax - add $0x20, %eax - .byte 0x64; mov (%eax), %ebx - and $0xFF000000, %ebx - shr $24, %ebx - movzb %bl, %ebx - -4: - add $apicid2cpuid, %ebx - movzb (%ebx), %eax - shl $3, %eax - mov %eax, %ebx - add $TSS_SELECTOR, %eax - addl _C_LABEL(gdt)+DESC_ACCESS, %eax - and $~0x02, %eax - ltr %bx /* set TSS register */ - - mov $DS_SELECTOR, %eax - mov %ax, %fs - -#endif /* CONFIG_APIC */ - pop %eax outb $INT_CTLMASK /* restore interrupt masks */ movb %ah, %al @@ -908,3 +877,171 @@ ENTRY(eoi_8259_slave) idt_ptr: .short 0x3ff .long 0x0 +#ifdef CONFIG_SMP + +/*===========================================================================*/ +/* smp_get_htt */ +/*===========================================================================*/ +/* PUBLIC int smp_get_htt(void); */ +/* return true if the processor is hyper-threaded. */ +ENTRY(smp_get_htt) + push %ebp + mov %esp, %ebp + pushf + pop %eax + mov %eax, %ebx + and $0x200000, %eax + je 0f + mov $0x1, %eax +/* FIXME don't use the byte code */ +.byte 0x0f, 0xa2 /* opcode for cpuid */ + mov %edx, %eax + pop %ebp + ret +0: + xor %eax, %eax + pop %ebp + ret + +/*===========================================================================*/ +/* smp_get_num_htt */ +/*===========================================================================*/ +/* PUBLIC int smp_get_num_htt(void); */ +/* Get the number of hyper-threaded processor cores */ +ENTRY(smp_get_num_htt) + push %ebp + mov %esp, %ebp + pushf + pop %eax + mov %eax, %ebx + and $0x200000, %eax + je 0f + mov $0x1, %eax +/* FIXME don't use the byte code */ +.byte 0x0f, 0xa2 /* opcode for cpuid */ + mov %ebx, %eax + pop %ebp + ret +0: + xor %eax, %eax + pop %ebp + ret + +/*===========================================================================*/ +/* smp_get_cores */ +/*===========================================================================*/ +/* PUBLIC int smp_get_cores(void); */ +/* Get the number of cores. */ +ENTRY(smp_get_cores) + push %ebp + mov %esp, %ebp + pushf + pop %eax + mov %eax, %ebx + and $0x200000, %eax + je 0f + push %ecx + xor %ecx, %ecx + mov $0x4, %eax +/* FIXME don't use the byte code */ +.byte 0x0f, 0xa2 /* opcode for cpuid */ + pop %ebp + ret +0: + xor %eax, %eax + pop %ebp + ret + +/*===========================================================================*/ +/* arch_spinlock_lock */ +/*===========================================================================*/ +/* void arch_spinlock_lock (u32_t *lock_data) + * { + * while (test_and_set(lock_data) == 1) + * while (*lock_data == 1) + * ; + * } + * eax register is clobbered. + */ +ENTRY(arch_spinlock_lock) + push %ebp + mov %esp, %ebp + push %ebx + mov 8(%ebp), %eax + mov $1, %ebx +/* FIXME use exponential backoff */ +2: + xchg %ebx, (%eax) + test %ebx, %ebx + je 0f +1: +/* FIXME don't use the byte code */ +.byte 0xf3, 0x90 /* pause */ + cmp $0, (%eax) + jne 1b + jmp 2b +0: + mfence + pop %ebx + pop %ebp + ret + +/*===========================================================================*/ +/* arch_spinlock_unlock */ +/*===========================================================================*/ +/* * void arch_spinlock_unlock (unsigned int *lockp) */ +/* spin lock release routine. */ +ENTRY(arch_spinlock_unlock) + mov 4(%esp), %eax + push %ebx + mov $0, %ebx + xchg %ebx, (%eax) + mfence + pop %ebx + ret + +/*===========================================================================*/ +/* mfence */ +/*===========================================================================*/ +/* PUBLIC void mfence (void); */ +/* architecture specific memory barrier routine. */ +ENTRY(mfence) + mfence + ret + +#endif /* CONFIG_SMP */ + +/*===========================================================================*/ +/* arch_pause */ +/*===========================================================================*/ +/* PUBLIC void arch_pause (void); */ +/* architecture specific pause routine. */ +ENTRY(arch_pause) + pause + ret + +/*===========================================================================*/ +/* read_ebp */ +/*===========================================================================*/ +/* PUBLIC u16_t cpuid(void) */ +ENTRY(read_ebp) + mov %ebp, %eax + ret + + +/* + * void switch_k_stack(void * esp, void (* continuation)(void)); + * + * sets the current stack pointer to the given value and continues execution at + * the given address + */ +ENTRY(switch_k_stack) + /* get the arguments from the stack */ + mov 8(%esp), %eax + mov 4(%esp), %ecx + mov $0, %ebp /* reset %ebp for stack trace */ + mov %ecx, %esp /* set the new stack */ + jmp *%eax /* and jump to the continuation */ + + /* NOT_REACHABLE */ +0: jmp 0b diff --git a/kernel/arch/i386/memory.c b/kernel/arch/i386/memory.c index 24ca0db62..10c432126 100644 --- a/kernel/arch/i386/memory.c +++ b/kernel/arch/i386/memory.c @@ -27,6 +27,8 @@ #endif #endif +PUBLIC int i386_paging_enabled = 0; + PRIVATE int psok = 0; #define MAX_FREEPDES (3 * CONFIG_MAX_CPUS) @@ -935,8 +937,10 @@ void i386_freepde(const int pde) PRIVATE int oxpcie_mapping_index = -1; -PUBLIC int arch_phys_map(const int index, phys_bytes *addr, - phys_bytes *len, int *flags) +PUBLIC int arch_phys_map(const int index, + phys_bytes *addr, + phys_bytes *len, + int *flags) { static int first = 1; int freeidx = 0; @@ -1079,7 +1083,12 @@ PUBLIC int arch_enable_paging(struct proc * caller, const message * m_ptr) io_apic[i].addr = io_apic[i].vaddr; } } + + /* TODO APs are still waiting, release them */ #endif + + i386_paging_enabled = 1; + #ifdef CONFIG_WATCHDOG /* * We make sure that we don't enable the watchdog until paging is turned diff --git a/kernel/arch/i386/mpx.S b/kernel/arch/i386/mpx.S index 42d25cd73..65d8855b6 100644 --- a/kernel/arch/i386/mpx.S +++ b/kernel/arch/i386/mpx.S @@ -43,7 +43,7 @@ begdata: begbss: #endif - +#include "../../kernel.h" #include #include #include @@ -55,6 +55,12 @@ begbss: #include "sconst.h" #include "multiboot.h" +#include "arch_proto.h" /* K_STACK_SIZE */ + +#ifdef CONFIG_SMP +#include "kernel/smp.h" +#endif + /* Selected 386 tss offsets. */ #define TSS3_S_SP0 4 @@ -72,7 +78,7 @@ IMPORT(switch_to_user) /*===========================================================================*/ /* MINIX */ /*===========================================================================*/ -.globl MINIX +.global MINIX MINIX: /* this is the entry point for the MINIX kernel */ jmp over_flags /* skip over the next few bytes */ @@ -164,7 +170,7 @@ copygdt: mov %ax, %fs mov %ax, %gs mov %ax, %ss - mov $_C_LABEL(k_boot_stktop), %esp /* set sp to point to the top of kernel stack */ + mov $_C_LABEL(k_boot_stktop) - 4, %esp /* set sp to point to the top of kernel stack */ /* Save boot parameters into these global variables for i386 code */ movl %edx, _C_LABEL(params_size) @@ -194,7 +200,7 @@ csinit: mov %ax, %fs mov %ax, %gs mov %ax, %ss - movw $TSS_SELECTOR, %ax /* no other TSS is used */ + movw $TSS_SELECTOR_BOOT, %ax /* no other TSS is used */ ltr %ax push $0 /* set flags to known good state */ popf /* esp, clear nested task and int enable */ @@ -615,6 +621,36 @@ ENTRY(reload_cr3) pop %ebp ret +#ifdef CONFIG_SMP +ENTRY(startup_ap_32) + /* + * we are in protected mode now, %cs is correct and we need to set the + * data descriptors before we can touch anything + */ + movw $DS_SELECTOR, %ax + mov %ax, %ds + mov %ax, %ss + mov %ax, %es + movw $0, %ax + mov %ax, %fs + mov %ax, %gs + + /* load TSS for this cpu which was prepared by BSP */ + movl _C_LABEL(__ap_id), %ecx + shl $3, %cx + mov $TSS_SELECTOR(0), %eax + add %cx, %ax + ltr %ax + + /* + * use the boot stack for now. The running CPUs are already using their + * own stack, the rest is still waiting to be booted + */ + mov $_C_LABEL(k_boot_stktop) - 4, %esp + jmp _C_LABEL(smp_ap_boot) + hlt +#endif + /*===========================================================================*/ /* data */ /*===========================================================================*/ @@ -630,5 +666,15 @@ ENTRY(reload_cr3) * the kernel stack */ k_boot_stack: -.space 4096 /* kernel stack */ /* FIXME use macro here */ +.space K_STACK_SIZE /* kernel stack */ /* FIXME use macro here */ LABEL(k_boot_stktop) /* top of kernel stack */ + +.balign K_STACK_SIZE +LABEL(k_stacks_start) + +/* two pages for each stack, one for data, other as a sandbox */ +.space 2 * (K_STACK_SIZE * (CONFIG_MAX_CPUS + 1)) + +LABEL(k_stacks_end) + +/* top of kernel stack */ diff --git a/kernel/arch/i386/protect.c b/kernel/arch/i386/protect.c index c752e6a3b..1dd4018b5 100644 --- a/kernel/arch/i386/protect.c +++ b/kernel/arch/i386/protect.c @@ -36,7 +36,7 @@ PUBLIC struct segdesc_s gdt[GDT_SIZE]= /* used in klib.s and mpx.s */ {0xffff,0,0,0x9a,0x0f,0}, /* temp for BIOS (386: monitor CS at startup) */ }; PRIVATE struct gatedesc_s idt[IDT_SIZE]; /* zero-init so none present */ -PUBLIC struct tss_s tss; /* zero init */ +PUBLIC struct tss_s tss[CONFIG_MAX_CPUS]; /* zero init */ FORWARD _PROTOTYPE( void sdesc, (struct segdesc_s *segdp, phys_bytes base, vir_bytes size) ); @@ -130,6 +130,31 @@ PUBLIC struct gate_table_s gate_table_pic[] = { { NULL, 0, 0} }; +PUBLIC void tss_init(unsigned cpu, void * kernel_stack) +{ + struct tss_s * t = &tss[cpu]; + + t->ss0 = DS_SELECTOR; + init_dataseg(&gdt[TSS_INDEX(cpu)], vir2phys(t), + sizeof(struct tss_s), INTR_PRIVILEGE); + gdt[TSS_INDEX(cpu)].access = PRESENT | + (INTR_PRIVILEGE << DPL_SHIFT) | TSS_TYPE; + + /* Complete building of main TSS. */ + t->iobase = sizeof(struct tss_s); /* empty i/o permissions map */ + + /* + * make space for process pointer and cpu id and point to the first + * usable word + */ + t->sp0 = ((unsigned) kernel_stack) - X86_STACK_TOP_RESERVED; + /* + * set the cpu id at the top of the stack so we know on which cpu is + * this stak in use when we trap to kernel + */ + *((reg_t *)(t->sp0 + 1 * sizeof(reg_t))) = cpu; +} + /*===========================================================================* * prot_init * *===========================================================================*/ @@ -175,13 +200,8 @@ PUBLIC void prot_init(void) rp->p_seg.p_ldt_sel = ldt_index * DESC_SIZE; } - /* Build main TSS */ - tss.ss0 = DS_SELECTOR; - init_dataseg(&gdt[TSS_INDEX], vir2phys(&tss), sizeof(tss), INTR_PRIVILEGE); - gdt[TSS_INDEX].access = PRESENT | (INTR_PRIVILEGE << DPL_SHIFT) | TSS_TYPE; - - /* Complete building of main TSS. */ - tss.iobase = sizeof tss; /* empty i/o permissions map */ + /* Build boot TSS */ + tss_init(0, &k_boot_stktop); } PUBLIC void idt_copy_vectors(struct gate_table_s * first) diff --git a/kernel/arch/i386/sconst.h b/kernel/arch/i386/sconst.h index 50e92f97b..5f2bd6e09 100644 --- a/kernel/arch/i386/sconst.h +++ b/kernel/arch/i386/sconst.h @@ -128,7 +128,7 @@ push %ebp ;\ ;\ movl (CURR_PROC_PTR + 4 + displ)(%esp), %ebp ;\ - ;\ + \ /* save the segment registers */ \ SAVE_SEGS(%ebp) ;\ \ diff --git a/kernel/arch/i386/trampoline.S b/kernel/arch/i386/trampoline.S new file mode 100644 index 000000000..1ec8919b6 --- /dev/null +++ b/kernel/arch/i386/trampoline.S @@ -0,0 +1,31 @@ +#include +#include "archconst.h" + +.balign 4096 +.text +.code16 +ENTRY(trampoline) + cli + + /* %cs has some value and we must use the same for data */ + mov %cs, %ax + mov %ax, %ds + + /* load gdt and idt prepared by bsp */ + lgdtl _C_LABEL(__ap_gdt) - _C_LABEL(trampoline) + lidtl _C_LABEL(__ap_idt) - _C_LABEL(trampoline) + + /* switch to protected mode */ + mov %cr0, %eax + orb $1, %al + mov %eax, %cr0 + + ljmp $CS_SELECTOR, $_C_LABEL(startup_ap_32) + +.balign 4 +LABEL(__ap_id) +.space 4 +LABEL(__ap_gdt) +.space 8 +LABEL(__ap_idt) +.space 8 diff --git a/kernel/config.h b/kernel/config.h index 31dbd3792..ff7fda94c 100644 --- a/kernel/config.h +++ b/kernel/config.h @@ -64,8 +64,7 @@ #endif #define VDEVIO_BUF_SIZE 64 /* max elements per VDEVIO request */ -/* How many bytes for the kernel stack. Space allocated in mpx.s. */ -#define K_STACK_BYTES 1024 +#define K_PARAM_SIZE 512 #endif /* CONFIG_H */ diff --git a/kernel/glo.h b/kernel/glo.h index cae5332c9..3ac06075a 100644 --- a/kernel/glo.h +++ b/kernel/glo.h @@ -62,6 +62,10 @@ EXTERN u64_t cpu_hz[CONFIG_MAX_CPUS]; #define cpu_set_freq(cpu, freq) do {cpu_hz[cpu] = freq;} while (0) #define cpu_get_freq(cpu) cpu_hz[cpu] +#ifdef CONFIG_SMP +EXTERN int config_no_smp; /* optionaly turn off SMP */ +#endif + /* VM */ EXTERN int vm_running; EXTERN int catch_pagefaults; diff --git a/kernel/kernel.h b/kernel/kernel.h index a8d571cea..6247f2c29 100644 --- a/kernel/kernel.h +++ b/kernel/kernel.h @@ -12,9 +12,10 @@ * (non-zero) is set in monitor */ #define CONFIG_WATCHDOG -/* We only support 1 cpu now */ + +#ifndef CONFIG_MAX_CPUS #define CONFIG_MAX_CPUS 1 -#define cpuid 0 +#endif /* OXPCIe952 PCIe with 2 UARTs in-kernel support */ #define CONFIG_OXPCIE 0 @@ -56,6 +57,17 @@ #include "debug.h" /* debugging, MUST be last kernel header */ #include "cpulocals.h" +#ifndef CONFIG_SMP +/* We only support 1 cpu now */ +#define CONFIG_MAX_CPUS 1 +#define cpuid 0 + +#else + +#include "smp.h" + +#endif + #endif /* __ASSEMBLY__ */ #endif /* KERNEL_H */ diff --git a/kernel/main.c b/kernel/main.c index 02194d166..da5532c39 100644 --- a/kernel/main.c +++ b/kernel/main.c @@ -21,9 +21,62 @@ #include "clock.h" #include "hw_intr.h" +#ifdef CONFIG_SMP +#include "smp.h" +#endif + /* Prototype declarations for PRIVATE functions. */ FORWARD _PROTOTYPE( void announce, (void)); +PUBLIC void bsp_finish_booting(void) +{ +#if SPROFILE + sprofiling = 0; /* we're not profiling until instructed to */ +#endif /* SPROFILE */ + cprof_procs_no = 0; /* init nr of hash table slots used */ + + vm_running = 0; + krandom.random_sources = RANDOM_SOURCES; + krandom.random_elements = RANDOM_ELEMENTS; + + /* MINIX is now ready. All boot image processes are on the ready queue. + * Return to the assembly code to start running the current process. + */ + get_cpulocal_var(bill_ptr) = proc_addr(IDLE); /* it has to point somewhere */ + announce(); /* print MINIX startup banner */ + + /* + * enable timer interrupts and clock task on the boot CPU + */ + if (boot_cpu_init_timer(system_hz)) { + panic("FATAL : failed to initialize timer interrupts, " + "cannot continue without any clock source!"); + } + +/* Warnings for sanity checks that take time. These warnings are printed + * so it's a clear warning no full release should be done with them + * enabled. + */ +#if DEBUG_SCHED_CHECK + FIXME("DEBUG_SCHED_CHECK enabled"); +#endif +#if DEBUG_VMASSERT + FIXME("DEBUG_VMASSERT enabled"); +#endif +#if DEBUG_PROC_CHECK + FIXME("PROC check enabled"); +#endif + + DEBUGEXTRA(("cycles_accounting_init()... ")); + cycles_accounting_init(); + DEBUGEXTRA(("done\n")); + + assert(runqueues_ok()); + + switch_to_user(); + NOT_REACHABLE; +} + /*===========================================================================* * main * *===========================================================================*/ @@ -197,52 +250,6 @@ PUBLIC int main(void) DEBUGEXTRA(("done\n")); } - /* Architecture-dependent initialization. */ - DEBUGEXTRA(("arch_init()... ")); - arch_init(); - DEBUGEXTRA(("done\n")); - - /* System and processes initialization */ - DEBUGEXTRA(("system_init()... ")); - system_init(); - DEBUGEXTRA(("done\n")); - -#if SPROFILE - sprofiling = 0; /* we're not profiling until instructed to */ -#endif /* SPROFILE */ - cprof_procs_no = 0; /* init nr of hash table slots used */ - - vm_running = 0; - krandom.random_sources = RANDOM_SOURCES; - krandom.random_elements = RANDOM_ELEMENTS; - - /* MINIX is now ready. All boot image processes are on the ready queue. - * Return to the assembly code to start running the current process. - */ - get_cpulocal_var(bill_ptr) = proc_addr(IDLE); /* it has to point somewhere */ - announce(); /* print MINIX startup banner */ - - /* - * enable timer interrupts and clock task on the boot CPU - */ - - if (boot_cpu_init_timer(system_hz)) { - panic( "FATAL : failed to initialize timer interrupts; " - "cannot continue without any clock source!"); - } - -/* Warnings for sanity checks that take time. These warnings are printed - * so it's a clear warning no full release should be done with them - * enabled. - */ -#if DEBUG_PROC_CHECK - FIXME("PROC check enabled"); -#endif - - DEBUGEXTRA(("cycles_accounting_init()... ")); - cycles_accounting_init(); - DEBUGEXTRA(("done\n")); - #define IPCNAME(n) { \ assert((n) >= 0 && (n) <= IPCNO_HIGHEST); \ assert(!ipc_call_names[n]); \ @@ -256,9 +263,34 @@ PUBLIC int main(void) IPCNAME(SENDNB); IPCNAME(SENDA); - assert(runqueues_ok()); + /* Architecture-dependent initialization. */ + DEBUGEXTRA(("arch_init()... ")); + arch_init(); + DEBUGEXTRA(("done\n")); + + /* System and processes initialization */ + DEBUGEXTRA(("system_init()... ")); + system_init(); + DEBUGEXTRA(("done\n")); + +#ifdef CONFIG_SMP + if (config_no_apic) { + BOOT_VERBOSE(printf("APIC disabled, disables SMP, using legacy PIC\n")); + smp_single_cpu_fallback(); + } else if (config_no_smp) { + BOOT_VERBOSE(printf("SMP disabled, using legacy PIC\n")); + smp_single_cpu_fallback(); + } else + smp_init(); +#else + /* + * if configured for a single CPU, we are already on the kernel stack which we + * are going to use everytime we execute kernel code. We finish booting and we + * never return here + */ + bsp_finish_booting(); +#endif - switch_to_user(); NOT_REACHABLE; return 1; } @@ -304,6 +336,17 @@ PUBLIC void minix_shutdown(timer_t *tp) * down MINIX. How to shutdown is in the argument: RBT_HALT (return to the * monitor), RBT_MONITOR (execute given code), RBT_RESET (hard reset). */ +#ifdef CONFIG_SMP + /* + * FIXME + * + * we will need to stop timers on all cpus if SMP is enabled and put them in + * such a state that we can perform the whole boot process once restarted from + * monitor again + */ + if (ncpus > 1) + NOT_IMPLEMENTED; +#endif arch_stop_local_timer(); hw_intr_disable_all(); intr_init(INTS_ORIG, 0); diff --git a/kernel/proc.c b/kernel/proc.c index c562c9f11..70938a9d9 100644 --- a/kernel/proc.c +++ b/kernel/proc.c @@ -42,6 +42,7 @@ #include "proc.h" #include "vm.h" #include "clock.h" +#include "spinlock.h" #include "arch_proto.h" diff --git a/kernel/proto.h b/kernel/proto.h index bdc7a8a18..f0923ee5e 100644 --- a/kernel/proto.h +++ b/kernel/proto.h @@ -1,5 +1,8 @@ /* Function prototypes. */ +/* FIXME this is a hack how to avoid inclusion conflicts */ +#ifdef __kernel__ + #ifndef PROTO_H #define PROTO_H @@ -37,6 +40,7 @@ _PROTOTYPE( void fpu_sigcontext, (struct proc *, struct sigframe *fr, struct sig _PROTOTYPE( int main, (void) ); _PROTOTYPE( void prepare_shutdown, (int how) ); _PROTOTYPE( __dead void minix_shutdown, (struct timer *tp) ); +_PROTOTYPE( void bsp_finish_booting, (void) ); /* proc.c */ @@ -207,4 +211,6 @@ _PROTOTYPE(void release_fpu, (void)); /* utility.c */ _PROTOTYPE( void cpu_print_freq, (unsigned cpu)); +#endif /* __kernel__ */ + #endif /* PROTO_H */ diff --git a/kernel/smp.c b/kernel/smp.c new file mode 100644 index 000000000..61745cdc1 --- /dev/null +++ b/kernel/smp.c @@ -0,0 +1,7 @@ +#include "smp.h" + +unsigned ncpus; +unsigned ht_per_core; +unsigned bsp_cpu_id; + +struct cpu cpus[CONFIG_MAX_CPUS]; diff --git a/kernel/smp.h b/kernel/smp.h new file mode 100644 index 000000000..d68c7ee05 --- /dev/null +++ b/kernel/smp.h @@ -0,0 +1,55 @@ +#ifndef __SMP_H__ +#define __SMP_H__ + +#ifdef CONFIG_SMP + +#ifndef __ASSEMBLY__ + +#include "kernel.h" +#include "arch_smp.h" + +/* number of CPUs (execution strands in the system */ +EXTERN unsigned ncpus; +/* Number of virtual strands per physical core */ +EXTERN unsigned ht_per_core; +/* which cpu is bootstraping */ +EXTERN unsigned bsp_cpu_id; + +#define cpu_is_bsp(cpu) (bsp_cpu_id == cpu) + +/* + * SMP initialization is largely architecture dependent and each architecture + * must provide a method how to do it. If initiating SMP fails the function does + * not report it. However it must put the system in such a state that it falls + * back to a uniprocessor system. Although the uniprocessor configuration may be + * suboptimal, the system must be able to run on the bootstrap processor as if + * it was the only processor in the system + */ +_PROTOTYPE(void smp_init, (void)); + +_PROTOTYPE(void smp_ipi_err_int, (void)); +_PROTOTYPE(void smp_ipi_spv_int, (void)); +_PROTOTYPE(void smp_ipi_sched, (void)); +_PROTOTYPE(void smp_ipi_dequeue, (void)); +_PROTOTYPE(void smp_ipi_stop, (void)); +_PROTOTYPE(void smp_ipi_reboot, (void)); + +#define CPU_IS_BSP 1 +#define CPU_IS_READY 2 + +struct cpu { + u32_t flags; +}; + +EXTERN struct cpu cpus[CONFIG_MAX_CPUS]; + +#define cpu_set_flag(cpu, flag) do { cpus[cpu].flags |= (flag); } while(0) +#define cpu_clear_flag(cpu, flag) do { cpus[cpu].flags &= ~(flag); } while(0) +#define cpu_test_flag(cpu, flag) (cpus[cpu].flags & (flag)) +#define cpu_is_ready(cpu) cpu_test_flag(cpu, CPU_IS_READY) + +#endif /* __ASSEMBLY__ */ + +#endif /* CONFIG_SMP */ + +#endif /* __SMP_H__ */ diff --git a/kernel/spinlock.h b/kernel/spinlock.h new file mode 100644 index 000000000..0a2bdffa5 --- /dev/null +++ b/kernel/spinlock.h @@ -0,0 +1,37 @@ +#ifndef __SPINLOCK_H__ +#define __SPINLOCK_H__ + +#include "kernel.h" + +typedef struct spinlock { + atomic_t val; +} spinlock_t; + +#ifndef CONFIG_SMP + +#define SPINLOCK_DEFINE(name) +#define PRIVATE_SPINLOCK_DEFINE(name) +#define SPINLOCK_DECLARE(name) +#define spinlock_init(sl) +#define spinlock_lock(sl) +#define spinlock_unlock(sl) + +#else + +/* SMP */ +#define SPINLOCK_DEFINE(name) spinlock_t name; +#define PRIVATE_SPINLOCK_DEFINE(name) PRIVATE SPINLOCK_DEFINE(name) +#define SPINLOCK_DECLARE(name) extern SPINLOCK_DEFINE(name) +#define spinlock_init(sl) do { (sl)->val = 0; } while (0) +#if CONFIG_MAX_CPUS == 1 +#define spinlock_lock(sl) +#define spinlock_unlock(sl) +#else +#define spinlock_lock(sl) arch_spinlock_lock((atomic_t*) sl) +#define spinlock_unlock(sl) arch_spinlock_unlock((atomic_t*) sl) +#endif + + +#endif + +#endif /* __SPINLOCK_H__ */ diff --git a/kernel/start.c b/kernel/start.c index 3b4786309..1c178914b 100644 --- a/kernel/start.c +++ b/kernel/start.c @@ -101,6 +101,16 @@ PUBLIC void cstart( watchdog_enabled = atoi(value); #endif +#ifdef CONFIG_SMP + if (config_no_apic) + config_no_smp = 1; + value = env_get("no_smp"); + if(value) + config_no_smp = atoi(value); + else + config_no_smp = 0; +#endif + /* Return to assembler code to switch to protected mode (if 286), * reload selectors and call main(). */ diff --git a/kernel/watchdog.c b/kernel/watchdog.c index 489532e36..b4c19c8b6 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -5,6 +5,7 @@ */ #include "watchdog.h" +#include "arch/i386/glo.h" unsigned watchdog_local_timer_ticks = 0U; struct arch_watchdog *watchdog; diff --git a/share/mk/bsd.own.mk b/share/mk/bsd.own.mk index 60f8ad8b3..f3c64e1bc 100644 --- a/share/mk/bsd.own.mk +++ b/share/mk/bsd.own.mk @@ -3,6 +3,16 @@ .if !defined(_MINIX_OWN_MK_) _MINIX_OWN_MK_=1 +.ifdef CONFIG_SMP +SMP_FLAGS += -DCONFIG_SMP +.ifdef CONFIG_MAX_CPUS +SMP_FLAGS += -DCONFIG_MAX_CPUS=${CONFIG_MAX_CPUS} +.endif +.endif + +CFLAGS += ${SMP_FLAGS} +AFLAGS += ${SMP_FLAGS} + MAKECONF?= /etc/make.conf .-include "${MAKECONF}"