SMP - Big kernel lock (BKL)
- to isolate execution inside kernel we use a big kernel lock implemented as a spinlock - the lock is acquired asap after entering kernel mode and released as late as possible. Only one CPU as a time can execute the core kernel code - measurement son real hw show that the overhead of this lock is close to 0% of kernel time for the currnet system - the overhead of this lock may be as high as 45% of kernel time in virtual machines depending on the ratio between physical CPUs available and emulated CPUs. The performance degradation is significant
This commit is contained in:
		
							parent
							
								
									a42ab504a0
								
							
						
					
					
						commit
						6aa26565e6
					
				@ -423,6 +423,7 @@ PRIVATE int calib_clk_handler(irq_hook_t * UNUSED(hook))
 | 
			
		||||
		stop_8253A_timer();
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	BKL_UNLOCK();
 | 
			
		||||
	return 1;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
@ -467,6 +468,14 @@ PRIVATE void apic_calibrate_clocks(unsigned cpu)
 | 
			
		||||
 | 
			
		||||
	/* set the PIC timer to get some time */
 | 
			
		||||
	init_8253A_timer(system_hz);
 | 
			
		||||
 | 
			
		||||
	/*
 | 
			
		||||
	 * We must unlock BKL here as the in-kernel interrupt will lock it
 | 
			
		||||
	 * again. The handler will unlock it after it is done. This is
 | 
			
		||||
	 * absolutely safe as only the BSP is running. It is just a workaround a
 | 
			
		||||
	 * corner case for APIC timer calibration
 | 
			
		||||
	 */
 | 
			
		||||
	BKL_UNLOCK();
 | 
			
		||||
	intr_enable();
 | 
			
		||||
 | 
			
		||||
	/* loop for some time to get a sample */
 | 
			
		||||
 | 
			
		||||
@ -15,6 +15,7 @@
 | 
			
		||||
#ifdef CONFIG_APIC
 | 
			
		||||
#include "apic.h"
 | 
			
		||||
#endif
 | 
			
		||||
#include "spinlock.h"
 | 
			
		||||
 | 
			
		||||
#define CLOCK_ACK_BIT   0x80    /* PS/2 clock interrupt acknowledge bit */
 | 
			
		||||
 | 
			
		||||
@ -79,6 +80,8 @@ PRIVATE int calib_cpu_handler(irq_hook_t * UNUSED(hook))
 | 
			
		||||
		tsc1 = tsc;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	/* just in case we are in an SMP single cpu fallback mode */
 | 
			
		||||
	BKL_UNLOCK();
 | 
			
		||||
	return 1;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
@ -92,6 +95,8 @@ PRIVATE void estimate_cpu_freq(void)
 | 
			
		||||
	/* set the probe, we use the legacy timer, IRQ 0 */
 | 
			
		||||
	put_irq_handler(&calib_cpu, CLOCK_IRQ, calib_cpu_handler);
 | 
			
		||||
 | 
			
		||||
	/* just in case we are in an SMP single cpu fallback mode */
 | 
			
		||||
	BKL_UNLOCK();
 | 
			
		||||
	/* set the PIC timer to get some time */
 | 
			
		||||
	intr_enable();
 | 
			
		||||
 | 
			
		||||
@ -101,6 +106,8 @@ PRIVATE void estimate_cpu_freq(void)
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	intr_disable();
 | 
			
		||||
	/* just in case we are in an SMP single cpu fallback mode */
 | 
			
		||||
	BKL_LOCK();
 | 
			
		||||
 | 
			
		||||
	/* remove the probe */
 | 
			
		||||
	rm_irq_handler(&calib_cpu);
 | 
			
		||||
@ -199,6 +206,19 @@ PUBLIC void context_stop(struct proc * p)
 | 
			
		||||
		}
 | 
			
		||||
#endif
 | 
			
		||||
	}
 | 
			
		||||
	
 | 
			
		||||
	/*
 | 
			
		||||
	 * This function is called only if we switch from kernel to user or idle
 | 
			
		||||
	 * or back. Therefore this is a perfect location to place the big kernel
 | 
			
		||||
	 * lock which will hopefully disappear soon.
 | 
			
		||||
	 *
 | 
			
		||||
	 * If we stop accounting for KERNEL we must unlock the BKL. If account
 | 
			
		||||
	 * for IDLE we must not hold the lock
 | 
			
		||||
	 */
 | 
			
		||||
	if (p == proc_addr(KERNEL))
 | 
			
		||||
		BKL_UNLOCK();
 | 
			
		||||
	else
 | 
			
		||||
		BKL_LOCK();
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
PUBLIC void context_stop_idle(void)
 | 
			
		||||
 | 
			
		||||
@ -189,6 +189,10 @@ PRIVATE void ap_finish_booting(void)
 | 
			
		||||
 | 
			
		||||
	while(!i386_paging_enabled)
 | 
			
		||||
		arch_pause();
 | 
			
		||||
 | 
			
		||||
	BKL_LOCK();
 | 
			
		||||
	printf("CPU %d is running\n", cpu);
 | 
			
		||||
	BKL_UNLOCK();
 | 
			
		||||
	for(;;);
 | 
			
		||||
 | 
			
		||||
	/* finish processor initialisation. */
 | 
			
		||||
 | 
			
		||||
@ -964,26 +964,27 @@ ENTRY(smp_get_cores)
 | 
			
		||||
 * eax register is clobbered.
 | 
			
		||||
 */
 | 
			
		||||
ENTRY(arch_spinlock_lock)
 | 
			
		||||
	push	%ebp
 | 
			
		||||
	mov	%esp, %ebp
 | 
			
		||||
	push	%ebx
 | 
			
		||||
	mov	8(%ebp), %eax
 | 
			
		||||
	mov	$1, %ebx
 | 
			
		||||
/* FIXME use exponential backoff */
 | 
			
		||||
	mov	4(%esp), %eax
 | 
			
		||||
	mov	$1, %edx
 | 
			
		||||
2:
 | 
			
		||||
	xchg	%ebx, (%eax)
 | 
			
		||||
	test	%ebx, %ebx
 | 
			
		||||
	mov	$1, %ecx
 | 
			
		||||
	xchg	%ecx, (%eax)
 | 
			
		||||
	test	%ecx, %ecx
 | 
			
		||||
	je	0f
 | 
			
		||||
 | 
			
		||||
	cmp	$(1<< 16), %edx
 | 
			
		||||
	je	1f
 | 
			
		||||
	shl	%edx
 | 
			
		||||
1:
 | 
			
		||||
/* FIXME don't use the byte code */
 | 
			
		||||
.byte	0xf3, 0x90	/*  pause */
 | 
			
		||||
	cmp	$0, (%eax)
 | 
			
		||||
	jne	1b
 | 
			
		||||
	jmp	2b
 | 
			
		||||
	mov	%edx, %ecx
 | 
			
		||||
3:
 | 
			
		||||
	pause
 | 
			
		||||
	sub	$1, %ecx
 | 
			
		||||
	test	%ecx, %ecx
 | 
			
		||||
	jz	2b
 | 
			
		||||
	jmp	3b
 | 
			
		||||
0:
 | 
			
		||||
	mfence
 | 
			
		||||
	pop	%ebx
 | 
			
		||||
	pop	%ebp
 | 
			
		||||
	ret
 | 
			
		||||
 | 
			
		||||
/*===========================================================================*/
 | 
			
		||||
@ -993,11 +994,9 @@ ENTRY(arch_spinlock_lock)
 | 
			
		||||
/*  spin lock release routine. */
 | 
			
		||||
ENTRY(arch_spinlock_unlock)
 | 
			
		||||
	mov	4(%esp), %eax
 | 
			
		||||
	push	%ebx
 | 
			
		||||
	mov	$0, %ebx
 | 
			
		||||
	xchg	%ebx, (%eax)
 | 
			
		||||
	mov	$0, %ecx
 | 
			
		||||
	xchg	%ecx, (%eax)
 | 
			
		||||
	mfence
 | 
			
		||||
	pop	%ebx
 | 
			
		||||
	ret
 | 
			
		||||
 | 
			
		||||
/*===========================================================================*/
 | 
			
		||||
 | 
			
		||||
@ -24,6 +24,10 @@
 | 
			
		||||
#ifdef CONFIG_SMP
 | 
			
		||||
#include "smp.h"
 | 
			
		||||
#endif
 | 
			
		||||
#ifdef CONFIG_WATCHDOG
 | 
			
		||||
#include "watchdog.h"
 | 
			
		||||
#endif
 | 
			
		||||
#include "spinlock.h"
 | 
			
		||||
 | 
			
		||||
/* Prototype declarations for PRIVATE functions. */
 | 
			
		||||
FORWARD _PROTOTYPE( void announce, (void));	
 | 
			
		||||
@ -93,6 +97,7 @@ PUBLIC int main(void)
 | 
			
		||||
  struct exec e_hdr;		/* for a copy of an a.out header */
 | 
			
		||||
  size_t argsz;			/* size of arguments passed to crtso on stack */
 | 
			
		||||
 | 
			
		||||
  BKL_LOCK();
 | 
			
		||||
   /* Global value to test segment sanity. */
 | 
			
		||||
   magictest = MAGICTEST;
 | 
			
		||||
 
 | 
			
		||||
 | 
			
		||||
@ -5,3 +5,5 @@ unsigned ht_per_core;
 | 
			
		||||
unsigned bsp_cpu_id;
 | 
			
		||||
 | 
			
		||||
struct cpu cpus[CONFIG_MAX_CPUS];
 | 
			
		||||
 | 
			
		||||
SPINLOCK_DEFINE(big_kernel_lock)
 | 
			
		||||
 | 
			
		||||
@ -7,6 +7,7 @@
 | 
			
		||||
 | 
			
		||||
#include "kernel.h"
 | 
			
		||||
#include "arch_smp.h"
 | 
			
		||||
#include "spinlock.h"
 | 
			
		||||
 | 
			
		||||
/* number of CPUs (execution strands in the system */
 | 
			
		||||
EXTERN unsigned ncpus;
 | 
			
		||||
@ -48,6 +49,8 @@ EXTERN struct cpu cpus[CONFIG_MAX_CPUS];
 | 
			
		||||
#define cpu_test_flag(cpu, flag) (cpus[cpu].flags & (flag))
 | 
			
		||||
#define cpu_is_ready(cpu) cpu_test_flag(cpu, CPU_IS_READY)
 | 
			
		||||
 | 
			
		||||
SPINLOCK_DECLARE(big_kernel_lock)
 | 
			
		||||
 | 
			
		||||
#endif /* __ASSEMBLY__ */
 | 
			
		||||
 | 
			
		||||
#endif /* CONFIG_SMP */
 | 
			
		||||
 | 
			
		||||
@ -23,6 +23,7 @@ typedef struct spinlock {
 | 
			
		||||
#define PRIVATE_SPINLOCK_DEFINE(name)	PRIVATE SPINLOCK_DEFINE(name)
 | 
			
		||||
#define SPINLOCK_DECLARE(name)	extern SPINLOCK_DEFINE(name)
 | 
			
		||||
#define spinlock_init(sl) do { (sl)->val = 0; } while (0)
 | 
			
		||||
 | 
			
		||||
#if CONFIG_MAX_CPUS == 1
 | 
			
		||||
#define spinlock_lock(sl)
 | 
			
		||||
#define spinlock_unlock(sl)
 | 
			
		||||
@ -32,6 +33,9 @@ typedef struct spinlock {
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
#endif
 | 
			
		||||
#endif /* CONFIG_SMP */
 | 
			
		||||
 | 
			
		||||
#define BKL_LOCK()	spinlock_lock(&big_kernel_lock)
 | 
			
		||||
#define BKL_UNLOCK()	spinlock_unlock(&big_kernel_lock)
 | 
			
		||||
 | 
			
		||||
#endif /* __SPINLOCK_H__ */
 | 
			
		||||
 | 
			
		||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user