Crash recovery and live update support for VM.

This commit is contained in:
Cristiano Giuffrida 2010-07-20 23:03:52 +00:00
parent 0b364d00bc
commit 91a83fe265
7 changed files with 286 additions and 53 deletions

View File

@ -1058,7 +1058,8 @@
#define VM_RS_MEMCTL (VM_RQ_BASE+42) #define VM_RS_MEMCTL (VM_RQ_BASE+42)
# define VM_RS_CTL_ENDPT m1_i1 # define VM_RS_CTL_ENDPT m1_i1
# define VM_RS_CTL_REQ m1_i2 # define VM_RS_CTL_REQ m1_i2
# define VM_RS_MEM_PIN 0 /* pin memory */ # define VM_RS_MEM_PIN 0 /* pin memory */
# define VM_RS_MEM_MAKE_VM 1 /* make VM instance */
/* Total. */ /* Total. */
#define NR_VM_CALLS 43 #define NR_VM_CALLS 43

View File

@ -552,6 +552,19 @@ struct rproc *rp;
*/ */
setuid(0); setuid(0);
/* If this is a VM instance, let VM know now. */
if(rp->r_priv.s_flags & VM_SYS_PROC) {
if(rs_verbose)
printf("RS: informing VM of instance %s\n", srv_to_string(rp));
s = vm_memctl(rpub->endpoint, VM_RS_MEM_MAKE_VM);
if(s != OK) {
printf("vm_memctl failed: %d\n", s);
cleanup_service(rp);
return s;
}
}
/* Tell VM about allowed calls. */ /* Tell VM about allowed calls. */
if ((s = vm_set_priv(rpub->endpoint, &rpub->vm_call_mask[0])) != OK) { if ((s = vm_set_priv(rpub->endpoint, &rpub->vm_call_mask[0])) != OK) {
printf("RS: vm_set_priv failed: %d\n", s); printf("RS: vm_set_priv failed: %d\n", s);

View File

@ -526,6 +526,119 @@ PRIVATE char *ptestr(u32_t pte)
return str; return str;
} }
/*===========================================================================*
* pt_map_in_range *
*===========================================================================*/
PUBLIC int pt_map_in_range(struct vmproc *src_vmp, struct vmproc *dst_vmp,
vir_bytes start, vir_bytes end)
{
/* Transfer all the mappings from the pt of the source process to the pt of
* the destination process in the range specified.
*/
int pde, pte;
int r;
vir_bytes viraddr, mapaddr;
pt_t *pt, *dst_pt;
pt = &src_vmp->vm_pt;
dst_pt = &dst_vmp->vm_pt;
end = end ? end : VM_DATATOP;
assert(start % I386_PAGE_SIZE == 0);
assert(end % I386_PAGE_SIZE == 0);
assert(I386_VM_PDE(start) >= proc_pde && start <= end);
assert(I386_VM_PDE(end) < I386_VM_DIR_ENTRIES);
#if LU_DEBUG
printf("VM: pt_map_in_range: src = %d, dst = %d\n",
src_vmp->vm_endpoint, dst_vmp->vm_endpoint);
printf("VM: pt_map_in_range: transferring from 0x%08x (pde %d pte %d) to 0x%08x (pde %d pte %d)\n",
start, I386_VM_PDE(start), I386_VM_PTE(start),
end, I386_VM_PDE(end), I386_VM_PTE(end));
#endif
/* Scan all page-table entries in the range. */
for(viraddr = start; viraddr <= end; viraddr += I386_PAGE_SIZE) {
pde = I386_VM_PDE(viraddr);
if(!(pt->pt_dir[pde] & I386_VM_PRESENT)) {
if(viraddr == VM_DATATOP) break;
continue;
}
pte = I386_VM_PTE(viraddr);
if(!(pt->pt_pt[pde][pte] & I386_VM_PRESENT)) {
if(viraddr == VM_DATATOP) break;
continue;
}
/* Transfer the mapping. */
dst_pt->pt_pt[pde][pte] = pt->pt_pt[pde][pte];
if(viraddr == VM_DATATOP) break;
}
return OK;
}
/*===========================================================================*
* pt_ptmap *
*===========================================================================*/
PUBLIC int pt_ptmap(struct vmproc *src_vmp, struct vmproc *dst_vmp)
{
/* Transfer mappings to page dir and page tables from source process and
* destination process. Make sure all the mappings are above the stack, not
* to corrupt valid mappings in the data segment of the destination process.
*/
int pde, r;
phys_bytes physaddr;
vir_bytes viraddr;
pt_t *pt;
assert(src_vmp->vm_stacktop == dst_vmp->vm_stacktop);
pt = &src_vmp->vm_pt;
#if LU_DEBUG
printf("VM: pt_ptmap: src = %d, dst = %d\n",
src_vmp->vm_endpoint, dst_vmp->vm_endpoint);
#endif
/* Transfer mapping to the page directory. */
assert((vir_bytes) pt->pt_dir >= src_vmp->vm_stacktop);
viraddr = arch_vir2map(src_vmp, (vir_bytes) pt->pt_dir);
physaddr = pt->pt_dir_phys & I386_VM_ADDR_MASK;
if((r=pt_writemap(&dst_vmp->vm_pt, viraddr, physaddr, I386_PAGE_SIZE,
I386_VM_PRESENT | I386_VM_USER | I386_VM_WRITE,
WMF_OVERWRITE)) != OK) {
return r;
}
#if LU_DEBUG
printf("VM: pt_ptmap: transferred mapping to page dir: 0x%08x (0x%08x)\n",
viraddr, physaddr);
#endif
/* Scan all non-reserved page-directory entries. */
for(pde=proc_pde; pde < I386_VM_DIR_ENTRIES; pde++) {
if(!(pt->pt_dir[pde] & I386_VM_PRESENT)) {
continue;
}
/* Transfer mapping to the page table. */
assert((vir_bytes) pt->pt_pt[pde] >= src_vmp->vm_stacktop);
viraddr = arch_vir2map(src_vmp, (vir_bytes) pt->pt_pt[pde]);
physaddr = pt->pt_dir[pde] & I386_VM_ADDR_MASK;
if((r=pt_writemap(&dst_vmp->vm_pt, viraddr, physaddr, I386_PAGE_SIZE,
I386_VM_PRESENT | I386_VM_USER | I386_VM_WRITE,
WMF_OVERWRITE)) != OK) {
return r;
}
}
#if LU_DEBUG
printf("VM: pt_ptmap: transferred mappings to page tables, pde range %d - %d\n",
proc_pde, I386_VM_DIR_ENTRIES - 1);
#endif
return OK;
}
/*===========================================================================* /*===========================================================================*
* pt_writemap * * pt_writemap *
*===========================================================================*/ *===========================================================================*/
@ -920,7 +1033,12 @@ PUBLIC void pt_init(phys_bytes usedlimit)
/* Back to reality - this is where the stack actually is. */ /* Back to reality - this is where the stack actually is. */
vmprocess->vm_arch.vm_seg[S].mem_len -= extra_clicks; vmprocess->vm_arch.vm_seg[S].mem_len -= extra_clicks;
/* Pretend VM stack top is the same as any regular process, not to
* have discrepancies with new VM instances later on.
*/
vmprocess->vm_stacktop = VM_STACKTOP;
/* All OK. */ /* All OK. */
return; return;
} }

View File

@ -44,7 +44,8 @@ _PROTOTYPE( void reserve_proc_mem, (struct memory *mem_chunks,
_PROTOTYPE( int vm_isokendpt, (endpoint_t ep, int *proc) ); _PROTOTYPE( int vm_isokendpt, (endpoint_t ep, int *proc) );
_PROTOTYPE( int get_stack_ptr, (int proc_nr, vir_bytes *sp) ); _PROTOTYPE( int get_stack_ptr, (int proc_nr, vir_bytes *sp) );
_PROTOTYPE( int do_info, (message *) ); _PROTOTYPE( int do_info, (message *) );
_PROTOTYPE( int swap_proc, (endpoint_t src_e, endpoint_t dst_e) ); _PROTOTYPE( int swap_proc_slot, (struct vmproc *src_vmp, struct vmproc *dst_vmp));
_PROTOTYPE( int swap_proc_dyn_data, (struct vmproc *src_vmp, struct vmproc *dst_vmp));
/* exit.c */ /* exit.c */
_PROTOTYPE( void clear_proc, (struct vmproc *vmp) ); _PROTOTYPE( void clear_proc, (struct vmproc *vmp) );
@ -101,6 +102,9 @@ _PROTOTYPE( void pt_init_mem, (void) );
_PROTOTYPE( void pt_check, (struct vmproc *vmp) ); _PROTOTYPE( void pt_check, (struct vmproc *vmp) );
_PROTOTYPE( int pt_new, (pt_t *pt) ); _PROTOTYPE( int pt_new, (pt_t *pt) );
_PROTOTYPE( void pt_free, (pt_t *pt) ); _PROTOTYPE( void pt_free, (pt_t *pt) );
_PROTOTYPE( int pt_map_in_range, (struct vmproc *src_vmp, struct vmproc *dst_vmp,
vir_bytes start, vir_bytes end) );
_PROTOTYPE( int pt_ptmap, (struct vmproc *src_vmp, struct vmproc *dst_vmp) );
_PROTOTYPE( int pt_ptalloc_in_range, (pt_t *pt, vir_bytes start, vir_bytes end, _PROTOTYPE( int pt_ptalloc_in_range, (pt_t *pt, vir_bytes start, vir_bytes end,
u32_t flags, int verify)); u32_t flags, int verify));
_PROTOTYPE( int pt_writemap, (pt_t *pt, vir_bytes v, phys_bytes physaddr, _PROTOTYPE( int pt_writemap, (pt_t *pt, vir_bytes v, phys_bytes physaddr,

View File

@ -21,6 +21,7 @@
#include <env.h> #include <env.h>
#include <stdio.h> #include <stdio.h>
#include <assert.h> #include <assert.h>
#include <memory.h>
#include "glo.h" #include "glo.h"
#include "proto.h" #include "proto.h"
@ -61,11 +62,26 @@ PUBLIC int do_rs_set_priv(message *m)
PUBLIC int do_rs_update(message *m_ptr) PUBLIC int do_rs_update(message *m_ptr)
{ {
endpoint_t src_e, dst_e, reply_e; endpoint_t src_e, dst_e, reply_e;
int src_p, dst_p;
struct vmproc *src_vmp, *dst_vmp;
struct vir_region *vr;
int r; int r;
src_e = m_ptr->VM_RS_SRC_ENDPT; src_e = m_ptr->VM_RS_SRC_ENDPT;
dst_e = m_ptr->VM_RS_DST_ENDPT; dst_e = m_ptr->VM_RS_DST_ENDPT;
/* Lookup slots for source and destination process. */
if(vm_isokendpt(src_e, &src_p) != OK) {
printf("do_rs_update: bad src endpoint %d\n", src_e);
return EINVAL;
}
src_vmp = &vmproc[src_p];
if(vm_isokendpt(dst_e, &dst_p) != OK) {
printf("do_rs_update: bad dst endpoint %d\n", dst_e);
return EINVAL;
}
dst_vmp = &vmproc[dst_p];
/* Let the kernel do the update first. */ /* Let the kernel do the update first. */
r = sys_update(src_e, dst_e); r = sys_update(src_e, dst_e);
if(r != OK) { if(r != OK) {
@ -73,15 +89,21 @@ PUBLIC int do_rs_update(message *m_ptr)
} }
/* Do the update in VM now. */ /* Do the update in VM now. */
r = swap_proc(src_e, dst_e); r = swap_proc_slot(src_vmp, dst_vmp);
if(r != OK) { if(r != OK) {
return r; return r;
} }
r = swap_proc_dyn_data(src_vmp, dst_vmp);
if(r != OK) {
return r;
}
pt_bind(&src_vmp->vm_pt, src_vmp);
pt_bind(&dst_vmp->vm_pt, dst_vmp);
/* Reply, update-aware. */ /* Reply, update-aware. */
reply_e = m_ptr->m_source; reply_e = m_ptr->m_source;
if(reply_e == src_e) reply_e = dst_e; if(reply_e == src_e) reply_e = dst_e;
if(reply_e == dst_e) reply_e = src_e; else if(reply_e == dst_e) reply_e = src_e;
m_ptr->m_type = OK; m_ptr->m_type = OK;
r = send(reply_e, m_ptr); r = send(reply_e, m_ptr);
if(r != OK) { if(r != OK) {
@ -91,6 +113,55 @@ PUBLIC int do_rs_update(message *m_ptr)
return SUSPEND; return SUSPEND;
} }
/*===========================================================================*
* rs_memctl_make_vm_instance *
*===========================================================================*/
PRIVATE int rs_memctl_make_vm_instance(struct vmproc *new_vm_vmp)
{
int vm_p, r;
u32_t flags;
int verify;
struct vmproc *this_vm_vmp;
this_vm_vmp = &vmproc[VM_PROC_NR];
/* Copy settings from current VM. */
new_vm_vmp->vm_stacktop = this_vm_vmp->vm_stacktop;
new_vm_vmp->vm_arch.vm_data_top = this_vm_vmp->vm_arch.vm_data_top;
/* Pin memory for the new VM instance. */
r = map_pin_memory(new_vm_vmp);
if(r != OK) {
return r;
}
/* Preallocate page tables for the entire address space for both
* VM and the new VM instance.
*/
flags = 0;
verify = FALSE;
r = pt_ptalloc_in_range(&this_vm_vmp->vm_pt, 0, 0, flags, verify);
if(r != OK) {
return r;
}
r = pt_ptalloc_in_range(&new_vm_vmp->vm_pt, 0, 0, flags, verify);
if(r != OK) {
return r;
}
/* Let the new VM instance map VM's page tables and its own. */
r = pt_ptmap(this_vm_vmp, new_vm_vmp);
if(r != OK) {
return r;
}
r = pt_ptmap(new_vm_vmp, new_vm_vmp);
if(r != OK) {
return r;
}
return OK;
}
/*===========================================================================* /*===========================================================================*
* do_rs_memctl * * do_rs_memctl *
*===========================================================================*/ *===========================================================================*/
@ -116,7 +187,9 @@ PUBLIC int do_rs_memctl(message *m_ptr)
case VM_RS_MEM_PIN: case VM_RS_MEM_PIN:
r = map_pin_memory(vmp); r = map_pin_memory(vmp);
return r; return r;
case VM_RS_MEM_MAKE_VM:
r = rs_memctl_make_vm_instance(vmp);
return r;
default: default:
printf("do_rs_memctl: bad request %d\n", req); printf("do_rs_memctl: bad request %d\n", req);
return EINVAL; return EINVAL;

View File

@ -37,8 +37,6 @@
#include "kernel/type.h" #include "kernel/type.h"
#include "kernel/proc.h" #include "kernel/proc.h"
#define SWAP_PROC_DEBUG 0
/*===========================================================================* /*===========================================================================*
* get_mem_map * * get_mem_map *
*===========================================================================*/ *===========================================================================*/
@ -232,36 +230,16 @@ PUBLIC int do_info(message *m)
} }
/*===========================================================================* /*===========================================================================*
* swap_proc * * swap_proc_slot *
*===========================================================================*/ *===========================================================================*/
PUBLIC int swap_proc(endpoint_t src_e, endpoint_t dst_e) PUBLIC int swap_proc_slot(struct vmproc *src_vmp, struct vmproc *dst_vmp)
{ {
struct vmproc *src_vmp, *dst_vmp;
struct vmproc orig_src_vmproc, orig_dst_vmproc; struct vmproc orig_src_vmproc, orig_dst_vmproc;
int src_p, dst_p, r;
struct vir_region *vr;
/* Lookup slots for source and destination process. */ #if LU_DEBUG
if(vm_isokendpt(src_e, &src_p) != OK) { printf("VM: swap_proc: swapping %d (%d) and %d (%d)\n",
printf("swap_proc: bad src endpoint %d\n", src_e); src_vmp->vm_endpoint, src_vmp->vm_slot,
return EINVAL; dst_vmp->vm_endpoint, dst_vmp->vm_slot);
}
src_vmp = &vmproc[src_p];
if(vm_isokendpt(dst_e, &dst_p) != OK) {
printf("swap_proc: bad dst endpoint %d\n", dst_e);
return EINVAL;
}
dst_vmp = &vmproc[dst_p];
#if SWAP_PROC_DEBUG
printf("swap_proc: swapping %d (%d, %d) and %d (%d, %d)\n",
src_vmp->vm_endpoint, src_p, src_vmp->vm_slot,
dst_vmp->vm_endpoint, dst_p, dst_vmp->vm_slot);
printf("swap_proc: map_printmap for source before swapping:\n");
map_printmap(src_vmp);
printf("swap_proc: map_printmap for destination before swapping:\n");
map_printmap(dst_vmp);
#endif #endif
/* Save existing data. */ /* Save existing data. */
@ -278,7 +256,52 @@ PUBLIC int swap_proc(endpoint_t src_e, endpoint_t dst_e)
dst_vmp->vm_endpoint = orig_dst_vmproc.vm_endpoint; dst_vmp->vm_endpoint = orig_dst_vmproc.vm_endpoint;
dst_vmp->vm_slot = orig_dst_vmproc.vm_slot; dst_vmp->vm_slot = orig_dst_vmproc.vm_slot;
/* Preserve vir_region's parents. */ /* Preserve yielded blocks. */
src_vmp->vm_yielded_blocks = orig_src_vmproc.vm_yielded_blocks;
dst_vmp->vm_yielded_blocks = orig_dst_vmproc.vm_yielded_blocks;
#if LU_DEBUG
printf("VM: swap_proc: swapped %d (%d) and %d (%d)\n",
src_vmp->vm_endpoint, src_vmp->vm_slot,
dst_vmp->vm_endpoint, dst_vmp->vm_slot);
#endif
return OK;
}
/*===========================================================================*
* swap_proc_dyn_data *
*===========================================================================*/
PUBLIC int swap_proc_dyn_data(struct vmproc *src_vmp, struct vmproc *dst_vmp)
{
struct vir_region *vr;
int is_vm;
int r;
is_vm = (dst_vmp->vm_endpoint == VM_PROC_NR);
/* For VM, transfer memory regions above the stack first. */
if(is_vm) {
#if LU_DEBUG
printf("VM: swap_proc_dyn_data: tranferring regions above the stack from old VM (%d) to new VM (%d)\n",
src_vmp->vm_endpoint, dst_vmp->vm_endpoint);
#endif
assert(src_vmp->vm_stacktop == dst_vmp->vm_stacktop);
r = pt_map_in_range(src_vmp, dst_vmp,
arch_vir2map(src_vmp, src_vmp->vm_stacktop), 0);
if(r != OK) {
printf("swap_proc_dyn_data: pt_map_in_range failed\n");
return r;
}
}
#if LU_DEBUG
printf("VM: swap_proc_dyn_data: swapping regions' parents for %d (%d) and %d (%d)\n",
src_vmp->vm_endpoint, src_vmp->vm_slot,
dst_vmp->vm_endpoint, dst_vmp->vm_slot);
#endif
/* Swap vir_regions' parents. */
for(vr = src_vmp->vm_regions; vr; vr = vr->next) { for(vr = src_vmp->vm_regions; vr; vr = vr->next) {
USE(vr, vr->parent = src_vmp;); USE(vr, vr->parent = src_vmp;);
} }
@ -286,25 +309,25 @@ PUBLIC int swap_proc(endpoint_t src_e, endpoint_t dst_e)
USE(vr, vr->parent = dst_vmp;); USE(vr, vr->parent = dst_vmp;);
} }
/* Adjust page tables. */ /* For regular processes, transfer regions above the stack now.
if(src_vmp->vm_flags & VMF_HASPT) * In case of rollback, we need to skip this step. To sandbox the
pt_bind(&src_vmp->vm_pt, src_vmp); * new instance and prevent state corruption on rollback, we share all
if(dst_vmp->vm_flags & VMF_HASPT) * the regions between the two instances as COW.
pt_bind(&dst_vmp->vm_pt, dst_vmp); */
if((r=sys_vmctl(SELF, VMCTL_FLUSHTLB, 0)) != OK) { if(!is_vm && (dst_vmp->vm_flags & VMF_HASPT)) {
panic("swap_proc: VMCTL_FLUSHTLB failed: %d", r); vr = map_lookup(dst_vmp, arch_vir2map(dst_vmp, dst_vmp->vm_stacktop));
} if(vr && !map_lookup(src_vmp, arch_vir2map(src_vmp, src_vmp->vm_stacktop))) {
#if LU_DEBUG
#if SWAP_PROC_DEBUG printf("VM: swap_proc_dyn_data: tranferring regions above the stack from %d to %d\n",
printf("swap_proc: swapped %d (%d, %d) and %d (%d, %d)\n", src_vmp->vm_endpoint, dst_vmp->vm_endpoint);
src_vmp->vm_endpoint, src_p, src_vmp->vm_slot,
dst_vmp->vm_endpoint, dst_p, dst_vmp->vm_slot);
printf("swap_proc: map_printmap for source after swapping:\n");
map_printmap(src_vmp);
printf("swap_proc: map_printmap for destination after swapping:\n");
map_printmap(dst_vmp);
#endif #endif
assert(src_vmp->vm_stacktop == dst_vmp->vm_stacktop);
r = map_proc_copy_from(src_vmp, dst_vmp, vr);
if(r != OK) {
return r;
}
}
}
return OK; return OK;
} }

View File

@ -28,6 +28,7 @@
/* How noisy are we supposed to be? */ /* How noisy are we supposed to be? */
#define VERBOSE 0 #define VERBOSE 0
#define LU_DEBUG 1
/* Minimum stack region size - 64MB. */ /* Minimum stack region size - 64MB. */
#define MINSTACKREGION (64*1024*1024) #define MINSTACKREGION (64*1024*1024)