phunix/minix/servers/vm/mem_cache.c
David van Moolenbroek e94f856b38 libminixfs/VM: fix memory-mapped file corruption
This patch employs one solution to resolve two independent but related
issues.  Both issues are the result of one fundamental aspect of the
way VM's memory mapping works: VM uses its cache to map in blocks for
memory-mapped file regions, and for blocks already in the VM cache, VM
does not go to the file system before mapping them in.  To preserve
consistency between the FS and VM caches, VM relies on being informed
about all updates to file contents through the block cache.  The two
issues are both the result of VM not being properly informed about
such updates:

 1. Once a file system provides libminixfs with an inode association
    (inode number + inode offset) for a disk block, this association
    is not broken until a new inode association is provided for it.
    If a block is freed and reallocated as a metadata (non-inode)
    block, its old association is maintained, and may be supplied to
    VM's secondary cache.  Due to reuse of inodes, it is possible
    that the same inode association becomes valid for an actual file
    block again.  In that case, when that new file is memory-mapped,
    under certain circumstances, VM may end up using the metadata
    block to satisfy a page fault on the file, due to the stale inode
    association.  The result is a corrupted memory mapping, with the
    application seeing data other than the current file contents
    mapped in at the file block.

 2. When a hole is created in a file, the underlying block is freed
    from the device, but VM is not informed of this update, and thus,
    if VM's cache contains the block with its previous inode
    association, this block will remain there.  As a result, if an
    application subsequently memory-maps the file, VM will map in the
    old block at the position of the hole, rather than an all-zeroes
    block.  Thus, again, the result is a corrupted memory mapping.

This patch resolves both issues by making the file system inform the
minixfs library about blocks being freed, so that libminixfs can
break the inode association for that block, both in its own cache and
in the VM cache.  Since libminixfs does not know whether VM has the
block in its cache or not, it makes a call to VM for each block being
freed.  Thus, this change introduces more calls to VM, but it solves
the correctness issues at hand; optimizations may be introduced
later.  On the upside, all freed blocks are now marked as clean,
which should result in fewer blocks being written back to the device,
and the blocks are removed from the caches entirely, which should
result in slightly better cache usage.

This patch is necessary but not sufficient to resolve the situation
with respect to memory mapping of file holes in general.  Therefore,
this patch extends test 74 with a (rather particular but effective)
test for the first issue, but not yet with a test for the second one.

This fixes #90.

Change-Id: Iad8b134d2f88a884f15d3fc303e463280749c467
2015-08-13 13:46:46 +00:00

301 lines
7.1 KiB
C

/* This file implements the disk cache.
*
* If they exist anywhere, cached pages are always in a private
* VM datastructure.
*
* They might also be any combination of:
* - be mapped in by a filesystem for reading/writing by it
* - be mapped in by a process as the result of an mmap call (future)
*
* This file manages the datastructure of all cache blocks, and
* mapping them in and out of filesystems.
*/
#include <assert.h>
#include <string.h>
#include <minix/hash.h>
#include <machine/vmparam.h>
#include "proto.h"
#include "vm.h"
#include "region.h"
#include "glo.h"
#include "cache.h"
static int cache_reference(struct phys_region *pr, struct phys_region *pr2);
static int cache_unreference(struct phys_region *pr);
static int cache_sanitycheck(struct phys_region *pr, const char *file, int line);
static int cache_writable(struct phys_region *pr);
static int cache_resize(struct vmproc *vmp, struct vir_region *vr, vir_bytes l);
static int cache_pagefault(struct vmproc *vmp, struct vir_region *region,
struct phys_region *ph, int write, vfs_callback_t cb, void *state,
int len, int *io);
static int cache_pt_flags(struct vir_region *vr);
struct mem_type mem_type_cache = {
.name = "cache memory",
.ev_reference = cache_reference,
.ev_unreference = cache_unreference,
.ev_resize = cache_resize,
.ev_sanitycheck = cache_sanitycheck,
.ev_pagefault = cache_pagefault,
.writable = cache_writable,
.pt_flags = cache_pt_flags,
};
static int cache_pt_flags(struct vir_region *vr){
#if defined(__arm__)
return ARM_VM_PTE_CACHED;
#else
return 0;
#endif
}
static int cache_reference(struct phys_region *pr, struct phys_region *pr2)
{
return OK;
}
static int cache_unreference(struct phys_region *pr)
{
return mem_type_anon.ev_unreference(pr);
}
static int cache_sanitycheck(struct phys_region *pr, const char *file, int line)
{
MYASSERT(usedpages_add(pr->ph->phys, VM_PAGE_SIZE) == OK);
return OK;
}
static int cache_writable(struct phys_region *pr)
{
/* Cache blocks are at the moment only used by filesystems so always writable. */
assert(pr->ph->refcount > 0);
return pr->ph->phys != MAP_NONE;
}
static int cache_resize(struct vmproc *vmp, struct vir_region *vr, vir_bytes l)
{
printf("VM: cannot resize cache blocks.\n");
return ENOMEM;
}
int
do_mapcache(message *msg)
{
dev_t dev = msg->m_vmmcp.dev;
uint64_t dev_off = msg->m_vmmcp.dev_offset;
off_t ino_off = msg->m_vmmcp.ino_offset;
int n;
phys_bytes bytes = msg->m_vmmcp.pages * VM_PAGE_SIZE;
struct vir_region *vr;
struct vmproc *caller;
vir_bytes offset;
int io = 0;
if(dev_off % PAGE_SIZE || ino_off % PAGE_SIZE) {
printf("VM: unaligned cache operation\n");
return EFAULT;
}
if(vm_isokendpt(msg->m_source, &n) != OK) panic("bogus source");
caller = &vmproc[n];
if(bytes < VM_PAGE_SIZE) return EINVAL;
if(!(vr = map_page_region(caller, VM_PAGE_SIZE, VM_DATATOP, bytes,
VR_ANON | VR_WRITABLE, 0, &mem_type_cache))) {
printf("VM: map_page_region failed\n");
return ENOMEM;
}
assert(vr->length == bytes);
for(offset = 0; offset < bytes; offset += VM_PAGE_SIZE) {
struct cached_page *hb;
assert(vr->length == bytes);
assert(offset < vr->length);
if(!(hb = find_cached_page_bydev(dev, dev_off + offset,
msg->m_vmmcp.ino, ino_off + offset, 1))) {
map_unmap_region(caller, vr, 0, bytes);
return ENOENT;
}
assert(!vr->param.pb_cache);
vr->param.pb_cache = hb->page;
assert(vr->length == bytes);
assert(offset < vr->length);
if(map_pf(caller, vr, offset, 1, NULL, NULL, 0, &io) != OK) {
map_unmap_region(caller, vr, 0, bytes);
printf("VM: map_pf failed\n");
return ENOMEM;
}
assert(!vr->param.pb_cache);
}
memset(msg, 0, sizeof(*msg));
msg->m_vmmcp_reply.addr = (void *) vr->vaddr;
assert(vr);
#if CACHE_SANITY
cache_sanitycheck_internal();
#endif
return OK;
}
static int cache_pagefault(struct vmproc *vmp, struct vir_region *region,
struct phys_region *ph, int write, vfs_callback_t cb,
void *state, int len, int *io)
{
vir_bytes offset = ph->offset;
assert(ph->ph->phys == MAP_NONE);
assert(region->param.pb_cache);
pb_unreferenced(region, ph, 0);
pb_link(ph, region->param.pb_cache, offset, region);
region->param.pb_cache = NULL;
return OK;
}
int
do_setcache(message *msg)
{
int r;
dev_t dev = msg->m_vmmcp.dev;
uint64_t dev_off = msg->m_vmmcp.dev_offset;
off_t ino_off = msg->m_vmmcp.ino_offset;
int flags = msg->m_vmmcp.flags;
int n;
struct vmproc *caller;
phys_bytes offset;
phys_bytes bytes = msg->m_vmmcp.pages * VM_PAGE_SIZE;
if(bytes < VM_PAGE_SIZE) return EINVAL;
if(dev_off % PAGE_SIZE || ino_off % PAGE_SIZE) {
printf("VM: unaligned cache operation\n");
return EFAULT;
}
if(vm_isokendpt(msg->m_source, &n) != OK) panic("bogus source");
caller = &vmproc[n];
for(offset = 0; offset < bytes; offset += VM_PAGE_SIZE) {
struct vir_region *region;
struct phys_region *phys_region = NULL;
vir_bytes v = (vir_bytes) msg->m_vmmcp.block + offset;
struct cached_page *hb;
if(!(region = map_lookup(caller, v, &phys_region))) {
printf("VM: error: no reasonable memory region given (offset 0x%lx, 0x%lx)\n", offset, v);
return EFAULT;
}
if(!phys_region) {
printf("VM: error: no available memory region given\n");
return EFAULT;
}
if((hb=find_cached_page_bydev(dev, dev_off + offset,
msg->m_vmmcp.ino, ino_off + offset, 1))) {
/* block inode info updated */
if(hb->page != phys_region->ph ||
(hb->flags & VMSF_ONCE)) {
/* previous cache entry has become
* obsolete; make a new one. rmcache
* removes it from the cache and frees
* the page if it isn't mapped in anywhere
* else.
*/
rmcache(hb);
} else {
/* block was already there, inode info might've changed which is fine */
continue;
}
}
if(phys_region->memtype != &mem_type_anon &&
phys_region->memtype != &mem_type_anon_contig) {
printf("VM: error: no reasonable memory type\n");
return EFAULT;
}
if(phys_region->ph->refcount != 1) {
printf("VM: error: no reasonable refcount\n");
return EFAULT;
}
phys_region->memtype = &mem_type_cache;
if((r=addcache(dev, dev_off + offset, msg->m_vmmcp.ino,
ino_off + offset, flags, phys_region->ph)) != OK) {
printf("VM: addcache failed\n");
return r;
}
}
#if CACHE_SANITY
cache_sanitycheck_internal();
#endif
return OK;
}
/*
* Forget all pages associated to a particular block in the cache.
*/
int
do_forgetcache(message *msg)
{
struct cached_page *hb;
dev_t dev;
uint64_t dev_off;
phys_bytes bytes, offset;
dev = msg->m_vmmcp.dev;
dev_off = msg->m_vmmcp.dev_offset;
bytes = msg->m_vmmcp.pages * VM_PAGE_SIZE;
if (bytes < VM_PAGE_SIZE)
return EINVAL;
if (dev_off % PAGE_SIZE) {
printf("VM: unaligned cache operation\n");
return EFAULT;
}
for (offset = 0; offset < bytes; offset += VM_PAGE_SIZE) {
if ((hb = find_cached_page_bydev(dev, dev_off + offset,
VMC_NO_INODE, 0 /*ino_off*/, 0 /*touchlru*/)) != NULL)
rmcache(hb);
}
return OK;
}
/*
* A file system wants to invalidate all pages belonging to a certain device.
*/
int
do_clearcache(message *msg)
{
dev_t dev;
dev = msg->m_vmmcp.dev;
clear_cache_bydev(dev);
return OK;
}