
This patch employs one solution to resolve two independent but related issues. Both issues are the result of one fundamental aspect of the way VM's memory mapping works: VM uses its cache to map in blocks for memory-mapped file regions, and for blocks already in the VM cache, VM does not go to the file system before mapping them in. To preserve consistency between the FS and VM caches, VM relies on being informed about all updates to file contents through the block cache. The two issues are both the result of VM not being properly informed about such updates: 1. Once a file system provides libminixfs with an inode association (inode number + inode offset) for a disk block, this association is not broken until a new inode association is provided for it. If a block is freed and reallocated as a metadata (non-inode) block, its old association is maintained, and may be supplied to VM's secondary cache. Due to reuse of inodes, it is possible that the same inode association becomes valid for an actual file block again. In that case, when that new file is memory-mapped, under certain circumstances, VM may end up using the metadata block to satisfy a page fault on the file, due to the stale inode association. The result is a corrupted memory mapping, with the application seeing data other than the current file contents mapped in at the file block. 2. When a hole is created in a file, the underlying block is freed from the device, but VM is not informed of this update, and thus, if VM's cache contains the block with its previous inode association, this block will remain there. As a result, if an application subsequently memory-maps the file, VM will map in the old block at the position of the hole, rather than an all-zeroes block. Thus, again, the result is a corrupted memory mapping. This patch resolves both issues by making the file system inform the minixfs library about blocks being freed, so that libminixfs can break the inode association for that block, both in its own cache and in the VM cache. Since libminixfs does not know whether VM has the block in its cache or not, it makes a call to VM for each block being freed. Thus, this change introduces more calls to VM, but it solves the correctness issues at hand; optimizations may be introduced later. On the upside, all freed blocks are now marked as clean, which should result in fewer blocks being written back to the device, and the blocks are removed from the caches entirely, which should result in slightly better cache usage. This patch is necessary but not sufficient to resolve the situation with respect to memory mapping of file holes in general. Therefore, this patch extends test 74 with a (rather particular but effective) test for the first issue, but not yet with a test for the second one. This fixes #90. Change-Id: Iad8b134d2f88a884f15d3fc303e463280749c467
100 lines
3.4 KiB
C
100 lines
3.4 KiB
C
/* The file system maintains a buffer cache to reduce the number of disk
|
|
* accesses needed. Whenever a read or write to the disk is done, a check is
|
|
* first made to see if the block is in the cache. This file manages the
|
|
* cache.
|
|
*
|
|
* The entry points into this file are:
|
|
* get_block: request to fetch a block for reading or writing from cache
|
|
* put_block: return a block previously requested with get_block
|
|
* alloc_zone: allocate a new zone (to increase the length of a file)
|
|
* free_zone: release a zone (when a file is removed)
|
|
* invalidate: remove all the cache blocks on some device
|
|
*
|
|
* Private functions:
|
|
* read_block: read or write a block from the disk itself
|
|
*/
|
|
|
|
#include "fs.h"
|
|
#include <minix/u64.h>
|
|
#include <minix/bdev.h>
|
|
#include <sys/param.h>
|
|
#include <stdlib.h>
|
|
#include <assert.h>
|
|
#include <minix/libminixfs.h>
|
|
#include <math.h>
|
|
#include "buf.h"
|
|
#include "super.h"
|
|
#include "inode.h"
|
|
|
|
/*===========================================================================*
|
|
* alloc_zone *
|
|
*===========================================================================*/
|
|
zone_t alloc_zone(
|
|
dev_t dev, /* device where zone wanted */
|
|
zone_t z /* try to allocate new zone near this one */
|
|
)
|
|
{
|
|
/* Allocate a new zone on the indicated device and return its number. */
|
|
|
|
bit_t b, bit;
|
|
struct super_block *sp;
|
|
static int print_oos_msg = 1;
|
|
|
|
/* Note that the routine alloc_bit() returns 1 for the lowest possible
|
|
* zone, which corresponds to sp->s_firstdatazone. To convert a value
|
|
* between the bit number, 'b', used by alloc_bit() and the zone number, 'z',
|
|
* stored in the inode, use the formula:
|
|
* z = b + sp->s_firstdatazone - 1
|
|
* Alloc_bit() never returns 0, since this is used for NO_BIT (failure).
|
|
*/
|
|
sp = get_super(dev);
|
|
|
|
/* If z is 0, skip initial part of the map known to be fully in use. */
|
|
if (z == sp->s_firstdatazone) {
|
|
bit = sp->s_zsearch;
|
|
} else {
|
|
bit = (bit_t) (z - (sp->s_firstdatazone - 1));
|
|
}
|
|
b = alloc_bit(sp, ZMAP, bit);
|
|
if (b == NO_BIT) {
|
|
err_code = ENOSPC;
|
|
if (print_oos_msg)
|
|
printf("No space on device %d/%d\n", major(sp->s_dev),
|
|
minor(sp->s_dev));
|
|
print_oos_msg = 0; /* Don't repeat message */
|
|
return(NO_ZONE);
|
|
}
|
|
print_oos_msg = 1;
|
|
if (z == sp->s_firstdatazone) sp->s_zsearch = b; /* for next time */
|
|
return( (zone_t) (sp->s_firstdatazone - 1) + (zone_t) b);
|
|
}
|
|
|
|
/*===========================================================================*
|
|
* free_zone *
|
|
*===========================================================================*/
|
|
void free_zone(
|
|
dev_t dev, /* device where zone located */
|
|
zone_t numb /* zone to be returned */
|
|
)
|
|
{
|
|
/* Return a zone. */
|
|
|
|
register struct super_block *sp;
|
|
bit_t bit;
|
|
|
|
/* Locate the appropriate super_block and return bit. */
|
|
sp = get_super(dev);
|
|
if (numb < sp->s_firstdatazone || numb >= sp->s_zones) return;
|
|
bit = (bit_t) (numb - (zone_t) (sp->s_firstdatazone - 1));
|
|
free_bit(sp, ZMAP, bit);
|
|
if (bit < sp->s_zsearch) sp->s_zsearch = bit;
|
|
|
|
/* Also tell libminixfs, so that 1) if it has a block for this bit, it can
|
|
* mark it as clean, thus reducing useless writes, and 2) it can tell VM that
|
|
* any previous inode association is to be broken for this block, so that the
|
|
* block will not be mapped in erroneously later on.
|
|
*/
|
|
assert(sp->s_log_zone_size == 0); /* otherwise we need a loop here.. */
|
|
lmfs_free_block(dev, (block_t)numb);
|
|
}
|