diff --git a/etc/system.conf b/etc/system.conf index bc1962dd9..6f09d66f4 100644 --- a/etc/system.conf +++ b/etc/system.conf @@ -385,6 +385,8 @@ service procfs ; vm INFO + SETCACHEPAGE + CLEARCACHE ; uid 0; }; @@ -402,6 +404,10 @@ service hgfs ipc SYSTEM pm vfs rs vm ; + vm + SETCACHEPAGE + CLEARCACHE + ; }; service vbfs @@ -409,6 +415,10 @@ service vbfs ipc SYSTEM pm vfs rs ds vm vbox ; + vm + SETCACHEPAGE + CLEARCACHE + ; }; service printer @@ -554,6 +564,10 @@ service hello service devman { uid 0; + vm + SETCACHEPAGE + CLEARCACHE + ; }; service mmc @@ -589,6 +603,10 @@ service gpio IRQCTL # 19 PADCONF # 57 ; + vm + SETCACHEPAGE + CLEARCACHE + ; irq 29 # GPIO module 1 (dm37xx) 30 # GPIO module 2 (dm37xx) diff --git a/minix/include/minix/vm.h b/minix/include/minix/vm.h index 6dd94cb0f..9775d7133 100644 --- a/minix/include/minix/vm.h +++ b/minix/include/minix/vm.h @@ -71,7 +71,8 @@ int vm_procctl_clear(endpoint_t ep); int vm_procctl_handlemem(endpoint_t ep, vir_bytes m1, vir_bytes m2, int wr); int vm_set_cacheblock(void *block, dev_t dev, off_t dev_offset, - ino_t ino, off_t ino_offset, u32_t *flags, int blocksize); + ino_t ino, off_t ino_offset, u32_t *flags, int blocksize, + int setflags); void *vm_map_cacheblock(dev_t dev, off_t dev_offset, ino_t ino, off_t ino_offset, u32_t *flags, int blocksize); @@ -87,5 +88,8 @@ int vm_clear_cache(dev_t dev); /* special inode number for vm cache functions */ #define VMC_NO_INODE 0 /* to reference a disk block, no associated file */ +/* setflags for vm_set_cacheblock, also used internally in VM */ +#define VMSF_ONCE 0x01 /* discard block after one-time use */ + #endif /* _MINIX_VM_H */ diff --git a/minix/lib/libfsdriver/call.c b/minix/lib/libfsdriver/call.c index cb40015b6..1f6189da1 100644 --- a/minix/lib/libfsdriver/call.c +++ b/minix/lib/libfsdriver/call.c @@ -1,6 +1,7 @@ #include "fsdriver.h" #include +#include /* * Process a READSUPER request from VFS. @@ -43,7 +44,8 @@ fsdriver_readsuper(const struct fsdriver * __restrict fdp, if (r == OK) { /* This one we can set on the file system's behalf. */ - if (fdp->fdr_peek != NULL && fdp->fdr_bpeek != NULL) + if ((fdp->fdr_peek != NULL && fdp->fdr_bpeek != NULL) || + major(dev) == NONE_MAJOR) res_flags |= RES_HASPEEK; m_out->m_fs_vfs_readsuper.inode = root_node.fn_ino_nr; @@ -74,6 +76,10 @@ fsdriver_unmount(const struct fsdriver * __restrict fdp, if (fdp->fdr_unmount != NULL) fdp->fdr_unmount(); + /* If we used mmap emulation, clear any cached blocks from VM. */ + if (fdp->fdr_peek == NULL && major(fsdriver_device) == NONE_MAJOR) + vm_clear_cache(fsdriver_device); + /* Update library-local state. */ fsdriver_mounted = FALSE; @@ -206,6 +212,61 @@ fsdriver_write(const struct fsdriver * __restrict fdp, return read_write(fdp, m_in, m_out, FSC_WRITE); } +/* + * A read-based peek implementation. This allows file systems that do not have + * a buffer cache and do not implement peek, to support a limited form of mmap. + * We map in a block, fill it by calling the file system's read function, tell + * VM about the page, and then unmap the block again. We tell VM not to cache + * the block beyond its immediate use for the mmap request, so as to prevent + * potentially stale data from being cached--at the cost of performance. + */ +static ssize_t +builtin_peek(const struct fsdriver * __restrict fdp, ino_t ino_nr, + size_t nbytes, off_t pos) +{ + static u32_t flags = 0; /* storage for the VMMC_ flags of all blocks */ + static off_t dev_off = 0; /* fake device offset, see below */ + struct fsdriver_data data; + char *buf; + ssize_t r; + + if ((buf = mmap(NULL, nbytes, PROT_READ | PROT_WRITE, + MAP_ANON | MAP_PRIVATE, -1, 0)) == MAP_FAILED) + return ENOMEM; + + data.endpt = SELF; + data.grant = (cp_grant_id_t)buf; + data.size = nbytes; + + r = fdp->fdr_read(ino_nr, &data, nbytes, pos, FSC_READ); + + if (r >= 0) { + if ((size_t)r < nbytes) + memset(&buf[r], 0, nbytes - r); + + /* + * VM uses serialized communication to VFS. Since the page is + * to be used only once, VM will use and then discard it before + * sending a new peek request. Thus, it should be safe to + * reuse the same device offset all the time. However, relying + * on assumptions in protocols elsewhere a bit dangerous, so we + * use an ever-increasing device offset just to be safe. + */ + r = vm_set_cacheblock(buf, fsdriver_device, dev_off, ino_nr, + pos, &flags, nbytes, VMSF_ONCE); + + if (r == OK) { + dev_off += nbytes; + + r = nbytes; + } + } + + munmap(buf, nbytes); + + return r; +} + /* * Process a PEEK request from VFS. */ @@ -222,13 +283,22 @@ fsdriver_peek(const struct fsdriver * __restrict fdp, pos = m_in->m_vfs_fs_readwrite.seek_pos; nbytes = m_in->m_vfs_fs_readwrite.nbytes; - if (fdp->fdr_peek == NULL) - return ENOSYS; - if (pos < 0 || nbytes > SSIZE_MAX) return EINVAL; - r = fdp->fdr_peek(ino_nr, NULL /*data*/, nbytes, pos, FSC_PEEK); + if (fdp->fdr_peek == NULL) { + if (major(fsdriver_device) != NONE_MAJOR) + return ENOSYS; + + /* + * For file systems that have no backing device, emulate peek + * support by reading into temporary buffers and passing these + * to VM. + */ + r = builtin_peek(fdp, ino_nr, nbytes, pos); + } else + r = fdp->fdr_peek(ino_nr, NULL /*data*/, nbytes, pos, + FSC_PEEK); /* Do not return a new position. */ if (r >= 0) { diff --git a/minix/lib/libminixfs/cache.c b/minix/lib/libminixfs/cache.c index 697b360a9..408c26175 100644 --- a/minix/lib/libminixfs/cache.c +++ b/minix/lib/libminixfs/cache.c @@ -469,7 +469,7 @@ void lmfs_put_block( if(vmcache && bp->lmfs_needsetcache && dev != NO_DEV) { if((r=vm_set_cacheblock(bp->data, dev, dev_off, bp->lmfs_inode, bp->lmfs_inode_offset, - &bp->lmfs_flags, fs_block_size)) != OK) { + &bp->lmfs_flags, fs_block_size, 0)) != OK) { if(r == ENOSYS) { printf("libminixfs: ENOSYS, disabling VM calls\n"); vmcache = 0; diff --git a/minix/lib/libsys/vm_cache.c b/minix/lib/libsys/vm_cache.c index c94c1e2f8..be31981c2 100644 --- a/minix/lib/libsys/vm_cache.c +++ b/minix/lib/libsys/vm_cache.c @@ -14,7 +14,7 @@ static int vm_cachecall(message *m, int call, void *addr, dev_t dev, off_t dev_offset, ino_t ino, off_t ino_offset, u32_t *flags, - int blocksize) + int blocksize, int setflags) { if(blocksize % PAGE_SIZE) panic("blocksize %d should be a multiple of pagesize %d\n", @@ -39,7 +39,7 @@ static int vm_cachecall(message *m, int call, void *addr, dev_t dev, m->m_vmmcp.flags_ptr = flags; m->m_vmmcp.dev = dev; m->m_vmmcp.pages = blocksize / PAGE_SIZE; - m->m_vmmcp.flags = 0; + m->m_vmmcp.flags = setflags; return _taskcall(VM_PROC_NR, call, m); } @@ -50,19 +50,19 @@ void *vm_map_cacheblock(dev_t dev, off_t dev_offset, message m; if(vm_cachecall(&m, VM_MAPCACHEPAGE, NULL, dev, dev_offset, - ino, ino_offset, flags, blocksize) != OK) + ino, ino_offset, flags, blocksize, 0) != OK) return MAP_FAILED; return m.m_vmmcp_reply.addr; } int vm_set_cacheblock(void *block, dev_t dev, off_t dev_offset, - ino_t ino, off_t ino_offset, u32_t *flags, int blocksize) + ino_t ino, off_t ino_offset, u32_t *flags, int blocksize, int setflags) { message m; return vm_cachecall(&m, VM_SETCACHEPAGE, block, dev, dev_offset, - ino, ino_offset, flags, blocksize); + ino, ino_offset, flags, blocksize, setflags); } int diff --git a/minix/servers/vm/cache.c b/minix/servers/vm/cache.c index 6935357d0..ecd7529ee 100644 --- a/minix/servers/vm/cache.c +++ b/minix/servers/vm/cache.c @@ -213,7 +213,8 @@ struct cached_page *find_cached_page_byino(dev_t dev, ino_t ino, u64_t ino_off, return NULL; } -int addcache(dev_t dev, u64_t dev_off, ino_t ino, u64_t ino_off, struct phys_block *pb) +int addcache(dev_t dev, u64_t dev_off, ino_t ino, u64_t ino_off, int flags, + struct phys_block *pb) { int hv_dev; struct cached_page *hb; @@ -237,6 +238,7 @@ int addcache(dev_t dev, u64_t dev_off, ino_t ino, u64_t ino_off, struct phys_blo hb->dev_offset = dev_off; hb->ino = ino; hb->ino_offset = ino_off; + hb->flags = flags & VMSF_ONCE; hb->page = pb; hb->page->refcount++; /* block also referenced by cache now */ hb->page->flags |= PBF_INCACHE; diff --git a/minix/servers/vm/cache.h b/minix/servers/vm/cache.h index 581349af8..0b5301d18 100644 --- a/minix/servers/vm/cache.h +++ b/minix/servers/vm/cache.h @@ -12,6 +12,7 @@ struct cached_page { ino_t ino; /* which ino is it about */ u64_t ino_offset; /* offset within ino */ + int flags; /* currently only VMSF_ONCE or 0 */ struct phys_block *page; /* page ptr */ struct cached_page *older; /* older in lru chain */ struct cached_page *newer; /* newer in lru chain */ diff --git a/minix/servers/vm/mem_cache.c b/minix/servers/vm/mem_cache.c index b61ac939e..6b1ace42a 100644 --- a/minix/servers/vm/mem_cache.c +++ b/minix/servers/vm/mem_cache.c @@ -175,6 +175,7 @@ do_setcache(message *msg) dev_t dev = msg->m_vmmcp.dev; off_t dev_off = msg->m_vmmcp.dev_offset; off_t ino_off = msg->m_vmmcp.ino_offset; + int flags = msg->m_vmmcp.flags; int n; struct vmproc *caller; phys_bytes offset; @@ -209,7 +210,8 @@ do_setcache(message *msg) if((hb=find_cached_page_bydev(dev, dev_off + offset, msg->m_vmmcp.ino, ino_off + offset, 1))) { /* block inode info updated */ - if(hb->page != phys_region->ph) { + if(hb->page != phys_region->ph || + (hb->flags & VMSF_ONCE)) { /* previous cache entry has become * obsolete; make a new one. rmcache * removes it from the cache and frees @@ -236,8 +238,8 @@ do_setcache(message *msg) phys_region->memtype = &mem_type_cache; - if((r=addcache(dev, dev_off + offset, - msg->m_vmmcp.ino, ino_off + offset, phys_region->ph)) != OK) { + if((r=addcache(dev, dev_off + offset, msg->m_vmmcp.ino, + ino_off + offset, flags, phys_region->ph)) != OK) { printf("VM: addcache failed\n"); return r; } diff --git a/minix/servers/vm/mem_file.c b/minix/servers/vm/mem_file.c index ef25cff55..d73c062d4 100644 --- a/minix/servers/vm/mem_file.c +++ b/minix/servers/vm/mem_file.c @@ -107,7 +107,17 @@ static int mappedfile_pagefault(struct vmproc *vmp, struct vir_region *region, cp = find_cached_page_byino(region->param.file.fdref->dev, region->param.file.fdref->ino, referenced_offset, 1); } - if(cp) { + /* + * Normally, a cache hit saves a round-trip to the file system + * to load the page. However, if the page in the VM cache is + * marked for one-time use, then force a round-trip through the + * file system anyway, so that the FS can update the page by + * by readding it to the cache. Thus, for one-time use pages, + * no caching is performed. This approach is correct even in + * the light of concurrent requests and disappearing processes + * but relies on VM requests to VFS being fully serialized. + */ + if(cp && (!cb || !(cp->flags & VMSF_ONCE))) { int result = OK; pb_unreferenced(region, ph, 0); pb_link(ph, cp->page, ph->offset, region); @@ -120,6 +130,10 @@ static int mappedfile_pagefault(struct vmproc *vmp, struct vir_region *region, result = cow_block(vmp, region, ph, 0); } + /* Discard one-use pages after mapping them in. */ + if (result == OK && (cp->flags & VMSF_ONCE)) + rmcache(cp); + return result; } @@ -210,7 +224,14 @@ int mappedfile_setfile(struct vmproc *owner, cp = find_cached_page_byino(dev, ino, referenced_offset, 1); } - if(!cp) continue; + /* + * If we get a hit for a page that is to be used only once, + * then either we found a stale page (due to a process dying + * before a requested once-page could be mapped in) or this is + * a rare case of concurrent requests for the same page. In + * both cases, force the page to be obtained from its FS later. + */ + if(!cp || (cp->flags & VMSF_ONCE)) continue; if(!(pr = pb_reference(cp->page, vaddr, region, &mem_type_mappedfile))) { printf("mappedfile_setfile: pb_reference failed\n"); diff --git a/minix/servers/vm/proto.h b/minix/servers/vm/proto.h index 0ad9998cf..e61ea9f74 100644 --- a/minix/servers/vm/proto.h +++ b/minix/servers/vm/proto.h @@ -227,7 +227,8 @@ int do_clearcache(message *m); struct cached_page *find_cached_page_bydev(dev_t dev, u64_t dev_off, ino_t ino, u64_t ino_off, int touchlru); struct cached_page *find_cached_page_byino(dev_t dev, ino_t ino, u64_t ino_off, int touchlru); -int addcache(dev_t dev, u64_t def_off, ino_t ino, u64_t ino_off, struct phys_block *pb); +int addcache(dev_t dev, u64_t def_off, ino_t ino, u64_t ino_off, int flags, + struct phys_block *pb); void cache_sanitycheck_internal(void); int cache_freepages(int pages); void get_stats_info(struct vm_stats_info *vsi); diff --git a/minix/tests/test72.c b/minix/tests/test72.c index 5a9e6289a..1723bced4 100644 --- a/minix/tests/test72.c +++ b/minix/tests/test72.c @@ -230,7 +230,7 @@ u32_t sqrt_approx(u32_t v) } int vm_set_cacheblock(void *block, dev_t dev, off_t dev_offset, - ino_t ino, off_t ino_offset, u32_t *flags, int blocksize) + ino_t ino, off_t ino_offset, u32_t *flags, int blocksize, int setflags) { return ENOSYS; } diff --git a/minix/tests/test74.c b/minix/tests/test74.c index a3cd76a82..fbfb4f6ca 100644 --- a/minix/tests/test74.c +++ b/minix/tests/test74.c @@ -36,6 +36,7 @@ #include #include #include +#include #include #include #include @@ -461,6 +462,63 @@ static void basic_regression(void) } +/* + * Test mmap on none-dev file systems - file systems that do not have a buffer + * cache and therefore have to fake mmap support. We use procfs as target. + * The idea is that while we succeed in mapping in /proc/uptime, we also get + * a new uptime value every time we map in the page -- VM must not cache it. + */ +static void +nonedev_regression(void) +{ + int fd; + char *buf; + unsigned long uptime1, uptime2, uptime3; + + subtest++; + + if ((fd = open(_PATH_PROC "uptime", O_RDONLY)) < 0) e(1); + + buf = mmap(NULL, 4096, PROT_READ, MAP_PRIVATE | MAP_FILE, fd, 0); + if (buf == MAP_FAILED) e(2); + + if (buf[4095] != 0) e(3); + + if ((uptime1 = atoi(buf)) == 0) e(4); + + if (munmap(buf, 4096) != 0) e(5); + + sleep(2); + + buf = mmap(NULL, 4096, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_FILE, + fd, 0); + if (buf == MAP_FAILED) e(6); + + if (buf[4095] != 0) e(7); + + if ((uptime2 = atoi(buf)) == 0) e(8); + + if (uptime1 == uptime2) e(9); + + if (munmap(buf, 4096) != 0) e(10); + + sleep(2); + + buf = mmap(NULL, 4096, PROT_READ, MAP_SHARED | MAP_FILE, fd, 0); + if (buf == MAP_FAILED) e(11); + + if (buf[4095] != 0) e(12); + + if ((uptime3 = atoi(buf)) == 0) e(13); + + if (uptime1 == uptime3) e(14); + if (uptime2 == uptime3) e(15); + + if (munmap(buf, 4096) != 0) e(16); + + close(fd); +} + int main(int argc, char *argv[]) { @@ -470,6 +528,8 @@ main(int argc, char *argv[]) basic_regression(); + nonedev_regression(); + test_memory_types_vs_operations(); makefiles(MAXFILES); diff --git a/minix/tests/testvm.c b/minix/tests/testvm.c index 824e4d39e..1f5f8e989 100644 --- a/minix/tests/testvm.c +++ b/minix/tests/testvm.c @@ -44,7 +44,7 @@ int dowriteblock(int b, int blocksize, u32_t seed, char *block) memcpy(bdata, block, blocksize); if(mustset && (r=vm_set_cacheblock(bdata, MYDEV, dev_off, - VMC_NO_INODE, 0, NULL, blocksize)) != OK) { + VMC_NO_INODE, 0, NULL, blocksize, 0)) != OK) { printf("dowriteblock: vm_set_cacheblock failed %d\n", r); exit(1); }