libminixfs: rework prefetch API
This patch changes the prefetch API so that file systems must now provide a set of block numbers, rather than a set of buffers. The result is a leaner and more well-defined API; linear computation of the range of blocks to prefetch; duplicates no longer interfering with the prefetch process; guaranteed inclusion of the block needed next into the prefetch range; and, limits and policy decisions better established by libminixfs now actually being moved into libminixfs. Change-Id: I7e44daf2d2d164bc5e2f1473ad717f3ff0f0a77f
This commit is contained in:
		
							parent
							
								
									6c46a77d95
								
							
						
					
					
						commit
						4472b590c7
					
				| @ -19,8 +19,6 @@ void fs_sync(void) | |||||||
|  */ |  */ | ||||||
|   struct inode *rip; |   struct inode *rip; | ||||||
| 
 | 
 | ||||||
|   assert(lmfs_nr_bufs() > 0); |  | ||||||
| 
 |  | ||||||
|   if (superblock->s_rd_only) |   if (superblock->s_rd_only) | ||||||
| 	return; /* nothing to sync */ | 	return; /* nothing to sync */ | ||||||
| 
 | 
 | ||||||
|  | |||||||
| @ -215,7 +215,6 @@ int ftype;			 /* used when ENTER and INCOMPAT_FILETYPE */ | |||||||
| 				/* 'flag' is LOOK_UP */ | 				/* 'flag' is LOOK_UP */ | ||||||
| 				*numb = (ino_t) conv4(le_CPU, dp->d_ino); | 				*numb = (ino_t) conv4(le_CPU, dp->d_ino); | ||||||
| 			} | 			} | ||||||
| 			assert(lmfs_dev(bp) != NO_DEV); |  | ||||||
| 			put_block(bp); | 			put_block(bp); | ||||||
| 			return(r); | 			return(r); | ||||||
| 		} | 		} | ||||||
| @ -250,7 +249,6 @@ int ftype;			 /* used when ENTER and INCOMPAT_FILETYPE */ | |||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
| 	/* The whole block has been searched or ENTER has a free slot. */ | 	/* The whole block has been searched or ENTER has a free slot. */ | ||||||
| 	assert(lmfs_dev(bp) != NO_DEV); |  | ||||||
| 	if (e_hit) break;	/* e_hit set if ENTER can be performed now */ | 	if (e_hit) break;	/* e_hit set if ENTER can be performed now */ | ||||||
| 	put_block(bp);		 /* otherwise, continue searching dir */ | 	put_block(bp);		 /* otherwise, continue searching dir */ | ||||||
|   } |   } | ||||||
|  | |||||||
| @ -252,8 +252,6 @@ int opportunistic; | |||||||
| 		b = rip->i_block[EXT2_TIND_BLOCK]; | 		b = rip->i_block[EXT2_TIND_BLOCK]; | ||||||
| 		if (b == NO_BLOCK) return(NO_BLOCK); | 		if (b == NO_BLOCK) return(NO_BLOCK); | ||||||
| 		bp = get_block(rip->i_dev, b, NORMAL); /* get triple ind block */ | 		bp = get_block(rip->i_dev, b, NORMAL); /* get triple ind block */ | ||||||
| 		ASSERT(lmfs_dev(bp) != NO_DEV); |  | ||||||
| 		ASSERT(lmfs_dev(bp) == rip->i_dev); |  | ||||||
| 		excess = block_pos - triple_ind_s; | 		excess = block_pos - triple_ind_s; | ||||||
| 		mindex = excess / addr_in_block2; | 		mindex = excess / addr_in_block2; | ||||||
| 		b = rd_indir(bp, mindex);	/* num of double ind block */ | 		b = rd_indir(bp, mindex);	/* num of double ind block */ | ||||||
| @ -264,8 +262,6 @@ int opportunistic; | |||||||
| 	bp = get_block(rip->i_dev, b, iomode); /* get double indirect block */ | 	bp = get_block(rip->i_dev, b, iomode); /* get double indirect block */ | ||||||
| 	if (bp == NULL) | 	if (bp == NULL) | ||||||
| 		return NO_BLOCK;		/* peeking failed */ | 		return NO_BLOCK;		/* peeking failed */ | ||||||
| 	ASSERT(lmfs_dev(bp) != NO_DEV); |  | ||||||
| 	ASSERT(lmfs_dev(bp) == rip->i_dev); |  | ||||||
| 	mindex = excess / addr_in_block; | 	mindex = excess / addr_in_block; | ||||||
| 	b = rd_indir(bp, mindex);	/* num of single ind block */ | 	b = rd_indir(bp, mindex);	/* num of single ind block */ | ||||||
| 	put_block(bp);				/* release double ind block */ | 	put_block(bp);				/* release double ind block */ | ||||||
| @ -276,8 +272,6 @@ int opportunistic; | |||||||
|   if (bp == NULL) |   if (bp == NULL) | ||||||
| 	return NO_BLOCK;			/* peeking failed */ | 	return NO_BLOCK;			/* peeking failed */ | ||||||
| 
 | 
 | ||||||
|   ASSERT(lmfs_dev(bp) != NO_DEV); |  | ||||||
|   ASSERT(lmfs_dev(bp) == rip->i_dev); |  | ||||||
|   b = rd_indir(bp, mindex); |   b = rd_indir(bp, mindex); | ||||||
|   put_block(bp);				/* release single ind block */ |   put_block(bp);				/* release single ind block */ | ||||||
| 
 | 
 | ||||||
| @ -332,34 +326,16 @@ unsigned bytes_ahead;           /* bytes beyond position for immediate use */ | |||||||
|  * flag on all reads to allow this. |  * flag on all reads to allow this. | ||||||
|  */ |  */ | ||||||
| /* Minimum number of blocks to prefetch. */ | /* Minimum number of blocks to prefetch. */ | ||||||
| # define BLOCKS_MINIMUM		(nr_bufs < 50 ? 18 : 32) | # define BLOCKS_MINIMUM		32 | ||||||
|   int nr_bufs = lmfs_nr_bufs(); |  | ||||||
|   int r, read_q_size; |   int r, read_q_size; | ||||||
|   unsigned int blocks_ahead, fragment, block_size; |   unsigned int blocks_ahead, fragment, block_size; | ||||||
|   block_t block, blocks_left; |   block_t block, blocks_left; | ||||||
|   off_t ind1_pos; |   off_t ind1_pos; | ||||||
|   dev_t dev; |   dev_t dev; | ||||||
|   struct buf *bp = NULL; |   struct buf *bp = NULL; | ||||||
|   static unsigned int readqsize = 0; |   static block64_t read_q[LMFS_MAX_PREFETCH]; | ||||||
|   static struct buf **read_q = NULL; |  | ||||||
|   u64_t position_running; |   u64_t position_running; | ||||||
| 
 | 
 | ||||||
|   if(readqsize != nr_bufs) { |  | ||||||
| 	if(readqsize > 0) { |  | ||||||
| 		assert(read_q != NULL); |  | ||||||
| 		free(read_q); |  | ||||||
| 		read_q = NULL; |  | ||||||
| 		readqsize = 0; |  | ||||||
| 	}  |  | ||||||
| 
 |  | ||||||
| 	assert(readqsize == 0); |  | ||||||
| 	assert(read_q == NULL); |  | ||||||
| 
 |  | ||||||
| 	if(!(read_q = malloc(sizeof(read_q[0])*nr_bufs))) |  | ||||||
| 		panic("couldn't allocate read_q"); |  | ||||||
| 	readqsize = nr_bufs; |  | ||||||
|   } |  | ||||||
| 
 |  | ||||||
|   dev = rip->i_dev; |   dev = rip->i_dev; | ||||||
|   assert(dev != NO_DEV); |   assert(dev != NO_DEV); | ||||||
|   block_size = get_block_size(dev); |   block_size = get_block_size(dev); | ||||||
| @ -372,11 +348,11 @@ unsigned bytes_ahead;           /* bytes beyond position for immediate use */ | |||||||
|   bytes_ahead += fragment; |   bytes_ahead += fragment; | ||||||
|   blocks_ahead = (bytes_ahead + block_size - 1) / block_size; |   blocks_ahead = (bytes_ahead + block_size - 1) / block_size; | ||||||
| 
 | 
 | ||||||
|   r = lmfs_get_block_ino(&bp, dev, block, PREFETCH, rip->i_num, position); |   r = lmfs_get_block_ino(&bp, dev, block, PEEK, rip->i_num, position); | ||||||
|   if (r != OK) |   if (r == OK) | ||||||
|  | 	return(bp); | ||||||
|  |   if (r != ENOENT) | ||||||
| 	panic("ext2: error getting block (%llu,%u): %d", dev, block, r); | 	panic("ext2: error getting block (%llu,%u): %d", dev, block, r); | ||||||
|   assert(bp != NULL); |  | ||||||
|   if (lmfs_dev(bp) != NO_DEV) return(bp); |  | ||||||
| 
 | 
 | ||||||
|   /* The best guess for the number of blocks to prefetch:  A lot.
 |   /* The best guess for the number of blocks to prefetch:  A lot.
 | ||||||
|    * It is impossible to tell what the device looks like, so we don't even |    * It is impossible to tell what the device looks like, so we don't even | ||||||
| @ -408,9 +384,6 @@ unsigned bytes_ahead;           /* bytes beyond position for immediate use */ | |||||||
| 	blocks_left++; | 	blocks_left++; | ||||||
|   } |   } | ||||||
| 
 | 
 | ||||||
|   /* No more than the maximum request. */ |  | ||||||
|   if (blocks_ahead > NR_IOREQS) blocks_ahead = NR_IOREQS; |  | ||||||
| 
 |  | ||||||
|   /* Read at least the minimum number of blocks, but not after a seek. */ |   /* Read at least the minimum number of blocks, but not after a seek. */ | ||||||
|   if (blocks_ahead < BLOCKS_MINIMUM && rip->i_seek == NO_SEEK) |   if (blocks_ahead < BLOCKS_MINIMUM && rip->i_seek == NO_SEEK) | ||||||
| 	blocks_ahead = BLOCKS_MINIMUM; | 	blocks_ahead = BLOCKS_MINIMUM; | ||||||
| @ -418,38 +391,39 @@ unsigned bytes_ahead;           /* bytes beyond position for immediate use */ | |||||||
|   /* Can't go past end of file. */ |   /* Can't go past end of file. */ | ||||||
|   if (blocks_ahead > blocks_left) blocks_ahead = blocks_left; |   if (blocks_ahead > blocks_left) blocks_ahead = blocks_left; | ||||||
| 
 | 
 | ||||||
|  |   /* No more than the maximum request. */ | ||||||
|  |   if (blocks_ahead > LMFS_MAX_PREFETCH) blocks_ahead = LMFS_MAX_PREFETCH; | ||||||
|  | 
 | ||||||
|   read_q_size = 0; |   read_q_size = 0; | ||||||
| 
 | 
 | ||||||
|   /* Acquire block buffers. */ |   /* Acquire block buffers. */ | ||||||
|   for (;;) { |   for (;;) { | ||||||
|   	block_t thisblock; |   	block_t thisblock; | ||||||
| 	read_q[read_q_size++] = bp; | 	read_q[read_q_size++] = block; | ||||||
| 
 | 
 | ||||||
| 	if (--blocks_ahead == 0) break; | 	if (--blocks_ahead == 0) break; | ||||||
| 
 | 
 | ||||||
| 	/* Don't trash the cache, leave 4 free. */ |  | ||||||
| 	if (lmfs_bufs_in_use() >= nr_bufs - 4) break; |  | ||||||
| 
 |  | ||||||
| 	block++; | 	block++; | ||||||
| 	position_running += block_size; | 	position_running += block_size; | ||||||
| 
 | 
 | ||||||
| 	thisblock = read_map(rip, (off_t) ex64lo(position_running), 1); | 	thisblock = read_map(rip, (off_t) ex64lo(position_running), 1); | ||||||
| 	if (thisblock != NO_BLOCK) { | 	if (thisblock != NO_BLOCK) { | ||||||
| 		r = lmfs_get_block_ino(&bp, dev, thisblock, PREFETCH, | 		r = lmfs_get_block_ino(&bp, dev, thisblock, PEEK, rip->i_num, | ||||||
| 		    rip->i_num, position_running); | 		    position_running); | ||||||
| 		if (r != OK) | 		block = thisblock; | ||||||
| 			panic("ext2: error getting block (%llu,%u): %d", | 	} else | ||||||
| 			    dev, thisblock, r); | 		r = lmfs_get_block(&bp, dev, block, PEEK); | ||||||
| 	} else { | 
 | ||||||
| 		bp = get_block(dev, block, PREFETCH); | 	if (r == OK) { | ||||||
| 	} |  | ||||||
| 	if (lmfs_dev(bp) != NO_DEV) { |  | ||||||
| 		/* Oops, block already in the cache, get out. */ | 		/* Oops, block already in the cache, get out. */ | ||||||
| 		put_block(bp); | 		put_block(bp); | ||||||
| 		break; | 		break; | ||||||
| 	} | 	} | ||||||
|  | 	if (r != ENOENT) | ||||||
|  | 		panic("ext2: error getting block (%llu,%u): %d", dev, block, | ||||||
|  | 		    r); | ||||||
|   } |   } | ||||||
|   lmfs_rw_scattered(dev, read_q, read_q_size, READING); |   lmfs_prefetch(dev, read_q, read_q_size); | ||||||
| 
 | 
 | ||||||
|   r = lmfs_get_block_ino(&bp, dev, baseblock, NORMAL, rip->i_num, position); |   r = lmfs_get_block_ino(&bp, dev, baseblock, NORMAL, rip->i_num, position); | ||||||
|   if (r != OK) |   if (r != OK) | ||||||
|  | |||||||
| @ -57,7 +57,7 @@ zone_t alloc_zone( | |||||||
|    *     z = b + sp->s_firstdatazone - 1 |    *     z = b + sp->s_firstdatazone - 1 | ||||||
|    * Alloc_bit() never returns 0, since this is used for NO_BIT (failure). |    * Alloc_bit() never returns 0, since this is used for NO_BIT (failure). | ||||||
|    */ |    */ | ||||||
|   sp = get_super(dev); |   sp = &superblock; | ||||||
| 
 | 
 | ||||||
|   /* If z is 0, skip initial part of the map known to be fully in use. */ |   /* If z is 0, skip initial part of the map known to be fully in use. */ | ||||||
|   if (z == sp->s_firstdatazone) { |   if (z == sp->s_firstdatazone) { | ||||||
| @ -93,7 +93,7 @@ void free_zone( | |||||||
|   bit_t bit; |   bit_t bit; | ||||||
| 
 | 
 | ||||||
|   /* Locate the appropriate super_block and return bit. */ |   /* Locate the appropriate super_block and return bit. */ | ||||||
|   sp = get_super(dev); |   sp = &superblock; | ||||||
|   if (numb < sp->s_firstdatazone || numb >= sp->s_zones) return; |   if (numb < sp->s_firstdatazone || numb >= sp->s_zones) return; | ||||||
|   bit = (bit_t) (numb - (zone_t) (sp->s_firstdatazone - 1)); |   bit = (bit_t) (numb - (zone_t) (sp->s_firstdatazone - 1)); | ||||||
|   free_bit(sp, ZMAP, bit); |   free_bit(sp, ZMAP, bit); | ||||||
|  | |||||||
| @ -2,10 +2,13 @@ | |||||||
| #ifndef _MFS_CLEAN_H | #ifndef _MFS_CLEAN_H | ||||||
| #define _MFS_CLEAN_H 1 | #define _MFS_CLEAN_H 1 | ||||||
| 
 | 
 | ||||||
| #define MARKDIRTY(b) do { if(superblock.s_dev == lmfs_dev(b) && superblock.s_rd_only) { printf("%s:%d: dirty block on rofs! ", __FILE__, __LINE__); util_stacktrace(); } else { lmfs_markdirty(b); } } while(0) | #define MARKDIRTY(b) do { \ | ||||||
| #define MARKCLEAN(b) lmfs_markclean(b) | 	if (superblock.s_rd_only) { \ | ||||||
| 
 | 		printf("%s:%d: dirty block on rofs! ", __FILE__, __LINE__); \ | ||||||
| #define ISDIRTY(b)	(!lmfs_isclean(b)) | 		util_stacktrace(); \ | ||||||
| #define ISCLEAN(b)	(lmfs_isclean(b)) | 	} else { \ | ||||||
|  | 		lmfs_markdirty(b); \ | ||||||
|  | 	} \ | ||||||
|  | } while(0) | ||||||
| 
 | 
 | ||||||
| #endif | #endif | ||||||
|  | |||||||
| @ -258,7 +258,7 @@ struct inode *alloc_inode(dev_t dev, mode_t bits, uid_t uid, gid_t gid) | |||||||
|   int major, minor, inumb; |   int major, minor, inumb; | ||||||
|   bit_t b; |   bit_t b; | ||||||
| 
 | 
 | ||||||
|   sp = get_super(dev);	/* get pointer to super_block */ |   sp = &superblock; | ||||||
|   if (sp->s_rd_only) {	/* can't allocate an inode on a read only device. */ |   if (sp->s_rd_only) {	/* can't allocate an inode on a read only device. */ | ||||||
| 	err_code = EROFS; | 	err_code = EROFS; | ||||||
| 	return(NULL); | 	return(NULL); | ||||||
| @ -335,8 +335,7 @@ static void free_inode( | |||||||
|   register struct super_block *sp; |   register struct super_block *sp; | ||||||
|   bit_t b; |   bit_t b; | ||||||
| 
 | 
 | ||||||
|   /* Locate the appropriate super_block. */ |   sp = &superblock; | ||||||
|   sp = get_super(dev); |  | ||||||
|   if (inumb == NO_ENTRY || inumb > sp->s_ninodes) return; |   if (inumb == NO_ENTRY || inumb > sp->s_ninodes) return; | ||||||
|   b = (bit_t) inumb; |   b = (bit_t) inumb; | ||||||
|   free_bit(sp, IMAP, b); |   free_bit(sp, IMAP, b); | ||||||
| @ -385,7 +384,7 @@ int rw_flag;			/* READING or WRITING */ | |||||||
|   block_t b, offset; |   block_t b, offset; | ||||||
| 
 | 
 | ||||||
|   /* Get the block where the inode resides. */ |   /* Get the block where the inode resides. */ | ||||||
|   sp = get_super(rip->i_dev);	/* get pointer to super block */ |   sp = &superblock; | ||||||
|   rip->i_sp = sp;		/* inode must contain super block pointer */ |   rip->i_sp = sp;		/* inode must contain super block pointer */ | ||||||
|   offset = START_BLOCK + sp->s_imap_blocks + sp->s_zmap_blocks; |   offset = START_BLOCK + sp->s_imap_blocks + sp->s_zmap_blocks; | ||||||
|   b = (block_t) (rip->i_num - 1)/sp->s_inodes_per_block + offset; |   b = (block_t) (rip->i_num - 1)/sp->s_inodes_per_block + offset; | ||||||
|  | |||||||
| @ -15,8 +15,6 @@ void fs_sync(void) | |||||||
|  */ |  */ | ||||||
|   struct inode *rip; |   struct inode *rip; | ||||||
| 
 | 
 | ||||||
|   assert(lmfs_nr_bufs() > 0); |  | ||||||
| 
 |  | ||||||
|   /* Write all the dirty inodes to the disk. */ |   /* Write all the dirty inodes to the disk. */ | ||||||
|   for(rip = &inode[0]; rip < &inode[NR_INODES]; rip++) |   for(rip = &inode[0]; rip < &inode[NR_INODES]; rip++) | ||||||
| 	  if(rip->i_count > 0 && IN_ISDIRTY(rip)) rw_inode(rip, WRITING); | 	  if(rip->i_count > 0 && IN_ISDIRTY(rip)) rw_inode(rip, WRITING); | ||||||
|  | |||||||
| @ -140,7 +140,6 @@ int flag;			 /* LOOK_UP, ENTER, DELETE or IS_EMPTY */ | |||||||
| 
 | 
 | ||||||
| 	assert(ldir_ptr->i_dev != NO_DEV); | 	assert(ldir_ptr->i_dev != NO_DEV); | ||||||
| 	assert(bp != NULL); | 	assert(bp != NULL); | ||||||
| 	assert(lmfs_dev(bp) != NO_DEV); |  | ||||||
| 
 | 
 | ||||||
| 	/* Search a directory block. */ | 	/* Search a directory block. */ | ||||||
| 	for (dp = &b_dir(bp)[0]; | 	for (dp = &b_dir(bp)[0]; | ||||||
| @ -185,7 +184,6 @@ int flag;			 /* LOOK_UP, ENTER, DELETE or IS_EMPTY */ | |||||||
| 				*numb = (ino_t) conv4(sp->s_native, | 				*numb = (ino_t) conv4(sp->s_native, | ||||||
| 						      (int) dp->mfs_d_ino); | 						      (int) dp->mfs_d_ino); | ||||||
| 			} | 			} | ||||||
| 			assert(lmfs_dev(bp) != NO_DEV); |  | ||||||
| 			put_block(bp); | 			put_block(bp); | ||||||
| 			return(r); | 			return(r); | ||||||
| 		} | 		} | ||||||
| @ -199,7 +197,6 @@ int flag;			 /* LOOK_UP, ENTER, DELETE or IS_EMPTY */ | |||||||
| 
 | 
 | ||||||
| 	/* The whole block has been searched or ENTER has a free slot. */ | 	/* The whole block has been searched or ENTER has a free slot. */ | ||||||
| 	if (e_hit) break;	/* e_hit set if ENTER can be performed now */ | 	if (e_hit) break;	/* e_hit set if ENTER can be performed now */ | ||||||
| 	assert(lmfs_dev(bp) != NO_DEV); |  | ||||||
| 	put_block(bp);		/* otherwise, continue searching dir */ | 	put_block(bp);		/* otherwise, continue searching dir */ | ||||||
|   } |   } | ||||||
| 
 | 
 | ||||||
|  | |||||||
| @ -84,7 +84,6 @@ int fs_statvfs(struct statvfs *st); | |||||||
| bit_t alloc_bit(struct super_block *sp, int map, bit_t origin); | bit_t alloc_bit(struct super_block *sp, int map, bit_t origin); | ||||||
| void free_bit(struct super_block *sp, int map, bit_t bit_returned); | void free_bit(struct super_block *sp, int map, bit_t bit_returned); | ||||||
| unsigned int get_block_size(dev_t dev); | unsigned int get_block_size(dev_t dev); | ||||||
| struct super_block *get_super(dev_t dev); |  | ||||||
| int read_super(struct super_block *sp); | int read_super(struct super_block *sp); | ||||||
| int write_super(struct super_block *sp); | int write_super(struct super_block *sp); | ||||||
| 
 | 
 | ||||||
|  | |||||||
| @ -260,8 +260,6 @@ int opportunistic;		/* if nonzero, only use cache for metadata */ | |||||||
| 	bp = get_block(rip->i_dev, b, iomode); /* get double indirect block */ | 	bp = get_block(rip->i_dev, b, iomode); /* get double indirect block */ | ||||||
| 	if (bp == NULL) | 	if (bp == NULL) | ||||||
| 		return NO_BLOCK;		/* peeking failed */ | 		return NO_BLOCK;		/* peeking failed */ | ||||||
| 	ASSERT(lmfs_dev(bp) != NO_DEV); |  | ||||||
| 	ASSERT(lmfs_dev(bp) == rip->i_dev); |  | ||||||
| 	z = rd_indir(bp, index);		/* z= zone for single*/ | 	z = rd_indir(bp, index);		/* z= zone for single*/ | ||||||
| 	put_block(bp);				/* release double ind block */ | 	put_block(bp);				/* release double ind block */ | ||||||
| 	excess = excess % nr_indirects;		/* index into single ind blk */ | 	excess = excess % nr_indirects;		/* index into single ind blk */ | ||||||
| @ -310,7 +308,7 @@ int index;			/* index into *bp */ | |||||||
|   if(bp == NULL) |   if(bp == NULL) | ||||||
| 	panic("rd_indir() on NULL"); | 	panic("rd_indir() on NULL"); | ||||||
| 
 | 
 | ||||||
|   sp = get_super(lmfs_dev(bp));	/* need super block to find file sys type */ |   sp = &superblock; | ||||||
| 
 | 
 | ||||||
|   /* read a zone from an indirect block */ |   /* read a zone from an indirect block */ | ||||||
|   assert(sp->s_version == V3); |   assert(sp->s_version == V3); | ||||||
| @ -343,28 +341,15 @@ unsigned bytes_ahead;		/* bytes beyond position for immediate use */ | |||||||
|  * flag on all reads to allow this. |  * flag on all reads to allow this. | ||||||
|  */ |  */ | ||||||
| /* Minimum number of blocks to prefetch. */ | /* Minimum number of blocks to prefetch. */ | ||||||
|   int nr_bufs = lmfs_nr_bufs(); | # define BLOCKS_MINIMUM		32 | ||||||
| # define BLOCKS_MINIMUM		(nr_bufs < 50 ? 18 : 32) |  | ||||||
|   int r, scale, read_q_size; |   int r, scale, read_q_size; | ||||||
|   unsigned int blocks_ahead, fragment, block_size; |   unsigned int blocks_ahead, fragment, block_size; | ||||||
|   block_t block, blocks_left; |   block_t block, blocks_left; | ||||||
|   off_t ind1_pos; |   off_t ind1_pos; | ||||||
|   dev_t dev; |   dev_t dev; | ||||||
|   struct buf *bp; |   struct buf *bp; | ||||||
|   static unsigned int readqsize = 0; |   static block64_t read_q[LMFS_MAX_PREFETCH]; | ||||||
|   static struct buf **read_q; |  | ||||||
|   u64_t position_running; |   u64_t position_running; | ||||||
|   int inuse_before = lmfs_bufs_in_use(); |  | ||||||
| 
 |  | ||||||
|   if(readqsize != nr_bufs) { |  | ||||||
| 	if(readqsize > 0) { |  | ||||||
| 		assert(read_q != NULL); |  | ||||||
| 		free(read_q); |  | ||||||
| 	} |  | ||||||
| 	if(!(read_q = malloc(sizeof(read_q[0])*nr_bufs))) |  | ||||||
| 		panic("couldn't allocate read_q"); |  | ||||||
| 	readqsize = nr_bufs; |  | ||||||
|   } |  | ||||||
| 
 | 
 | ||||||
|   dev = rip->i_dev; |   dev = rip->i_dev; | ||||||
|   assert(dev != NO_DEV); |   assert(dev != NO_DEV); | ||||||
| @ -379,12 +364,11 @@ unsigned bytes_ahead;		/* bytes beyond position for immediate use */ | |||||||
|   bytes_ahead += fragment; |   bytes_ahead += fragment; | ||||||
|   blocks_ahead = (bytes_ahead + block_size - 1) / block_size; |   blocks_ahead = (bytes_ahead + block_size - 1) / block_size; | ||||||
| 
 | 
 | ||||||
|   r = lmfs_get_block_ino(&bp, dev, block, PREFETCH, rip->i_num, position); |   r = lmfs_get_block_ino(&bp, dev, block, PEEK, rip->i_num, position); | ||||||
|   if (r != OK) |   if (r == OK) | ||||||
|  | 	return(bp); | ||||||
|  |   if (r != ENOENT) | ||||||
| 	panic("MFS: error getting block (%llu,%u): %d", dev, block, r); | 	panic("MFS: error getting block (%llu,%u): %d", dev, block, r); | ||||||
|   assert(bp != NULL); |  | ||||||
|   assert(bp->lmfs_count > 0); |  | ||||||
|   if (lmfs_dev(bp) != NO_DEV) return(bp); |  | ||||||
| 
 | 
 | ||||||
|   /* The best guess for the number of blocks to prefetch:  A lot.
 |   /* The best guess for the number of blocks to prefetch:  A lot.
 | ||||||
|    * It is impossible to tell what the device looks like, so we don't even |    * It is impossible to tell what the device looks like, so we don't even | ||||||
| @ -417,9 +401,6 @@ unsigned bytes_ahead;		/* bytes beyond position for immediate use */ | |||||||
| 	blocks_left++; | 	blocks_left++; | ||||||
|   } |   } | ||||||
| 
 | 
 | ||||||
|   /* No more than the maximum request. */ |  | ||||||
|   if (blocks_ahead > NR_IOREQS) blocks_ahead = NR_IOREQS; |  | ||||||
| 
 |  | ||||||
|   /* Read at least the minimum number of blocks, but not after a seek. */ |   /* Read at least the minimum number of blocks, but not after a seek. */ | ||||||
|   if (blocks_ahead < BLOCKS_MINIMUM && rip->i_seek == NO_SEEK) |   if (blocks_ahead < BLOCKS_MINIMUM && rip->i_seek == NO_SEEK) | ||||||
| 	blocks_ahead = BLOCKS_MINIMUM; | 	blocks_ahead = BLOCKS_MINIMUM; | ||||||
| @ -427,43 +408,38 @@ unsigned bytes_ahead;		/* bytes beyond position for immediate use */ | |||||||
|   /* Can't go past end of file. */ |   /* Can't go past end of file. */ | ||||||
|   if (blocks_ahead > blocks_left) blocks_ahead = blocks_left; |   if (blocks_ahead > blocks_left) blocks_ahead = blocks_left; | ||||||
| 
 | 
 | ||||||
|  |   /* No more than the maximum request. */ | ||||||
|  |   if (blocks_ahead > LMFS_MAX_PREFETCH) blocks_ahead = LMFS_MAX_PREFETCH; | ||||||
|  | 
 | ||||||
|   read_q_size = 0; |   read_q_size = 0; | ||||||
| 
 | 
 | ||||||
|   /* Acquire block buffers. */ |   /* Acquire block buffers. */ | ||||||
|   for (;;) { |   for (;;) { | ||||||
|   	block_t thisblock; |   	block_t thisblock; | ||||||
| 	assert(bp->lmfs_count > 0); | 	read_q[read_q_size++] = block; | ||||||
| 	read_q[read_q_size++] = bp; |  | ||||||
| 
 | 
 | ||||||
| 	if (--blocks_ahead == 0) break; | 	if (--blocks_ahead == 0) break; | ||||||
| 
 | 
 | ||||||
| 	/* Don't trash the cache, leave 4 free. */ |  | ||||||
| 	if (lmfs_bufs_in_use() >= nr_bufs - 4) break; |  | ||||||
| 
 |  | ||||||
| 	block++; | 	block++; | ||||||
| 	position_running += block_size; | 	position_running += block_size; | ||||||
| 
 | 
 | ||||||
| 	thisblock = read_map(rip, (off_t) ex64lo(position_running), 1); | 	thisblock = read_map(rip, (off_t) ex64lo(position_running), 1); | ||||||
| 	if (thisblock != NO_BLOCK) { | 	if (thisblock != NO_BLOCK) { | ||||||
| 		r = lmfs_get_block_ino(&bp, dev, thisblock, PREFETCH, | 		r = lmfs_get_block_ino(&bp, dev, thisblock, PEEK, rip->i_num, | ||||||
| 		    rip->i_num, position_running); | 		    position_running); | ||||||
| 		if (r != OK) | 		block = thisblock; | ||||||
| 			panic("MFS: error getting block (%llu,%u): %d", | 	} else | ||||||
| 			    dev, thisblock, r); | 		r = lmfs_get_block(&bp, dev, block, PEEK); | ||||||
| 	} else { | 
 | ||||||
| 		bp = get_block(dev, block, PREFETCH); | 	if (r == OK) { | ||||||
| 	} |  | ||||||
| 	assert(bp); |  | ||||||
| 	assert(bp->lmfs_count > 0); |  | ||||||
| 	if (lmfs_dev(bp) != NO_DEV) { |  | ||||||
| 		/* Oops, block already in the cache, get out. */ | 		/* Oops, block already in the cache, get out. */ | ||||||
| 		put_block(bp); | 		put_block(bp); | ||||||
| 		break; | 		break; | ||||||
| 	} | 	} | ||||||
|  | 	if (r != ENOENT) | ||||||
|  | 		panic("MFS: error getting block (%llu,%u): %d", dev, block, r); | ||||||
|   } |   } | ||||||
|   lmfs_rw_scattered(dev, read_q, read_q_size, READING); |   lmfs_prefetch(dev, read_q, read_q_size); | ||||||
| 
 |  | ||||||
|   assert(inuse_before == lmfs_bufs_in_use()); |  | ||||||
| 
 | 
 | ||||||
|   r = lmfs_get_block_ino(&bp, dev, baseblock, NORMAL, rip->i_num, position); |   r = lmfs_get_block_ino(&bp, dev, baseblock, NORMAL, rip->i_num, position); | ||||||
|   if (r != OK) |   if (r != OK) | ||||||
|  | |||||||
| @ -85,7 +85,7 @@ int fs_statvfs(struct statvfs *st) | |||||||
|   struct super_block *sp; |   struct super_block *sp; | ||||||
|   int scale; |   int scale; | ||||||
| 
 | 
 | ||||||
|   sp = get_super(fs_dev); |   sp = &superblock; | ||||||
| 
 | 
 | ||||||
|   scale = sp->s_log_zone_size; |   scale = sp->s_log_zone_size; | ||||||
| 
 | 
 | ||||||
|  | |||||||
| @ -6,7 +6,6 @@ | |||||||
|  * The entry points into this file are |  * The entry points into this file are | ||||||
|  *   alloc_bit:       somebody wants to allocate a zone or inode; find one |  *   alloc_bit:       somebody wants to allocate a zone or inode; find one | ||||||
|  *   free_bit:        indicate that a zone or inode is available for allocation |  *   free_bit:        indicate that a zone or inode is available for allocation | ||||||
|  *   get_super:       search the 'superblock' table for a device |  | ||||||
|  *   mounted:         tells if file inode is on mounted (or ROOT) file system |  *   mounted:         tells if file inode is on mounted (or ROOT) file system | ||||||
|  *   read_super:      read a superblock |  *   read_super:      read a superblock | ||||||
|  */ |  */ | ||||||
| @ -156,23 +155,6 @@ bit_t bit_returned;		/* number of bit to insert into the map */ | |||||||
|   } |   } | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| /*===========================================================================*
 |  | ||||||
|  *				get_super				     * |  | ||||||
|  *===========================================================================*/ |  | ||||||
| struct super_block *get_super( |  | ||||||
|   dev_t dev			/* device number whose super_block is sought */ |  | ||||||
| ) |  | ||||||
| { |  | ||||||
|   if (dev == NO_DEV) |  | ||||||
|   	panic("request for super_block of NO_DEV"); |  | ||||||
| 
 |  | ||||||
|   if(superblock.s_dev != dev) |  | ||||||
|   	panic("wrong superblock: 0x%x", (int) dev); |  | ||||||
| 
 |  | ||||||
|   return(&superblock); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| 
 |  | ||||||
| /*===========================================================================*
 | /*===========================================================================*
 | ||||||
|  *				get_block_size				     * |  *				get_block_size				     * | ||||||
|  *===========================================================================*/ |  *===========================================================================*/ | ||||||
|  | |||||||
| @ -200,7 +200,7 @@ zone_t zone;			/* zone to write */ | |||||||
|   if(bp == NULL) |   if(bp == NULL) | ||||||
| 	panic("wr_indir() on NULL"); | 	panic("wr_indir() on NULL"); | ||||||
| 
 | 
 | ||||||
|   sp = get_super(lmfs_dev(bp));	/* need super block to find file sys type */ |   sp = &superblock; | ||||||
| 
 | 
 | ||||||
|   /* write a zone into an indirect block */ |   /* write a zone into an indirect block */ | ||||||
|   assert(sp->s_version == V3); |   assert(sp->s_version == V3); | ||||||
|  | |||||||
| @ -5,6 +5,9 @@ | |||||||
| 
 | 
 | ||||||
| #include <minix/fsdriver.h> | #include <minix/fsdriver.h> | ||||||
| 
 | 
 | ||||||
|  | /* Maximum number of blocks that will be considered by lmfs_prefetch() */ | ||||||
|  | #define LMFS_MAX_PREFETCH	NR_IOREQS | ||||||
|  | 
 | ||||||
| struct buf { | struct buf { | ||||||
|   /* Data portion of the buffer. */ |   /* Data portion of the buffer. */ | ||||||
|   void *data; |   void *data; | ||||||
| @ -30,9 +33,6 @@ struct buf { | |||||||
| void lmfs_markdirty(struct buf *bp); | void lmfs_markdirty(struct buf *bp); | ||||||
| void lmfs_markclean(struct buf *bp); | void lmfs_markclean(struct buf *bp); | ||||||
| int lmfs_isclean(struct buf *bp); | int lmfs_isclean(struct buf *bp); | ||||||
| dev_t lmfs_dev(struct buf *bp); |  | ||||||
| int lmfs_bufs_in_use(void); |  | ||||||
| int lmfs_nr_bufs(void); |  | ||||||
| void lmfs_flushall(void); | void lmfs_flushall(void); | ||||||
| void lmfs_flushdev(dev_t dev); | void lmfs_flushdev(dev_t dev); | ||||||
| size_t lmfs_fs_block_size(void); | size_t lmfs_fs_block_size(void); | ||||||
| @ -46,7 +46,7 @@ void lmfs_put_block(struct buf *bp); | |||||||
| void lmfs_free_block(dev_t dev, block64_t block); | void lmfs_free_block(dev_t dev, block64_t block); | ||||||
| void lmfs_zero_block_ino(dev_t dev, ino_t ino, u64_t off); | void lmfs_zero_block_ino(dev_t dev, ino_t ino, u64_t off); | ||||||
| void lmfs_invalidate(dev_t device); | void lmfs_invalidate(dev_t device); | ||||||
| void lmfs_rw_scattered(dev_t, struct buf **, int, int); | void lmfs_prefetch(dev_t dev, const block64_t *blockset, unsigned int nblocks); | ||||||
| void lmfs_setquiet(int q); | void lmfs_setquiet(int q); | ||||||
| void lmfs_set_blockusage(fsblkcnt_t btotal, fsblkcnt_t bused); | void lmfs_set_blockusage(fsblkcnt_t btotal, fsblkcnt_t bused); | ||||||
| void lmfs_change_blockusage(int delta); | void lmfs_change_blockusage(int delta); | ||||||
| @ -54,8 +54,7 @@ void lmfs_change_blockusage(int delta); | |||||||
| /* get_block arguments */ | /* get_block arguments */ | ||||||
| #define NORMAL             0    /* forces get_block to do disk read */ | #define NORMAL             0    /* forces get_block to do disk read */ | ||||||
| #define NO_READ            1    /* prevents get_block from doing disk read */ | #define NO_READ            1    /* prevents get_block from doing disk read */ | ||||||
| #define PREFETCH           2    /* tells get_block not to read or mark dev */ | #define PEEK               2    /* returns ENOENT if not in cache */ | ||||||
| #define PEEK               3    /* returns ENOENT if not in cache */ |  | ||||||
| 
 | 
 | ||||||
| /* Block I/O helper functions. */ | /* Block I/O helper functions. */ | ||||||
| void lmfs_driver(dev_t dev, char *label); | void lmfs_driver(dev_t dev, char *label); | ||||||
|  | |||||||
| @ -10,7 +10,8 @@ | |||||||
|  *   o  it must initialize this library in order to set up a buffer pool for |  *   o  it must initialize this library in order to set up a buffer pool for | ||||||
|  *      use by these functions, using the lmfs_buf_pool function; the |  *      use by these functions, using the lmfs_buf_pool function; the | ||||||
|  *      recommended number of blocks for *non*-disk-backed file systems is |  *      recommended number of blocks for *non*-disk-backed file systems is | ||||||
|  *      NR_IOREQS buffers (disk-backed file systems typically use many more); |  *      LMFS_MAX_PREFETCH buffers (disk-backed file systems typically use many | ||||||
|  |  *      more); | ||||||
|  *   o  it must enable VM caching in order to support memory mapping of block |  *   o  it must enable VM caching in order to support memory mapping of block | ||||||
|  *      devices, using the lmfs_may_use_vmcache function; |  *      devices, using the lmfs_may_use_vmcache function; | ||||||
|  *   o  it must either use lmfs_flushall as implementation for the fdr_sync |  *   o  it must either use lmfs_flushall as implementation for the fdr_sync | ||||||
| @ -64,12 +65,15 @@ static void | |||||||
| block_prefetch(dev_t dev, block64_t block, unsigned int nblocks, | block_prefetch(dev_t dev, block64_t block, unsigned int nblocks, | ||||||
| 	size_t block_size, size_t last_size) | 	size_t block_size, size_t last_size) | ||||||
| { | { | ||||||
| 	struct buf *bp, *bufs[NR_IOREQS]; | 	struct buf *bp; | ||||||
| 	unsigned int count; | 	unsigned int count, limit; | ||||||
| 	int r; | 	int r; | ||||||
| 
 | 
 | ||||||
| 	if (nblocks > NR_IOREQS) { | 	limit = lmfs_readahead_limit(); | ||||||
| 		nblocks = NR_IOREQS; | 	assert(limit >= 1 && limit <= LMFS_MAX_PREFETCH); | ||||||
|  | 
 | ||||||
|  | 	if (nblocks > limit) { | ||||||
|  | 		nblocks = limit; | ||||||
| 
 | 
 | ||||||
| 		last_size = block_size; | 		last_size = block_size; | ||||||
| 	} | 	} | ||||||
| @ -77,24 +81,21 @@ block_prefetch(dev_t dev, block64_t block, unsigned int nblocks, | |||||||
| 	for (count = 0; count < nblocks; count++) { | 	for (count = 0; count < nblocks; count++) { | ||||||
| 		if (count == nblocks - 1 && last_size < block_size) | 		if (count == nblocks - 1 && last_size < block_size) | ||||||
| 			r = lmfs_get_partial_block(&bp, dev, block + count, | 			r = lmfs_get_partial_block(&bp, dev, block + count, | ||||||
| 			    PREFETCH, last_size); | 			    PEEK, last_size); | ||||||
| 		else | 		else | ||||||
| 			r = lmfs_get_block(&bp, dev, block + count, PREFETCH); | 			r = lmfs_get_block(&bp, dev, block + count, PEEK); | ||||||
| 
 | 
 | ||||||
| 		if (r != OK) | 		if (r == OK) { | ||||||
| 			panic("libminixfs: get_block PREFETCH error: %d\n", r); |  | ||||||
| 
 |  | ||||||
| 		if (lmfs_dev(bp) != NO_DEV) { |  | ||||||
| 			lmfs_put_block(bp); | 			lmfs_put_block(bp); | ||||||
| 
 | 
 | ||||||
|  | 			last_size = block_size; | ||||||
|  | 
 | ||||||
| 			break; | 			break; | ||||||
| 		} | 		} | ||||||
| 
 |  | ||||||
| 		bufs[count] = bp; |  | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
| 	if (count > 0) | 	if (count > 0) | ||||||
| 		lmfs_rw_scattered(dev, bufs, count, READING); | 		lmfs_readahead(dev, block, count, last_size); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| /*
 | /*
 | ||||||
| @ -206,8 +207,6 @@ lmfs_bio(dev_t dev, struct fsdriver_data * data, size_t bytes, off_t pos, | |||||||
| 
 | 
 | ||||||
| 		/* Perform the actual copy. */ | 		/* Perform the actual copy. */ | ||||||
| 		if (r == OK && data != NULL) { | 		if (r == OK && data != NULL) { | ||||||
| 			assert(lmfs_dev(bp) != NO_DEV); |  | ||||||
| 
 |  | ||||||
| 			if (write) { | 			if (write) { | ||||||
| 				r = fsdriver_copyin(data, off, | 				r = fsdriver_copyin(data, off, | ||||||
| 				    (char *)bp->data + block_off, chunk); | 				    (char *)bp->data + block_off, chunk); | ||||||
|  | |||||||
| @ -18,6 +18,7 @@ | |||||||
| #include <minix/sysutil.h> | #include <minix/sysutil.h> | ||||||
| #include <minix/u64.h> | #include <minix/u64.h> | ||||||
| #include <minix/bdev.h> | #include <minix/bdev.h> | ||||||
|  | #include <minix/bitmap.h> | ||||||
| 
 | 
 | ||||||
| #include "inc.h" | #include "inc.h" | ||||||
| 
 | 
 | ||||||
| @ -173,11 +174,6 @@ int lmfs_isclean(struct buf *bp) | |||||||
| 	return !(bp->lmfs_flags & VMMC_DIRTY); | 	return !(bp->lmfs_flags & VMMC_DIRTY); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| dev_t lmfs_dev(struct buf *bp) |  | ||||||
| { |  | ||||||
| 	return bp->lmfs_dev; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| static void free_unused_blocks(void) | static void free_unused_blocks(void) | ||||||
| { | { | ||||||
| 	struct buf *bp; | 	struct buf *bp; | ||||||
| @ -319,10 +315,8 @@ static int get_block_ino(struct buf **bpp, dev_t dev, block64_t block, int how, | |||||||
|  * disk (if 'how' is NORMAL).  If 'how' is NO_READ, the caller intends to |  * disk (if 'how' is NORMAL).  If 'how' is NO_READ, the caller intends to | ||||||
|  * overwrite the requested block in its entirety, so it is only necessary to |  * overwrite the requested block in its entirety, so it is only necessary to | ||||||
|  * see if it is in the cache; if it is not, any free buffer will do.  If 'how' |  * see if it is in the cache; if it is not, any free buffer will do.  If 'how' | ||||||
|  * is PREFETCH, the block need not be read from the disk, and the device is not |  * is PEEK, the function returns the block if it is in the cache or the VM | ||||||
|  * to be marked on the block (i.e., set to NO_DEV), so callers can tell if the |  * cache, and an ENOENT error code otherwise. | ||||||
|  * block returned is valid.  If 'how' is PEEK, the function returns the block |  | ||||||
|  * if it is in the cache or the VM cache, and an ENOENT error code otherwise. |  | ||||||
|  * In addition to the LRU chain, there is also a hash chain to link together |  * In addition to the LRU chain, there is also a hash chain to link together | ||||||
|  * blocks whose block numbers end with the same bit strings, for fast lookup. |  * blocks whose block numbers end with the same bit strings, for fast lookup. | ||||||
|  */ |  */ | ||||||
| @ -441,12 +435,16 @@ static int get_block_ino(struct buf **bpp, dev_t dev, block64_t block, int how, | |||||||
| 
 | 
 | ||||||
|   assert(dev != NO_DEV); |   assert(dev != NO_DEV); | ||||||
| 
 | 
 | ||||||
|   /* Block is not found in our cache, but we do want it
 |   /* The block is not found in our cache, but we do want it if it's in the VM
 | ||||||
|    * if it's in the vm cache. |    * cache. The exception is NO_READ, purely for context switching performance | ||||||
|  |    * reasons. NO_READ is used for 1) newly allocated blocks, 2) blocks being | ||||||
|  |    * prefetched, and 3) blocks about to be fully overwritten. In the first two | ||||||
|  |    * cases, VM will not have the block in its cache anyway, and for the third | ||||||
|  |    * we save on one VM call only if the block is in the VM cache. | ||||||
|    */ |    */ | ||||||
|   assert(!bp->data); |   assert(!bp->data); | ||||||
|   assert(!bp->lmfs_bytes); |   assert(!bp->lmfs_bytes); | ||||||
|   if(vmcache) { |   if (how != NO_READ && vmcache) { | ||||||
| 	if((bp->data = vm_map_cacheblock(dev, dev_off, ino, ino_off, | 	if((bp->data = vm_map_cacheblock(dev, dev_off, ino, ino_off, | ||||||
| 	    &bp->lmfs_flags, roundup(block_size, PAGE_SIZE))) != MAP_FAILED) { | 	    &bp->lmfs_flags, roundup(block_size, PAGE_SIZE))) != MAP_FAILED) { | ||||||
| 		bp->lmfs_bytes = block_size; | 		bp->lmfs_bytes = block_size; | ||||||
| @ -476,10 +474,7 @@ static int get_block_ino(struct buf **bpp, dev_t dev, block64_t block, int how, | |||||||
| 
 | 
 | ||||||
|   assert(bp->data); |   assert(bp->data); | ||||||
| 
 | 
 | ||||||
|   if(how == PREFETCH) { |   if (how == NORMAL) { | ||||||
| 	/* PREFETCH: don't do i/o. */ |  | ||||||
| 	bp->lmfs_dev = NO_DEV; |  | ||||||
|   } else if (how == NORMAL) { |  | ||||||
| 	/* Try to read the block. Return an error code on failure. */ | 	/* Try to read the block. Return an error code on failure. */ | ||||||
| 	if ((r = read_block(bp, block_size)) != OK) { | 	if ((r = read_block(bp, block_size)) != OK) { | ||||||
| 		put_block(bp, 0); | 		put_block(bp, 0); | ||||||
| @ -812,68 +807,59 @@ void lmfs_invalidate( | |||||||
| } | } | ||||||
| 
 | 
 | ||||||
| /*===========================================================================*
 | /*===========================================================================*
 | ||||||
|  *				lmfs_flushdev				     * |  *				sort_blocks				     * | ||||||
|  *===========================================================================*/ |  *===========================================================================*/ | ||||||
| void lmfs_flushdev(dev_t dev) | static void sort_blocks(struct buf **bufq, unsigned int bufqsize) | ||||||
| { | { | ||||||
| /* Flush all dirty blocks for one device. */ |   struct buf *bp; | ||||||
|  |   int i, j, gap; | ||||||
| 
 | 
 | ||||||
|   register struct buf *bp; |   gap = 1; | ||||||
|   static struct buf **dirty; |   do | ||||||
|   static unsigned int dirtylistsize = 0; | 	gap = 3 * gap + 1; | ||||||
|   int ndirty; |   while ((unsigned int)gap <= bufqsize); | ||||||
| 
 | 
 | ||||||
|   if(dirtylistsize != nr_bufs) { |   while (gap != 1) { | ||||||
| 	if(dirtylistsize > 0) { | 	gap /= 3; | ||||||
| 		assert(dirty != NULL); | 	for (j = gap; (unsigned int)j < bufqsize; j++) { | ||||||
| 		free(dirty); | 		for (i = j - gap; i >= 0 && | ||||||
| 	} | 		    bufq[i]->lmfs_blocknr > bufq[i + gap]->lmfs_blocknr; | ||||||
| 	if(!(dirty = malloc(sizeof(dirty[0])*nr_bufs))) | 		    i -= gap) { | ||||||
| 		panic("couldn't allocate dirty buf list"); | 			bp = bufq[i]; | ||||||
| 	dirtylistsize = nr_bufs; | 			bufq[i] = bufq[i + gap]; | ||||||
|   } | 			bufq[i + gap] = bp; | ||||||
| 
 | 		} | ||||||
|   for (bp = &buf[0], ndirty = 0; bp < &buf[nr_bufs]; bp++) { |  | ||||||
| 	/* Do not flush dirty blocks that are in use (lmfs_count>0): the file
 |  | ||||||
| 	 * system may mark the block as dirty before changing its contents, in |  | ||||||
| 	 * which case the new contents could end up being lost. |  | ||||||
| 	 */ |  | ||||||
| 	if (!lmfs_isclean(bp) && bp->lmfs_dev == dev && bp->lmfs_count == 0) { |  | ||||||
| 		dirty[ndirty++] = bp; |  | ||||||
| 	} | 	} | ||||||
|   } |   } | ||||||
| 
 |  | ||||||
|   lmfs_rw_scattered(dev, dirty, ndirty, WRITING); |  | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| /*===========================================================================*
 | /*===========================================================================*
 | ||||||
|  *				lmfs_rw_scattered			     * |  *				rw_scattered				     * | ||||||
|  *===========================================================================*/ |  *===========================================================================*/ | ||||||
| void lmfs_rw_scattered( | static void rw_scattered( | ||||||
|   dev_t dev,			/* major-minor device number */ |   dev_t dev,			/* major-minor device number */ | ||||||
|   struct buf **bufq,		/* pointer to array of buffers */ |   struct buf **bufq,		/* pointer to array of buffers */ | ||||||
|   int bufqsize,			/* number of buffers */ |   unsigned int bufqsize,	/* number of buffers */ | ||||||
|   int rw_flag			/* READING or WRITING */ |   int rw_flag			/* READING or WRITING */ | ||||||
| ) | ) | ||||||
| { | { | ||||||
| /* Read or write scattered data from a device. */ | /* Read or write scattered data from a device. */ | ||||||
| 
 | 
 | ||||||
|   register struct buf *bp; |   register struct buf *bp; | ||||||
|   int gap; |  | ||||||
|   register int i; |  | ||||||
|   register iovec_t *iop; |   register iovec_t *iop; | ||||||
|   static iovec_t iovec[NR_IOREQS]; |   static iovec_t iovec[NR_IOREQS]; | ||||||
|   off_t pos; |   off_t pos; | ||||||
|   int iov_per_block; |   unsigned int i, iov_per_block; | ||||||
|   unsigned int start_in_use = bufs_in_use, start_bufqsize = bufqsize; |   unsigned int start_in_use = bufs_in_use, start_bufqsize = bufqsize; | ||||||
| 
 | 
 | ||||||
|   assert(bufqsize >= 0); |  | ||||||
|   if(bufqsize == 0) return; |   if(bufqsize == 0) return; | ||||||
| 
 | 
 | ||||||
|   /* for READING, check all buffers on the list are obtained and held
 |   /* for READING, check all buffers on the list are obtained and held
 | ||||||
|    * (count > 0) |    * (count > 0) | ||||||
|    */ |    */ | ||||||
|   if (rw_flag == READING) { |   if (rw_flag == READING) { | ||||||
|  | 	assert(bufqsize <= LMFS_MAX_PREFETCH); | ||||||
|  | 
 | ||||||
| 	for(i = 0; i < bufqsize; i++) { | 	for(i = 0; i < bufqsize; i++) { | ||||||
| 		assert(bufq[i] != NULL); | 		assert(bufq[i] != NULL); | ||||||
| 		assert(bufq[i]->lmfs_count > 0); | 		assert(bufq[i]->lmfs_count > 0); | ||||||
| @ -887,40 +873,26 @@ void lmfs_rw_scattered( | |||||||
|   assert(fs_block_size > 0); |   assert(fs_block_size > 0); | ||||||
|   assert(howmany(fs_block_size, PAGE_SIZE) <= NR_IOREQS); |   assert(howmany(fs_block_size, PAGE_SIZE) <= NR_IOREQS); | ||||||
|    |    | ||||||
|   /* (Shell) sort buffers on lmfs_blocknr. */ |   /* For WRITING, (Shell) sort buffers on lmfs_blocknr.
 | ||||||
|   gap = 1; |    * For READING, the buffers are already sorted. | ||||||
|   do |    */ | ||||||
| 	gap = 3 * gap + 1; |   if (rw_flag == WRITING) | ||||||
|   while (gap <= bufqsize); | 	sort_blocks(bufq, bufqsize); | ||||||
|   while (gap != 1) { |  | ||||||
|   	int j; |  | ||||||
| 	gap /= 3; |  | ||||||
| 	for (j = gap; j < bufqsize; j++) { |  | ||||||
| 		for (i = j - gap; |  | ||||||
| 		     i >= 0 && bufq[i]->lmfs_blocknr > bufq[i + gap]->lmfs_blocknr; |  | ||||||
| 		     i -= gap) { |  | ||||||
| 			bp = bufq[i]; |  | ||||||
| 			bufq[i] = bufq[i + gap]; |  | ||||||
| 			bufq[i + gap] = bp; |  | ||||||
| 		} |  | ||||||
| 	} |  | ||||||
|   } |  | ||||||
| 
 | 
 | ||||||
|   /* Set up I/O vector and do I/O.  The result of bdev I/O is OK if everything
 |   /* Set up I/O vector and do I/O.  The result of bdev I/O is OK if everything
 | ||||||
|    * went fine, otherwise the error code for the first failed transfer. |    * went fine, otherwise the error code for the first failed transfer. | ||||||
|    */ |    */ | ||||||
|   while (bufqsize > 0) { |   while (bufqsize > 0) { | ||||||
|   	int nblocks = 0, niovecs = 0; | 	unsigned int p, nblocks = 0, niovecs = 0; | ||||||
| 	int r; | 	int r; | ||||||
| 	for (iop = iovec; nblocks < bufqsize; nblocks++) { | 	for (iop = iovec; nblocks < bufqsize; nblocks++) { | ||||||
| 		int p; |  | ||||||
| 		vir_bytes vdata, blockrem; | 		vir_bytes vdata, blockrem; | ||||||
| 		bp = bufq[nblocks]; | 		bp = bufq[nblocks]; | ||||||
| 		if (bp->lmfs_blocknr != bufq[0]->lmfs_blocknr + nblocks) | 		if (bp->lmfs_blocknr != bufq[0]->lmfs_blocknr + nblocks) | ||||||
| 			break; | 			break; | ||||||
| 		blockrem = bp->lmfs_bytes; | 		blockrem = bp->lmfs_bytes; | ||||||
| 		iov_per_block = howmany(blockrem, PAGE_SIZE); | 		iov_per_block = howmany(blockrem, PAGE_SIZE); | ||||||
| 		if(niovecs >= NR_IOREQS-iov_per_block) break; | 		if (niovecs > NR_IOREQS - iov_per_block) break; | ||||||
| 		vdata = (vir_bytes) bp->data; | 		vdata = (vir_bytes) bp->data; | ||||||
| 		for(p = 0; p < iov_per_block; p++) { | 		for(p = 0; p < iov_per_block; p++) { | ||||||
| 			vir_bytes chunk = | 			vir_bytes chunk = | ||||||
| @ -937,7 +909,7 @@ void lmfs_rw_scattered( | |||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
| 	assert(nblocks > 0); | 	assert(nblocks > 0); | ||||||
| 	assert(niovecs > 0); | 	assert(niovecs > 0 && niovecs <= NR_IOREQS); | ||||||
| 
 | 
 | ||||||
| 	pos = (off_t)bufq[0]->lmfs_blocknr * fs_block_size; | 	pos = (off_t)bufq[0]->lmfs_blocknr * fs_block_size; | ||||||
| 	if (rw_flag == READING) | 	if (rw_flag == READING) | ||||||
| @ -963,7 +935,6 @@ void lmfs_rw_scattered( | |||||||
| 			break; | 			break; | ||||||
| 		} | 		} | ||||||
| 		if (rw_flag == READING) { | 		if (rw_flag == READING) { | ||||||
| 			bp->lmfs_dev = dev;	/* validate block */ |  | ||||||
| 			lmfs_put_block(bp); | 			lmfs_put_block(bp); | ||||||
| 		} else { | 		} else { | ||||||
| 			MARKCLEAN(bp); | 			MARKCLEAN(bp); | ||||||
| @ -979,7 +950,9 @@ void lmfs_rw_scattered( | |||||||
| 		 * give at this time.  Don't forget to release those extras. | 		 * give at this time.  Don't forget to release those extras. | ||||||
| 		 */ | 		 */ | ||||||
| 		while (bufqsize > 0) { | 		while (bufqsize > 0) { | ||||||
| 			lmfs_put_block(*bufq++); | 			bp = *bufq++; | ||||||
|  | 			bp->lmfs_dev = NO_DEV;	/* invalidate block */ | ||||||
|  | 			lmfs_put_block(bp); | ||||||
| 			bufqsize--; | 			bufqsize--; | ||||||
| 		} | 		} | ||||||
| 	} | 	} | ||||||
| @ -1001,6 +974,190 @@ void lmfs_rw_scattered( | |||||||
|   } |   } | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | /*===========================================================================*
 | ||||||
|  |  *				lmfs_readahead				     * | ||||||
|  |  *===========================================================================*/ | ||||||
|  | void lmfs_readahead(dev_t dev, block64_t base_block, unsigned int nblocks, | ||||||
|  | 	size_t last_size) | ||||||
|  | { | ||||||
|  | /* Read ahead 'nblocks' blocks starting from the block 'base_block' on device
 | ||||||
|  |  * 'dev'. The number of blocks must be between 1 and LMFS_MAX_PREFETCH, | ||||||
|  |  * inclusive. All blocks have the file system's block size, possibly except the | ||||||
|  |  * last block in the range, which is of size 'last_size'. The caller must | ||||||
|  |  * ensure that none of the blocks in the range are already in the cache. | ||||||
|  |  * However, the caller must also not rely on all or even any of the blocks to | ||||||
|  |  * be present in the cache afterwards--failures are (deliberately!) ignored. | ||||||
|  |  */ | ||||||
|  |   static struct buf *bufq[LMFS_MAX_PREFETCH]; /* static because of size only */ | ||||||
|  |   struct buf *bp; | ||||||
|  |   unsigned int count; | ||||||
|  |   int r; | ||||||
|  | 
 | ||||||
|  |   assert(nblocks >= 1 && nblocks <= LMFS_MAX_PREFETCH); | ||||||
|  | 
 | ||||||
|  |   for (count = 0; count < nblocks; count++) { | ||||||
|  | 	if (count == nblocks - 1) | ||||||
|  | 		r = lmfs_get_partial_block(&bp, dev, base_block + count, | ||||||
|  | 		    NO_READ, last_size); | ||||||
|  | 	else | ||||||
|  | 		r = lmfs_get_block(&bp, dev, base_block + count, NO_READ); | ||||||
|  | 
 | ||||||
|  | 	if (r != OK) | ||||||
|  | 		break; | ||||||
|  | 
 | ||||||
|  | 	/* We could add a flag that makes the get_block() calls fail if the
 | ||||||
|  | 	 * block is already in the cache, but it is not a major concern if it | ||||||
|  | 	 * is: we just perform a useless read in that case. However, if the | ||||||
|  | 	 * block is cached *and* dirty, we are about to lose its new contents. | ||||||
|  | 	 */ | ||||||
|  | 	assert(lmfs_isclean(bp)); | ||||||
|  | 
 | ||||||
|  | 	bufq[count] = bp; | ||||||
|  |   } | ||||||
|  | 
 | ||||||
|  |   rw_scattered(dev, bufq, count, READING); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | /*===========================================================================*
 | ||||||
|  |  *				lmfs_prefetch				     * | ||||||
|  |  *===========================================================================*/ | ||||||
|  | unsigned int lmfs_readahead_limit(void) | ||||||
|  | { | ||||||
|  | /* Return the maximum number of blocks that should be read ahead at once. The
 | ||||||
|  |  * return value is guaranteed to be between 1 and LMFS_MAX_PREFETCH, inclusive. | ||||||
|  |  */ | ||||||
|  |   unsigned int max_transfer, max_bufs; | ||||||
|  | 
 | ||||||
|  |   /* The returned value is the minimum of two factors: the maximum number of
 | ||||||
|  |    * blocks that can be transferred in a single I/O gather request (see how | ||||||
|  |    * rw_scattered() generates I/O requests), and a policy limit on the number | ||||||
|  |    * of buffers that any read-ahead operation may use (that is, thrash). | ||||||
|  |    */ | ||||||
|  |   max_transfer = NR_IOREQS / MAX(fs_block_size / PAGE_SIZE, 1); | ||||||
|  | 
 | ||||||
|  |   /* The constants have been imported from MFS as is, and may need tuning. */ | ||||||
|  |   if (nr_bufs < 50) | ||||||
|  | 	max_bufs = 18; | ||||||
|  |   else | ||||||
|  | 	max_bufs = nr_bufs - 4; | ||||||
|  | 
 | ||||||
|  |   return MIN(max_transfer, max_bufs); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | /*===========================================================================*
 | ||||||
|  |  *				lmfs_prefetch				     * | ||||||
|  |  *===========================================================================*/ | ||||||
|  | void lmfs_prefetch(dev_t dev, const block64_t *blockset, unsigned int nblocks) | ||||||
|  | { | ||||||
|  | /* The given set of blocks is expected to be needed soon, so prefetch a
 | ||||||
|  |  * convenient subset. The blocks are expected to be sorted by likelihood of | ||||||
|  |  * being accessed soon, making the first block of the set the most important | ||||||
|  |  * block to prefetch right now. The caller must have made sure that the blocks | ||||||
|  |  * are not in the cache already. The array may have duplicate block numbers. | ||||||
|  |  */ | ||||||
|  |   bitchunk_t blocks_before[BITMAP_CHUNKS(LMFS_MAX_PREFETCH)]; | ||||||
|  |   bitchunk_t blocks_after[BITMAP_CHUNKS(LMFS_MAX_PREFETCH)]; | ||||||
|  |   block64_t block, base_block; | ||||||
|  |   unsigned int i, bit, nr_before, nr_after, span, limit, nr_blocks; | ||||||
|  | 
 | ||||||
|  |   if (nblocks == 0) | ||||||
|  | 	return; | ||||||
|  | 
 | ||||||
|  |   /* Here is the deal. We are going to prefetch one range only, because seeking
 | ||||||
|  |    * is too expensive for just prefetching. The range we select should at least | ||||||
|  |    * include the first ("base") block of the given set, since that is the block | ||||||
|  |    * the caller is primarily interested in. Thus, the rest of the range is | ||||||
|  |    * going to have to be directly around this base block. We first check which | ||||||
|  |    * blocks from the set fall just before and after the base block, which then | ||||||
|  |    * allows us to construct a contiguous range of desired blocks directly | ||||||
|  |    * around the base block, in O(n) time. As a natural part of this, we ignore | ||||||
|  |    * duplicate blocks in the given set. We then read from the beginning of this | ||||||
|  |    * range, in order to maximize the chance that a next prefetch request will | ||||||
|  |    * continue from the last disk position without requiring a seek. However, we | ||||||
|  |    * do correct for the maximum number of blocks we can (or should) read in at | ||||||
|  |    * once, such that we will still end up reading the base block. | ||||||
|  |    */ | ||||||
|  |   base_block = blockset[0]; | ||||||
|  | 
 | ||||||
|  |   memset(blocks_before, 0, sizeof(blocks_before)); | ||||||
|  |   memset(blocks_after, 0, sizeof(blocks_after)); | ||||||
|  | 
 | ||||||
|  |   for (i = 1; i < nblocks; i++) { | ||||||
|  | 	block = blockset[i]; | ||||||
|  | 
 | ||||||
|  | 	if (block < base_block && block + LMFS_MAX_PREFETCH >= base_block) { | ||||||
|  | 		bit = base_block - block - 1; | ||||||
|  | 		assert(bit < LMFS_MAX_PREFETCH); | ||||||
|  | 		SET_BIT(blocks_before, bit); | ||||||
|  | 	} else if (block > base_block && | ||||||
|  | 	    block - LMFS_MAX_PREFETCH <= base_block) { | ||||||
|  | 		bit = block - base_block - 1; | ||||||
|  | 		assert(bit < LMFS_MAX_PREFETCH); | ||||||
|  | 		SET_BIT(blocks_after, bit); | ||||||
|  | 	} | ||||||
|  |   } | ||||||
|  | 
 | ||||||
|  |   for (nr_before = 0; nr_before < LMFS_MAX_PREFETCH; nr_before++) | ||||||
|  | 	if (!GET_BIT(blocks_before, nr_before)) | ||||||
|  | 		break; | ||||||
|  | 
 | ||||||
|  |   for (nr_after = 0; nr_after < LMFS_MAX_PREFETCH; nr_after++) | ||||||
|  | 	if (!GET_BIT(blocks_after, nr_after)) | ||||||
|  | 		break; | ||||||
|  | 
 | ||||||
|  |   /* The number of blocks to prefetch is the minimum of two factors: the number
 | ||||||
|  |    * of blocks in the range around the base block, and the maximum number of | ||||||
|  |    * blocks that should be read ahead at once at all. | ||||||
|  |    */ | ||||||
|  |   span = nr_before + 1 + nr_after; | ||||||
|  |   limit = lmfs_readahead_limit(); | ||||||
|  | 
 | ||||||
|  |   nr_blocks = MIN(span, limit); | ||||||
|  |   assert(nr_blocks >= 1 && nr_blocks <= LMFS_MAX_PREFETCH); | ||||||
|  | 
 | ||||||
|  |   /* Start prefetching from the lowest block within the contiguous range, but
 | ||||||
|  |    * make sure that we read at least the original base block itself, too. | ||||||
|  |    */ | ||||||
|  |   base_block -= MIN(nr_before, nr_blocks - 1); | ||||||
|  | 
 | ||||||
|  |   lmfs_readahead(dev, base_block, nr_blocks, fs_block_size); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | /*===========================================================================*
 | ||||||
|  |  *				lmfs_flushdev				     * | ||||||
|  |  *===========================================================================*/ | ||||||
|  | void lmfs_flushdev(dev_t dev) | ||||||
|  | { | ||||||
|  | /* Flush all dirty blocks for one device. */ | ||||||
|  | 
 | ||||||
|  |   register struct buf *bp; | ||||||
|  |   static struct buf **dirty; | ||||||
|  |   static unsigned int dirtylistsize = 0; | ||||||
|  |   unsigned int ndirty; | ||||||
|  | 
 | ||||||
|  |   if(dirtylistsize != nr_bufs) { | ||||||
|  | 	if(dirtylistsize > 0) { | ||||||
|  | 		assert(dirty != NULL); | ||||||
|  | 		free(dirty); | ||||||
|  | 	} | ||||||
|  | 	if(!(dirty = malloc(sizeof(dirty[0])*nr_bufs))) | ||||||
|  | 		panic("couldn't allocate dirty buf list"); | ||||||
|  | 	dirtylistsize = nr_bufs; | ||||||
|  |   } | ||||||
|  | 
 | ||||||
|  |   for (bp = &buf[0], ndirty = 0; bp < &buf[nr_bufs]; bp++) { | ||||||
|  | 	/* Do not flush dirty blocks that are in use (lmfs_count>0): the file
 | ||||||
|  | 	 * system may mark the block as dirty before changing its contents, in | ||||||
|  | 	 * which case the new contents could end up being lost. | ||||||
|  | 	 */ | ||||||
|  | 	if (!lmfs_isclean(bp) && bp->lmfs_dev == dev && bp->lmfs_count == 0) { | ||||||
|  | 		dirty[ndirty++] = bp; | ||||||
|  | 	} | ||||||
|  |   } | ||||||
|  | 
 | ||||||
|  |   rw_scattered(dev, dirty, ndirty, WRITING); | ||||||
|  | } | ||||||
|  | 
 | ||||||
| /*===========================================================================*
 | /*===========================================================================*
 | ||||||
|  *				rm_lru					     * |  *				rm_lru					     * | ||||||
|  *===========================================================================*/ |  *===========================================================================*/ | ||||||
| @ -1128,16 +1285,6 @@ void lmfs_buf_pool(int new_nr_bufs) | |||||||
|   buf_hash[0] = front; |   buf_hash[0] = front; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| int lmfs_bufs_in_use(void) |  | ||||||
| { |  | ||||||
| 	return bufs_in_use; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| int lmfs_nr_bufs(void) |  | ||||||
| { |  | ||||||
| 	return nr_bufs; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| void lmfs_flushall(void) | void lmfs_flushall(void) | ||||||
| { | { | ||||||
| 	struct buf *bp; | 	struct buf *bp; | ||||||
|  | |||||||
| @ -3,5 +3,8 @@ | |||||||
| 
 | 
 | ||||||
| int lmfs_get_partial_block(struct buf **bpp, dev_t dev, block64_t block, | int lmfs_get_partial_block(struct buf **bpp, dev_t dev, block64_t block, | ||||||
| 	int how, size_t block_size); | 	int how, size_t block_size); | ||||||
|  | void lmfs_readahead(dev_t dev, block64_t base_block, unsigned int nblocks, | ||||||
|  | 	size_t last_size); | ||||||
|  | unsigned int lmfs_readahead_limit(void); | ||||||
| 
 | 
 | ||||||
| #endif /* !_LIBMINIXFS_INC_H */ | #endif /* !_LIBMINIXFS_INC_H */ | ||||||
|  | |||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user
	 David van Moolenbroek
						David van Moolenbroek