VFS: change locking to ease concurrent FSes
This patch uses stricter locking for REQ_LINK, REQ_MKDIR, REQ_MKNOD, REQ_RENAME, REQ_RMDIR, REQ_SLINK and REQ_UNLINK. For all requests, VFS locks the directory in which we add or remove an inode with VNODE_WRITE. I.e., the operations have exclusive access to that directory. Furthermore, REQ_CHOWN, REQ_CHMOD, and REQ_FTRUNC now lock the vmnt VMNT_READ; VMNT_WRITE was unnecessary.
This commit is contained in:
		
							parent
							
								
									93103f497f
								
							
						
					
					
						commit
						23c5f56e32
					
				| @ -56,7 +56,7 @@ int do_link() | |||||||
|   /* Does the final directory of 'name2' exist? */ |   /* Does the final directory of 'name2' exist? */ | ||||||
|   lookup_init(&resolve, fullpath, PATH_NOFLAGS, &vmp2, &dirp); |   lookup_init(&resolve, fullpath, PATH_NOFLAGS, &vmp2, &dirp); | ||||||
|   resolve.l_vmnt_lock = VMNT_READ; |   resolve.l_vmnt_lock = VMNT_READ; | ||||||
|   resolve.l_vnode_lock = VNODE_READ; |   resolve.l_vnode_lock = VNODE_WRITE; | ||||||
|   if (fetch_name(vname2, vname2_length, fullpath) != OK) |   if (fetch_name(vname2, vname2_length, fullpath) != OK) | ||||||
| 	r = err_code; | 	r = err_code; | ||||||
|   else if ((dirp = last_dir(&resolve, fp)) == NULL) |   else if ((dirp = last_dir(&resolve, fp)) == NULL) | ||||||
| @ -98,11 +98,11 @@ int do_unlink() | |||||||
|  * may be used by the superuser to do dangerous things; rmdir() may not. |  * may be used by the superuser to do dangerous things; rmdir() may not. | ||||||
|  * The syscall might provide 'name' embedded in the message. |  * The syscall might provide 'name' embedded in the message. | ||||||
|  */ |  */ | ||||||
|   struct vnode *dirp, *vp; |   struct vnode *dirp, *dirp_l, *vp; | ||||||
|   struct vmnt *vmp, *vmp2; |   struct vmnt *vmp, *vmp2; | ||||||
|   int r; |   int r; | ||||||
|   char fullpath[PATH_MAX]; |   char fullpath[PATH_MAX]; | ||||||
|   struct lookup resolve; |   struct lookup resolve, stickycheck; | ||||||
|   vir_bytes vname; |   vir_bytes vname; | ||||||
|   size_t vname_length; |   size_t vname_length; | ||||||
| 
 | 
 | ||||||
| @ -114,13 +114,12 @@ int do_unlink() | |||||||
| 		return(err_code); | 		return(err_code); | ||||||
|   } |   } | ||||||
| 
 | 
 | ||||||
|   lookup_init(&resolve, fullpath, PATH_RET_SYMLINK, &vmp, &dirp); |   lookup_init(&resolve, fullpath, PATH_RET_SYMLINK, &vmp, &dirp_l); | ||||||
|   resolve.l_vmnt_lock = VMNT_WRITE; |   resolve.l_vmnt_lock = VMNT_WRITE; | ||||||
|   resolve.l_vnode_lock = VNODE_READ; |   resolve.l_vnode_lock = VNODE_WRITE; | ||||||
| 
 | 
 | ||||||
|   /* Get the last directory in the path. */ |   /* Get the last directory in the path. */ | ||||||
|   if ((dirp = last_dir(&resolve, fp)) == NULL) return(err_code); |   if ((dirp = last_dir(&resolve, fp)) == NULL) return(err_code); | ||||||
|   assert(vmp != NULL);	/* We must have locked the vmnt */ |  | ||||||
| 
 | 
 | ||||||
|   /* Make sure that the object is a directory */ |   /* Make sure that the object is a directory */ | ||||||
|   if (!S_ISDIR(dirp->v_mode)) { |   if (!S_ISDIR(dirp->v_mode)) { | ||||||
| @ -142,12 +141,10 @@ int do_unlink() | |||||||
|      user is allowed to unlink */ |      user is allowed to unlink */ | ||||||
|   if ((dirp->v_mode & S_ISVTX) == S_ISVTX) { |   if ((dirp->v_mode & S_ISVTX) == S_ISVTX) { | ||||||
| 	/* Look up inode of file to unlink to retrieve owner */ | 	/* Look up inode of file to unlink to retrieve owner */ | ||||||
| 	resolve.l_flags = PATH_RET_SYMLINK; | 	lookup_init(&stickycheck, resolve.l_path, PATH_RET_SYMLINK, &vmp2, &vp); | ||||||
| 	resolve.l_vmp = &vmp2;	/* Shouldn't actually get locked */ | 	stickycheck.l_vmnt_lock = VMNT_READ; | ||||||
| 	resolve.l_vmnt_lock = VMNT_READ; | 	stickycheck.l_vnode_lock = VNODE_READ; | ||||||
| 	resolve.l_vnode = &vp; | 	vp = advance(dirp, &stickycheck, fp); | ||||||
| 	resolve.l_vnode_lock = VNODE_READ; |  | ||||||
| 	vp = advance(dirp, &resolve, fp); |  | ||||||
| 	assert(vmp2 == NULL); | 	assert(vmp2 == NULL); | ||||||
| 	if (vp != NULL) { | 	if (vp != NULL) { | ||||||
| 		if (vp->v_uid != fp->fp_effuid && fp->fp_effuid != SU_UID) | 		if (vp->v_uid != fp->fp_effuid && fp->fp_effuid != SU_UID) | ||||||
| @ -164,7 +161,6 @@ int do_unlink() | |||||||
| 	} | 	} | ||||||
|   } |   } | ||||||
| 
 | 
 | ||||||
|   assert(vmp != NULL); |  | ||||||
|   upgrade_vmnt_lock(vmp); |   upgrade_vmnt_lock(vmp); | ||||||
| 
 | 
 | ||||||
|   if (job_call_nr == UNLINK) |   if (job_call_nr == UNLINK) | ||||||
| @ -184,11 +180,11 @@ int do_rename() | |||||||
| { | { | ||||||
| /* Perform the rename(name1, name2) system call. */ | /* Perform the rename(name1, name2) system call. */ | ||||||
|   int r = OK, r1; |   int r = OK, r1; | ||||||
|   struct vnode *old_dirp, *new_dirp = NULL, *vp; |   struct vnode *old_dirp, *new_dirp, *new_dirp_l, *vp; | ||||||
|   struct vmnt *oldvmp, *newvmp, *vmp2; |   struct vmnt *oldvmp, *newvmp, *vmp2; | ||||||
|   char old_name[PATH_MAX]; |   char old_name[PATH_MAX]; | ||||||
|   char fullpath[PATH_MAX]; |   char fullpath[PATH_MAX]; | ||||||
|   struct lookup resolve; |   struct lookup resolve, stickycheck; | ||||||
|   vir_bytes vname1, vname2; |   vir_bytes vname1, vname2; | ||||||
|   size_t vname1_length, vname2_length; |   size_t vname1_length, vname2_length; | ||||||
| 
 | 
 | ||||||
| @ -200,7 +196,7 @@ int do_rename() | |||||||
|   lookup_init(&resolve, fullpath, PATH_RET_SYMLINK, &oldvmp, &old_dirp); |   lookup_init(&resolve, fullpath, PATH_RET_SYMLINK, &oldvmp, &old_dirp); | ||||||
|   /* Do not yet request exclusive lock on vmnt to prevent deadlocks later on */ |   /* Do not yet request exclusive lock on vmnt to prevent deadlocks later on */ | ||||||
|   resolve.l_vmnt_lock = VMNT_WRITE; |   resolve.l_vmnt_lock = VMNT_WRITE; | ||||||
|   resolve.l_vnode_lock = VNODE_READ; |   resolve.l_vnode_lock = VNODE_WRITE; | ||||||
| 
 | 
 | ||||||
|   /* See if 'name1' (existing file) exists.  Get dir and file inodes. */ |   /* See if 'name1' (existing file) exists.  Get dir and file inodes. */ | ||||||
|   if (fetch_name(vname1, vname1_length, fullpath) != OK) return(err_code); |   if (fetch_name(vname1, vname1_length, fullpath) != OK) return(err_code); | ||||||
| @ -210,10 +206,10 @@ int do_rename() | |||||||
|      user is allowed to rename */ |      user is allowed to rename */ | ||||||
|   if ((old_dirp->v_mode & S_ISVTX) == S_ISVTX) { |   if ((old_dirp->v_mode & S_ISVTX) == S_ISVTX) { | ||||||
| 	/* Look up inode of file to unlink to retrieve owner */ | 	/* Look up inode of file to unlink to retrieve owner */ | ||||||
| 	lookup_init(&resolve, resolve.l_path, PATH_RET_SYMLINK, &vmp2, &vp); | 	lookup_init(&stickycheck, resolve.l_path, PATH_RET_SYMLINK, &vmp2, &vp); | ||||||
| 	resolve.l_vmnt_lock = VMNT_READ; | 	stickycheck.l_vmnt_lock = VMNT_READ; | ||||||
| 	resolve.l_vnode_lock = VNODE_READ; | 	stickycheck.l_vnode_lock = VNODE_READ; | ||||||
| 	vp = advance(old_dirp, &resolve, fp); | 	vp = advance(old_dirp, &stickycheck, fp); | ||||||
| 	assert(vmp2 == NULL); | 	assert(vmp2 == NULL); | ||||||
| 	if (vp != NULL) { | 	if (vp != NULL) { | ||||||
| 		if(vp->v_uid != fp->fp_effuid && fp->fp_effuid != SU_UID) | 		if(vp->v_uid != fp->fp_effuid && fp->fp_effuid != SU_UID) | ||||||
| @ -240,12 +236,19 @@ int do_rename() | |||||||
|   strlcpy(old_name, fullpath, PATH_MAX); |   strlcpy(old_name, fullpath, PATH_MAX); | ||||||
| 
 | 
 | ||||||
|   /* See if 'name2' (new name) exists.  Get dir inode */ |   /* See if 'name2' (new name) exists.  Get dir inode */ | ||||||
|   lookup_init(&resolve, fullpath, PATH_RET_SYMLINK, &newvmp, &new_dirp); |   lookup_init(&resolve, fullpath, PATH_RET_SYMLINK, &newvmp, &new_dirp_l); | ||||||
|   resolve.l_vmnt_lock = VMNT_READ; |   resolve.l_vmnt_lock = VMNT_READ; | ||||||
|   resolve.l_vnode_lock = VNODE_READ; |   resolve.l_vnode_lock = VNODE_WRITE; | ||||||
|   if (fetch_name(vname2, vname2_length, fullpath) != OK) r = err_code; |   if (fetch_name(vname2, vname2_length, fullpath) != OK) r = err_code; | ||||||
|   else if ((new_dirp = last_dir(&resolve, fp)) == NULL) r = err_code; |   else if ((new_dirp = last_dir(&resolve, fp)) == NULL) r = err_code; | ||||||
| 
 | 
 | ||||||
|  |   /* We used a separate vnode pointer to see whether we obtained a lock on the
 | ||||||
|  |    * new_dirp vnode. If the new directory and old directory are the same, then | ||||||
|  |    * the VNODE_WRITE lock on new_dirp will fail. In that case, new_dirp_l will | ||||||
|  |    * be NULL, but new_dirp will not. | ||||||
|  |    */ | ||||||
|  |   if (new_dirp == old_dirp) assert(new_dirp_l == NULL); | ||||||
|  | 
 | ||||||
|   if (r != OK) { |   if (r != OK) { | ||||||
| 	unlock_vnode(old_dirp); | 	unlock_vnode(old_dirp); | ||||||
| 	unlock_vmnt(oldvmp); | 	unlock_vmnt(oldvmp); | ||||||
| @ -265,9 +268,10 @@ int do_rename() | |||||||
| 	r = req_rename(old_dirp->v_fs_e, old_dirp->v_inode_nr, old_name, | 	r = req_rename(old_dirp->v_fs_e, old_dirp->v_inode_nr, old_name, | ||||||
| 		       new_dirp->v_inode_nr, fullpath); | 		       new_dirp->v_inode_nr, fullpath); | ||||||
|   } |   } | ||||||
|  | 
 | ||||||
|   unlock_vnode(old_dirp); |   unlock_vnode(old_dirp); | ||||||
|   unlock_vnode(new_dirp); |  | ||||||
|   unlock_vmnt(oldvmp); |   unlock_vmnt(oldvmp); | ||||||
|  |   if (new_dirp_l) unlock_vnode(new_dirp_l); | ||||||
|   if (newvmp) unlock_vmnt(newvmp); |   if (newvmp) unlock_vmnt(newvmp); | ||||||
| 
 | 
 | ||||||
|   put_vnode(old_dirp); |   put_vnode(old_dirp); | ||||||
| @ -299,7 +303,7 @@ int do_truncate() | |||||||
|   vname_length = job_m_in.m2_i1; |   vname_length = job_m_in.m2_i1; | ||||||
| 
 | 
 | ||||||
|   lookup_init(&resolve, fullpath, PATH_NOFLAGS, &vmp, &vp); |   lookup_init(&resolve, fullpath, PATH_NOFLAGS, &vmp, &vp); | ||||||
|   resolve.l_vmnt_lock = VMNT_EXCL; |   resolve.l_vmnt_lock = VMNT_READ; | ||||||
|   resolve.l_vnode_lock = VNODE_WRITE; |   resolve.l_vnode_lock = VNODE_WRITE; | ||||||
| 
 | 
 | ||||||
|   length = (off_t) job_m_in.flength; |   length = (off_t) job_m_in.flength; | ||||||
| @ -404,7 +408,7 @@ int do_slink() | |||||||
| 
 | 
 | ||||||
|   lookup_init(&resolve, fullpath, PATH_NOFLAGS, &vmp, &vp); |   lookup_init(&resolve, fullpath, PATH_NOFLAGS, &vmp, &vp); | ||||||
|   resolve.l_vmnt_lock = VMNT_WRITE; |   resolve.l_vmnt_lock = VMNT_WRITE; | ||||||
|   resolve.l_vnode_lock = VNODE_READ; |   resolve.l_vnode_lock = VNODE_WRITE; | ||||||
| 
 | 
 | ||||||
|   vname1 = (vir_bytes) job_m_in.name1; |   vname1 = (vir_bytes) job_m_in.name1; | ||||||
|   vname1_length = job_m_in.name1_length; |   vname1_length = job_m_in.name1_length; | ||||||
|  | |||||||
| @ -547,7 +547,7 @@ int do_mknod() | |||||||
| 
 | 
 | ||||||
|   lookup_init(&resolve, fullpath, PATH_NOFLAGS, &vmp, &vp); |   lookup_init(&resolve, fullpath, PATH_NOFLAGS, &vmp, &vp); | ||||||
|   resolve.l_vmnt_lock = VMNT_WRITE; |   resolve.l_vmnt_lock = VMNT_WRITE; | ||||||
|   resolve.l_vnode_lock = VNODE_READ; |   resolve.l_vnode_lock = VNODE_WRITE; | ||||||
| 
 | 
 | ||||||
|   /* Only the super_user may make nodes other than fifos. */ |   /* Only the super_user may make nodes other than fifos. */ | ||||||
|   if (!super_user && (!S_ISFIFO(mode_bits) && !S_ISSOCK(mode_bits))) { |   if (!super_user && (!S_ISFIFO(mode_bits) && !S_ISSOCK(mode_bits))) { | ||||||
| @ -595,7 +595,7 @@ int do_mkdir() | |||||||
| 
 | 
 | ||||||
|   lookup_init(&resolve, fullpath, PATH_NOFLAGS, &vmp, &vp); |   lookup_init(&resolve, fullpath, PATH_NOFLAGS, &vmp, &vp); | ||||||
|   resolve.l_vmnt_lock = VMNT_WRITE; |   resolve.l_vmnt_lock = VMNT_WRITE; | ||||||
|   resolve.l_vnode_lock = VNODE_READ; |   resolve.l_vnode_lock = VNODE_WRITE; | ||||||
| 
 | 
 | ||||||
|   if (fetch_name(vname1, vname1_length, fullpath) != OK) return(err_code); |   if (fetch_name(vname1, vname1_length, fullpath) != OK) return(err_code); | ||||||
|   bits = I_DIRECTORY | (dirmode & RWX_MODES & fp->fp_umask); |   bits = I_DIRECTORY | (dirmode & RWX_MODES & fp->fp_umask); | ||||||
|  | |||||||
| @ -168,7 +168,7 @@ struct fproc *rfp; | |||||||
|   size_t len; |   size_t len; | ||||||
|   char *cp; |   char *cp; | ||||||
|   char dir_entry[NAME_MAX+1]; |   char dir_entry[NAME_MAX+1]; | ||||||
|   struct vnode *start_dir, *res_vp, *sym_vp, *loop_start; |   struct vnode *start_dir, *res_vp, *sym_vp, *sym_vp_l, *loop_start; | ||||||
|   struct vmnt *sym_vmp = NULL; |   struct vmnt *sym_vmp = NULL; | ||||||
|   int r, symloop = 0, ret_on_symlink = 0; |   int r, symloop = 0, ret_on_symlink = 0; | ||||||
|   struct lookup symlink; |   struct lookup symlink; | ||||||
| @ -256,12 +256,18 @@ struct fproc *rfp; | |||||||
| 	strlcpy(resolve->l_path, dir_entry, NAME_MAX + 1); | 	strlcpy(resolve->l_path, dir_entry, NAME_MAX + 1); | ||||||
| 
 | 
 | ||||||
| 	/* Look up the directory entry, but do not follow the symlink when it
 | 	/* Look up the directory entry, but do not follow the symlink when it
 | ||||||
| 	 * is one. | 	 * is one. Note: depending on the previous advance, we might not be | ||||||
|  | 	 * able to lock the resulting vnode. For example, when we look up "./." | ||||||
|  | 	 * and request a VNODE_WRITE lock on the result, then the previous | ||||||
|  | 	 * advance has "./" locked. The next advance to "." will try to lock | ||||||
|  | 	 * the same vnode with a VNODE_READ lock, and fail. When that happens, | ||||||
|  | 	 * sym_vp_l will be NULL and we must not unlock the vnode. If we would | ||||||
|  | 	 * unlock, we actually unlock the vnode locked by the previous advance. | ||||||
| 	 */ | 	 */ | ||||||
| 	lookup_init(&symlink, resolve->l_path, | 	lookup_init(&symlink, resolve->l_path, | ||||||
| 		    resolve->l_flags|PATH_RET_SYMLINK, &sym_vmp, &sym_vp); | 		    resolve->l_flags|PATH_RET_SYMLINK, &sym_vmp, &sym_vp_l); | ||||||
| 	symlink.l_vnode_lock = VNODE_READ; |  | ||||||
| 	symlink.l_vmnt_lock = VMNT_READ; | 	symlink.l_vmnt_lock = VMNT_READ; | ||||||
|  | 	symlink.l_vnode_lock = VNODE_READ; | ||||||
| 	sym_vp = advance(res_vp, &symlink, rfp); | 	sym_vp = advance(res_vp, &symlink, rfp); | ||||||
| 
 | 
 | ||||||
| 	if (sym_vp == NULL) break; | 	if (sym_vp == NULL) break; | ||||||
| @ -291,6 +297,7 @@ struct fproc *rfp; | |||||||
| 		resolve->l_path[r] = '\0'; | 		resolve->l_path[r] = '\0'; | ||||||
| 
 | 
 | ||||||
| 		if (strrchr(resolve->l_path, '/') != NULL) { | 		if (strrchr(resolve->l_path, '/') != NULL) { | ||||||
|  | 			if (sym_vp_l != NULL) | ||||||
| 				unlock_vnode(sym_vp); | 				unlock_vnode(sym_vp); | ||||||
| 			unlock_vmnt(*resolve->l_vmp); | 			unlock_vmnt(*resolve->l_vmp); | ||||||
| 			if (sym_vmp != NULL) | 			if (sym_vmp != NULL) | ||||||
| @ -323,6 +330,7 @@ struct fproc *rfp; | |||||||
| 			assert(sym_vmp != NULL); | 			assert(sym_vmp != NULL); | ||||||
| 
 | 
 | ||||||
| 			/* Unlock final file, it might have wrong lock types */ | 			/* Unlock final file, it might have wrong lock types */ | ||||||
|  | 			if (sym_vp_l != NULL) | ||||||
| 				unlock_vnode(sym_vp); | 				unlock_vnode(sym_vp); | ||||||
| 			unlock_vmnt(sym_vmp); | 			unlock_vmnt(sym_vmp); | ||||||
| 			put_vnode(sym_vp); | 			put_vnode(sym_vp); | ||||||
| @ -357,7 +365,9 @@ struct fproc *rfp; | |||||||
|   } |   } | ||||||
| 
 | 
 | ||||||
|   if (sym_vp != NULL) { |   if (sym_vp != NULL) { | ||||||
|  | 	if (sym_vp_l != NULL) { | ||||||
| 		unlock_vnode(sym_vp); | 		unlock_vnode(sym_vp); | ||||||
|  | 	} | ||||||
| 	if (sym_vmp != NULL) { | 	if (sym_vmp != NULL) { | ||||||
| 		unlock_vmnt(sym_vmp); | 		unlock_vmnt(sym_vmp); | ||||||
| 	} | 	} | ||||||
|  | |||||||
| @ -47,7 +47,7 @@ int do_chmod() | |||||||
|   new_mode = (mode_t) job_m_in.mode; |   new_mode = (mode_t) job_m_in.mode; | ||||||
| 
 | 
 | ||||||
|   lookup_init(&resolve, fullpath, PATH_NOFLAGS, &vmp, &vp); |   lookup_init(&resolve, fullpath, PATH_NOFLAGS, &vmp, &vp); | ||||||
|   resolve.l_vmnt_lock = VMNT_WRITE; |   resolve.l_vmnt_lock = VMNT_READ; | ||||||
|   resolve.l_vnode_lock = VNODE_WRITE; |   resolve.l_vnode_lock = VNODE_WRITE; | ||||||
| 
 | 
 | ||||||
|   if (job_call_nr == CHMOD) { |   if (job_call_nr == CHMOD) { | ||||||
| @ -123,7 +123,7 @@ int do_chown() | |||||||
|   gid = job_m_in.group; |   gid = job_m_in.group; | ||||||
| 
 | 
 | ||||||
|   lookup_init(&resolve, fullpath, PATH_NOFLAGS, &vmp, &vp); |   lookup_init(&resolve, fullpath, PATH_NOFLAGS, &vmp, &vp); | ||||||
|   resolve.l_vmnt_lock = VMNT_WRITE; |   resolve.l_vmnt_lock = VMNT_READ; | ||||||
|   resolve.l_vnode_lock = VNODE_WRITE; |   resolve.l_vnode_lock = VNODE_WRITE; | ||||||
| 
 | 
 | ||||||
|   if (job_call_nr == CHOWN) { |   if (job_call_nr == CHOWN) { | ||||||
|  | |||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user
	 Thomas Veerman
						Thomas Veerman