VFS: fix locking bugs
.sync and fsync used unnecessarily restrictive locking type
.fsync violated locking order by obtaining a vmnt lock after a filp lock
.fsync contained a TOCTOU bug
.new_node violated locking rules (didn't upgrade lock upon file creation)
.do_pipe used unnecessarily restrictive locking type
.always lock pipes exclusively; even a read operation might require to do
 a write on a vnode object (update pipe size)
.when opening a file with O_TRUNC, upgrade vnode lock when truncating
.utime used unnecessarily restrictive locking type
.path parsing:
  .always acquire VMNT_WRITE or VMNT_EXCL on vmnt and downgrade to
   VMNT_READ if that was what was actually requested. This prevents the
   following deadlock scenario:
   thread A:
     lock_vmnt(vmp, TLL_READSER);
     lock_vnode(vp, TLL_READSER);
     upgrade_vmnt_lock(vmp, TLL_WRITE);
   thread B:
     lock_vmnt(vmp, TLL_READ);
     lock_vnode(vp, TLL_READSER);
   thread A will be stuck in upgrade_vmnt_lock and thread B is stuck in
   lock_vnode. This happens when, for example, thread A tries create a
   new node (open.c:new_node) and thread B tries to do eat_path to
   change dir (stadir.c:do_chdir). When the path is being resolved, a
   vnode is always locked with VNODE_OPCL (TLL_READSER) and then
   downgraded to VNODE_READ if read-only is actually requested. Thread
   A locks the vmnt with VMNT_WRITE (TLL_READSER) which still allows
   VMNT_READ locks. Thread B can't acquire a lock on the vnode because
   thread A has it; Thread A can't upgrade its vmnt lock to VMNT_WRITE
   (TLL_WRITE) because thread B has a VMNT_READ lock on it.
   By serializing vmnt locks during path parsing, thread B can only
   acquire a lock on vmp when thread A has completely finished its
   operation.
			
			
This commit is contained in:
		
							parent
							
								
									ecf9b40841
								
							
						
					
					
						commit
						7c8b3ddfed
					
				| @ -325,6 +325,12 @@ tll_access_t locktype; | |||||||
| 	assert(filp->filp_softlock == NULL); | 	assert(filp->filp_softlock == NULL); | ||||||
| 	filp->filp_softlock = fp; | 	filp->filp_softlock = fp; | ||||||
|   } else { |   } else { | ||||||
|  | 	/* We have to make an exception for vnodes belonging to pipes. Even
 | ||||||
|  | 	 * read(2) operations on pipes change the vnode and therefore require | ||||||
|  | 	 * exclusive access. | ||||||
|  | 	 */ | ||||||
|  | 	if (S_ISFIFO(vp->v_mode) && locktype == VNODE_READ) | ||||||
|  | 		locktype = VNODE_WRITE; | ||||||
| 	lock_vnode(vp, locktype); | 	lock_vnode(vp, locktype); | ||||||
|   } |   } | ||||||
| 
 | 
 | ||||||
|  | |||||||
| @ -165,7 +165,7 @@ int do_unlink() | |||||||
|   } |   } | ||||||
| 
 | 
 | ||||||
|   assert(vmp != NULL); |   assert(vmp != NULL); | ||||||
|   tll_upgrade(&vmp->m_lock); |   upgrade_vmnt_lock(vmp); | ||||||
| 
 | 
 | ||||||
|   if (job_call_nr == UNLINK) |   if (job_call_nr == UNLINK) | ||||||
| 	  r = req_unlink(dirp->v_fs_e, dirp->v_inode_nr, fullpath); | 	  r = req_unlink(dirp->v_fs_e, dirp->v_inode_nr, fullpath); | ||||||
| @ -261,7 +261,7 @@ int do_rename() | |||||||
|       (r1 = forbidden(fp, new_dirp, W_BIT|X_BIT)) != OK) r = r1; |       (r1 = forbidden(fp, new_dirp, W_BIT|X_BIT)) != OK) r = r1; | ||||||
| 
 | 
 | ||||||
|   if (r == OK) { |   if (r == OK) { | ||||||
| 	tll_upgrade(&oldvmp->m_lock); /* Upgrade to exclusive access */ | 	upgrade_vmnt_lock(oldvmp); /* Upgrade to exclusive access */ | ||||||
| 	r = req_rename(old_dirp->v_fs_e, old_dirp->v_inode_nr, old_name, | 	r = req_rename(old_dirp->v_fs_e, old_dirp->v_inode_nr, old_name, | ||||||
| 		       new_dirp->v_inode_nr, fullpath); | 		       new_dirp->v_inode_nr, fullpath); | ||||||
|   } |   } | ||||||
|  | |||||||
| @ -304,7 +304,7 @@ int do_sync() | |||||||
|   int r = OK; |   int r = OK; | ||||||
| 
 | 
 | ||||||
|   for (vmp = &vmnt[0]; vmp < &vmnt[NR_MNTS]; ++vmp) { |   for (vmp = &vmnt[0]; vmp < &vmnt[NR_MNTS]; ++vmp) { | ||||||
| 	if ((r = lock_vmnt(vmp, VMNT_EXCL)) != OK) | 	if ((r = lock_vmnt(vmp, VMNT_READ)) != OK) | ||||||
| 		break; | 		break; | ||||||
| 	if (vmp->m_dev != NO_DEV && vmp->m_fs_e != NONE && | 	if (vmp->m_dev != NO_DEV && vmp->m_fs_e != NONE && | ||||||
| 		 vmp->m_root_node != NULL) { | 		 vmp->m_root_node != NULL) { | ||||||
| @ -331,20 +331,22 @@ int do_fsync() | |||||||
| 
 | 
 | ||||||
|   if ((rfilp = get_filp(scratch(fp).file.fd_nr, VNODE_READ)) == NULL) |   if ((rfilp = get_filp(scratch(fp).file.fd_nr, VNODE_READ)) == NULL) | ||||||
| 	return(err_code); | 	return(err_code); | ||||||
|  | 
 | ||||||
|   dev = rfilp->filp_vno->v_dev; |   dev = rfilp->filp_vno->v_dev; | ||||||
|  |   unlock_filp(rfilp); | ||||||
|  | 
 | ||||||
|   for (vmp = &vmnt[0]; vmp < &vmnt[NR_MNTS]; ++vmp) { |   for (vmp = &vmnt[0]; vmp < &vmnt[NR_MNTS]; ++vmp) { | ||||||
|  | 	if (vmp->m_dev != dev) continue; | ||||||
|  | 	if ((r = lock_vmnt(vmp, VMNT_READ)) != OK) | ||||||
|  | 		break; | ||||||
| 	if (vmp->m_dev != NO_DEV && vmp->m_dev == dev && | 	if (vmp->m_dev != NO_DEV && vmp->m_dev == dev && | ||||||
| 		vmp->m_fs_e != NONE && vmp->m_root_node != NULL) { | 		vmp->m_fs_e != NONE && vmp->m_root_node != NULL) { | ||||||
| 
 | 
 | ||||||
| 		if ((r = lock_vmnt(vmp, VMNT_EXCL)) != OK) |  | ||||||
| 			break; |  | ||||||
| 		req_sync(vmp->m_fs_e); | 		req_sync(vmp->m_fs_e); | ||||||
| 		unlock_vmnt(vmp); |  | ||||||
| 	} | 	} | ||||||
|  | 	unlock_vmnt(vmp); | ||||||
|   } |   } | ||||||
| 
 | 
 | ||||||
|   unlock_filp(rfilp); |  | ||||||
| 
 |  | ||||||
|   return(r); |   return(r); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | |||||||
| @ -172,6 +172,7 @@ int common_open(char path[PATH_MAX], int oflags, mode_t omode) | |||||||
| 			if (oflags & O_TRUNC) { | 			if (oflags & O_TRUNC) { | ||||||
| 				if ((r = forbidden(fp, vp, W_BIT)) != OK) | 				if ((r = forbidden(fp, vp, W_BIT)) != OK) | ||||||
| 					break; | 					break; | ||||||
|  | 				upgrade_vnode_lock(vp); | ||||||
| 				truncate_vnode(vp, 0); | 				truncate_vnode(vp, 0); | ||||||
| 			} | 			} | ||||||
| 			break; | 			break; | ||||||
| @ -243,7 +244,7 @@ int common_open(char path[PATH_MAX], int oflags, mode_t omode) | |||||||
| 		   case S_IFIFO: | 		   case S_IFIFO: | ||||||
| 			/* Create a mapped inode on PFS which handles reads
 | 			/* Create a mapped inode on PFS which handles reads
 | ||||||
| 			   and writes to this named pipe. */ | 			   and writes to this named pipe. */ | ||||||
| 			tll_upgrade(&vp->v_lock); | 			upgrade_vnode_lock(vp); | ||||||
| 			r = map_vnode(vp, PFS_PROC_NR); | 			r = map_vnode(vp, PFS_PROC_NR); | ||||||
| 			if (r == OK) { | 			if (r == OK) { | ||||||
| 				if (vp->v_ref_count == 1) { | 				if (vp->v_ref_count == 1) { | ||||||
| @ -374,6 +375,7 @@ static struct vnode *new_node(struct lookup *resolve, int oflags, mode_t bits) | |||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
| 	lock_vnode(vp, VNODE_OPCL); | 	lock_vnode(vp, VNODE_OPCL); | ||||||
|  | 	upgrade_vmnt_lock(dir_vmp); /* Creating file, need exclusive access */ | ||||||
| 
 | 
 | ||||||
| 	if ((r = forbidden(fp, dirp, W_BIT|X_BIT)) != OK || | 	if ((r = forbidden(fp, dirp, W_BIT|X_BIT)) != OK || | ||||||
| 	    (r = req_create(dirp->v_fs_e, dirp->v_inode_nr,bits, fp->fp_effuid, | 	    (r = req_create(dirp->v_fs_e, dirp->v_inode_nr,bits, fp->fp_effuid, | ||||||
| @ -381,9 +383,14 @@ static struct vnode *new_node(struct lookup *resolve, int oflags, mode_t bits) | |||||||
| 		/* Can't create inode either due to permissions or some other
 | 		/* Can't create inode either due to permissions or some other
 | ||||||
| 		 * problem. In case r is EEXIST, we might be dealing with a | 		 * problem. In case r is EEXIST, we might be dealing with a | ||||||
| 		 * dangling symlink.*/ | 		 * dangling symlink.*/ | ||||||
|  | 
 | ||||||
|  | 		/* Downgrade lock to prevent deadlock during symlink resolving*/ | ||||||
|  | 		downgrade_vmnt_lock(dir_vmp); | ||||||
|  | 
 | ||||||
| 		if (r == EEXIST) { | 		if (r == EEXIST) { | ||||||
| 			struct vnode *slp, *old_wd; | 			struct vnode *slp, *old_wd; | ||||||
| 
 | 
 | ||||||
|  | 
 | ||||||
| 			/* Resolve path up to symlink */ | 			/* Resolve path up to symlink */ | ||||||
| 			findnode.l_flags = PATH_RET_SYMLINK; | 			findnode.l_flags = PATH_RET_SYMLINK; | ||||||
| 			findnode.l_vnode_lock = VNODE_READ; | 			findnode.l_vnode_lock = VNODE_READ; | ||||||
|  | |||||||
| @ -398,6 +398,7 @@ struct fproc *rfp; | |||||||
|   struct vnode *dir_vp; |   struct vnode *dir_vp; | ||||||
|   struct vmnt *vmp, *vmpres; |   struct vmnt *vmp, *vmpres; | ||||||
|   struct lookup_res res; |   struct lookup_res res; | ||||||
|  |   tll_access_t mnt_lock_type; | ||||||
| 
 | 
 | ||||||
|   assert(resolve->l_vmp); |   assert(resolve->l_vmp); | ||||||
|   assert(resolve->l_vnode); |   assert(resolve->l_vnode); | ||||||
| @ -435,7 +436,12 @@ struct fproc *rfp; | |||||||
|   symloop = 0;	/* Number of symlinks seen so far */ |   symloop = 0;	/* Number of symlinks seen so far */ | ||||||
| 
 | 
 | ||||||
|   /* Lock vmnt */ |   /* Lock vmnt */ | ||||||
|   if ((r = lock_vmnt(vmpres, resolve->l_vmnt_lock)) != OK) { |   if (resolve->l_vmnt_lock == VMNT_READ) | ||||||
|  | 	mnt_lock_type = VMNT_WRITE; | ||||||
|  |   else | ||||||
|  | 	mnt_lock_type = resolve->l_vmnt_lock; | ||||||
|  | 
 | ||||||
|  |   if ((r = lock_vmnt(vmpres, mnt_lock_type)) != OK) { | ||||||
| 	if (r == EBUSY) /* vmnt already locked */ | 	if (r == EBUSY) /* vmnt already locked */ | ||||||
| 		vmpres = NULL; | 		vmpres = NULL; | ||||||
| 	else | 	else | ||||||
| @ -532,7 +538,7 @@ struct fproc *rfp; | |||||||
| 	if (vmpres) unlock_vmnt(vmpres); | 	if (vmpres) unlock_vmnt(vmpres); | ||||||
| 	vmpres = find_vmnt(fs_e); | 	vmpres = find_vmnt(fs_e); | ||||||
| 	if (vmpres == NULL) return(EIO);	/* mount point vanished? */ | 	if (vmpres == NULL) return(EIO);	/* mount point vanished? */ | ||||||
| 	if ((r = lock_vmnt(vmpres, resolve->l_vmnt_lock)) != OK) { | 	if ((r = lock_vmnt(vmpres, mnt_lock_type)) != OK) { | ||||||
| 		if (r == EBUSY) | 		if (r == EBUSY) | ||||||
| 			vmpres = NULL;	/* Already locked */ | 			vmpres = NULL;	/* Already locked */ | ||||||
| 		else | 		else | ||||||
| @ -549,6 +555,11 @@ struct fproc *rfp; | |||||||
| 	} | 	} | ||||||
|   } |   } | ||||||
| 
 | 
 | ||||||
|  |   if (*(resolve->l_vmp) != NULL && resolve->l_vmnt_lock != mnt_lock_type) { | ||||||
|  | 	/* downgrade VMNT_WRITE to VMNT_READ */ | ||||||
|  | 	downgrade_vmnt_lock(*(resolve->l_vmp)); | ||||||
|  |   } | ||||||
|  | 
 | ||||||
|   /* Fill in response fields */ |   /* Fill in response fields */ | ||||||
|   result_node->inode_nr = res.inode_nr; |   result_node->inode_nr = res.inode_nr; | ||||||
|   result_node->fmode = res.fmode; |   result_node->fmode = res.fmode; | ||||||
|  | |||||||
| @ -52,7 +52,7 @@ int do_pipe() | |||||||
| 
 | 
 | ||||||
|   /* Get a lock on PFS */ |   /* Get a lock on PFS */ | ||||||
|   if ((vmp = find_vmnt(PFS_PROC_NR)) == NULL) panic("PFS gone"); |   if ((vmp = find_vmnt(PFS_PROC_NR)) == NULL) panic("PFS gone"); | ||||||
|   if ((r = lock_vmnt(vmp, VMNT_WRITE)) != OK) return(r); |   if ((r = lock_vmnt(vmp, VMNT_READ)) != OK) return(r); | ||||||
| 
 | 
 | ||||||
|   /* See if a free vnode is available */ |   /* See if a free vnode is available */ | ||||||
|   if ((vp = get_free_vnode()) == NULL) { |   if ((vp = get_free_vnode()) == NULL) { | ||||||
|  | |||||||
| @ -316,6 +316,8 @@ int lock_vmnt(struct vmnt *vp, tll_access_t locktype); | |||||||
| void unlock_vmnt(struct vmnt *vp); | void unlock_vmnt(struct vmnt *vp); | ||||||
| void vmnt_unmap_by_endpt(endpoint_t proc_e); | void vmnt_unmap_by_endpt(endpoint_t proc_e); | ||||||
| void fetch_vmnt_paths(void); | void fetch_vmnt_paths(void); | ||||||
|  | void upgrade_vmnt_lock(struct vmnt *vmp); | ||||||
|  | void downgrade_vmnt_lock(struct vmnt *vmp); | ||||||
| 
 | 
 | ||||||
| /* vnode.c */ | /* vnode.c */ | ||||||
| void check_vnode_locks(void); | void check_vnode_locks(void); | ||||||
| @ -329,6 +331,7 @@ void unlock_vnode(struct vnode *vp); | |||||||
| void dup_vnode(struct vnode *vp); | void dup_vnode(struct vnode *vp); | ||||||
| void put_vnode(struct vnode *vp); | void put_vnode(struct vnode *vp); | ||||||
| void vnode_clean_refs(struct vnode *vp); | void vnode_clean_refs(struct vnode *vp); | ||||||
|  | void upgrade_vnode_lock(struct vnode *vp); | ||||||
| 
 | 
 | ||||||
| /* write.c */ | /* write.c */ | ||||||
| int do_write(void); | int do_write(void); | ||||||
|  | |||||||
| @ -267,7 +267,7 @@ size_t req_size; | |||||||
|   u64_t position, new_pos; |   u64_t position, new_pos; | ||||||
| 
 | 
 | ||||||
|   /* Must make sure we're operating on locked filp and vnode */ |   /* Must make sure we're operating on locked filp and vnode */ | ||||||
|   assert(tll_islocked(&f->filp_vno->v_lock)); |   assert(tll_locked_by_me(&f->filp_vno->v_lock)); | ||||||
|   assert(mutex_trylock(&f->filp_lock) == -EDEADLK); |   assert(mutex_trylock(&f->filp_lock) == -EDEADLK); | ||||||
| 
 | 
 | ||||||
|   oflags = f->filp_flags; |   oflags = f->filp_flags; | ||||||
|  | |||||||
| @ -41,7 +41,7 @@ int do_utime() | |||||||
|   if (len == 0) len = (size_t) job_m_in.utime_strlen; |   if (len == 0) len = (size_t) job_m_in.utime_strlen; | ||||||
| 
 | 
 | ||||||
|   lookup_init(&resolve, fullpath, PATH_NOFLAGS, &vmp, &vp); |   lookup_init(&resolve, fullpath, PATH_NOFLAGS, &vmp, &vp); | ||||||
|   resolve.l_vmnt_lock = VMNT_WRITE; |   resolve.l_vmnt_lock = VMNT_READ; | ||||||
|   resolve.l_vnode_lock = VNODE_READ; |   resolve.l_vnode_lock = VNODE_READ; | ||||||
| 
 | 
 | ||||||
|   /* Temporarily open the file */ |   /* Temporarily open the file */ | ||||||
|  | |||||||
| @ -185,13 +185,15 @@ int tll_lock(tll_t *tllp, tll_access_t locktype) | |||||||
|    * request queued ("write bias") or when a read-serialized lock is trying to |    * request queued ("write bias") or when a read-serialized lock is trying to | ||||||
|    * upgrade to write-only. The current lock for this tll is either read or |    * upgrade to write-only. The current lock for this tll is either read or | ||||||
|    * read-serialized. */ |    * read-serialized. */ | ||||||
|   if (tllp->t_write != NULL || (tllp->t_status & TLL_UPGR)) |   if (tllp->t_write != NULL || (tllp->t_status & TLL_UPGR)) { | ||||||
|  | 	assert(!(tllp->t_status & TLL_PEND)); | ||||||
| 	return tll_append(tllp, locktype); | 	return tll_append(tllp, locktype); | ||||||
|  |   } | ||||||
| 
 | 
 | ||||||
|   /* If this lock is in read-serialized mode, we can allow read requests and
 |   /* If this lock is in read-serialized mode, we can allow read requests and
 | ||||||
|    * queue read-serialized requests */ |    * queue read-serialized requests */ | ||||||
|   if (tllp->t_current == TLL_READSER) { |   if (tllp->t_current == TLL_READSER) { | ||||||
| 	if (locktype == TLL_READ) { | 	if (locktype == TLL_READ && !(tllp->t_status & TLL_UPGR)) { | ||||||
| 		tllp->t_readonly++; | 		tllp->t_readonly++; | ||||||
| 		return(OK); | 		return(OK); | ||||||
| 	} else | 	} else | ||||||
|  | |||||||
| @ -164,7 +164,7 @@ int lock_vmnt(struct vmnt *vmp, tll_access_t locktype) | |||||||
|   if (r == EBUSY) return(r); |   if (r == EBUSY) return(r); | ||||||
| 
 | 
 | ||||||
|   if (initial_locktype != locktype) { |   if (initial_locktype != locktype) { | ||||||
| 	tll_upgrade(&vmp->m_lock); | 	upgrade_vmnt_lock(vmp); | ||||||
|   } |   } | ||||||
| 
 | 
 | ||||||
| #if LOCK_DEBUG | #if LOCK_DEBUG | ||||||
| @ -216,6 +216,31 @@ void unlock_vmnt(struct vmnt *vmp) | |||||||
| 
 | 
 | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | /*===========================================================================*
 | ||||||
|  |  *                             downgrade_vmnt_lock			     * | ||||||
|  |  *===========================================================================*/ | ||||||
|  | void downgrade_vmnt_lock(struct vmnt *vmp) | ||||||
|  | { | ||||||
|  |   ASSERTVMP(vmp); | ||||||
|  |   tll_downgrade(&vmp->m_lock); | ||||||
|  | 
 | ||||||
|  | #if LOCK_DEBUG | ||||||
|  |   /* If we're no longer the owner of a lock, we downgraded to VMNT_READ */ | ||||||
|  |   if (!tll_locked_by_me(&vmp->m_lock)) { | ||||||
|  | 	fp->fp_vmnt_rdlocks++; | ||||||
|  |   } | ||||||
|  | #endif | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | /*===========================================================================*
 | ||||||
|  |  *                             upgrade_vmnt_lock			     * | ||||||
|  |  *===========================================================================*/ | ||||||
|  | void upgrade_vmnt_lock(struct vmnt *vmp) | ||||||
|  | { | ||||||
|  |   ASSERTVMP(vmp); | ||||||
|  |   tll_upgrade(&vmp->m_lock); | ||||||
|  | } | ||||||
|  | 
 | ||||||
| /*===========================================================================*
 | /*===========================================================================*
 | ||||||
|  *                             fetch_vmnt_paths				     * |  *                             fetch_vmnt_paths				     * | ||||||
|  *===========================================================================*/ |  *===========================================================================*/ | ||||||
|  | |||||||
| @ -212,6 +212,15 @@ void unlock_vnode(struct vnode *vp) | |||||||
|   tll_unlock(&vp->v_lock); |   tll_unlock(&vp->v_lock); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | /*===========================================================================*
 | ||||||
|  |  *				vnode				     * | ||||||
|  |  *===========================================================================*/ | ||||||
|  | void upgrade_vnode_lock(struct vnode *vp) | ||||||
|  | { | ||||||
|  |   ASSERTVP(vp); | ||||||
|  |   tll_upgrade(&vp->v_lock); | ||||||
|  | } | ||||||
|  | 
 | ||||||
| /*===========================================================================*
 | /*===========================================================================*
 | ||||||
|  *				dup_vnode				     * |  *				dup_vnode				     * | ||||||
|  *===========================================================================*/ |  *===========================================================================*/ | ||||||
| @ -259,7 +268,7 @@ void put_vnode(struct vnode *vp) | |||||||
| 
 | 
 | ||||||
|   /* If we already had a lock, there is a consistency problem */ |   /* If we already had a lock, there is a consistency problem */ | ||||||
|   assert(lock_vp != EBUSY); |   assert(lock_vp != EBUSY); | ||||||
|   tll_upgrade(&vp->v_lock);	/* Make sure nobody else accesses this vnode */ |   upgrade_vnode_lock(vp); /* Acquire exclusive access */ | ||||||
| 
 | 
 | ||||||
|   /* A vnode that's not in use can't be put back. */ |   /* A vnode that's not in use can't be put back. */ | ||||||
|   if (vp->v_ref_count <= 0) |   if (vp->v_ref_count <= 0) | ||||||
|  | |||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user
	 Thomas Veerman
						Thomas Veerman