VFS: fix locking bugs
.sync and fsync used unnecessarily restrictive locking type
.fsync violated locking order by obtaining a vmnt lock after a filp lock
.fsync contained a TOCTOU bug
.new_node violated locking rules (didn't upgrade lock upon file creation)
.do_pipe used unnecessarily restrictive locking type
.always lock pipes exclusively; even a read operation might require to do
 a write on a vnode object (update pipe size)
.when opening a file with O_TRUNC, upgrade vnode lock when truncating
.utime used unnecessarily restrictive locking type
.path parsing:
  .always acquire VMNT_WRITE or VMNT_EXCL on vmnt and downgrade to
   VMNT_READ if that was what was actually requested. This prevents the
   following deadlock scenario:
   thread A:
     lock_vmnt(vmp, TLL_READSER);
     lock_vnode(vp, TLL_READSER);
     upgrade_vmnt_lock(vmp, TLL_WRITE);
   thread B:
     lock_vmnt(vmp, TLL_READ);
     lock_vnode(vp, TLL_READSER);
   thread A will be stuck in upgrade_vmnt_lock and thread B is stuck in
   lock_vnode. This happens when, for example, thread A tries create a
   new node (open.c:new_node) and thread B tries to do eat_path to
   change dir (stadir.c:do_chdir). When the path is being resolved, a
   vnode is always locked with VNODE_OPCL (TLL_READSER) and then
   downgraded to VNODE_READ if read-only is actually requested. Thread
   A locks the vmnt with VMNT_WRITE (TLL_READSER) which still allows
   VMNT_READ locks. Thread B can't acquire a lock on the vnode because
   thread A has it; Thread A can't upgrade its vmnt lock to VMNT_WRITE
   (TLL_WRITE) because thread B has a VMNT_READ lock on it.
   By serializing vmnt locks during path parsing, thread B can only
   acquire a lock on vmp when thread A has completely finished its
   operation.
			
			
This commit is contained in:
		
							parent
							
								
									ecf9b40841
								
							
						
					
					
						commit
						7c8b3ddfed
					
				| @ -325,6 +325,12 @@ tll_access_t locktype; | ||||
| 	assert(filp->filp_softlock == NULL); | ||||
| 	filp->filp_softlock = fp; | ||||
|   } else { | ||||
| 	/* We have to make an exception for vnodes belonging to pipes. Even
 | ||||
| 	 * read(2) operations on pipes change the vnode and therefore require | ||||
| 	 * exclusive access. | ||||
| 	 */ | ||||
| 	if (S_ISFIFO(vp->v_mode) && locktype == VNODE_READ) | ||||
| 		locktype = VNODE_WRITE; | ||||
| 	lock_vnode(vp, locktype); | ||||
|   } | ||||
| 
 | ||||
|  | ||||
| @ -165,7 +165,7 @@ int do_unlink() | ||||
|   } | ||||
| 
 | ||||
|   assert(vmp != NULL); | ||||
|   tll_upgrade(&vmp->m_lock); | ||||
|   upgrade_vmnt_lock(vmp); | ||||
| 
 | ||||
|   if (job_call_nr == UNLINK) | ||||
| 	  r = req_unlink(dirp->v_fs_e, dirp->v_inode_nr, fullpath); | ||||
| @ -261,7 +261,7 @@ int do_rename() | ||||
|       (r1 = forbidden(fp, new_dirp, W_BIT|X_BIT)) != OK) r = r1; | ||||
| 
 | ||||
|   if (r == OK) { | ||||
| 	tll_upgrade(&oldvmp->m_lock); /* Upgrade to exclusive access */ | ||||
| 	upgrade_vmnt_lock(oldvmp); /* Upgrade to exclusive access */ | ||||
| 	r = req_rename(old_dirp->v_fs_e, old_dirp->v_inode_nr, old_name, | ||||
| 		       new_dirp->v_inode_nr, fullpath); | ||||
|   } | ||||
|  | ||||
| @ -304,7 +304,7 @@ int do_sync() | ||||
|   int r = OK; | ||||
| 
 | ||||
|   for (vmp = &vmnt[0]; vmp < &vmnt[NR_MNTS]; ++vmp) { | ||||
| 	if ((r = lock_vmnt(vmp, VMNT_EXCL)) != OK) | ||||
| 	if ((r = lock_vmnt(vmp, VMNT_READ)) != OK) | ||||
| 		break; | ||||
| 	if (vmp->m_dev != NO_DEV && vmp->m_fs_e != NONE && | ||||
| 		 vmp->m_root_node != NULL) { | ||||
| @ -331,19 +331,21 @@ int do_fsync() | ||||
| 
 | ||||
|   if ((rfilp = get_filp(scratch(fp).file.fd_nr, VNODE_READ)) == NULL) | ||||
| 	return(err_code); | ||||
| 
 | ||||
|   dev = rfilp->filp_vno->v_dev; | ||||
|   unlock_filp(rfilp); | ||||
| 
 | ||||
|   for (vmp = &vmnt[0]; vmp < &vmnt[NR_MNTS]; ++vmp) { | ||||
| 	if (vmp->m_dev != dev) continue; | ||||
| 	if ((r = lock_vmnt(vmp, VMNT_READ)) != OK) | ||||
| 		break; | ||||
| 	if (vmp->m_dev != NO_DEV && vmp->m_dev == dev && | ||||
| 		vmp->m_fs_e != NONE && vmp->m_root_node != NULL) { | ||||
| 
 | ||||
| 		if ((r = lock_vmnt(vmp, VMNT_EXCL)) != OK) | ||||
| 			break; | ||||
| 		req_sync(vmp->m_fs_e); | ||||
| 	} | ||||
| 	unlock_vmnt(vmp); | ||||
|   } | ||||
|   } | ||||
| 
 | ||||
|   unlock_filp(rfilp); | ||||
| 
 | ||||
|   return(r); | ||||
| } | ||||
|  | ||||
| @ -172,6 +172,7 @@ int common_open(char path[PATH_MAX], int oflags, mode_t omode) | ||||
| 			if (oflags & O_TRUNC) { | ||||
| 				if ((r = forbidden(fp, vp, W_BIT)) != OK) | ||||
| 					break; | ||||
| 				upgrade_vnode_lock(vp); | ||||
| 				truncate_vnode(vp, 0); | ||||
| 			} | ||||
| 			break; | ||||
| @ -243,7 +244,7 @@ int common_open(char path[PATH_MAX], int oflags, mode_t omode) | ||||
| 		   case S_IFIFO: | ||||
| 			/* Create a mapped inode on PFS which handles reads
 | ||||
| 			   and writes to this named pipe. */ | ||||
| 			tll_upgrade(&vp->v_lock); | ||||
| 			upgrade_vnode_lock(vp); | ||||
| 			r = map_vnode(vp, PFS_PROC_NR); | ||||
| 			if (r == OK) { | ||||
| 				if (vp->v_ref_count == 1) { | ||||
| @ -374,6 +375,7 @@ static struct vnode *new_node(struct lookup *resolve, int oflags, mode_t bits) | ||||
| 	} | ||||
| 
 | ||||
| 	lock_vnode(vp, VNODE_OPCL); | ||||
| 	upgrade_vmnt_lock(dir_vmp); /* Creating file, need exclusive access */ | ||||
| 
 | ||||
| 	if ((r = forbidden(fp, dirp, W_BIT|X_BIT)) != OK || | ||||
| 	    (r = req_create(dirp->v_fs_e, dirp->v_inode_nr,bits, fp->fp_effuid, | ||||
| @ -381,9 +383,14 @@ static struct vnode *new_node(struct lookup *resolve, int oflags, mode_t bits) | ||||
| 		/* Can't create inode either due to permissions or some other
 | ||||
| 		 * problem. In case r is EEXIST, we might be dealing with a | ||||
| 		 * dangling symlink.*/ | ||||
| 
 | ||||
| 		/* Downgrade lock to prevent deadlock during symlink resolving*/ | ||||
| 		downgrade_vmnt_lock(dir_vmp); | ||||
| 
 | ||||
| 		if (r == EEXIST) { | ||||
| 			struct vnode *slp, *old_wd; | ||||
| 
 | ||||
| 
 | ||||
| 			/* Resolve path up to symlink */ | ||||
| 			findnode.l_flags = PATH_RET_SYMLINK; | ||||
| 			findnode.l_vnode_lock = VNODE_READ; | ||||
|  | ||||
| @ -398,6 +398,7 @@ struct fproc *rfp; | ||||
|   struct vnode *dir_vp; | ||||
|   struct vmnt *vmp, *vmpres; | ||||
|   struct lookup_res res; | ||||
|   tll_access_t mnt_lock_type; | ||||
| 
 | ||||
|   assert(resolve->l_vmp); | ||||
|   assert(resolve->l_vnode); | ||||
| @ -435,7 +436,12 @@ struct fproc *rfp; | ||||
|   symloop = 0;	/* Number of symlinks seen so far */ | ||||
| 
 | ||||
|   /* Lock vmnt */ | ||||
|   if ((r = lock_vmnt(vmpres, resolve->l_vmnt_lock)) != OK) { | ||||
|   if (resolve->l_vmnt_lock == VMNT_READ) | ||||
| 	mnt_lock_type = VMNT_WRITE; | ||||
|   else | ||||
| 	mnt_lock_type = resolve->l_vmnt_lock; | ||||
| 
 | ||||
|   if ((r = lock_vmnt(vmpres, mnt_lock_type)) != OK) { | ||||
| 	if (r == EBUSY) /* vmnt already locked */ | ||||
| 		vmpres = NULL; | ||||
| 	else | ||||
| @ -532,7 +538,7 @@ struct fproc *rfp; | ||||
| 	if (vmpres) unlock_vmnt(vmpres); | ||||
| 	vmpres = find_vmnt(fs_e); | ||||
| 	if (vmpres == NULL) return(EIO);	/* mount point vanished? */ | ||||
| 	if ((r = lock_vmnt(vmpres, resolve->l_vmnt_lock)) != OK) { | ||||
| 	if ((r = lock_vmnt(vmpres, mnt_lock_type)) != OK) { | ||||
| 		if (r == EBUSY) | ||||
| 			vmpres = NULL;	/* Already locked */ | ||||
| 		else | ||||
| @ -549,6 +555,11 @@ struct fproc *rfp; | ||||
| 	} | ||||
|   } | ||||
| 
 | ||||
|   if (*(resolve->l_vmp) != NULL && resolve->l_vmnt_lock != mnt_lock_type) { | ||||
| 	/* downgrade VMNT_WRITE to VMNT_READ */ | ||||
| 	downgrade_vmnt_lock(*(resolve->l_vmp)); | ||||
|   } | ||||
| 
 | ||||
|   /* Fill in response fields */ | ||||
|   result_node->inode_nr = res.inode_nr; | ||||
|   result_node->fmode = res.fmode; | ||||
|  | ||||
| @ -52,7 +52,7 @@ int do_pipe() | ||||
| 
 | ||||
|   /* Get a lock on PFS */ | ||||
|   if ((vmp = find_vmnt(PFS_PROC_NR)) == NULL) panic("PFS gone"); | ||||
|   if ((r = lock_vmnt(vmp, VMNT_WRITE)) != OK) return(r); | ||||
|   if ((r = lock_vmnt(vmp, VMNT_READ)) != OK) return(r); | ||||
| 
 | ||||
|   /* See if a free vnode is available */ | ||||
|   if ((vp = get_free_vnode()) == NULL) { | ||||
|  | ||||
| @ -316,6 +316,8 @@ int lock_vmnt(struct vmnt *vp, tll_access_t locktype); | ||||
| void unlock_vmnt(struct vmnt *vp); | ||||
| void vmnt_unmap_by_endpt(endpoint_t proc_e); | ||||
| void fetch_vmnt_paths(void); | ||||
| void upgrade_vmnt_lock(struct vmnt *vmp); | ||||
| void downgrade_vmnt_lock(struct vmnt *vmp); | ||||
| 
 | ||||
| /* vnode.c */ | ||||
| void check_vnode_locks(void); | ||||
| @ -329,6 +331,7 @@ void unlock_vnode(struct vnode *vp); | ||||
| void dup_vnode(struct vnode *vp); | ||||
| void put_vnode(struct vnode *vp); | ||||
| void vnode_clean_refs(struct vnode *vp); | ||||
| void upgrade_vnode_lock(struct vnode *vp); | ||||
| 
 | ||||
| /* write.c */ | ||||
| int do_write(void); | ||||
|  | ||||
| @ -267,7 +267,7 @@ size_t req_size; | ||||
|   u64_t position, new_pos; | ||||
| 
 | ||||
|   /* Must make sure we're operating on locked filp and vnode */ | ||||
|   assert(tll_islocked(&f->filp_vno->v_lock)); | ||||
|   assert(tll_locked_by_me(&f->filp_vno->v_lock)); | ||||
|   assert(mutex_trylock(&f->filp_lock) == -EDEADLK); | ||||
| 
 | ||||
|   oflags = f->filp_flags; | ||||
|  | ||||
| @ -41,7 +41,7 @@ int do_utime() | ||||
|   if (len == 0) len = (size_t) job_m_in.utime_strlen; | ||||
| 
 | ||||
|   lookup_init(&resolve, fullpath, PATH_NOFLAGS, &vmp, &vp); | ||||
|   resolve.l_vmnt_lock = VMNT_WRITE; | ||||
|   resolve.l_vmnt_lock = VMNT_READ; | ||||
|   resolve.l_vnode_lock = VNODE_READ; | ||||
| 
 | ||||
|   /* Temporarily open the file */ | ||||
|  | ||||
| @ -185,13 +185,15 @@ int tll_lock(tll_t *tllp, tll_access_t locktype) | ||||
|    * request queued ("write bias") or when a read-serialized lock is trying to | ||||
|    * upgrade to write-only. The current lock for this tll is either read or | ||||
|    * read-serialized. */ | ||||
|   if (tllp->t_write != NULL || (tllp->t_status & TLL_UPGR)) | ||||
|   if (tllp->t_write != NULL || (tllp->t_status & TLL_UPGR)) { | ||||
| 	assert(!(tllp->t_status & TLL_PEND)); | ||||
| 	return tll_append(tllp, locktype); | ||||
|   } | ||||
| 
 | ||||
|   /* If this lock is in read-serialized mode, we can allow read requests and
 | ||||
|    * queue read-serialized requests */ | ||||
|   if (tllp->t_current == TLL_READSER) { | ||||
| 	if (locktype == TLL_READ) { | ||||
| 	if (locktype == TLL_READ && !(tllp->t_status & TLL_UPGR)) { | ||||
| 		tllp->t_readonly++; | ||||
| 		return(OK); | ||||
| 	} else | ||||
|  | ||||
| @ -164,7 +164,7 @@ int lock_vmnt(struct vmnt *vmp, tll_access_t locktype) | ||||
|   if (r == EBUSY) return(r); | ||||
| 
 | ||||
|   if (initial_locktype != locktype) { | ||||
| 	tll_upgrade(&vmp->m_lock); | ||||
| 	upgrade_vmnt_lock(vmp); | ||||
|   } | ||||
| 
 | ||||
| #if LOCK_DEBUG | ||||
| @ -216,6 +216,31 @@ void unlock_vmnt(struct vmnt *vmp) | ||||
| 
 | ||||
| } | ||||
| 
 | ||||
| /*===========================================================================*
 | ||||
|  *                             downgrade_vmnt_lock			     * | ||||
|  *===========================================================================*/ | ||||
| void downgrade_vmnt_lock(struct vmnt *vmp) | ||||
| { | ||||
|   ASSERTVMP(vmp); | ||||
|   tll_downgrade(&vmp->m_lock); | ||||
| 
 | ||||
| #if LOCK_DEBUG | ||||
|   /* If we're no longer the owner of a lock, we downgraded to VMNT_READ */ | ||||
|   if (!tll_locked_by_me(&vmp->m_lock)) { | ||||
| 	fp->fp_vmnt_rdlocks++; | ||||
|   } | ||||
| #endif | ||||
| } | ||||
| 
 | ||||
| /*===========================================================================*
 | ||||
|  *                             upgrade_vmnt_lock			     * | ||||
|  *===========================================================================*/ | ||||
| void upgrade_vmnt_lock(struct vmnt *vmp) | ||||
| { | ||||
|   ASSERTVMP(vmp); | ||||
|   tll_upgrade(&vmp->m_lock); | ||||
| } | ||||
| 
 | ||||
| /*===========================================================================*
 | ||||
|  *                             fetch_vmnt_paths				     * | ||||
|  *===========================================================================*/ | ||||
|  | ||||
| @ -212,6 +212,15 @@ void unlock_vnode(struct vnode *vp) | ||||
|   tll_unlock(&vp->v_lock); | ||||
| } | ||||
| 
 | ||||
| /*===========================================================================*
 | ||||
|  *				vnode				     * | ||||
|  *===========================================================================*/ | ||||
| void upgrade_vnode_lock(struct vnode *vp) | ||||
| { | ||||
|   ASSERTVP(vp); | ||||
|   tll_upgrade(&vp->v_lock); | ||||
| } | ||||
| 
 | ||||
| /*===========================================================================*
 | ||||
|  *				dup_vnode				     * | ||||
|  *===========================================================================*/ | ||||
| @ -259,7 +268,7 @@ void put_vnode(struct vnode *vp) | ||||
| 
 | ||||
|   /* If we already had a lock, there is a consistency problem */ | ||||
|   assert(lock_vp != EBUSY); | ||||
|   tll_upgrade(&vp->v_lock);	/* Make sure nobody else accesses this vnode */ | ||||
|   upgrade_vnode_lock(vp); /* Acquire exclusive access */ | ||||
| 
 | ||||
|   /* A vnode that's not in use can't be put back. */ | ||||
|   if (vp->v_ref_count <= 0) | ||||
|  | ||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user
	 Thomas Veerman
						Thomas Veerman