mirror of
				https://github.com/open-source-parsers/jsoncpp.git
				synced 2025-11-04 02:32:19 -05:00 
			
		
		
		
	
		
			
				
	
	
		
			202 lines
		
	
	
		
			7.6 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			202 lines
		
	
	
		
			7.6 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
#!/usr/bin/env python
 | 
						|
# encoding: utf-8
 | 
						|
# Baptiste Lepilleur, 2009
 | 
						|
 | 
						|
from dircache import listdir
 | 
						|
import re
 | 
						|
import fnmatch
 | 
						|
import os.path
 | 
						|
 | 
						|
 | 
						|
# These fnmatch expressions are used by default to prune the directory tree
 | 
						|
# while doing the recursive traversal in the glob_impl method of glob function.
 | 
						|
prune_dirs = '.git .bzr .hg .svn _MTN _darcs CVS SCCS '
 | 
						|
 | 
						|
# These fnmatch expressions are used by default to exclude files and dirs
 | 
						|
# while doing the recursive traversal in the glob_impl method of glob function.
 | 
						|
##exclude_pats = prune_pats + '*~ #*# .#* %*% ._* .gitignore .cvsignore vssver.scc .DS_Store'.split()
 | 
						|
 | 
						|
# These ant_glob expressions are used by default to exclude files and dirs and also prune the directory tree
 | 
						|
# while doing the recursive traversal in the glob_impl method of glob function.
 | 
						|
default_excludes = '''
 | 
						|
**/*~
 | 
						|
**/#*#
 | 
						|
**/.#*
 | 
						|
**/%*%
 | 
						|
**/._*
 | 
						|
**/CVS
 | 
						|
**/CVS/**
 | 
						|
**/.cvsignore
 | 
						|
**/SCCS
 | 
						|
**/SCCS/**
 | 
						|
**/vssver.scc
 | 
						|
**/.svn
 | 
						|
**/.svn/**
 | 
						|
**/.git
 | 
						|
**/.git/**
 | 
						|
**/.gitignore
 | 
						|
**/.bzr
 | 
						|
**/.bzr/**
 | 
						|
**/.hg
 | 
						|
**/.hg/**
 | 
						|
**/_MTN
 | 
						|
**/_MTN/**
 | 
						|
**/_darcs
 | 
						|
**/_darcs/**
 | 
						|
**/.DS_Store '''
 | 
						|
 | 
						|
DIR = 1
 | 
						|
FILE = 2
 | 
						|
DIR_LINK = 4
 | 
						|
FILE_LINK = 8
 | 
						|
LINKS = DIR_LINK | FILE_LINK
 | 
						|
ALL_NO_LINK = DIR | FILE
 | 
						|
ALL = DIR | FILE | LINKS
 | 
						|
 | 
						|
_ANT_RE = re.compile( r'(/\*\*/)|(\*\*/)|(/\*\*)|(\*)|(/)|([^\*/]*)' )
 | 
						|
 | 
						|
def ant_pattern_to_re( ant_pattern ):
 | 
						|
    """Generates a regular expression from the ant pattern.
 | 
						|
    Matching convention:
 | 
						|
    **/a: match 'a', 'dir/a', 'dir1/dir2/a'
 | 
						|
    a/**/b: match 'a/b', 'a/c/b', 'a/d/c/b'
 | 
						|
    *.py: match 'script.py' but not 'a/script.py'
 | 
						|
    """
 | 
						|
    rex = ['^']
 | 
						|
    next_pos = 0
 | 
						|
    sep_rex = r'(?:/|%s)' % re.escape( os.path.sep )
 | 
						|
##    print 'Converting', ant_pattern
 | 
						|
    for match in _ANT_RE.finditer( ant_pattern ):
 | 
						|
##        print 'Matched', match.group()
 | 
						|
##        print match.start(0), next_pos
 | 
						|
        if match.start(0) != next_pos:
 | 
						|
            raise ValueError( "Invalid ant pattern" )
 | 
						|
        if match.group(1): # /**/
 | 
						|
            rex.append( sep_rex + '(?:.*%s)?' % sep_rex )
 | 
						|
        elif match.group(2): # **/
 | 
						|
            rex.append( '(?:.*%s)?' % sep_rex )
 | 
						|
        elif match.group(3): # /**
 | 
						|
            rex.append( sep_rex + '.*' )
 | 
						|
        elif match.group(4): # *
 | 
						|
            rex.append( '[^/%s]*' % re.escape(os.path.sep) )
 | 
						|
        elif match.group(5): # /
 | 
						|
            rex.append( sep_rex )
 | 
						|
        else: # somepath
 | 
						|
            rex.append( re.escape(match.group(6)) )
 | 
						|
        next_pos = match.end()
 | 
						|
    rex.append('$')
 | 
						|
    return re.compile( ''.join( rex ) )
 | 
						|
 | 
						|
def _as_list( l ):
 | 
						|
    if isinstance(l, basestring):
 | 
						|
        return l.split()
 | 
						|
    return l
 | 
						|
 | 
						|
def glob(dir_path,
 | 
						|
         includes = '**/*',
 | 
						|
         excludes = default_excludes,
 | 
						|
         entry_type = FILE,
 | 
						|
         prune_dirs = prune_dirs,
 | 
						|
         max_depth = 25):
 | 
						|
    include_filter = [ant_pattern_to_re(p) for p in _as_list(includes)]
 | 
						|
    exclude_filter = [ant_pattern_to_re(p) for p in _as_list(excludes)]
 | 
						|
    prune_dirs = [p.replace('/',os.path.sep) for p in _as_list(prune_dirs)]
 | 
						|
    dir_path = dir_path.replace('/',os.path.sep)
 | 
						|
    entry_type_filter = entry_type
 | 
						|
 | 
						|
    def is_pruned_dir( dir_name ):
 | 
						|
        for pattern in prune_dirs:
 | 
						|
            if fnmatch.fnmatch( dir_name, pattern ):
 | 
						|
                return True
 | 
						|
        return False
 | 
						|
 | 
						|
    def apply_filter( full_path, filter_rexs ):
 | 
						|
        """Return True if at least one of the filter regular expression match full_path."""
 | 
						|
        for rex in filter_rexs:
 | 
						|
            if rex.match( full_path ):
 | 
						|
                return True
 | 
						|
        return False
 | 
						|
 | 
						|
    def glob_impl( root_dir_path ):
 | 
						|
        child_dirs = [root_dir_path]
 | 
						|
        while child_dirs:
 | 
						|
            dir_path = child_dirs.pop()
 | 
						|
            for entry in listdir( dir_path ):
 | 
						|
                full_path = os.path.join( dir_path, entry )
 | 
						|
##                print 'Testing:', full_path,
 | 
						|
                is_dir = os.path.isdir( full_path )
 | 
						|
                if is_dir and not is_pruned_dir( entry ): # explore child directory ?
 | 
						|
##                    print '===> marked for recursion',
 | 
						|
                    child_dirs.append( full_path )
 | 
						|
                included = apply_filter( full_path, include_filter )
 | 
						|
                rejected = apply_filter( full_path, exclude_filter )
 | 
						|
                if not included or rejected: # do not include entry ?
 | 
						|
##                    print '=> not included or rejected'
 | 
						|
                    continue
 | 
						|
                link = os.path.islink( full_path )
 | 
						|
                is_file = os.path.isfile( full_path )
 | 
						|
                if not is_file and not is_dir:
 | 
						|
##                    print '=> unknown entry type'
 | 
						|
                    continue
 | 
						|
                if link:
 | 
						|
                    entry_type = is_file and FILE_LINK or DIR_LINK
 | 
						|
                else:
 | 
						|
                    entry_type = is_file and FILE or DIR
 | 
						|
##                print '=> type: %d' % entry_type, 
 | 
						|
                if (entry_type & entry_type_filter) != 0:
 | 
						|
##                    print ' => KEEP'
 | 
						|
                    yield os.path.join( dir_path, entry )
 | 
						|
##                else:
 | 
						|
##                    print ' => TYPE REJECTED'
 | 
						|
    return list( glob_impl( dir_path ) )
 | 
						|
 | 
						|
 | 
						|
if __name__ == "__main__":
 | 
						|
    import unittest
 | 
						|
 | 
						|
    class AntPatternToRETest(unittest.TestCase):
 | 
						|
##        def test_conversion( self ):
 | 
						|
##            self.assertEqual( '^somepath$', ant_pattern_to_re( 'somepath' ).pattern )
 | 
						|
 | 
						|
        def test_matching( self ):
 | 
						|
            test_cases = [ ( 'path',
 | 
						|
                             ['path'],
 | 
						|
                             ['somepath', 'pathsuffix', '/path', '/path'] ),
 | 
						|
                           ( '*.py',
 | 
						|
                             ['source.py', 'source.ext.py', '.py'],
 | 
						|
                             ['path/source.py', '/.py', 'dir.py/z', 'z.pyc', 'z.c'] ),
 | 
						|
                           ( '**/path',
 | 
						|
                             ['path', '/path', '/a/path', 'c:/a/path', '/a/b/path', '//a/path', '/a/path/b/path'],
 | 
						|
                             ['path/', 'a/path/b', 'dir.py/z', 'somepath', 'pathsuffix', 'a/somepath'] ),
 | 
						|
                           ( 'path/**',
 | 
						|
                             ['path/a', 'path/path/a', 'path//'],
 | 
						|
                             ['path', 'somepath/a', 'a/path', 'a/path/a', 'pathsuffix/a'] ),
 | 
						|
                           ( '/**/path',
 | 
						|
                             ['/path', '/a/path', '/a/b/path/path', '/path/path'],
 | 
						|
                             ['path', 'path/', 'a/path', '/pathsuffix', '/somepath'] ),
 | 
						|
                           ( 'a/b',
 | 
						|
                             ['a/b'],
 | 
						|
                             ['somea/b', 'a/bsuffix', 'a/b/c'] ),
 | 
						|
                           ( '**/*.py',
 | 
						|
                             ['script.py', 'src/script.py', 'a/b/script.py', '/a/b/script.py'],
 | 
						|
                             ['script.pyc', 'script.pyo', 'a.py/b'] ),
 | 
						|
                           ( 'src/**/*.py',
 | 
						|
                             ['src/a.py', 'src/dir/a.py'],
 | 
						|
                             ['a/src/a.py', '/src/a.py'] ),
 | 
						|
                           ]
 | 
						|
            for ant_pattern, accepted_matches, rejected_matches in list(test_cases):
 | 
						|
                def local_path( paths ):
 | 
						|
                    return [ p.replace('/',os.path.sep) for p in paths ]
 | 
						|
                test_cases.append( (ant_pattern, local_path(accepted_matches), local_path( rejected_matches )) )
 | 
						|
            for ant_pattern, accepted_matches, rejected_matches in test_cases:
 | 
						|
                rex = ant_pattern_to_re( ant_pattern )
 | 
						|
                print 'ant_pattern:', ant_pattern, ' => ', rex.pattern
 | 
						|
                for accepted_match in accepted_matches:
 | 
						|
                    print 'Accepted?:', accepted_match
 | 
						|
                    self.assert_( rex.match( accepted_match ) is not None )
 | 
						|
                for rejected_match in rejected_matches:
 | 
						|
                    print 'Rejected?:', rejected_match
 | 
						|
                    self.assert_( rex.match( rejected_match ) is None )
 | 
						|
 | 
						|
    unittest.main()
 |