2010-02-23 08:44:52 +00:00
|
|
|
#!/usr/bin/env python
|
|
|
|
# encoding: utf-8
|
|
|
|
# Baptiste Lepilleur, 2009
|
|
|
|
|
2014-11-19 23:30:47 -06:00
|
|
|
from __future__ import print_function
|
2010-02-23 08:44:52 +00:00
|
|
|
from dircache import listdir
|
|
|
|
import re
|
|
|
|
import fnmatch
|
|
|
|
import os.path
|
|
|
|
|
|
|
|
|
|
|
|
# These fnmatch expressions are used by default to prune the directory tree
|
|
|
|
# while doing the recursive traversal in the glob_impl method of glob function.
|
|
|
|
prune_dirs = '.git .bzr .hg .svn _MTN _darcs CVS SCCS '
|
|
|
|
|
|
|
|
# These fnmatch expressions are used by default to exclude files and dirs
|
|
|
|
# while doing the recursive traversal in the glob_impl method of glob function.
|
|
|
|
##exclude_pats = prune_pats + '*~ #*# .#* %*% ._* .gitignore .cvsignore vssver.scc .DS_Store'.split()
|
|
|
|
|
|
|
|
# These ant_glob expressions are used by default to exclude files and dirs and also prune the directory tree
|
|
|
|
# while doing the recursive traversal in the glob_impl method of glob function.
|
|
|
|
default_excludes = '''
|
|
|
|
**/*~
|
|
|
|
**/#*#
|
|
|
|
**/.#*
|
|
|
|
**/%*%
|
|
|
|
**/._*
|
|
|
|
**/CVS
|
|
|
|
**/CVS/**
|
|
|
|
**/.cvsignore
|
|
|
|
**/SCCS
|
|
|
|
**/SCCS/**
|
|
|
|
**/vssver.scc
|
|
|
|
**/.svn
|
|
|
|
**/.svn/**
|
|
|
|
**/.git
|
|
|
|
**/.git/**
|
|
|
|
**/.gitignore
|
|
|
|
**/.bzr
|
|
|
|
**/.bzr/**
|
|
|
|
**/.hg
|
|
|
|
**/.hg/**
|
|
|
|
**/_MTN
|
|
|
|
**/_MTN/**
|
|
|
|
**/_darcs
|
|
|
|
**/_darcs/**
|
|
|
|
**/.DS_Store '''
|
|
|
|
|
|
|
|
DIR = 1
|
|
|
|
FILE = 2
|
|
|
|
DIR_LINK = 4
|
|
|
|
FILE_LINK = 8
|
|
|
|
LINKS = DIR_LINK | FILE_LINK
|
|
|
|
ALL_NO_LINK = DIR | FILE
|
|
|
|
ALL = DIR | FILE | LINKS
|
|
|
|
|
2015-01-24 15:29:52 -06:00
|
|
|
_ANT_RE = re.compile(r'(/\*\*/)|(\*\*/)|(/\*\*)|(\*)|(/)|([^\*/]*)')
|
2010-02-23 08:44:52 +00:00
|
|
|
|
2015-01-24 15:29:52 -06:00
|
|
|
def ant_pattern_to_re(ant_pattern):
|
2011-06-21 21:18:49 +00:00
|
|
|
"""Generates a regular expression from the ant pattern.
|
|
|
|
Matching convention:
|
|
|
|
**/a: match 'a', 'dir/a', 'dir1/dir2/a'
|
|
|
|
a/**/b: match 'a/b', 'a/c/b', 'a/d/c/b'
|
|
|
|
*.py: match 'script.py' but not 'a/script.py'
|
2010-02-23 08:44:52 +00:00
|
|
|
"""
|
|
|
|
rex = ['^']
|
|
|
|
next_pos = 0
|
2015-01-24 15:29:52 -06:00
|
|
|
sep_rex = r'(?:/|%s)' % re.escape(os.path.sep)
|
2011-06-21 21:18:49 +00:00
|
|
|
## print 'Converting', ant_pattern
|
2015-01-24 15:29:52 -06:00
|
|
|
for match in _ANT_RE.finditer(ant_pattern):
|
2011-06-21 21:18:49 +00:00
|
|
|
## print 'Matched', match.group()
|
|
|
|
## print match.start(0), next_pos
|
2010-02-23 08:44:52 +00:00
|
|
|
if match.start(0) != next_pos:
|
2015-01-24 15:29:52 -06:00
|
|
|
raise ValueError("Invalid ant pattern")
|
2010-02-23 08:44:52 +00:00
|
|
|
if match.group(1): # /**/
|
2015-01-24 15:29:52 -06:00
|
|
|
rex.append(sep_rex + '(?:.*%s)?' % sep_rex)
|
2010-02-23 08:44:52 +00:00
|
|
|
elif match.group(2): # **/
|
2015-01-24 15:29:52 -06:00
|
|
|
rex.append('(?:.*%s)?' % sep_rex)
|
2010-02-23 08:44:52 +00:00
|
|
|
elif match.group(3): # /**
|
2015-01-24 15:29:52 -06:00
|
|
|
rex.append(sep_rex + '.*')
|
2010-02-23 08:44:52 +00:00
|
|
|
elif match.group(4): # *
|
2015-01-24 15:29:52 -06:00
|
|
|
rex.append('[^/%s]*' % re.escape(os.path.sep))
|
2010-02-23 08:44:52 +00:00
|
|
|
elif match.group(5): # /
|
2015-01-24 15:29:52 -06:00
|
|
|
rex.append(sep_rex)
|
2010-02-23 08:44:52 +00:00
|
|
|
else: # somepath
|
2015-01-24 15:29:52 -06:00
|
|
|
rex.append(re.escape(match.group(6)))
|
2011-06-21 21:18:49 +00:00
|
|
|
next_pos = match.end()
|
2010-02-23 08:44:52 +00:00
|
|
|
rex.append('$')
|
2015-01-24 15:29:52 -06:00
|
|
|
return re.compile(''.join(rex))
|
2011-06-21 21:18:49 +00:00
|
|
|
|
2015-01-24 15:29:52 -06:00
|
|
|
def _as_list(l):
|
2011-06-21 21:18:49 +00:00
|
|
|
if isinstance(l, basestring):
|
|
|
|
return l.split()
|
|
|
|
return l
|
2010-02-23 08:44:52 +00:00
|
|
|
|
|
|
|
def glob(dir_path,
|
|
|
|
includes = '**/*',
|
|
|
|
excludes = default_excludes,
|
|
|
|
entry_type = FILE,
|
|
|
|
prune_dirs = prune_dirs,
|
|
|
|
max_depth = 25):
|
|
|
|
include_filter = [ant_pattern_to_re(p) for p in _as_list(includes)]
|
2011-06-21 21:18:49 +00:00
|
|
|
exclude_filter = [ant_pattern_to_re(p) for p in _as_list(excludes)]
|
|
|
|
prune_dirs = [p.replace('/',os.path.sep) for p in _as_list(prune_dirs)]
|
2010-02-23 08:44:52 +00:00
|
|
|
dir_path = dir_path.replace('/',os.path.sep)
|
|
|
|
entry_type_filter = entry_type
|
|
|
|
|
2015-01-24 15:29:52 -06:00
|
|
|
def is_pruned_dir(dir_name):
|
2010-02-23 08:44:52 +00:00
|
|
|
for pattern in prune_dirs:
|
2015-01-24 15:29:52 -06:00
|
|
|
if fnmatch.fnmatch(dir_name, pattern):
|
2010-02-23 08:44:52 +00:00
|
|
|
return True
|
|
|
|
return False
|
|
|
|
|
2015-01-24 15:29:52 -06:00
|
|
|
def apply_filter(full_path, filter_rexs):
|
2010-02-23 08:44:52 +00:00
|
|
|
"""Return True if at least one of the filter regular expression match full_path."""
|
|
|
|
for rex in filter_rexs:
|
2015-01-24 15:29:52 -06:00
|
|
|
if rex.match(full_path):
|
2010-02-23 08:44:52 +00:00
|
|
|
return True
|
|
|
|
return False
|
|
|
|
|
2015-01-24 15:29:52 -06:00
|
|
|
def glob_impl(root_dir_path):
|
2011-06-21 21:18:49 +00:00
|
|
|
child_dirs = [root_dir_path]
|
|
|
|
while child_dirs:
|
2010-02-23 08:44:52 +00:00
|
|
|
dir_path = child_dirs.pop()
|
2015-01-24 15:29:52 -06:00
|
|
|
for entry in listdir(dir_path):
|
|
|
|
full_path = os.path.join(dir_path, entry)
|
2011-06-21 21:18:49 +00:00
|
|
|
## print 'Testing:', full_path,
|
2015-01-24 15:29:52 -06:00
|
|
|
is_dir = os.path.isdir(full_path)
|
|
|
|
if is_dir and not is_pruned_dir(entry): # explore child directory ?
|
2011-06-21 21:18:49 +00:00
|
|
|
## print '===> marked for recursion',
|
2015-01-24 15:29:52 -06:00
|
|
|
child_dirs.append(full_path)
|
|
|
|
included = apply_filter(full_path, include_filter)
|
|
|
|
rejected = apply_filter(full_path, exclude_filter)
|
2011-06-21 21:18:49 +00:00
|
|
|
if not included or rejected: # do not include entry ?
|
|
|
|
## print '=> not included or rejected'
|
|
|
|
continue
|
2015-01-24 15:29:52 -06:00
|
|
|
link = os.path.islink(full_path)
|
|
|
|
is_file = os.path.isfile(full_path)
|
2011-06-21 21:18:49 +00:00
|
|
|
if not is_file and not is_dir:
|
|
|
|
## print '=> unknown entry type'
|
|
|
|
continue
|
|
|
|
if link:
|
|
|
|
entry_type = is_file and FILE_LINK or DIR_LINK
|
|
|
|
else:
|
|
|
|
entry_type = is_file and FILE or DIR
|
|
|
|
## print '=> type: %d' % entry_type,
|
|
|
|
if (entry_type & entry_type_filter) != 0:
|
|
|
|
## print ' => KEEP'
|
2015-01-24 15:29:52 -06:00
|
|
|
yield os.path.join(dir_path, entry)
|
2011-06-21 21:18:49 +00:00
|
|
|
## else:
|
|
|
|
## print ' => TYPE REJECTED'
|
2015-01-24 15:29:52 -06:00
|
|
|
return list(glob_impl(dir_path))
|
2010-02-23 08:44:52 +00:00
|
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
import unittest
|
|
|
|
|
|
|
|
class AntPatternToRETest(unittest.TestCase):
|
2015-01-24 15:29:52 -06:00
|
|
|
## def test_conversion(self):
|
|
|
|
## self.assertEqual('^somepath$', ant_pattern_to_re('somepath').pattern)
|
2011-06-21 21:18:49 +00:00
|
|
|
|
2015-01-24 15:29:52 -06:00
|
|
|
def test_matching(self):
|
|
|
|
test_cases = [ ('path',
|
2011-06-21 21:18:49 +00:00
|
|
|
['path'],
|
2015-01-24 15:29:52 -06:00
|
|
|
['somepath', 'pathsuffix', '/path', '/path']),
|
|
|
|
('*.py',
|
2011-06-21 21:18:49 +00:00
|
|
|
['source.py', 'source.ext.py', '.py'],
|
2015-01-24 15:29:52 -06:00
|
|
|
['path/source.py', '/.py', 'dir.py/z', 'z.pyc', 'z.c']),
|
|
|
|
('**/path',
|
2011-06-21 21:18:49 +00:00
|
|
|
['path', '/path', '/a/path', 'c:/a/path', '/a/b/path', '//a/path', '/a/path/b/path'],
|
2015-01-24 15:29:52 -06:00
|
|
|
['path/', 'a/path/b', 'dir.py/z', 'somepath', 'pathsuffix', 'a/somepath']),
|
|
|
|
('path/**',
|
2011-06-21 21:18:49 +00:00
|
|
|
['path/a', 'path/path/a', 'path//'],
|
2015-01-24 15:29:52 -06:00
|
|
|
['path', 'somepath/a', 'a/path', 'a/path/a', 'pathsuffix/a']),
|
|
|
|
('/**/path',
|
2011-06-21 21:18:49 +00:00
|
|
|
['/path', '/a/path', '/a/b/path/path', '/path/path'],
|
2015-01-24 15:29:52 -06:00
|
|
|
['path', 'path/', 'a/path', '/pathsuffix', '/somepath']),
|
|
|
|
('a/b',
|
2011-06-21 21:18:49 +00:00
|
|
|
['a/b'],
|
2015-01-24 15:29:52 -06:00
|
|
|
['somea/b', 'a/bsuffix', 'a/b/c']),
|
|
|
|
('**/*.py',
|
2011-06-21 21:18:49 +00:00
|
|
|
['script.py', 'src/script.py', 'a/b/script.py', '/a/b/script.py'],
|
2015-01-24 15:29:52 -06:00
|
|
|
['script.pyc', 'script.pyo', 'a.py/b']),
|
|
|
|
('src/**/*.py',
|
2011-06-21 21:18:49 +00:00
|
|
|
['src/a.py', 'src/dir/a.py'],
|
2015-01-24 15:29:52 -06:00
|
|
|
['a/src/a.py', '/src/a.py']),
|
2011-06-21 21:18:49 +00:00
|
|
|
]
|
|
|
|
for ant_pattern, accepted_matches, rejected_matches in list(test_cases):
|
2015-01-24 15:29:52 -06:00
|
|
|
def local_path(paths):
|
2011-06-21 21:18:49 +00:00
|
|
|
return [ p.replace('/',os.path.sep) for p in paths ]
|
2015-01-24 15:29:52 -06:00
|
|
|
test_cases.append((ant_pattern, local_path(accepted_matches), local_path(rejected_matches)))
|
2011-06-21 21:18:49 +00:00
|
|
|
for ant_pattern, accepted_matches, rejected_matches in test_cases:
|
2015-01-24 15:29:52 -06:00
|
|
|
rex = ant_pattern_to_re(ant_pattern)
|
2014-11-19 23:30:47 -06:00
|
|
|
print('ant_pattern:', ant_pattern, ' => ', rex.pattern)
|
2011-06-21 21:18:49 +00:00
|
|
|
for accepted_match in accepted_matches:
|
2014-11-19 23:30:47 -06:00
|
|
|
print('Accepted?:', accepted_match)
|
2015-01-24 15:29:52 -06:00
|
|
|
self.assertTrue(rex.match(accepted_match) is not None)
|
2011-06-21 21:18:49 +00:00
|
|
|
for rejected_match in rejected_matches:
|
2014-11-19 23:30:47 -06:00
|
|
|
print('Rejected?:', rejected_match)
|
2015-01-24 15:29:52 -06:00
|
|
|
self.assertTrue(rex.match(rejected_match) is None)
|
2010-02-23 08:44:52 +00:00
|
|
|
|
|
|
|
unittest.main()
|