You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
220 lines
6.6 KiB
Python
220 lines
6.6 KiB
Python
8 years ago
|
"""Filename matching with shell patterns.
|
||
|
|
||
|
fnmatch(FILENAME, PATTERN) matches according to the local convention.
|
||
|
fnmatchcase(FILENAME, PATTERN) always takes case in account.
|
||
|
|
||
|
The functions operate by translating the pattern into a regular
|
||
|
expression. They cache the compiled regular expressions for speed.
|
||
|
|
||
|
The function translate(PATTERN) returns a regular expression
|
||
|
corresponding to PATTERN. (It does not compile it.)
|
||
|
|
||
|
Based on code from fnmatch.py file distributed with Python 2.6.
|
||
|
|
||
|
Licensed under PSF License (see LICENSE.txt file).
|
||
|
|
||
|
Changes to original fnmatch module:
|
||
|
- translate function supports ``*`` and ``**`` similarly to fnmatch C library
|
||
|
"""
|
||
|
|
||
|
import os
|
||
|
import re
|
||
|
|
||
|
__all__ = ["fnmatch", "fnmatchcase", "translate"]
|
||
|
|
||
|
_cache = {}
|
||
|
|
||
|
LEFT_BRACE = re.compile(
|
||
|
r"""
|
||
|
|
||
|
(?: ^ | [^\\] ) # Beginning of string or a character besides "\"
|
||
|
|
||
|
\{ # "{"
|
||
|
|
||
|
""", re.VERBOSE
|
||
|
)
|
||
|
|
||
|
RIGHT_BRACE = re.compile(
|
||
|
r"""
|
||
|
|
||
|
(?: ^ | [^\\] ) # Beginning of string or a character besides "\"
|
||
|
|
||
|
\} # "}"
|
||
|
|
||
|
""", re.VERBOSE
|
||
|
)
|
||
|
|
||
|
NUMERIC_RANGE = re.compile(
|
||
|
r"""
|
||
|
( # Capture a number
|
||
|
[+-] ? # Zero or one "+" or "-" characters
|
||
|
\d + # One or more digits
|
||
|
)
|
||
|
|
||
|
\.\. # ".."
|
||
|
|
||
|
( # Capture a number
|
||
|
[+-] ? # Zero or one "+" or "-" characters
|
||
|
\d + # One or more digits
|
||
|
)
|
||
|
""", re.VERBOSE
|
||
|
)
|
||
|
|
||
|
|
||
|
def fnmatch(name, pat):
|
||
|
"""Test whether FILENAME matches PATTERN.
|
||
|
|
||
|
Patterns are Unix shell style:
|
||
|
|
||
|
- ``*`` matches everything except path separator
|
||
|
- ``**`` matches everything
|
||
|
- ``?`` matches any single character
|
||
|
- ``[seq]`` matches any character in seq
|
||
|
- ``[!seq]`` matches any char not in seq
|
||
|
- ``{s1,s2,s3}`` matches any of the strings given (separated by commas)
|
||
|
|
||
|
An initial period in FILENAME is not special.
|
||
|
Both FILENAME and PATTERN are first case-normalized
|
||
|
if the operating system requires it.
|
||
|
If you don't want this, use fnmatchcase(FILENAME, PATTERN).
|
||
|
"""
|
||
|
|
||
|
name = os.path.normpath(name).replace(os.sep, "/")
|
||
|
return fnmatchcase(name, pat)
|
||
|
|
||
|
|
||
|
def cached_translate(pat):
|
||
|
if not pat in _cache:
|
||
|
res, num_groups = translate(pat)
|
||
|
regex = re.compile(res)
|
||
|
_cache[pat] = regex, num_groups
|
||
|
return _cache[pat]
|
||
|
|
||
|
|
||
|
def fnmatchcase(name, pat):
|
||
|
"""Test whether FILENAME matches PATTERN, including case.
|
||
|
|
||
|
This is a version of fnmatch() which doesn't case-normalize
|
||
|
its arguments.
|
||
|
"""
|
||
|
|
||
|
regex, num_groups = cached_translate(pat)
|
||
|
match = regex.match(name)
|
||
|
if not match:
|
||
|
return False
|
||
|
pattern_matched = True
|
||
|
for (num, (min_num, max_num)) in zip(match.groups(), num_groups):
|
||
|
if num[0] == '0' or not (min_num <= int(num) <= max_num):
|
||
|
pattern_matched = False
|
||
|
break
|
||
|
return pattern_matched
|
||
|
|
||
|
|
||
|
def translate(pat, nested=False):
|
||
|
"""Translate a shell PATTERN to a regular expression.
|
||
|
|
||
|
There is no way to quote meta-characters.
|
||
|
"""
|
||
|
|
||
|
index, length = 0, len(pat) # Current index and length of pattern
|
||
|
brace_level = 0
|
||
|
in_brackets = False
|
||
|
result = ''
|
||
|
is_escaped = False
|
||
|
matching_braces = (len(LEFT_BRACE.findall(pat)) ==
|
||
|
len(RIGHT_BRACE.findall(pat)))
|
||
|
numeric_groups = []
|
||
|
while index < length:
|
||
|
current_char = pat[index]
|
||
|
index += 1
|
||
|
if current_char == '*':
|
||
|
pos = index
|
||
|
if pos < length and pat[pos] == '*':
|
||
|
result += '.*'
|
||
|
else:
|
||
|
result += '[^/]*'
|
||
|
elif current_char == '?':
|
||
|
result += '.'
|
||
|
elif current_char == '[':
|
||
|
if in_brackets:
|
||
|
result += '\\['
|
||
|
else:
|
||
|
pos = index
|
||
|
has_slash = False
|
||
|
while pos < length and pat[pos] != ']':
|
||
|
if pat[pos] == '/' and pat[pos-1] != '\\':
|
||
|
has_slash = True
|
||
|
break
|
||
|
pos += 1
|
||
|
if has_slash:
|
||
|
result += '\\[' + pat[index:(pos + 1)] + '\\]'
|
||
|
index = pos + 2
|
||
|
else:
|
||
|
if index < length and pat[index] in '!^':
|
||
|
index += 1
|
||
|
result += '[^'
|
||
|
else:
|
||
|
result += '['
|
||
|
in_brackets = True
|
||
|
elif current_char == '-':
|
||
|
if in_brackets:
|
||
|
result += current_char
|
||
|
else:
|
||
|
result += '\\' + current_char
|
||
|
elif current_char == ']':
|
||
|
result += current_char
|
||
|
in_brackets = False
|
||
|
elif current_char == '{':
|
||
|
pos = index
|
||
|
has_comma = False
|
||
|
while pos < length and (pat[pos] != '}' or is_escaped):
|
||
|
if pat[pos] == ',' and not is_escaped:
|
||
|
has_comma = True
|
||
|
break
|
||
|
is_escaped = pat[pos] == '\\' and not is_escaped
|
||
|
pos += 1
|
||
|
if not has_comma and pos < length:
|
||
|
num_range = NUMERIC_RANGE.match(pat[index:pos])
|
||
|
if num_range:
|
||
|
numeric_groups.append(map(int, num_range.groups()))
|
||
|
result += "([+-]?\d+)"
|
||
|
else:
|
||
|
inner_result, inner_groups = translate(pat[index:pos],
|
||
|
nested=True)
|
||
|
result += '\\{%s\\}' % (inner_result,)
|
||
|
numeric_groups += inner_groups
|
||
|
index = pos + 1
|
||
|
elif matching_braces:
|
||
|
result += '(?:'
|
||
|
brace_level += 1
|
||
|
else:
|
||
|
result += '\\{'
|
||
|
elif current_char == ',':
|
||
|
if brace_level > 0 and not is_escaped:
|
||
|
result += '|'
|
||
|
else:
|
||
|
result += '\\,'
|
||
|
elif current_char == '}':
|
||
|
if brace_level > 0 and not is_escaped:
|
||
|
result += ')'
|
||
|
brace_level -= 1
|
||
|
else:
|
||
|
result += '\\}'
|
||
|
elif current_char == '/':
|
||
|
if pat[index:(index + 3)] == "**/":
|
||
|
result += "(?:/|/.*/)"
|
||
|
index += 3
|
||
|
else:
|
||
|
result += '/'
|
||
|
elif current_char != '\\':
|
||
|
result += re.escape(current_char)
|
||
|
if current_char == '\\':
|
||
|
if is_escaped:
|
||
|
result += re.escape(current_char)
|
||
|
is_escaped = not is_escaped
|
||
|
else:
|
||
|
is_escaped = False
|
||
|
if not nested:
|
||
|
result += '\Z(?ms)'
|
||
|
return result, numeric_groups
|