You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
350 lines
9.9 KiB
Python
350 lines
9.9 KiB
Python
11 years ago
|
#!/usr/bin/env python
|
||
|
# encoding: utf-8
|
||
|
|
||
|
"""
|
||
|
Not really a lexer in the classical sense, but code to convert snippet
|
||
|
definitions into logical units called Tokens.
|
||
|
"""
|
||
|
|
||
|
import string
|
||
|
import re
|
||
|
|
||
|
from UltiSnips.compatibility import as_unicode
|
||
|
from UltiSnips.position import Position
|
||
|
from UltiSnips.escaping import unescape
|
||
|
|
||
|
class _TextIterator(object):
|
||
|
"""Helper class to make iterating over text easier."""
|
||
|
|
||
|
def __init__(self, text, offset):
|
||
|
self._text = as_unicode(text)
|
||
|
self._line = offset.line
|
||
|
self._col = offset.col
|
||
|
|
||
|
self._idx = 0
|
||
|
|
||
|
def __iter__(self):
|
||
|
"""Iterator interface."""
|
||
|
return self
|
||
|
|
||
|
def __next__(self):
|
||
|
"""Returns the next character."""
|
||
|
if self._idx >= len(self._text):
|
||
|
raise StopIteration
|
||
|
|
||
|
rv = self._text[self._idx]
|
||
|
if self._text[self._idx] in ('\n', '\r\n'):
|
||
|
self._line += 1
|
||
|
self._col = 0
|
||
|
else:
|
||
|
self._col += 1
|
||
|
self._idx += 1
|
||
|
return rv
|
||
|
next = __next__ # for python2
|
||
|
|
||
|
def peek(self, count=1):
|
||
|
"""Returns the next 'count' characters without advancing the stream."""
|
||
|
if count > 1: # This might return '' if nothing is found
|
||
|
return self._text[self._idx:self._idx + count]
|
||
|
try:
|
||
|
return self._text[self._idx]
|
||
|
except IndexError:
|
||
|
return None
|
||
|
|
||
|
@property
|
||
|
def pos(self):
|
||
|
"""Current position in the text."""
|
||
|
return Position(self._line, self._col)
|
||
|
|
||
|
def _parse_number(stream):
|
||
|
"""
|
||
|
Expects the stream to contain a number next, returns the number
|
||
|
without consuming any more bytes
|
||
|
"""
|
||
|
rv = ""
|
||
|
while stream.peek() and stream.peek() in string.digits:
|
||
|
rv += next(stream)
|
||
|
|
||
|
return int(rv)
|
||
|
|
||
|
def _parse_till_closing_brace(stream):
|
||
|
"""
|
||
|
Returns all chars till a non-escaped } is found. Other
|
||
|
non escaped { are taken into account and skipped over.
|
||
|
|
||
|
Will also consume the closing }, but not return it
|
||
|
"""
|
||
|
rv = ""
|
||
|
in_braces = 1
|
||
|
while True:
|
||
|
if EscapeCharToken.starts_here(stream, '{}'):
|
||
|
rv += next(stream) + next(stream)
|
||
|
else:
|
||
|
char = next(stream)
|
||
|
if char == '{':
|
||
|
in_braces += 1
|
||
|
elif char == '}':
|
||
|
in_braces -= 1
|
||
|
if in_braces == 0:
|
||
|
break
|
||
|
rv += char
|
||
|
return rv
|
||
|
|
||
|
def _parse_till_unescaped_char(stream, chars):
|
||
|
"""
|
||
|
Returns all chars till a non-escaped char is found.
|
||
|
|
||
|
Will also consume the closing char, but and return it as second
|
||
|
return value
|
||
|
"""
|
||
|
rv = ""
|
||
|
while True:
|
||
|
escaped = False
|
||
|
for char in chars:
|
||
|
if EscapeCharToken.starts_here(stream, char):
|
||
|
rv += next(stream) + next(stream)
|
||
|
escaped = True
|
||
|
if not escaped:
|
||
|
char = next(stream)
|
||
|
if char in chars:
|
||
|
break
|
||
|
rv += char
|
||
|
return rv, char
|
||
|
|
||
|
class Token(object):
|
||
|
"""Represents a Token as parsed from a snippet definition."""
|
||
|
|
||
|
def __init__(self, gen, indent):
|
||
|
self.initial_text = as_unicode("")
|
||
|
self.start = gen.pos
|
||
|
self._parse(gen, indent)
|
||
|
self.end = gen.pos
|
||
|
|
||
|
def _parse(self, stream, indent):
|
||
|
"""Parses the token from 'stream' with the current 'indent'."""
|
||
|
pass # Does nothing
|
||
|
|
||
|
class TabStopToken(Token):
|
||
|
"""${1:blub}"""
|
||
|
CHECK = re.compile(r'^\${\d+[:}]')
|
||
|
|
||
|
@classmethod
|
||
|
def starts_here(cls, stream):
|
||
|
"""Returns true if this token starts at the current position in
|
||
|
'stream'."""
|
||
|
return cls.CHECK.match(stream.peek(10)) is not None
|
||
|
|
||
|
def _parse(self, stream, indent):
|
||
|
next(stream) # $
|
||
|
next(stream) # {
|
||
|
|
||
|
self.number = _parse_number(stream)
|
||
|
|
||
|
if stream.peek() == ":":
|
||
|
next(stream)
|
||
|
self.initial_text = _parse_till_closing_brace(stream)
|
||
|
|
||
|
def __repr__(self):
|
||
|
return "TabStopToken(%r,%r,%r,%r)" % (
|
||
|
self.start, self.end, self.number, self.initial_text
|
||
|
)
|
||
|
|
||
|
class VisualToken(Token):
|
||
|
"""${VISUAL}"""
|
||
|
CHECK = re.compile(r"^\${VISUAL[:}/]")
|
||
|
|
||
|
@classmethod
|
||
|
def starts_here(cls, stream):
|
||
|
"""Returns true if this token starts at the current position in
|
||
|
'stream'."""
|
||
|
return cls.CHECK.match(stream.peek(10)) is not None
|
||
|
|
||
|
def _parse(self, stream, indent):
|
||
|
for _ in range(8): # ${VISUAL
|
||
|
next(stream)
|
||
|
|
||
|
if stream.peek() == ":":
|
||
|
next(stream)
|
||
|
self.alternative_text, char = _parse_till_unescaped_char(stream, '/}')
|
||
|
self.alternative_text = unescape(self.alternative_text)
|
||
|
|
||
|
if char == '/': # Transformation going on
|
||
|
try:
|
||
|
self.search = _parse_till_unescaped_char(stream, '/')[0]
|
||
|
self.replace = _parse_till_unescaped_char(stream, '/')[0]
|
||
|
self.options = _parse_till_closing_brace(stream)
|
||
|
except StopIteration:
|
||
|
raise RuntimeError(
|
||
|
"Invalid ${VISUAL} transformation! Forgot to escape a '/'?")
|
||
|
else:
|
||
|
self.search = None
|
||
|
self.replace = None
|
||
|
self.options = None
|
||
|
|
||
|
def __repr__(self):
|
||
|
return "VisualToken(%r,%r)" % (
|
||
|
self.start, self.end
|
||
|
)
|
||
|
|
||
|
class TransformationToken(Token):
|
||
|
"""${1/match/replace/options}"""
|
||
|
|
||
|
CHECK = re.compile(r'^\${\d+\/')
|
||
|
|
||
|
@classmethod
|
||
|
def starts_here(cls, stream):
|
||
|
"""Returns true if this token starts at the current position in
|
||
|
'stream'."""
|
||
|
return cls.CHECK.match(stream.peek(10)) is not None
|
||
|
|
||
|
def _parse(self, stream, indent):
|
||
|
next(stream) # $
|
||
|
next(stream) # {
|
||
|
|
||
|
self.number = _parse_number(stream)
|
||
|
|
||
|
next(stream) # /
|
||
|
|
||
|
self.search = _parse_till_unescaped_char(stream, '/')[0]
|
||
|
self.replace = _parse_till_unescaped_char(stream, '/')[0]
|
||
|
self.options = _parse_till_closing_brace(stream)
|
||
|
|
||
|
def __repr__(self):
|
||
|
return "TransformationToken(%r,%r,%r,%r,%r)" % (
|
||
|
self.start, self.end, self.number, self.search, self.replace
|
||
|
)
|
||
|
|
||
|
class MirrorToken(Token):
|
||
|
"""$1"""
|
||
|
CHECK = re.compile(r'^\$\d+')
|
||
|
|
||
|
@classmethod
|
||
|
def starts_here(cls, stream):
|
||
|
"""Returns true if this token starts at the current position in
|
||
|
'stream'."""
|
||
|
return cls.CHECK.match(stream.peek(10)) is not None
|
||
|
|
||
|
def _parse(self, stream, indent):
|
||
|
next(stream) # $
|
||
|
self.number = _parse_number(stream)
|
||
|
|
||
|
def __repr__(self):
|
||
|
return "MirrorToken(%r,%r,%r)" % (
|
||
|
self.start, self.end, self.number
|
||
|
)
|
||
|
|
||
|
class EscapeCharToken(Token):
|
||
|
"""\\n"""
|
||
|
@classmethod
|
||
|
def starts_here(cls, stream, chars=r'{}\$`'):
|
||
|
"""Returns true if this token starts at the current position in
|
||
|
'stream'."""
|
||
|
cs = stream.peek(2)
|
||
|
if len(cs) == 2 and cs[0] == '\\' and cs[1] in chars:
|
||
|
return True
|
||
|
|
||
|
def _parse(self, stream, indent):
|
||
|
next(stream) # \
|
||
|
self.initial_text = next(stream)
|
||
|
|
||
|
def __repr__(self):
|
||
|
return "EscapeCharToken(%r,%r,%r)" % (
|
||
|
self.start, self.end, self.initial_text
|
||
|
)
|
||
|
|
||
|
class ShellCodeToken(Token):
|
||
|
"""`! echo "hi"`"""
|
||
|
@classmethod
|
||
|
def starts_here(cls, stream):
|
||
|
"""Returns true if this token starts at the current position in
|
||
|
'stream'."""
|
||
|
return stream.peek(1) == '`'
|
||
|
|
||
|
def _parse(self, stream, indent):
|
||
|
next(stream) # `
|
||
|
self.code = _parse_till_unescaped_char(stream, '`')[0]
|
||
|
|
||
|
def __repr__(self):
|
||
|
return "ShellCodeToken(%r,%r,%r)" % (
|
||
|
self.start, self.end, self.code
|
||
|
)
|
||
|
|
||
|
class PythonCodeToken(Token):
|
||
|
"""`!p snip.rv = "Hi"`"""
|
||
|
CHECK = re.compile(r'^`!p\s')
|
||
|
|
||
|
@classmethod
|
||
|
def starts_here(cls, stream):
|
||
|
"""Returns true if this token starts at the current position in
|
||
|
'stream'."""
|
||
|
return cls.CHECK.match(stream.peek(4)) is not None
|
||
|
|
||
|
def _parse(self, stream, indent):
|
||
|
for _ in range(3):
|
||
|
next(stream) # `!p
|
||
|
if stream.peek() in '\t ':
|
||
|
next(stream)
|
||
|
|
||
|
code = _parse_till_unescaped_char(stream, '`')[0]
|
||
|
|
||
|
# Strip the indent if any
|
||
|
if len(indent):
|
||
|
lines = code.splitlines()
|
||
|
self.code = lines[0] + '\n'
|
||
|
self.code += '\n'.join([l[len(indent):]
|
||
|
for l in lines[1:]])
|
||
|
else:
|
||
|
self.code = code
|
||
|
self.indent = indent
|
||
|
|
||
|
def __repr__(self):
|
||
|
return "PythonCodeToken(%r,%r,%r)" % (
|
||
|
self.start, self.end, self.code
|
||
|
)
|
||
|
|
||
|
class VimLCodeToken(Token):
|
||
|
"""`!v g:hi`"""
|
||
|
CHECK = re.compile(r'^`!v\s')
|
||
|
|
||
|
@classmethod
|
||
|
def starts_here(cls, stream):
|
||
|
"""Returns true if this token starts at the current position in
|
||
|
'stream'."""
|
||
|
return cls.CHECK.match(stream.peek(4)) is not None
|
||
|
|
||
|
def _parse(self, stream, indent):
|
||
|
for _ in range(4):
|
||
|
next(stream) # `!v
|
||
|
self.code = _parse_till_unescaped_char(stream, '`')[0]
|
||
|
|
||
|
def __repr__(self):
|
||
|
return "VimLCodeToken(%r,%r,%r)" % (
|
||
|
self.start, self.end, self.code
|
||
|
)
|
||
|
|
||
|
class EndOfTextToken(Token):
|
||
|
"""Appears at the end of the text."""
|
||
|
def __repr__(self):
|
||
|
return "EndOfText(%r)" % self.end
|
||
|
|
||
|
__ALLOWED_TOKENS = [
|
||
|
EscapeCharToken, VisualToken, TransformationToken, TabStopToken,
|
||
|
MirrorToken, PythonCodeToken, VimLCodeToken, ShellCodeToken
|
||
|
]
|
||
|
def tokenize(text, indent, offset):
|
||
|
"""Returns an iterator of tokens of 'text'['offset':] which is assumed to
|
||
|
have 'indent' as the whitespace of the begging of the lines."""
|
||
|
stream = _TextIterator(text, offset)
|
||
|
try:
|
||
|
while True:
|
||
|
done_something = False
|
||
|
for token in __ALLOWED_TOKENS:
|
||
|
if token.starts_here(stream):
|
||
|
yield token(stream, indent)
|
||
|
done_something = True
|
||
|
break
|
||
|
if not done_something:
|
||
|
next(stream)
|
||
|
except StopIteration:
|
||
|
yield EndOfTextToken(stream, indent)
|