Browse Source

Fixing unicode strings in funcparserlib

- Required to work with Cygwin Python3
simple
Jacob Alexander 9 years ago
parent
commit
8a216b1403
3 changed files with 35 additions and 35 deletions
  1. 8
    8
      funcparserlib/lexer.py
  2. 24
    24
      funcparserlib/parser.py
  3. 3
    3
      funcparserlib/util.py

+ 8
- 8
funcparserlib/lexer.py View File

self.msg = msg self.msg = msg


def __str__(self): def __str__(self):
s = u'cannot tokenize data'
s = 'cannot tokenize data'
line, pos = self.place line, pos = self.place
return u'%s: %d,%d: "%s"' % (s, line, pos, self.msg)
return '%s: %d,%d: "%s"' % (s, line, pos, self.msg)




class Token(object): class Token(object):
self.end = end self.end = end


def __repr__(self): def __repr__(self):
return u'Token(%r, %r)' % (self.type, self.value)
return 'Token(%r, %r)' % (self.type, self.value)


def __eq__(self, other): def __eq__(self, other):
# FIXME: Case sensitivity is assumed here # FIXME: Case sensitivity is assumed here
else: else:
sl, sp = self.start sl, sp = self.start
el, ep = self.end el, ep = self.end
return u'%d,%d-%d,%d:' % (sl, sp, el, ep)
return '%d,%d-%d,%d:' % (sl, sp, el, ep)


def __str__(self): def __str__(self):
s = u"%s %s '%s'" % (self._pos_str(), self.type, self.value)
s = "%s %s '%s'" % (self._pos_str(), self.type, self.value)
return s.strip() return s.strip()


@property @property
return self.value return self.value


def pformat(self): def pformat(self):
return u"%s %s '%s'" % (self._pos_str().ljust(20),
return "%s %s '%s'" % (self._pos_str().ljust(20),
self.type.ljust(14), self.type.ljust(14),
self.value) self.value)


m = regexp.match(str, i) m = regexp.match(str, i)
if m is not None: if m is not None:
value = m.group() value = m.group()
nls = value.count(u'\n')
nls = value.count('\n')
n_line = line + nls n_line = line + nls
if nls == 0: if nls == 0:
n_pos = pos + len(value) n_pos = pos + len(value)
else: else:
n_pos = len(value) - value.rfind(u'\n') - 1
n_pos = len(value) - value.rfind('\n') - 1
return Token(type, value, (line, pos + 1), (n_line, n_pos)) return Token(type, value, (line, pos + 1), (n_line, n_pos))
else: else:
errline = str.splitlines()[line - 1] errline = str.splitlines()[line - 1]

+ 24
- 24
funcparserlib/parser.py View File

Runs a parser wrapped into this object. Runs a parser wrapped into this object.
""" """
if debug: if debug:
log.debug(u'trying %s' % self.name)
log.debug('trying %s' % self.name)
return self._run(tokens, s) return self._run(tokens, s)


def _run(self, tokens, s): def _run(self, tokens, s):
raise NotImplementedError(u'you must define() a parser')
raise NotImplementedError('you must define() a parser')


def parse(self, tokens): def parse(self, tokens):
"""Sequence(a) -> b """Sequence(a) -> b
if len(tokens) > max: if len(tokens) > max:
tok = tokens[max] tok = tokens[max]
else: else:
tok = u'<EOF>'
raise NoParseError(u'%s: %s' % (e.msg, tok), e.state)
tok = '<EOF>'
raise NoParseError('%s: %s' % (e.msg, tok), e.state)


def __add__(self, other): def __add__(self, other):
"""Parser(a, b), Parser(a, c) -> Parser(a, _Tuple(b, c)) """Parser(a, b), Parser(a, c) -> Parser(a, _Tuple(b, c))


# or in terms of bind and pure: # or in terms of bind and pure:
# _add = self.bind(lambda x: other.bind(lambda y: pure(magic(x, y)))) # _add = self.bind(lambda x: other.bind(lambda y: pure(magic(x, y))))
_add.name = u'(%s , %s)' % (self.name, other.name)
_add.name = '(%s , %s)' % (self.name, other.name)
return _add return _add


def __or__(self, other): def __or__(self, other):
except NoParseError as e: except NoParseError as e:
return other.run(tokens, State(s.pos, e.state.max)) return other.run(tokens, State(s.pos, e.state.max))


_or.name = u'(%s | %s)' % (self.name, other.name)
_or.name = '(%s | %s)' % (self.name, other.name)
return _or return _or


def __rshift__(self, f): def __rshift__(self, f):


# or in terms of bind and pure: # or in terms of bind and pure:
# _shift = self.bind(lambda x: pure(f(x))) # _shift = self.bind(lambda x: pure(f(x)))
_shift.name = u'(%s)' % (self.name,)
_shift.name = '(%s)' % (self.name,)
return _shift return _shift


def bind(self, f): def bind(self, f):
(v, s2) = self.run(tokens, s) (v, s2) = self.run(tokens, s)
return f(v).run(tokens, s2) return f(v).run(tokens, s2)


_bind.name = u'(%s >>=)' % (self.name,)
_bind.name = '(%s >>=)' % (self.name,)
return _bind return _bind




return unicode((self.pos, self.max)) return unicode((self.pos, self.max))


def __repr__(self): def __repr__(self):
return u'State(%r, %r)' % (self.pos, self.max)
return 'State(%r, %r)' % (self.pos, self.max)




class NoParseError(Exception): class NoParseError(Exception):
def __init__(self, msg=u'', state=None):
def __init__(self, msg='', state=None):
self.msg = msg self.msg = msg
self.state = state self.state = state


self.value = value self.value = value


def __repr__(self): def __repr__(self):
return u'_Ignored(%s)' % repr(self.value)
return '_Ignored(%s)' % repr(self.value)




@Parser @Parser
if s.pos >= len(tokens): if s.pos >= len(tokens):
return None, s return None, s
else: else:
raise NoParseError(u'should have reached <EOF>', s)
raise NoParseError('should have reached <EOF>', s)




finished.name = u'finished'
finished.name = 'finished'




def many(p): def many(p):
except NoParseError as e: except NoParseError as e:
return res, State(s.pos, e.state.max) return res, State(s.pos, e.state.max)


_many.name = u'{ %s }' % p.name
_many.name = '{ %s }' % p.name
return _many return _many




@Parser @Parser
def _some(tokens, s): def _some(tokens, s):
if s.pos >= len(tokens): if s.pos >= len(tokens):
raise NoParseError(u'no tokens left in the stream', s)
raise NoParseError('no tokens left in the stream', s)
else: else:
t = tokens[s.pos] t = tokens[s.pos]
if pred(t): if pred(t):
pos = s.pos + 1 pos = s.pos + 1
s2 = State(pos, max(pos, s.max)) s2 = State(pos, max(pos, s.max))
if debug: if debug:
log.debug(u'*matched* "%s", new state = %s' % (t, s2))
log.debug('*matched* "%s", new state = %s' % (t, s2))
return t, s2 return t, s2
else: else:
if debug: if debug:
log.debug(u'failed "%s", state = %s' % (t, s))
raise NoParseError(u'got unexpected token', s)
log.debug('failed "%s", state = %s' % (t, s))
raise NoParseError('got unexpected token', s)


_some.name = u'(some)'
_some.name = '(some)'
return _some return _some




Returns a parser that parses a token that is equal to the value value. Returns a parser that parses a token that is equal to the value value.
""" """
name = getattr(value, 'name', value) name = getattr(value, 'name', value)
return some(lambda t: t == value).named(u'(a "%s")' % (name,))
return some(lambda t: t == value).named('(a "%s")' % (name,))




def pure(x): def pure(x):
def _pure(_, s): def _pure(_, s):
return x, s return x, s


_pure.name = u'(pure %r)' % (x,)
_pure.name = '(pure %r)' % (x,)
return _pure return _pure




NOTE: In a statically typed language, the type Maybe b could be more NOTE: In a statically typed language, the type Maybe b could be more
approprieate. approprieate.
""" """
return (p | pure(None)).named(u'[ %s ]' % (p.name,))
return (p | pure(None)).named('[ %s ]' % (p.name,))




def skip(p): def skip(p):
Returns a parser that applies the parser p one or more times. Returns a parser that applies the parser p one or more times.
""" """
q = p + many(p) >> (lambda x: [x[0]] + x[1]) q = p + many(p) >> (lambda x: [x[0]] + x[1])
return q.named(u'(%s , { %s })' % (p.name, p.name))
return q.named('(%s , { %s })' % (p.name, p.name))




def with_forward_decls(suspension): def with_forward_decls(suspension):


@Parser @Parser
def f(tokens, s): def f(tokens, s):
raise NotImplementedError(u'you must define() a forward_decl somewhere')
raise NotImplementedError('you must define() a forward_decl somewhere')


return f return f



+ 3
- 3
funcparserlib/util.py View File

Returns a pseudographic tree representation of x similar to the tree command Returns a pseudographic tree representation of x similar to the tree command
in Unix. in Unix.
""" """
(MID, END, CONT, LAST, ROOT) = (u'|-- ', u'`-- ', u'| ', u' ', u'')
(MID, END, CONT, LAST, ROOT) = ('|-- ', '`-- ', '| ', ' ', '')


def rec(x, indent, sym): def rec(x, indent, sym):
line = indent + sym + show(x) line = indent + sym + show(x)
next_indent = indent + LAST next_indent = indent + LAST
syms = [MID] * (len(xs) - 1) + [END] syms = [MID] * (len(xs) - 1) + [END]
lines = [rec(x, next_indent, sym) for x, sym in zip(xs, syms)] lines = [rec(x, next_indent, sym) for x, sym in zip(xs, syms)]
return u'\n'.join([line] + lines)
return '\n'.join([line] + lines)


return rec(x, u'', ROOT)
return rec(x, '', ROOT)