Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support for PEP3131 (Non-ASCII Identifiers) #160

Open
wants to merge 6 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion baron/grouper.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# encoding: utf-8

import re
import regex as re
from .utils import FlexibleIterator

to_group = (
Expand Down
4 changes: 3 additions & 1 deletion baron/render.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import sys
import json

import six


def render(node, strict=False):
"""Recipe to render a given FST node.
Expand Down Expand Up @@ -69,7 +71,7 @@ def render_node(node, strict=False):
if key_type == "key":
assert isinstance(node[render_key], (dict, type(None))), "Key '%s' is expected to have type of 'key' (dict/None) but has type of '%s' instead" % (render_key, type(node[render_key]))
elif key_type == "string":
assert isinstance(node[render_key], str), "Key '%s' is expected to have type of 'string' but has type of '%s' instead" % (render_key, type(node[render_key]))
assert isinstance(node[render_key], six.string_types), "Key '%s' is expected to have type of 'string' but has type of '%s' instead" % (render_key, type(node[render_key]))
elif key_type in ("list", "formatting"):
assert isinstance(node[render_key], list), "Key '%s' is expected to have type of 'list' but has type of '%s' instead" % (render_key, type(node[render_key]))
elif key_type == "constant":
Expand Down
8 changes: 6 additions & 2 deletions baron/spliter.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import string
from .utils import FlexibleIterator, BaronError
from .utils import FlexibleIterator, BaronError, is_xid_start, is_xid_continue


def split(sequence):
Expand Down Expand Up @@ -58,12 +58,16 @@ def split_generator(sequence):
not_found = False
yield iterator.grab(lambda iterator: iterator.show_next() in section)

if iterator.next_is(is_xid_start) or iterator.next_is(is_xid_continue):
not_found = False
yield iterator.grab(lambda iterator: iterator.next_is(is_xid_start) or iterator.next_is(is_xid_continue))

for one in "@,.;()=*:+-/^%&<>|\r\n~[]{}!``\\":
if iterator.next_in(one):
not_found = False
yield next(iterator)

if iterator.show_next().__repr__().startswith("'\\x"):
if iterator.show_next().__repr__().startswith(r"'\x"):
# guys, seriously, how do you manage to put this shit in your code?
# I mean, I don't even know how this is possible!
# example of guilty file: ve/lib/python2.7/site-packages/tests/test_oauth.py
Expand Down
4 changes: 2 additions & 2 deletions baron/tokenizer.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import re
import regex as re
from .utils import BaronError


Expand All @@ -9,7 +9,7 @@ class UnknowItem(BaronError):
KEYWORDS = ("and", "as", "assert", "break", "class", "continue", "def", "del", "elif", "else", "except", "exec", "finally", "for", "from", "global", "nonlocal", "if", "import", "in", "is", "lambda", "not", "or", "pass", "print", "raise", "return", "try", "while", "with", "yield")

TOKENS = (
(r'[a-zA-Z_]\w*', 'NAME'),
(r'[\p{XID_Start}_]\p{XID_Continue}*', 'NAME'),
(r'0', 'INT'),
(r'[-+]?\d+[eE][-+]?\d+[jJ]', 'FLOAT_EXPONANT_COMPLEX'),
(r'[-+]?\d+.\d?[eE][-+]?\d+[jJ]', 'FLOAT_EXPONANT_COMPLEX'),
Expand Down
43 changes: 30 additions & 13 deletions baron/utils.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
import sys
import re

import regex as re
import six

python_version = sys.version_info[0]
python_subversion = sys.version_info[1]
string_instance = str if python_version == 3 else basestring
string_instance = six.string_types # alias, for isinstance usage in redbaron


class BaronError(Exception):
Expand Down Expand Up @@ -33,9 +34,12 @@ def next_starts_with(self, sentence):
return self.sequence[self.position + 1: self.position + 1 + size_of_choice] == sentence

def next_in(self, choice):
return self.next_is(lambda item: item in choice)

def next_is(self, predicate):
if self.position + 1 >= len(self.sequence):
return False
return self.sequence[self.position + 1] in choice
return predicate(self.sequence[self.position + 1])

def show_next(self, at=1):
if self.position + at >= len(self.sequence):
Expand Down Expand Up @@ -106,6 +110,20 @@ def split_on_newlines(text):
yield text[current_position:]


xid_start_regex = re.compile(r"\p{XID_Start}")


def is_xid_start(char):
return xid_start_regex.match(char)


xid_continue_regex = re.compile(r"\p{XID_Continue}")


def is_xid_continue(char):
return xid_continue_regex.match(char)


# Thanks to
# https://github.com/nvie/rq/commit/282f4be9316d608ebbacd6114aab1203591e8f95
if python_version >= 3 or python_subversion >= 7:
Expand All @@ -115,26 +133,25 @@ def total_ordering(cls):
"""Class decorator that fills in missing ordering methods"""
convert = {
'__lt__': [('__gt__', lambda self, other: other < self),
('__le__', lambda self, other: not other < self),
('__ge__', lambda self, other: not self < other)],
('__le__', lambda self, other: not other < self),
('__ge__', lambda self, other: not self < other)],
'__le__': [('__ge__', lambda self, other: other <= self),
('__lt__', lambda self, other: not other <= self),
('__gt__', lambda self, other: not self <= other)],
('__lt__', lambda self, other: not other <= self),
('__gt__', lambda self, other: not self <= other)],
'__gt__': [('__lt__', lambda self, other: other > self),
('__ge__', lambda self, other: not other > self),
('__le__', lambda self, other: not self > other)],
('__ge__', lambda self, other: not other > self),
('__le__', lambda self, other: not self > other)],
'__ge__': [('__le__', lambda self, other: other >= self),
('__gt__', lambda self, other: not other >= self),
('__lt__', lambda self, other: not self >= other)]
('__gt__', lambda self, other: not other >= self),
('__lt__', lambda self, other: not self >= other)]
}
roots = set(dir(cls)) & set(convert)
if not roots:
raise ValueError('must define at least one ordering operation: < > <= >=') # noqa
root = max(roots) # prefer __lt__ to __le__ to __gt__ to __ge__
root = max(roots) # prefer __lt__ to __le__ to __gt__ to __ge__
for opname, opfunc in convert[root]:
if opname not in roots:
opfunc.__name__ = opname
opfunc.__doc__ = getattr(int, opname).__doc__
setattr(cls, opname, opfunc)
return cls

2 changes: 2 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1 +1,3 @@
rply
regex
six
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
long_description=read_md("README.md") + "\n\n" + open("CHANGELOG", "r").read(),
author_email='cortex@worlddomination.be',
url='https://github.com/PyCQA/baron',
install_requires=['rply'],
install_requires=['rply', 'regex', 'six'],
packages=['baron'],
license='lgplv3+',
scripts=[],
Expand Down
7 changes: 4 additions & 3 deletions tests/test_path.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
import six

from baron.baron import parse
from baron.path import PathWalker, Position, BoundingBox
from baron.path import position_to_path, path_to_node, position_to_node
from baron.path import path_to_bounding_box, node_to_bounding_box
from baron.utils import string_instance
from baron.path import position_to_path, path_to_node, position_to_node


def test_position():
Expand Down Expand Up @@ -145,7 +146,7 @@ def check_path(code, positions, target_path):
return

node = path_to_node(tree, path)
assert isinstance(node, string_instance)
assert isinstance(node, six.string_types)

assert position_to_node(tree, position) is node

Expand Down
17 changes: 13 additions & 4 deletions tests/test_spliter.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#!/usr/bin/python
# -*- coding:Utf-8 -*-

import six

from baron.spliter import split, UntreatedError
from baron.utils import python_version
Expand Down Expand Up @@ -91,6 +91,11 @@ def test_assign():
assert split("a = b") == ["a", " ", "=", " ", "b"]


if six.PY3:
def test_assign_unicode():
assert split("α = β") == ["α", " ", "=", " ", "β"]


def test_call():
assert split("function()") == ["function", "(", ")"]

Expand Down Expand Up @@ -247,7 +252,9 @@ def test_if():


def test_if_elif_else():
assert split("if a:\n pass\nelif b:\n pass\nelse: \n pass") == ["if", " ", "a", ":", "\n", " ", "pass", "\n", "elif", " ", "b", ":", "\n", " ", "pass", "\n", "else", ":", " ", "\n", " ", "pass"]
assert split("if a:\n pass\nelif b:\n pass\nelse: \n pass") == ["if", " ", "a", ":", "\n", " ", "pass", "\n",
"elif", " ", "b", ":", "\n", " ", "pass", "\n",
"else", ":", " ", "\n", " ", "pass"]


def test_while():
Expand Down Expand Up @@ -365,10 +372,12 @@ def test_backslash_in_comment():


def test_regression():
assert split("(r'[\"\\'](.|\n|\r)*[\"\\']', 'STRING'),") == ["(", "r", "'[\"\\'](.|\n|\r)*[\"\\']'", ",", " ", "'STRING'", ")", ","]
assert split("(r'[\"\\'](.|\n|\r)*[\"\\']', 'STRING'),") == ["(", "r", "'[\"\\'](.|\n|\r)*[\"\\']'", ",", " ",
"'STRING'", ")", ","]


# TODO: make this test pass in python3 also
# requires to remove dependency on ast.py
if python_version == 2:
if six.PY2:
def test_remove_crap():
assert split("\x0c\xef\xbb\xbf") == []
9 changes: 8 additions & 1 deletion tests/test_tokenizer.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#!/usr/bin/python
# -*- coding:Utf-8 -*-

import six

from baron.tokenizer import tokenize, KEYWORDS

Expand All @@ -22,6 +22,12 @@ def test_name__():
match('_a', 'NAME')


if six.PY3:
def test_name_unicode():
match('β', 'NAME')
match('가사', 'NAME')


def test_name_number():
match('a123', 'NAME')

Expand Down Expand Up @@ -551,6 +557,7 @@ def test_exponant_complex():
match("-1.1E+1J", "FLOAT_EXPONANT_COMPLEX")
match("-.1E+1J", "FLOAT_EXPONANT_COMPLEX")


# TODO 1.1e1j


Expand Down