Skip to content

Commit

Permalink
Add support for non-capturing groups and enhance backreference tests
Browse files Browse the repository at this point in the history
  • Loading branch information
Buba98 committed Dec 20, 2024
1 parent 1f49c77 commit 6515e1a
Show file tree
Hide file tree
Showing 4 changed files with 40 additions and 17 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -35,11 +35,11 @@ print(re.next()) # a2b
- [x] Alternation
- [x] Escaped characters
- [x] Backreferences (named and unnamed)
- [x] Non-capturing groups

## What I plan to support

- [ ] Lookahead and lookbehind
- [ ] Non-capturing groups

## What is not supported

Expand Down
40 changes: 24 additions & 16 deletions regex_enumerator/regex_parser.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from .regex_tree import Alternative, BackReference, CharClasses, RegexTree


class RegexError(Exception):
def __init__(self, regex: str, index: int, message: str):
self.regex = regex
Expand All @@ -8,7 +9,7 @@ def __init__(self, regex: str, index: int, message: str):

def __str__(self):
caret_line = ' ' * self.index + '^'
return f"{self.regex}\n{caret_line}\n{self.message}"
return f"\n{self.regex}\n{caret_line}\n{self.message}"


class RegexParser:
Expand Down Expand Up @@ -36,25 +37,32 @@ def _parseRegex(self, to_close: bool) -> RegexTree:
self.index += 1
match char:
case'(':
name = None
if self.index < len(self.regex) and self.regex[self.index] == '?':
self.index += 1
if self.index >= len(self.regex) or self.regex[self.index] != '<':
if self.index >= len(self.regex):
self._raise_error("Invalid named group")
self.index += 1
name = ''
while self.index < len(self.regex) and self.regex[self.index] != '>':
name += self.regex[self.index]
elif self.regex[self.index] == '<':
self.index += 1
if self.index >= len(self.regex) or self.regex[self.index] != '>' or name == '':
self._raise_error("Invalid named group")
self.index += 1
if name in named_groups:
self._raise_error("Duplicate named group")
subTree = self._parseRegex(True)
if name is not None:
named_groups[name] = subTree
ordered_groups.append(subTree)
name = ''
while self.index < len(self.regex) and self.regex[self.index] != '>':
name += self.regex[self.index]
self.index += 1
if self.index >= len(self.regex) or self.regex[self.index] != '>' or name == '':
self._raise_error("Invalid named group")
self.index += 1
if name in named_groups:
self._raise_error("Duplicate named group")
subTree = self._parseRegex(True)
named_groups[name] = subTree
ordered_groups.append(subTree)
elif self.regex[self.index] == ':':
self.index += 1
subTree = self._parseRegex(True)
else:
self._raise_error("Invalid group")
else:
subTree = self._parseRegex(True)
ordered_groups.append(subTree)
elements.append(subTree)
case ')':
if not to_close:
Expand Down
6 changes: 6 additions & 0 deletions tests/test_backreference.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,3 +42,9 @@ def test_zero_width_backreference():
possibilities = ['a', '']

f_finite(regexEnumerator, possibilities)

def test_10_backreference():
regexEnumerator = RegexEnumerator(r'(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)\10')
possibilities = ['abcdefghijj']

f_finite(regexEnumerator, possibilities)
9 changes: 9 additions & 0 deletions tests/test_not_capturing_groups.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
from regex_enumerator import RegexEnumerator
from .test_function import f_finite, f_infinite


def test_not_capturing_groups():
regexEnumerator = RegexEnumerator(r'(?:a)(b)\1')
possibilities = ['abb']

f_finite(regexEnumerator, possibilities)

0 comments on commit 6515e1a

Please sign in to comment.