Skip to content

Commit

Permalink
Finalizing GSM...
Browse files Browse the repository at this point in the history
  • Loading branch information
robvanderleek committed Apr 2, 2024
1 parent d09b85f commit 0941c12
Show file tree
Hide file tree
Showing 21 changed files with 153 additions and 86 deletions.
2 changes: 1 addition & 1 deletion codelimit/common/ScanTotals.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@


class ScanTotals:
def __init__(self):
def __init__(self) -> None:
self._languages_totals: dict[str, LanguageTotals] = {}

def add(self, entry: SourceFileEntry):
Expand Down
10 changes: 6 additions & 4 deletions codelimit/common/gsm/Atom.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,16 @@
from codelimit.common.gsm.Automata import Automata
from typing import Any

from codelimit.common.gsm.NFA import NFA
from codelimit.common.gsm.Operator import Operator
from codelimit.common.gsm.State import State


class Atom(Operator):
def __init__(self, item: str):
def __init__(self, item: Any):
self.item = item

def apply(self, stack: list[Automata]):
def apply(self, stack: list[NFA]):
start = State()
accepting = State()
start.transition.append((self.item, accepting))
stack.append(Automata(start, accepting))
stack.append(NFA(start, accepting))
16 changes: 6 additions & 10 deletions codelimit/common/gsm/Automata.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,12 @@
from abc import abstractmethod, ABC

from codelimit.common.gsm.State import State


class Automata:
def __init__(self, start: State, accepting: State | list[State]):
class Automata(ABC):
def __init__(self, start: State):
self.start = start
self.accepting = accepting

@abstractmethod
def is_accepting(self, state: State) -> bool:
if isinstance(self.accepting, list):
return state in self.accepting
else:
return state == self.accepting

def __str__(self):
return f'Automata(start={self.start}, accepting={self.accepting})'
pass
6 changes: 3 additions & 3 deletions codelimit/common/gsm/Concat.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
from codelimit.common.gsm.Automata import Automata
from codelimit.common.gsm.NFA import NFA
from codelimit.common.gsm.Operator import Operator


class Concat(Operator):
def apply(self, stack: list[Automata]):
def apply(self, stack: list[NFA]):
if len(stack) < 2:
return
nfa1 = stack.pop()
nfa2 = stack.pop()
nfa2.accepting.assign(nfa1.start)
nfa = Automata(nfa2.start, nfa1.accepting)
nfa = NFA(nfa2.start, nfa1.accepting)
stack.append(nfa)
14 changes: 14 additions & 0 deletions codelimit/common/gsm/DFA.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
from codelimit.common.gsm.Automata import Automata
from codelimit.common.gsm.State import State


class DFA(Automata):
def __init__(self, start: State, accepting: list[State]):
super().__init__(start)
self.accepting = accepting

def is_accepting(self, state: State) -> bool:
return state in self.accepting

def __str__(self):
return f'DFA(start={self.start}, accepting={self.accepting})'
30 changes: 21 additions & 9 deletions codelimit/common/gsm/Expression.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,27 @@
from typing import Iterable
from typing import Iterable, TypeVar, TypeAlias

from codelimit.common.gsm.Atom import Atom
from codelimit.common.gsm.Automata import Automata
from codelimit.common.gsm.Concat import Concat
from codelimit.common.gsm.DFA import DFA
from codelimit.common.gsm.NFA import NFA
from codelimit.common.gsm.Operator import Operator
from codelimit.common.gsm.Predicate import Predicate
from codelimit.common.gsm.State import State

T = TypeVar('T')

def expression_to_nfa(expression: list[Operator | str]) -> Automata:
op_expression = [Atom(item) if isinstance(item, str) else item for item in expression]
nfa_stack = []
Expression: TypeAlias = Operator | Predicate[T] | T | list[Operator | Predicate[T] | T]


def expression_to_nfa(expression: Expression[T]) -> NFA:
if isinstance(expression, list):
op_expression = [Atom(item) if not isinstance(item, Operator) or isinstance(item, Predicate) else
item for item in expression]

else:
op_expression = [Atom(expression) if not isinstance(expression, Operator) or
isinstance(expression, Predicate) else expression]
nfa_stack: list[NFA] = []
for item in op_expression:
item.apply(nfa_stack)
Concat().apply(nfa_stack)
Expand All @@ -20,7 +32,7 @@ def expression_to_nfa(expression: list[Operator | str]) -> Automata:
def epsilon_closure(states: State | Iterable[State]) -> set[State]:
result = set()
if isinstance(states, State):
states: set[State] = {states}
states = {states}
for state in states:
result.add(state)
for s in state.epsilon_transitions:
Expand Down Expand Up @@ -49,10 +61,10 @@ def state_set_id(states: set[State]) -> str:
return ", ".join([str(id) for id in sorted([state.id for state in states])])


def nfa_to_dfa(nfa: Automata) -> Automata:
def nfa_to_dfa(nfa: NFA) -> DFA:
start = State()
stack = [(start, epsilon_closure(nfa.start))]
states = {}
states: dict[str, State] = {}
accepting_states = []
marked_states = set()
while stack:
Expand All @@ -74,4 +86,4 @@ def nfa_to_dfa(nfa: Automata) -> Automata:
states[state_set_id(new_states)] = new_state
state.transition.append((atom, new_state))
stack.append((new_state, new_states))
return Automata(start, accepting_states)
return DFA(start, accepting_states)
14 changes: 14 additions & 0 deletions codelimit/common/gsm/NFA.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
from codelimit.common.gsm.Automata import Automata
from codelimit.common.gsm.State import State


class NFA(Automata):
def __init__(self, start: State, accepting: State):
super().__init__(start)
self.accepting = accepting

def is_accepting(self, state: State) -> bool:
return state == self.accepting

def __str__(self):
return f'NFA(start={self.start}, accepting={self.accepting})'
10 changes: 5 additions & 5 deletions codelimit/common/gsm/OneOrMore.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,17 @@
from codelimit.common.gsm.Expression import expression_to_nfa
from codelimit.common.gsm.Automata import Automata
from codelimit.common.gsm.Expression import expression_to_nfa, Expression
from codelimit.common.gsm.NFA import NFA
from codelimit.common.gsm.Operator import Operator
from codelimit.common.gsm.State import State


class OneOrMore(Operator):
def __init__(self, expression: Operator | str | list[Operator | str]):
def __init__(self, expression: Expression):
self.expression = expression if isinstance(expression, list) else [expression]

def apply(self, stack: list[Automata]):
def apply(self, stack: list[NFA]):
start = State()
nfa = expression_to_nfa(self.expression)
accepting = State()
start.epsilon_transitions = [nfa.start]
nfa.accepting.epsilon_transitions = [nfa.start, accepting]
stack.append(Automata(start, accepting))
stack.append(NFA(start, accepting))
4 changes: 2 additions & 2 deletions codelimit/common/gsm/Operator.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
from abc import ABC, abstractmethod

from codelimit.common.gsm.Automata import Automata
from codelimit.common.gsm.NFA import NFA


class Operator(ABC):
@abstractmethod
def apply(self, stack: list[Automata]):
def apply(self, stack: list[NFA]):
pass
10 changes: 5 additions & 5 deletions codelimit/common/gsm/Optional.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,17 @@
from codelimit.common.gsm.Expression import expression_to_nfa
from codelimit.common.gsm.Automata import Automata
from codelimit.common.gsm.Expression import expression_to_nfa, Expression
from codelimit.common.gsm.NFA import NFA
from codelimit.common.gsm.Operator import Operator
from codelimit.common.gsm.State import State


class Optional(Operator):
def __init__(self, expression: Operator | str | list[Operator | str]):
def __init__(self, expression: Expression):
self.expression = expression if isinstance(expression, list) else [expression]

def apply(self, stack: list[Automata]):
def apply(self, stack: list[NFA]):
start = State()
nfa = expression_to_nfa(self.expression)
accepting = State()
start.epsilon_transitions = [nfa.start, accepting]
nfa.accepting.epsilon_transitions = [accepting]
stack.append(Automata(start, accepting))
stack.append(NFA(start, accepting))
10 changes: 7 additions & 3 deletions codelimit/common/gsm/Pattern.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,12 @@
from codelimit.common.gsm.State import State
from codelimit.common.gsm.Automata import Automata


class Pattern:
def __init__(self, start: int, state: State):
def __init__(self, start: int, automata: Automata):
self.start = start
self.state = state
self.automata = automata
self.state = automata.start
self.tokens: list = []

def is_accepting(self):
return self.automata.is_accepting(self.state)
11 changes: 11 additions & 0 deletions codelimit/common/gsm/Predicate.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
from abc import ABC, abstractmethod
from typing import Generic, TypeVar

T = TypeVar('T')


class Predicate(ABC, Generic[T]):

@abstractmethod
def accept(self, item: T) -> bool:
pass
2 changes: 1 addition & 1 deletion codelimit/common/gsm/State.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
class State:
_id = 1

def __init__(self):
def __init__(self) -> None:
self.id = State._id
State._id += 1
self.transition: list[tuple[str, State]] = []
Expand Down
10 changes: 5 additions & 5 deletions codelimit/common/gsm/Union.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,20 @@
from codelimit.common.gsm.Expression import expression_to_nfa
from codelimit.common.gsm.Automata import Automata
from codelimit.common.gsm.Expression import expression_to_nfa, Expression
from codelimit.common.gsm.NFA import NFA
from codelimit.common.gsm.Operator import Operator
from codelimit.common.gsm.State import State


class Union(Operator):
def __init__(self, left: Operator | str | list[Operator | str], right: Operator | str | list[Operator | str]):
def __init__(self, left: Expression, right: Expression):
self.left = left if isinstance(left, list) else [left]
self.right = right if isinstance(right, list) else [right]

def apply(self, stack: list[Automata]):
def apply(self, stack: list[NFA]):
start = State()
nfa1 = expression_to_nfa(self.left)
nfa2 = expression_to_nfa(self.right)
start.epsilon_transitions = [nfa1.start, nfa2.start]
accepting = State()
nfa1.accepting.epsilon_transitions = [accepting]
nfa2.accepting.epsilon_transitions = [accepting]
stack.append(Automata(start, accepting))
stack.append(NFA(start, accepting))
10 changes: 5 additions & 5 deletions codelimit/common/gsm/ZeroOrMore.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,17 @@
from codelimit.common.gsm.Expression import expression_to_nfa
from codelimit.common.gsm.Automata import Automata
from codelimit.common.gsm.Expression import expression_to_nfa, Expression
from codelimit.common.gsm.NFA import NFA
from codelimit.common.gsm.Operator import Operator
from codelimit.common.gsm.State import State


class ZeroOrMore(Operator):
def __init__(self, expression: Operator | str | list[Operator | str]):
def __init__(self, expression: Expression):
self.expression = expression if isinstance(expression, list) else [expression]

def apply(self, stack: list[Automata]):
def apply(self, stack: list[NFA]):
start = State()
nfa = expression_to_nfa(self.expression)
accepting = State()
start.epsilon_transitions = [nfa.start, accepting]
nfa.accepting.epsilon_transitions = [nfa.start, accepting]
stack.append(Automata(start, accepting))
stack.append(NFA(start, accepting))
28 changes: 11 additions & 17 deletions codelimit/common/gsm/matcher.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,19 @@
import subprocess
import tempfile
import copy
from typing import TypeVar

from codelimit.common.gsm.Automata import Automata
from codelimit.common.gsm.Expression import expression_to_nfa, epsilon_closure, nfa_to_dfa
from codelimit.common.gsm.Operator import Operator
from codelimit.common.gsm.Pattern import Pattern
from codelimit.common.gsm.utils import to_dot
from codelimit.common.gsm.Predicate import Predicate
from codelimit.common.gsm.utils import render_automata

T = TypeVar('T')

def match(expression: list[Operator | str], text: list) -> Pattern | None:

def match(expression: Operator | Predicate[T] | T | list[Operator | Predicate[T] | T], text: list) -> Pattern | None:
nfa = expression_to_nfa(expression)
dfa = nfa_to_dfa(nfa)
pattern = Pattern(dfa.start)
pattern = Pattern(0, dfa)
for char in text:
next_state = None
for transition in pattern.state.transition:
Expand All @@ -35,7 +37,8 @@ def find_all(expression: list[Operator | str], text: list) -> list[Pattern]:
active_patterns = []
last_match_idx = -1
for idx, char in enumerate(text):
active_patterns.append(Pattern(idx, dfa.start))
dfa_copy = copy.deepcopy(dfa)
active_patterns.append(Pattern(idx, dfa_copy))
next_state_patterns = []
for pattern in active_patterns:
if pattern.start <= last_match_idx:
Expand All @@ -46,7 +49,7 @@ def find_all(expression: list[Operator | str], text: list) -> list[Pattern]:
pattern.state = transition[1]
next_state_patterns.append(pattern)
else:
if pattern.state in dfa.accepting:
if pattern.is_accepting():
matches.append(pattern)
last_match_idx = idx
active_patterns = next_state_patterns
Expand Down Expand Up @@ -75,12 +78,3 @@ def render_nfa(expression: list[Operator | str]):

def render_dfa(expression: list[Operator | str]):
render_automata(nfa_to_dfa(expression_to_nfa(expression)))


def render_automata(automata: Automata):
dot = to_dot(automata)
with tempfile.NamedTemporaryFile(mode='w') as f:
f.write(dot)
f.flush()
subprocess.run(['dot', '-Tpdf', f'-o{f.name}.pdf', f.name])
subprocess.run(['open', f'{f.name}.pdf'])
Loading

0 comments on commit 0941c12

Please sign in to comment.