Skip to content

Commit

Permalink
Going from NFA to DFA...
Browse files Browse the repository at this point in the history
  • Loading branch information
robvanderleek committed Mar 29, 2024
1 parent 9e698b8 commit ff80c98
Show file tree
Hide file tree
Showing 14 changed files with 216 additions and 72 deletions.
6 changes: 3 additions & 3 deletions codelimit/common/gsm/Atom.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from codelimit.common.gsm.NFA import NFA
from codelimit.common.gsm.Automata import Automata
from codelimit.common.gsm.Operator import Operator
from codelimit.common.gsm.State import State

Expand All @@ -7,8 +7,8 @@ class Atom(Operator):
def __init__(self, item: str):
self.item = item

def apply(self, stack: list[NFA]):
def apply(self, stack: list[Automata]):
start = State()
accepting = State()
start.transition = (self.item, accepting)
stack.append(NFA(start, accepting))
stack.append(Automata(start, accepting))
16 changes: 16 additions & 0 deletions codelimit/common/gsm/Automata.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
from codelimit.common.gsm.State import State


class Automata:
def __init__(self, start: State, accepting: State | list[State]):
self.start = start
self.accepting = accepting

def is_accepting(self, state: State) -> bool:
if isinstance(self.accepting, list):
return state in self.accepting
else:
return state == self.accepting

def __str__(self):
return f'Automata(start={self.start}, accepting={self.accepting})'
6 changes: 3 additions & 3 deletions codelimit/common/gsm/Concat.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
from codelimit.common.gsm.NFA import NFA
from codelimit.common.gsm.Automata import Automata
from codelimit.common.gsm.Operator import Operator


class Concat(Operator):
def apply(self, stack: list[NFA]):
def apply(self, stack: list[Automata]):
if len(stack) < 2:
return
nfa1 = stack.pop()
nfa2 = stack.pop()
nfa2.accepting.assign(nfa1.start)
nfa = NFA(nfa2.start, nfa1.accepting)
nfa = Automata(nfa2.start, nfa1.accepting)
stack.append(nfa)
75 changes: 70 additions & 5 deletions codelimit/common/gsm/Expression.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,79 @@
from codelimit.common.gsm.Atom import Atom
from codelimit.common.gsm.Concat import Concat
from codelimit.common.gsm.NFA import NFA
from codelimit.common.gsm.Operator import Operator
from typing import Iterable

from codelimit.common.gsm.Atom import Atom
from codelimit.common.gsm.Automata import Automata
from codelimit.common.gsm.Concat import Concat
from codelimit.common.gsm.Operator import Operator
from codelimit.common.gsm.State import State

def expression_to_nfa(expression: list[Operator | str]) -> NFA:

def expression_to_nfa(expression: list[Operator | str]) -> Automata:
op_expression = [Atom(item) if isinstance(item, str) else item for item in expression]
nfa_stack = []
for item in op_expression:
item.apply(nfa_stack)
Concat().apply(nfa_stack)

return nfa_stack.pop()


def epsilon_closure(states: State | Iterable[State]) -> set[State]:
result = set()
if isinstance(states, State):
states: set[State] = {states}
for state in states:
result.add(state)
if state.epsilon_transitions:
for s in state.epsilon_transitions:
result.update(epsilon_closure(s))
return result


def move(states: set[State], symbol: str) -> set[State]:
result = set()
for state in states:
if state.transition and state.transition[0] == symbol:
result.add(state.transition[1])
return result


def state_set_transitions(states: set[State]) -> set[str]:
result = set()
for state in states:
if state.transition:
result.add(state.transition[0])
return result


def state_set_id(states: set[State]) -> str:
return ", ".join([str(id) for id in sorted([state.id for state in states])])


def nfa_to_dfa(nfa: Automata) -> Automata:
start = State()
stack = [(start, epsilon_closure(nfa.start))]
states = {}
accepting_states = []
marked_states = set()
while stack:
state, T = stack.pop()
T_id = state_set_id(T)
if T_id in marked_states:
continue
else:
marked_states.add(T_id)
if nfa.accepting in T:
accepting_states.append(state)
transitions = state_set_transitions(T)
for atom in transitions:
new_states = epsilon_closure(move(T, atom))
if state_set_id(new_states) in states:
new_state = states[state_set_id(new_states)]
else:
new_state = State()
states[state_set_id(new_states)] = new_state
if state.transition is None:
state.transition = []
state.transition.append((atom, new_state))
stack.append((new_state, new_states))
return Automata(start, accepting_states)
9 changes: 0 additions & 9 deletions codelimit/common/gsm/NFA.py

This file was deleted.

6 changes: 3 additions & 3 deletions codelimit/common/gsm/OneOrMore.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from codelimit.common.gsm.Expression import expression_to_nfa
from codelimit.common.gsm.NFA import NFA
from codelimit.common.gsm.Automata import Automata
from codelimit.common.gsm.Operator import Operator
from codelimit.common.gsm.State import State

Expand All @@ -8,10 +8,10 @@ class OneOrMore(Operator):
def __init__(self, expression: Operator | str | list[Operator | str]):
self.expression = expression if isinstance(expression, list) else [expression]

def apply(self, stack: list[NFA]):
def apply(self, stack: list[Automata]):
start = State()
nfa = expression_to_nfa(self.expression)
accepting = State()
start.epsilon_transitions = [nfa.start]
nfa.accepting.epsilon_transitions = [nfa.start, accepting]
stack.append(NFA(start, accepting))
stack.append(Automata(start, accepting))
4 changes: 2 additions & 2 deletions codelimit/common/gsm/Operator.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
from abc import ABC, abstractmethod

from codelimit.common.gsm.NFA import NFA
from codelimit.common.gsm.Automata import Automata


class Operator(ABC):
@abstractmethod
def apply(self, stack: list[NFA]):
def apply(self, stack: list[Automata]):
pass
6 changes: 3 additions & 3 deletions codelimit/common/gsm/Optional.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from codelimit.common.gsm.Expression import expression_to_nfa
from codelimit.common.gsm.NFA import NFA
from codelimit.common.gsm.Automata import Automata
from codelimit.common.gsm.Operator import Operator
from codelimit.common.gsm.State import State

Expand All @@ -8,10 +8,10 @@ class Optional(Operator):
def __init__(self, expression: Operator | str | list[Operator | str]):
self.expression = expression if isinstance(expression, list) else [expression]

def apply(self, stack: list[NFA]):
def apply(self, stack: list[Automata]):
start = State()
nfa = expression_to_nfa(self.expression)
accepting = State()
start.epsilon_transitions = [nfa.start, accepting]
nfa.accepting.epsilon_transitions = [accepting]
stack.append(NFA(start, accepting))
stack.append(Automata(start, accepting))
27 changes: 14 additions & 13 deletions codelimit/common/gsm/State.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,28 +7,29 @@ class State:
def __init__(self):
self.id = State._id
State._id += 1
self.transition: tuple[str, State] | None = None
self.transition: tuple[str, State] | list[tuple[str, State]] | None = None
self.epsilon_transitions: list[State] | None = None

def assign(self, state: State):
self.id = state.id
self.transition = state.transition
self.epsilon_transitions = state.epsilon_transitions

def is_accepting(self):
return self.transition is None and self.epsilon_transitions is None
def __str__(self):
return f'State({self.id})'

def __repr__(self):
result = f'State({self.id}, '
if self.is_accepting():
result += 'F'
else:
parts = []
if self.transition:
result = 'State('
parts = [f'{self.id}']
if self.transition:
if isinstance(self.transition, list):
for t in self.transition:
parts.append(f'{t[0]} -> {t[1]}')
else:
parts.append(f'{self.transition[0]} -> {self.transition[1]}')
if self.epsilon_transitions:
for t in self.epsilon_transitions:
parts.append(f'epsilon -> {t}')
result += ', '.join(parts)
if self.epsilon_transitions:
for t in self.epsilon_transitions:
parts.append(f'epsilon -> {t}')
result += ', '.join(parts)
result += ')'
return result
6 changes: 3 additions & 3 deletions codelimit/common/gsm/Union.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from codelimit.common.gsm.Expression import expression_to_nfa
from codelimit.common.gsm.NFA import NFA
from codelimit.common.gsm.Automata import Automata
from codelimit.common.gsm.Operator import Operator
from codelimit.common.gsm.State import State

Expand All @@ -9,12 +9,12 @@ def __init__(self, left: Operator | str | list[Operator | str], right: Operator
self.left = left if isinstance(left, list) else [left]
self.right = right if isinstance(right, list) else [right]

def apply(self, stack: list[NFA]):
def apply(self, stack: list[Automata]):
start = State()
nfa1 = expression_to_nfa(self.left)
nfa2 = expression_to_nfa(self.right)
start.epsilon_transitions = [nfa1.start, nfa2.start]
accepting = State()
nfa1.accepting.epsilon_transitions = [accepting]
nfa2.accepting.epsilon_transitions = [accepting]
stack.append(NFA(start, accepting))
stack.append(Automata(start, accepting))
6 changes: 3 additions & 3 deletions codelimit/common/gsm/ZeroOrMore.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from codelimit.common.gsm.Expression import expression_to_nfa
from codelimit.common.gsm.NFA import NFA
from codelimit.common.gsm.Automata import Automata
from codelimit.common.gsm.Operator import Operator
from codelimit.common.gsm.State import State

Expand All @@ -8,10 +8,10 @@ class ZeroOrMore(Operator):
def __init__(self, expression: Operator | str | list[Operator | str]):
self.expression = expression if isinstance(expression, list) else [expression]

def apply(self, stack: list[NFA]):
def apply(self, stack: list[Automata]):
start = State()
nfa = expression_to_nfa(self.expression)
accepting = State()
start.epsilon_transitions = [nfa.start, accepting]
nfa.accepting.epsilon_transitions = [nfa.start, accepting]
stack.append(NFA(start, accepting))
stack.append(Automata(start, accepting))
21 changes: 8 additions & 13 deletions codelimit/common/gsm/matcher.py
Original file line number Diff line number Diff line change
@@ -1,28 +1,20 @@
import subprocess
import tempfile

from codelimit.common.gsm.Expression import expression_to_nfa
from codelimit.common.gsm.Automata import Automata
from codelimit.common.gsm.Expression import expression_to_nfa, epsilon_closure
from codelimit.common.gsm.Operator import Operator
from codelimit.common.gsm.State import State
from codelimit.common.gsm.utils import to_dot


def _follow_epsilon_transitions(state: State) -> set[State]:
result = {state}
if state.epsilon_transitions:
for s in state.epsilon_transitions:
result.update(_follow_epsilon_transitions(s))
return result


def match(expression: list[Operator | str], text: list):
nfa = expression_to_nfa(expression)
active_states = _follow_epsilon_transitions(nfa.start)
active_states = epsilon_closure(nfa.start)
next_states = set()
for char in text:
for active_state in active_states:
if active_state.transition and char == active_state.transition[0]:
next_states.update(_follow_epsilon_transitions(active_state.transition[1]))
next_states.update(epsilon_closure(active_state.transition[1]))
if not next_states:
return False
active_states = next_states
Expand All @@ -31,7 +23,10 @@ def match(expression: list[Operator | str], text: list):


def render(expression: list[Operator | str]):
nfa = expression_to_nfa(expression)
render_nfa(expression_to_nfa(expression))


def render_nfa(nfa: Automata):
dot = to_dot(nfa)
with tempfile.NamedTemporaryFile(mode='w') as f:
f.write(dot)
Expand Down
Loading

0 comments on commit ff80c98

Please sign in to comment.