Skip to content

Commit

Permalink
Regex performance improvement - avoiding iterating over all alphabet …
Browse files Browse the repository at this point in the history
…every state
  • Loading branch information
noamgat committed Nov 5, 2023
1 parent 32ccda0 commit b703411
Showing 1 changed file with 4 additions and 3 deletions.
7 changes: 4 additions & 3 deletions lmformatenforcer/regexparser.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from typing import Dict, Hashable, Optional, Union
from typing import Dict, Hashable, Optional, Union, List
import interegular
from interegular.fsm import anything_else

Expand Down Expand Up @@ -59,8 +59,9 @@ def get_allowed_characters(self) -> str:
if self.current_state not in self.context.state_character_cache:
allowed_characters = []
state_map = self.context.pattern.map[self.current_state]
for symbol, symbol_idx in self.context.pattern.alphabet.items():
if symbol_idx in state_map:
for symbol_idx in state_map:
symbols: List[str] = self.context.pattern.alphabet.by_transition[symbol_idx]
for symbol in symbols:
if symbol == anything_else:
allowed_characters.append(self.context.anything_else_characters)
else:
Expand Down

0 comments on commit b703411

Please sign in to comment.