diff --git a/allennlp/semparse/contexts/sql_context_utils.py b/allennlp/semparse/contexts/sql_context_utils.py index 5ccd5846594..5887615ee6c 100644 --- a/allennlp/semparse/contexts/sql_context_utils.py +++ b/allennlp/semparse/contexts/sql_context_utils.py @@ -8,6 +8,8 @@ from parsimonious.nodes import Node, NodeVisitor from parsimonious.grammar import Grammar +WHITESPACE_REGEX = re.compile(" wsp |wsp | wsp| ws |ws | ws") + def format_grammar_string(grammar_dictionary: Dict[str, List[str]]) -> str: """ Formats a dictionary of production rules into the string format expected @@ -98,7 +100,7 @@ def format_action(nonterminal: str, else: right_hand_side = right_hand_side.lstrip("(").rstrip(")") - child_strings = [token for token in re.split(" wsp |wsp | wsp| ws |ws | ws|", right_hand_side) if token] + child_strings = [token for token in WHITESPACE_REGEX.split(right_hand_side) if token] child_strings = [tok.upper() if tok.upper() in keywords_to_uppercase else tok for tok in child_strings] return f"{nonterminal} -> [{', '.join(child_strings)}]" @@ -175,8 +177,7 @@ def add_action(self, node: Node) -> None: else: child_right_side_string = child.expr._as_rhs().lstrip("(").rstrip(")") # pylint: disable=protected-access child_right_side_list = [tok for tok in - re.split(" wsp |wsp | wsp| ws |ws | ws|", - child_right_side_string) if tok] + WHITESPACE_REGEX.split(child_right_side_string) if tok] child_right_side_list = [tok.upper() if tok.upper() in self.keywords_to_uppercase else tok for tok in child_right_side_list]