diff --git a/lmformatenforcer/jsonschemaparser.py b/lmformatenforcer/jsonschemaparser.py index f426369..1dedfe4 100644 --- a/lmformatenforcer/jsonschemaparser.py +++ b/lmformatenforcer/jsonschemaparser.py @@ -73,6 +73,7 @@ def add_character(self, new_character: str) -> CharacterLevelParser: updated_parser.object_stack[receiving_idx] = updated_parser.object_stack[receiving_idx].add_character(new_character) if new_character in WHITESPACE_CHARACTERS: updated_parser.num_consecutive_whitespaces += 1 + updated_parser.last_non_whitespace_character = self.last_non_whitespace_character else: updated_parser.num_consecutive_whitespaces = 0 updated_parser.last_non_whitespace_character = new_character diff --git a/tests/test_jsonschemaparser.py b/tests/test_jsonschemaparser.py index 3d8c6f5..ac5b027 100644 --- a/tests/test_jsonschemaparser.py +++ b/tests/test_jsonschemaparser.py @@ -354,3 +354,20 @@ class FlightRoute(BaseModel): _test_json_schema_parsing_with_string(output_ok, FlightRoute.model_json_schema(), True) _test_json_schema_parsing_with_string(output_notok, FlightRoute.model_json_schema(), False) + + +def test_comma_cannot_start_list_2(): + # This also stresses the whitespace handling + max consecutive whitespace concept, + # bug reported in https://github.com/noamgat/lm-format-enforcer/issues/80 + output_notok = """ + { + "airports": [ + ,"Hamad", + ",Doha", + ",Bahrain", + ",Dammam" + ] + }""" + class FlightRoute(BaseModel): + airports: List[str] + _test_json_schema_parsing_with_string(output_notok, FlightRoute.model_json_schema(), False) \ No newline at end of file