Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Run corpus of TLA+ syntax tests in simple pass/fail manner #159

Merged
merged 28 commits into from
Nov 4, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
6de51d5
Basic round-trip parse unit test
ahelwer Jul 30, 2024
52c8a4a
Syntax corpus files copied by dune build
ahelwer Oct 16, 2024
76b0b24
Basic parsing of test file format
ahelwer Oct 17, 2024
88ff8ed
More structured parsing of all test files
ahelwer Oct 19, 2024
5fc1074
Show syntax_test datastructure using ppx_deriving
ahelwer Oct 21, 2024
8789325
Parse test attributes
ahelwer Oct 21, 2024
e44b1c2
Split corpus file parsing logic into separate file
ahelwer Oct 21, 2024
cb43c6a
Added documentation to syntax corpus file parser
ahelwer Oct 21, 2024
d6a7a84
Parse expected test output using sexplib
ahelwer Oct 21, 2024
e0ea1c4
Send corpus tests into parser
ahelwer Oct 21, 2024
5be306b
Update test corpus, print failures
ahelwer Oct 22, 2024
3b47b05
Accumulate test run summary
ahelwer Oct 22, 2024
5a9a76d
Isolate failing syntax test cases
ahelwer Oct 22, 2024
a80c1e4
Encode failing tests; assert on test failure
ahelwer Oct 22, 2024
4476a32
Added documentation to syntax corpus test file
ahelwer Oct 22, 2024
7af9882
Simplified test control logic
ahelwer Oct 23, 2024
0bde78d
Accumulate test summaries in place
ahelwer Oct 23, 2024
93c084a
Use List.partition for parsing test header
ahelwer Oct 23, 2024
ee66bed
Add test deps to dune-project
ahelwer Oct 23, 2024
a8b5f6a
Disambiguated parser tests filename
ahelwer Oct 23, 2024
9a8d99d
Formalized test filter predicate
ahelwer Oct 23, 2024
894bd9c
Added syntax tests for \forall and \exists
ahelwer Oct 24, 2024
7fd6b9b
Tag test failures with github issues
ahelwer Oct 24, 2024
7ab6f54
Added failing test for parameterized refinement in subscript
ahelwer Oct 24, 2024
1da7b5b
Added failing tests for implicit proof names
ahelwer Oct 24, 2024
0cc1d87
Added test for proof level changes terminating jlists
ahelwer Oct 24, 2024
86a046e
Added failing parse tests from SANY
ahelwer Oct 29, 2024
8a9533b
Added syntax tests for tuples
ahelwer Oct 31, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion dune-project
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,9 @@
camlzip
re2
ppx_inline_test
ppx_assert)
ppx_assert
ppx_deriving
ounit2)
(depopts
lsp ; https://github.com/ocaml/ocaml-lsp
eio_main)) ; https://github.com/ocaml-multicore/eio, only available on OCaml version >= 5.
7 changes: 7 additions & 0 deletions test/parser/dune
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
(test
(name parser_tests)
(modes exe)
(libraries tlapm_lib ounit2 sexplib)
(deps (glob_files_rec syntax_corpus/*))
(preprocess (pps ppx_deriving.show))
)
177 changes: 177 additions & 0 deletions test/parser/parser_tests.ml
Original file line number Diff line number Diff line change
@@ -0,0 +1,177 @@
(** This test runs a battery of TLA+ syntax fragments against TLAPM's syntax
parser. In the future it will check the actual parse tree emitted by
TLAPM, but for now it just checks whether TLAPM parses without error all
the syntax it is expected to parse. Tests sourced from:
https://github.com/tlaplus/tlaplus-standard/tree/main/tests/tlaplus_syntax
*)

open Syntax_corpus_file_parser;;
open OUnit2;;

(** Calls TLAPM's parser with the given input. Catches all exceptions and
treats them as parse failures.
@param input The TLA+ fragment to parse.
@return None if parse failure, syntax tree root if successful.
*)
let parse (input : string) : Tlapm_lib__M_t.mule option =
try Tlapm_lib.module_of_string input
with _ -> None

(** Datatype summarizing a run of all the syntax tests. *)
type test_run_summary = {
total : int;
succeeded : int;
failed : int;
skipped : int;
failures : syntax_test_info list;
} [@@deriving show]

(** A blank test summary. *)
let test_summary_init = {
total = 0;
succeeded = 0;
failed = 0;
skipped = 0;
failures = [];
}

(** Runs a given syntax test by determining its type then sending the input
into the TLAPM parser.
@param expect_failure Whether this test should fail due to a TLAPM bug.
@param test Information about the test itself.
@return Whether the test succeeded.
*)
let run_test (test : syntax_test) : bool =
match test.test with
| Error_test input -> parse input |> Option.is_none
| Expected_test (input, _) -> parse input |> Option.is_some

(** Controls run of a given syntax test. Checks whether test should be
skipped and whether it is expected to fail, then runs test and returns
summary.
@param expect_failure Whether this test should fail due to a TLAPM bug.
@param acc Accumulation variable for test summarization.
@param test Information about the test itself.
@return Test run summary.
*)
let control_test_run
(expect_failure : syntax_test -> bool)
(acc : test_run_summary)
(test : syntax_test)
: test_run_summary =
let acc = {acc with total = acc.total + 1} in
if test.skip then {acc with skipped = acc.skipped + 1} else
if run_test test = expect_failure test
then {acc with failed = acc.failed + 1; failures = test.info :: acc.failures}
else {acc with succeeded = acc.succeeded + 1}

(** Given a path to a directory containing a corpus of syntax tests, get all
the tests encoded in those files, filter them as appropriate, then run
them all and collect the results.
@param path Path to the directory containing the corpus of syntax tests.
@param expect_failure Whether a test should fail due to a TLAPM bug.
@param filter_predicate Whether to actually execute a test.
@return Accumulated summary of all test executions.
*)
let run_test_corpus
(path : string)
(expect_failure : syntax_test -> bool)
(filter_pred : syntax_test -> bool)
: test_run_summary =
path
|> get_all_tests_under
|> List.filter filter_pred
|> List.fold_left (control_test_run expect_failure) test_summary_init

(** Names of tests that are known to fail due to TLAPM parser bugs.
@param test Information about the test.
@return Whether the test is expected to fail.
*)
let expect_failure (test : syntax_test) : bool =
List.mem test.info.name [
(* https://github.com/tlaplus/tlapm/issues/11 *)
"Bounded Quantification With Tuples";
"Mixed Bounded Quantification With Tuples";
"Bounded CHOOSE With Tuple";
"Unbounded CHOOSE With Tuple";
"Set Filter with Tuple";

(* https://github.com/tlaplus/tlapm/issues/54#issuecomment-2435515180 *)
"RECURSIVE inside LET/IN";
"Conjlist with RECURSIVE in LET/IN";
"Disjlist with RECURSIVE in LET/IN";

(* https://github.com/tlaplus/tlapm/issues/160 *)
"Verbose Bounded Quantification";

(* https://github.com/tlaplus/tlapm/issues/161 *)
"Infix Minus as Parameter";
"Prefix Operator References";

(* https://github.com/tlaplus/tlapm/issues/162 *)
"Cartesian Product Infix Op Definition";
"Cartesian Product Declaration as Parameter";

(* https://github.com/tlaplus/tlapm/issues/163 *)
"Bitfield Number Formats";

(* https://github.com/tlaplus/tlapm/issues/164 *)
"Mistaken Set Filter Test";

(* https://github.com/tlaplus/tlapm/issues/165 *)
"Proof by QED with implicit step level";

(* https://github.com/tlaplus/tlapm/issues/166 *)
"Use & Hide Modules";
"Proof by Module References";

(* https://github.com/tlaplus/tlapm/issues/167 *)
"Proof with INSTANCE step type";

(* https://github.com/tlaplus/tlapm/issues/168 *)
"Invalid parentheses use in jlist";

(* https://github.com/tlaplus/tlapm/issues/169 *)
"Label interfering with precedence";

(* https://github.com/tlaplus/tlapm/issues/156 *)
"Step Expression With Parameterized Subscript";

(* https://github.com/tlaplus/tlapm/issues/170 *)
"Implicit Proof Steps With Names";
"Plus Proof Step With Name";

(* https://github.com/tlaplus/tlapm/issues/172 *)
"Invalid LOCAL Declaration of THEOREM";
"Invalid LOCAL Declaration of ASSUME";
"Invalid LOCAL Declaration of USE";

(* https://github.com/tlaplus/tlapm/issues/173 *)
"Decimal No Leading Zero (GH tlaplus/tlaplus #596)";

(* https://github.com/tlaplus/tlapm/issues/173 *)
"Nonfix Submodule Excl (GH tlaplus/tlaplus #GH884)";
"Nonfix Double Exclamation Operator (GH TSTLA #GH97, GH tlaplus/tlaplus #884)";
]

(** Filter predicate to control which tests to run.
@param name Optional; a test name to filter on.
@return Predicate matching all tests or tests with given name.
*)
let should_run ?name test =
match name with
| Some name -> String.equal test.info.name name
| None -> true

(** The top-level test; runs all syntax tests, prints summary, then fails
with an assertion if any tests failed.
*)
let () =
let test_results =
run_test_corpus
"syntax_corpus"
expect_failure
(should_run (*~name:"Proof Containing Jlist"*))
in
print_endline (show_test_run_summary test_results);
assert_equal 0 test_results.failed;
174 changes: 174 additions & 0 deletions test/parser/syntax_corpus/assume-prove.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,174 @@
===============================|||
Basic Assume/Prove
===============================|||

---- MODULE Test ----
THEOREM thm ==
ASSUME P, Q
PROVE C
OBVIOUS
====

-------------------------------|||

(source_file (module (header_line) name: (identifier) (header_line)
(theorem
name: (identifier) (def_eq)
statement: (assume_prove
assumption: (identifier_ref)
assumption: (identifier_ref)
conclusion: (identifier_ref)
)
proof: (terminal_proof)
)
(double_line)))

===============================|||
Nested Assume/Prove With Label
===============================|||

---- MODULE Test ----
THEOREM thm ==
ASSUME asm ::
ASSUME P
PROVE Q
PROVE C
OMITTED
====

-------------------------------|||

(source_file (module (header_line) name: (identifier) (header_line)
(theorem
name: (identifier) (def_eq)
statement: (assume_prove
assumption: (inner_assume_prove
(identifier) (label_as)
(assume_prove
assumption: (identifier_ref)
conclusion: (identifier_ref)
)
)
conclusion: (identifier_ref)
)
proof: (terminal_proof)
)
(double_line)))

===============================|||
Assume/Prove With New Constant
===============================|||

---- MODULE Test ----
THEOREM thm ==
ASSUME NEW CONSTANT x \in S
PROVE C
OBVIOUS
====

-------------------------------|||

(source_file (module (header_line) name: (identifier) (header_line)
(theorem
name: (identifier) (def_eq)
statement: (assume_prove
assumption: (new (statement_level) (identifier) (set_in) (identifier_ref))
conclusion: (identifier_ref)
)
proof: (terminal_proof)
)
(double_line)))

===============================|||
Assume/Prove With New Operator
===============================|||

---- MODULE Test ----
THEOREM thm ==
ASSUME NEW TEMPORAL f(_, _)
PROVE C
OMITTED
====

-------------------------------|||

(source_file (module (header_line) name: (identifier) (header_line)
(theorem
name: (identifier) (def_eq)
statement: (assume_prove
assumption: (new (statement_level) (operator_declaration
name: (identifier) parameter: (placeholder) parameter: (placeholder)
))
conclusion: (identifier_ref)
)
proof: (terminal_proof)
)
(double_line)))

===============================|||
Assume/Prove in Suffices Step
===============================|||

---- MODULE Test ----
THEOREM TRUE
<1>a. SUFFICES
ASSUME CONSTANT x \in S
PROVE TRUE
<1>b. QED
====

-------------------------------|||

(source_file (module (header_line) name: (identifier) (header_line)
(theorem statement: (boolean)
proof: (non_terminal_proof
(proof_step (proof_step_id (level) (name))
(suffices_proof_step (assume_prove
assumption: (new (statement_level) (identifier) (set_in) (identifier_ref))
conclusion: (boolean)
))
)
(qed_step (proof_step_id (level) (name)))
)
)
(double_line)))

===============================|||
Assume/Prove with Mixed Assumptions
===============================|||

---- MODULE Test ----
THEOREM thm ==
ASSUME
NEW f,
1 + 2,
/\ 1
/\ 2,
lbl :: ASSUME P PROVE Q
PROVE C
OMITTED
====

-------------------------------|||

(source_file (module (header_line) name: (identifier) (header_line)
(theorem
name: (identifier) (def_eq)
statement: (assume_prove
assumption: (new (identifier))
assumption: (bound_infix_op lhs: (nat_number) symbol: (plus) rhs: (nat_number))
assumption: (conj_list (conj_item (bullet_conj) (nat_number)) (conj_item (bullet_conj) (nat_number)))
assumption: (inner_assume_prove
(identifier)
(label_as)
(assume_prove
assumption: (identifier_ref)
conclusion: (identifier_ref)
)
)
conclusion: (identifier_ref)
)
proof: (terminal_proof)
)
(double_line)))

Loading