-
Notifications
You must be signed in to change notification settings - Fork 5
/
pytest_stochastics.json
140 lines (140 loc) · 15.3 KB
/
pytest_stochastics.json
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
{
"test_plan_list": [
{
"plan": "initial",
"policy_tests": [
{
"policy": "mostly",
"tests": [
"tests/core/engines/alpha/test_baseline_scenarios.py::test_a_single_message_event_is_emitted_for_a_session_with_a_few_messages",
"tests/core/engines/alpha/test_baseline_scenarios.py::test_a_single_message_event_is_emitted_for_a_session_with_a_user_message",
"tests/core/engines/alpha/test_baseline_scenarios.py::test_add_and_multiply_tools_called_multiple_times_each",
"tests/core/engines/alpha/test_baseline_scenarios.py::test_add_and_multiply_tools_called_once_each",
"tests/core/engines/alpha/test_baseline_scenarios.py::test_add_tool_called_twice",
"tests/core/engines/alpha/test_baseline_scenarios.py::test_adherence_to_guidelines_without_fabricating_responses",
"tests/core/engines/alpha/test_baseline_scenarios.py::test_drinks_and_toppings_tools_called_from_different_guidelines",
"tests/core/engines/alpha/test_baseline_scenarios.py::test_drinks_and_toppings_tools_called_from_same_guideline",
"tests/core/engines/alpha/test_baseline_scenarios.py::test_failure_to_process_a_message_emits_an_error_status",
"tests/core/engines/alpha/test_baseline_scenarios.py::test_message_generation_is_cancelled",
"tests/core/engines/alpha/test_baseline_scenarios.py::test_no_message_is_emitted_for_an_empty_session",
"tests/core/engines/alpha/test_baseline_scenarios.py::test_preference_for_user_request_over_guideline_account_related_questions",
"tests/core/engines/alpha/test_baseline_scenarios.py::test_preference_for_user_request_over_guideline_account_related_questions",
"tests/core/engines/alpha/test_baseline_scenarios.py::test_prioritizing_freezing_transactions_over_processing_refunds",
"tests/core/engines/alpha/test_baseline_scenarios.py::test_prioritizing_transferring_the_upset_customer_to_the_manager_over_offering_pizza",
"tests/core/engines/alpha/test_baseline_scenarios.py::test_relevant_guidelines_are_not_refreshed_based_on_tool_results_if_no_second_iteration_of_proposing_a_new_guideline_is_made",
"tests/core/engines/alpha/test_baseline_scenarios.py::test_relevant_guidelines_are_refreshed_based_on_tool_results",
"tests/core/engines/alpha/test_baseline_scenarios.py::test_single_tool_is_being_called_multiple_times",
"tests/core/engines/alpha/test_baseline_scenarios.py::test_single_tool_is_being_called_once",
"tests/core/engines/alpha/test_baseline_scenarios.py::test_single_tool_is_being_called_once[check_drinks_in_stock-get_available_drinks-Sprite and Coca Cola as available drinks]",
"tests/core/engines/alpha/test_baseline_scenarios.py::test_single_tool_is_being_called_once[check_toppings_in_stock-get_available_toppings-Mushrooms and Olives as available toppings]",
"tests/core/engines/alpha/test_baseline_scenarios.py::test_the_agent_continues_a_conversation_that_was_started_on_its_behalf",
"tests/core/engines/alpha/test_baseline_scenarios.py::test_the_agent_distinguishes_between_tools_from_different_services",
"tests/core/engines/alpha/test_baseline_scenarios.py::test_the_agent_does_not_repeat_responses",
"tests/core/engines/alpha/test_baseline_scenarios.py::test_the_agent_does_not_start_a_conversation_if_no_proactive_guidelines_exist",
"tests/core/engines/alpha/test_baseline_scenarios.py::test_the_agent_doesnt_give_false_information_upon_user_request",
"tests/core/engines/alpha/test_baseline_scenarios.py::test_the_agent_doesnt_initiate_conversation_unprompted",
"tests/core/engines/alpha/test_baseline_scenarios.py::test_the_agent_explains_an_ambiguous_term",
"tests/core/engines/alpha/test_baseline_scenarios.py::test_the_agent_finds_and_follows_relevant_guidelines_like_a_needle_in_a_haystack",
"tests/core/engines/alpha/test_baseline_scenarios.py::test_the_agent_follows_a_guideline_that_is_entailed_by_another_guideline",
"tests/core/engines/alpha/test_baseline_scenarios.py::test_the_agent_follows_a_guideline_that_mentions_a_term_by_name",
"tests/core/engines/alpha/test_baseline_scenarios.py::test_the_agent_follows_a_guideline_that_refers_to_a_terms_definition",
"tests/core/engines/alpha/test_baseline_scenarios.py::test_the_agent_greets_the_user",
"tests/core/engines/alpha/test_baseline_scenarios.py::test_the_agent_ignores_deleted_messages_when_responding",
"tests/core/engines/alpha/test_baseline_scenarios.py::test_the_agent_initiates_conversation_when_instructed",
"tests/core/engines/alpha/test_baseline_scenarios.py::test_the_agent_offers_a_thirsty_user_a_drink",
"tests/core/engines/alpha/test_baseline_scenarios.py::test_the_agent_replies_to_farewell_messages",
"tests/core/engines/alpha/test_baseline_scenarios.py::test_the_agent_responds_to_a_censored_harassment_message",
"tests/core/engines/alpha/test_baseline_scenarios.py::test_the_agent_responds_with_a_term_retrieved_from_guideline_content",
"tests/core/engines/alpha/test_baseline_scenarios.py::test_the_agent_responds_with_a_term_retrieved_from_tool_content",
"tests/core/engines/alpha/test_baseline_scenarios.py::test_the_agent_sells_pizza_in_accordance_with_its_defined_description",
"tests/core/engines/alpha/test_baseline_scenarios.py::test_the_agent_starts_a_conversation_based_on_context_values",
"tests/core/engines/alpha/test_baseline_scenarios.py::test_the_agent_stops_replying_when_asked_explicitly",
"tests/core/engines/alpha/test_baseline_scenarios.py::test_the_tool_call_is_correlated_with_the_message_with_which_it_was_generated",
"tests/core/engines/alpha/test_baseline_scenarios.py::test_tool_call_takes_context_variables_into_consideration",
"tests/core/engines/alpha/test_guideline_proposer.py::test_that_guidelines_with_the_same_conditions_are_scored_identically",
"tests/core/engines/alpha/test_guideline_proposer.py::test_that_irrelevant_guidelines_are_not_proposed",
"tests/core/engines/alpha/test_guideline_proposer.py::test_that_relevant_guidelines_are_proposed",
"tests/core/services/indexing/test_coherence.py::test_that_a_terminology_based_incoherency_is_detected",
"tests/core/services/indexing/test_coherence.py::test_that_an_agent_description_based_incoherency_is_detected",
"tests/core/services/indexing/test_coherence.py::test_that_contextual_contradictions_are_detected_as_contingent_incoherence",
"tests/core/services/indexing/test_coherence.py::test_that_contingent_incoherencies_are_detected",
"tests/core/services/indexing/test_coherence.py::test_that_contradicting_actions_that_are_contextualized_by_their_conditions_are_detected",
"tests/core/services/indexing/test_coherence.py::test_that_contradicting_actions_with_hierarchical_conditions_are_detected",
"tests/core/services/indexing/test_coherence.py::test_that_contradictory_next_message_commands_are_detected_as_incoherencies",
"tests/core/services/indexing/test_coherence.py::test_that_entailing_conditions_with_unrelated_actions_arent_false_positives",
"tests/core/services/indexing/test_coherence.py::test_that_existing_guidelines_are_not_checked_against_each_other",
"tests/core/services/indexing/test_coherence.py::test_that_guidelines_with_many_incoherencies_are_detected",
"tests/core/services/indexing/test_coherence.py::test_that_logically_contradicting_response_actions_are_detected_as_incoherencies",
"tests/core/services/indexing/test_coherence.py::test_that_many_coherent_guidelines_arent_detected_as_false_positive",
"tests/core/services/indexing/test_coherence.py::test_that_many_guidelines_which_are_all_contradictory_are_detected",
"tests/core/services/indexing/test_coherence.py::test_that_misspelled_contradicting_actions_are_detected_as_incoherencies",
"tests/core/services/indexing/test_coherence.py::test_that_non_contradicting_guidelines_arent_false_positives",
"tests/core/services/indexing/test_coherence.py::test_that_seemingly_contradictory_but_actually_complementary_actions_are_not_false_positives",
"tests/core/services/indexing/test_coherence.py::test_that_suggestive_conditions_with_contradicting_actions_are_detected_as_contingent_incoherencies",
"tests/core/services/indexing/test_coherence.py::test_that_temporal_contradictions_are_detected_as_incoherencies",
"tests/core/services/indexing/test_guideline_connection_proposer.py::test_that_a_connection_is_proposed_based_on_given_glossary",
"tests/core/services/indexing/test_guideline_connection_proposer.py::test_that_a_connection_is_proposed_based_on_multiple_glossary_terms",
"tests/core/services/indexing/test_guideline_connection_proposer.py::test_that_a_suggestion_connection_is_proposed_for_two_guidelines_where_the_content_of_one_suggests_a_follow_up_to_the_condition_of_the_other",
"tests/core/services/indexing/test_guideline_connection_proposer.py::test_that_a_suggestive_guideline_which_entails_another_guideline_are_connected_as_suggestive",
"tests/core/services/indexing/test_guideline_connection_proposer.py::test_that_agent_based_connection_is_detected",
"tests/core/services/indexing/test_guideline_connection_proposer.py::test_that_an_entailment_connection_is_proposed_for_two_guidelines_where_the_content_of_one_entails_the_condition_of_the_other",
"tests/core/services/indexing/test_guideline_connection_proposer.py::test_that_circular_connection_is_proposed_for_three_guidelines_where_each_action_entails_the_following_condition",
"tests/core/services/indexing/test_guideline_connection_proposer.py::test_that_connection_is_proposed_for_a_sequence_where_each_guideline_entails_the_next_one_using_pronouns_from_then_to_when",
"tests/core/services/indexing/test_guideline_connection_proposer.py::test_that_connection_is_proposed_for_a_sequence_where_each_guideline_entails_the_next_one",
"tests/core/services/indexing/test_guideline_connection_proposer.py::test_that_connection_is_proposed_for_a_sequence_where_each_guideline_suggests_the_next_one",
"tests/core/services/indexing/test_guideline_connection_proposer.py::test_that_entailing_thens_are_not_connected",
"tests/core/services/indexing/test_guideline_connection_proposer.py::test_that_entailing_whens_are_not_connected",
"tests/core/services/indexing/test_guideline_connection_proposer.py::test_that_guidelines_with_similar_thens_arent_connected",
"tests/core/services/indexing/test_guideline_connection_proposer.py::test_that_identical_actions_arent_connected",
"tests/core/services/indexing/test_guideline_connection_proposer.py::test_that_many_guidelines_with_agent_description_and_glossary_arent_detected_as_false_positives",
"tests/core/services/indexing/test_guideline_connection_proposer.py::test_that_misspelled_entailing_guidelines_are_connected",
"tests/core/services/indexing/test_guideline_connection_proposer.py::test_that_multiple_connections_are_detected_and_proposed_at_the_same_time",
"tests/core/services/indexing/test_guideline_connection_proposer.py::test_that_no_connection_is_made_for_a_guideline_which_implies_but_not_causes_another_guideline",
"tests/core/services/indexing/test_guideline_connection_proposer.py::test_that_no_connection_is_made_for_a_guidelines_whose_condition_entails_another_guidelines_condition",
"tests/core/services/indexing/test_guideline_connection_proposer.py::test_that_one_guideline_can_entail_multiple_guidelines",
"tests/core/services/indexing/test_guideline_connection_proposer.py::test_that_possible_connections_between_existing_guidelines_are_not_proposed",
"tests/core/services/indexing/test_guideline_connection_proposer.py::test_that_try_actions_are_connected_but_not_suggestive",
"tests/core/services/indexing/test_guideline_connection_proposer.py::test_that_try_actions_are_connected_but_not_suggestive",
"tests/core/services/indexing/test_guideline_proposer.py::test_that_guidelines_with_the_same_conditions_are_scored_identically",
"tests/core/engines/alpha/test_baseline_scenarios.py::test_tool_called_again_by_context_after_user_response",
"tests/core/engines/alpha/test_baseline_scenarios.py::test_the_agent_correctly_calls_tools_from_an_entailed_guideline",
"tests/core/engines/alpha/test_baseline_scenarios.py::test_the_agent_uses_tools_based_on_the_agents_description",
"tests/core/engines/alpha/test_baseline_scenarios.py::test_the_agent_uses_tools_correctly_when_many_are_available"
]
},
{
"policy": "best_effort",
"tests": [
"tests/core/engines/alpha/test_baseline_scenarios.py::test_the_agent_does_not_get_stuck_in_apology_loop_when_facing_frustrated_user",
"tests/e2e/test_client_cli_via_api.py::test_that_guidelines_can_be_entailed"
]
},
{
"policy": "disable",
"tests": [
"tests/e2e/test_client_cli_via_api.py::test_that_view_a_guideline_with_connections_displays_indirect_and_direct_connections",
"tests/e2e/test_server_cli.py::test_that_the_server_recovery_restarts_all_active_evaluation_tasks"
]
}
]
}
],
"policy_list": [
{
"policy": "disable",
"at_least": 0,
"out_of": 0
},
{
"policy": "best_effort",
"at_least": 0,
"out_of": 3
},
{
"policy": "mostly",
"at_least": 2,
"out_of": 3
}
],
"plan_fallback_list": []
}