-
Notifications
You must be signed in to change notification settings - Fork 287
/
Copy pathresult_evaluation.py
47 lines (41 loc) · 1.74 KB
/
result_evaluation.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
"""Evaluate a result from an agent execution.
by calculating the embedding distance to an expected output (a reference).
"""
from config import set_environment
from langchain.evaluation import EvaluatorType, PairwiseStringEvalChain, load_evaluator
set_environment()
# evaluator = load_evaluator("embedding_distance")
#
# print(evaluator.evaluate_strings(prediction="I shall go", reference="I shan't go"))
#
#
# evaluator = load_evaluator("labeled_pairwise_string")
#
# print(evaluator.evaluate_string_pairs(
# prediction="there are three dogs",
# prediction_b="4",
# input="how many dogs are in the park?",
# reference="four",
# ))
custom_criteria = {
"simplicity": "Is the language straightforward and unpretentious?",
"clarity": "Are the sentences clear and easy to understand?",
"precision": "Is the writing precise, with no unnecessary words or details?",
"truthfulness": "Does the writing feel honest and sincere?",
"subtext": "Does the writing suggest deeper meanings or themes?",
}
evaluator = load_evaluator(EvaluatorType.PAIRWISE_STRING, criteria=custom_criteria)
assert isinstance(evaluator, PairwiseStringEvalChain)
print(
evaluator.evaluate_string_pairs(
prediction="Every cheerful household shares a similar rhythm of joy; but sorrow, "
"in each household, plays a unique, haunting melody.",
prediction_b="Where one finds a symphony of joy, every domicile of happiness "
"resounds in harmonious,"
" identical notes; yet, every abode of despair conducts a dissonant orchestra, each"
" playing an elegy of grief that is peculiar and profound to its own existence.",
input="Write some prose about families.",
)
) # noqa: E501
if __name__ == "__main__":
pass