Skip to content

Commit

Permalink
Merge pull request #562 from confident-ai/feature/datasetsave
Browse files Browse the repository at this point in the history
Feature/datasetsave
  • Loading branch information
penguine-ip authored Mar 4, 2024
2 parents 6b63b74 + c65ddab commit 32d3a09
Show file tree
Hide file tree
Showing 2 changed files with 68 additions and 3 deletions.
58 changes: 58 additions & 0 deletions deepeval/dataset/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,10 @@
from dataclasses import dataclass
from rich.console import Console
import json
import csv
import webbrowser
import os
import datetime

from deepeval.metrics import BaseMetric
from deepeval.api import Api, Endpoints
Expand All @@ -20,6 +23,8 @@
from deepeval.utils import is_confident
from deepeval.synthesizer.base_synthesizer import BaseSynthesizer

valid_file_types = ["csv", "json"]


@dataclass
class EvaluationDataset:
Expand Down Expand Up @@ -335,3 +340,56 @@ def generate_goldens(
self.goldens.extend(
synthesizer.generate_goldens(contexts, max_goldens_per_context)
)

# TODO: add save test cases as well
def save_as(self, file_type: str, directory: str):
if file_type not in valid_file_types:
raise ValueError(
f"Invalid file type. Available file types to save as: {', '.join(type for type in valid_file_types)}"
)

if len(self.goldens) == 0:
raise ValueError(
f"No synthetic goldens found. Please generate goldens before attempting to save data as {file_type}"
)

new_filename = (
datetime.datetime.now().strftime("%Y%m%d_%H%M%S") + f".{file_type}"
)

if not os.path.exists(directory):
os.makedirs(directory)

full_file_path = os.path.join(directory, new_filename)

if file_type == "json":
with open(full_file_path, "w") as file:
json_data = [
{
"input": golden.input,
"actual_output": golden.actual_output,
"expected_output": golden.expected_output,
"context": golden.context,
}
for golden in self.goldens
]
json.dump(json_data, file, indent=4)

elif file_type == "csv":
with open(full_file_path, "w", newline="") as file:
writer = csv.writer(file)
writer.writerow(
["input", "actual_output", "expected_output", "context"]
)
for golden in self.goldens:
context_str = "|".join(golden.context)
writer.writerow(
[
golden.input,
golden.actual_output,
golden.expected_output,
context_str,
]
)

print(f"Evaluation dataset saved at {full_file_path}!")
13 changes: 10 additions & 3 deletions deepeval/synthesizer/synthesizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@
from deepeval.utils import trimAndLoadJson
from deepeval.dataset.golden import Golden


valid_file_types = ["csv", "json"]


Expand Down Expand Up @@ -174,6 +173,7 @@ def save_as(self, file_type: str, directory: str):
json_data = [
{
"input": golden.input,
"actual_output": golden.actual_output,
"expected_output": golden.expected_output,
"context": golden.context,
}
Expand All @@ -184,11 +184,18 @@ def save_as(self, file_type: str, directory: str):
elif file_type == "csv":
with open(full_file_path, "w", newline="") as file:
writer = csv.writer(file)
writer.writerow(["input", "expected_output", "context"])
writer.writerow(
["input", "actual_output", "expected_output", "context"]
)
for golden in self.synthetic_goldens:
context_str = "|".join(golden.context)
writer.writerow(
[golden.input, golden.expected_output, context_str]
[
golden.input,
golden.actual_output,
golden.expected_output,
context_str,
]
)

print(f"Synthetic goldens saved at {full_file_path}!")

0 comments on commit 32d3a09

Please sign in to comment.