Skip to content

Commit

Permalink
Removed uneeded aio fix
Browse files Browse the repository at this point in the history
  • Loading branch information
matthewcoole committed Nov 29, 2024
1 parent 23343d7 commit b5c49cb
Showing 1 changed file with 17 additions and 8 deletions.
25 changes: 17 additions & 8 deletions scripts/generate_synthetic_testset.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,24 @@
import json
import logging
from argparse import ArgumentParser
from typing import List

import nest_asyncio
from langchain.docstore.document import Document
from langchain_community.chat_models import ChatOllama
from langchain_community.embeddings import OllamaEmbeddings
from ragas.run_config import RunConfig
from ragas.testset.evolutions import multi_context, reasoning, simple
from ragas.testset.generator import TestsetGenerator
from langchain.docstore.document import Document
import json
from argparse import ArgumentParser

logger = logging.getLogger(__name__)
logging.basicConfig(
level=logging.INFO,
handlers=[logging.StreamHandler()],
)


def load_metadata(metadata_file):
def load_metadata(metadata_file: str) -> List[Document]:
with open(metadata_file) as f:
json_data = json.load(f)
return [
Expand All @@ -21,16 +30,16 @@ def load_metadata(metadata_file):
]


def main(metadata_file, testset_output_file, testset_size=5):
nest_asyncio.apply()
def main(metadata_file: str, testset_output_file: str, testset_size: int = 5) -> None:
docs = load_metadata(metadata_file)
logger.info(f"Loaded {len(docs)} documents from {metadata_file}")
llm = ChatOllama(model="mistral-nemo", num_ctx=16384)
embeddings = OllamaEmbeddings(model="mistral-nemo", num_ctx=16384)
gen = TestsetGenerator.from_langchain(
llm, llm, embeddings, run_config=RunConfig(max_workers=1, max_retries=1)
llm, llm, embeddings, run_config=RunConfig(max_workers=1, max_retries=2)
)
dist = {simple: 0.6, multi_context: 0.2, reasoning: 0.2}
testset = gen.generate_with_langchain_docs(docs, testset_size, dist, is_async=False)
testset = gen.generate_with_langchain_docs(docs, testset_size, dist)
df = testset.to_pandas()
df.to_csv(testset_output_file, index=False)

Expand Down

1 comment on commit b5c49cb

@github-actions
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

answer_correctness: 0.46513286457053726
answer_relevancy: 0.498770215655982
context_recall: 0.4881271183104263
context_precision: 0.47066042100264194

Please sign in to comment.