From cebdb8b7a2f8e064bfc282231cd7ad1ed273770d Mon Sep 17 00:00:00 2001 From: Traun Leyden Date: Fri, 19 Jan 2024 10:37:55 +0100 Subject: [PATCH] Add data format to readme --- dalm/pipelines/README.md | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/dalm/pipelines/README.md b/dalm/pipelines/README.md index f43ab39..cc4da60 100644 --- a/dalm/pipelines/README.md +++ b/dalm/pipelines/README.md @@ -13,4 +13,24 @@ python dalm/pipelines/reading_comprehension_pipeline.py --model_name HuggingFace --llm_synth_model_name meta-llama/Llama-2-13b-chat-hf \ --llm_synth_model_context_length 4096 +``` + +### Data format + +``` +{"messages":[ + [{"role":"...", "content": "..."}, {"role":"...", "content": "..."}, ...], + [{"role":"...", "content": "..."}, {"role":"...", "content": "..."}, ...], + [{"role":"...", "content": "..."}, {"role":"...", "content": "..."}, ...], + .... + ] +} +``` + +take from this snippet: + +``` +import datasets +a = datasets.load_dataset("arcee-ai/azure-reading-comprehension-dataset") +print(a["train"]["messages"][0]) ``` \ No newline at end of file