From 2d50d71a1602cddf46aef45286bfaf2468a1347a Mon Sep 17 00:00:00 2001
From: moritzlaurer <moritz.laurer@posteo.de>
Date: Sun, 17 Nov 2024 15:18:55 +0100
Subject: [PATCH] test: introduced doctest and doc examples

---
 .github/workflows/test.yml         |   2 +-
 hf_hub_prompts/hub_api.py          |  24 ++--
 hf_hub_prompts/populated_prompt.py |  50 ++++++--
 hf_hub_prompts/prompt_templates.py | 189 +++++++++++++++++++++++++++--
 pyproject.toml                     |   4 +-
 tests/conftest.py                  |  21 ++++
 tests/test_data/code_teacher.yaml  |  17 +++
 tests/test_data/sync_test_data.py  |  39 ++++++
 tests/test_data/translate.yaml     |  15 +++
 9 files changed, 322 insertions(+), 39 deletions(-)
 create mode 100644 tests/test_data/code_teacher.yaml
 create mode 100644 tests/test_data/sync_test_data.py
 create mode 100644 tests/test_data/translate.yaml

diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 35e6c04..38332c2 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -54,7 +54,7 @@ jobs:
 
     - name: Run tests with pytest
       continue-on-error: true  # Won't fail the PR
-      run: poetry run pytest --cov=hf_hub_prompts --cov-report=xml
+      run: poetry run pytest --doctest-modules --cov=hf_hub_prompts --cov-report=xml
 
     - name: Upload results to Codecov
       uses: codecov/codecov-action@v4
diff --git a/hf_hub_prompts/hub_api.py b/hf_hub_prompts/hub_api.py
index feedbf8..997a454 100644
--- a/hf_hub_prompts/hub_api.py
+++ b/hf_hub_prompts/hub_api.py
@@ -26,26 +26,24 @@ def download_prompt(
         >>> # Inspect the template
         >>> template.template
         'Translate the following text to {language}:\\n{text}'
-        >>> # Populate the template
-        >>> prompt = template.populate_template(
-        ...     language="French",
-        ...     text="Hello world!"
-        ... )
+        >>> template.input_variables
+        ['language', 'text']
+        >>> template.metadata['name']
+        'Simple Translator'
 
         Download and use a chat prompt template:
         >>> # Download code teaching prompt
-        >>> chat_template = download_prompt(
+        >>> template = download_prompt(
         ...     repo_id="MoritzLaurer/example_prompts",
         ...     filename="code_teacher.yaml"
         ... )
         >>> # Inspect the template
-        >>> chat_template.messages[1]["content"]
-        'Explain what {concept} is in {programming_language}.'
-        >>> # Populate the template
-        >>> chat_prompt = chat_template.populate_template(
-        ...     concept="list comprehension",
-        ...     programming_language="Python"
-        ... )
+        >>> template.messages
+        [{'role': 'system', 'content': 'You are a coding assistant who explains concepts clearly and provides short examples.'}, {'role': 'user', 'content': 'Explain what {concept} is in {programming_language}.'}]
+        >>> template.input_variables
+        ['concept', 'programming_language']
+        >>> template.metadata['version']
+        '0.0.1'
 
     Args:
         repo_id (str): The repository ID on Hugging Face Hub (e.g., 'username/repo_name').
diff --git a/hf_hub_prompts/populated_prompt.py b/hf_hub_prompts/populated_prompt.py
index 83a7998..79f5660 100644
--- a/hf_hub_prompts/populated_prompt.py
+++ b/hf_hub_prompts/populated_prompt.py
@@ -7,16 +7,18 @@
 
 @dataclass
 class PopulatedPrompt:
-    """A class representing a populated prompt.
-
-    Examples:
-        >>> # For standard prompts
-        >>> prompt = template.populate_template(name="Alice")
-        >>> text = prompt.content
-        >>>
-        >>> # For chat prompts
-        >>> prompt = chat_template.populate_template(name="Alice")
-        >>> messages = prompt.format_for_client(client="anthropic")
+    """A class representing a populated prompt that can be formatted to be compatible with different LLM clients.
+
+    This class serves two main purposes:
+    1. Store populated prompts (both text and chat formats)
+    2. Convert chat prompts between different LLM client formats (e.g., OpenAI, Anthropic)
+
+    The class handles two types of content:
+
+    * **Text prompts**: Simple strings that can be used directly with any LLM
+    * **Chat prompts**: Lists or Dicts of messages that are compatible with the format expected by different LLM clients
+
+    For examples of converting between client formats, see the [`format_for_client()`][hf_hub_prompts.populated_prompt.PopulatedPrompt.format_for_client] method.
     """
 
     content: Union[str, List[Dict[str, Any]]]
@@ -24,6 +26,32 @@ class PopulatedPrompt:
     def format_for_client(self, client: str = "openai") -> Union[List[Dict[str, Any]], Dict[str, Any]]:
         """Format the prompt content for a specific client.
 
+        Examples:
+            Format chat messages for different clients:
+            >>> from hf_hub_prompts import download_prompt
+            >>> template = download_prompt(
+            ...     repo_id="MoritzLaurer/example_prompts",
+            ...     filename="code_teacher.yaml"
+            ... )
+            >>> prompt = template.populate_template(
+            ...     concept="list comprehension",
+            ...     programming_language="Python"
+            ... )
+            >>> prompt.content
+            [{'role': 'system', 'content': 'You are a coding assistant who explains concepts clearly and provides short examples.'}, {'role': 'user', 'content': 'Explain what list comprehension is in Python.'}]
+
+            >>> # By default, the populated prompt.content is in the OpenAI messages format
+            >>> messages_openai = prompt.format_for_client("openai")
+            >>> messages_openai == prompt.content
+            True
+
+            >>> # We can also convert the populated prompt to other formats
+            >>> messages_anthropic = prompt.format_for_client("anthropic")
+            >>> messages_anthropic == prompt.content
+            False
+            >>> messages_anthropic
+            {'system': 'You are a coding assistant who explains concepts clearly and provides short examples.', 'messages': [{'role': 'user', 'content': 'Explain what list comprehension is in Python.'}]}
+
         Args:
             client (str): The client format to use ('openai', 'anthropic'). Defaults to 'openai'.
 
@@ -31,7 +59,7 @@ def format_for_client(self, client: str = "openai") -> Union[List[Dict[str, Any]
             Union[List[Dict[str, Any]], Dict[str, Any]]: Formatted prompt content suitable for the specified client.
 
         Raises:
-            ValueError: If an unsupported client format is specified.
+            ValueError: If an unsupported client format is specified or if trying to format a text prompt.
         """
         if isinstance(self.content, str):
             # For standard prompts, format_for_client does not add value
diff --git a/hf_hub_prompts/prompt_templates.py b/hf_hub_prompts/prompt_templates.py
index 10ad8c7..4da397f 100644
--- a/hf_hub_prompts/prompt_templates.py
+++ b/hf_hub_prompts/prompt_templates.py
@@ -21,7 +21,12 @@
 
 
 class BasePromptTemplate(ABC):
-    """An abstract base class for prompt templates."""
+    """An abstract base class for prompt templates.
+
+    This class defines the common interface and shared functionality for all prompt templates.
+    Users should not instantiate this class directly, but instead use TextPromptTemplate
+    or ChatPromptTemplate, which are subclasses of BasePromptTemplate.
+    """
 
     # Type hints for optional standard attributes shared across all template types
     metadata: Optional[Dict[str, Any]]
@@ -33,8 +38,8 @@ def __init__(self, prompt_data: Dict[str, Any], prompt_url: Optional[str] = None
         self._set_required_attributes_for_template_type(prompt_data)
 
         # Set optional standard attributes that are the same across all templates
-        self.metadata = prompt_data.get("metadata")
         self.input_variables = prompt_data.get("input_variables")
+        self.metadata = prompt_data.get("metadata")
 
         # Store any additional optional data that might be present in the prompt data
         self.other_data = {
@@ -69,14 +74,38 @@ def populate_template(self, **input_variables: Any) -> PopulatedPrompt:
         pass
 
     def display(self, format: Literal["json", "yaml"] = "json") -> None:
-        """Display the prompt configuration in the specified format."""
+        """Display the prompt configuration in the specified format.
+
+        Examples:
+            >>> from hf_hub_prompts import download_prompt
+            >>> template = download_prompt(
+            ...     repo_id="MoritzLaurer/example_prompts",
+            ...     filename="translate.yaml"
+            ... )
+            >>> template.display(format="yaml")  # doctest: +NORMALIZE_WHITESPACE
+            template: 'Translate the following text to {language}:
+              {text}'
+            input_variables:
+            - language
+            - text
+            metadata:
+              name: Simple Translator
+              description: A simple translation prompt for illustrating the standard prompt YAML
+                format
+              tags:
+              - translation
+              - multilinguality
+              version: 0.0.1
+              author: Some Person
+        """
         # Create a dict of all attributes except other_data
-        display_dict = {k: v for k, v in self.__dict__.items() if k not in ["other_data"] or v}
+        display_dict = self.__dict__.copy()
+        display_dict.pop("other_data", None)
 
         if format == "json":
-            print(json.dumps(display_dict, indent=2))
+            print(json.dumps(display_dict, indent=2), end="")
         elif format == "yaml":
-            print(yaml.dump(display_dict, default_flow_style=False, sort_keys=False))
+            print(yaml.dump(display_dict, default_flow_style=False, sort_keys=False), end="")
 
     def to_dict(self) -> Dict[str, Any]:
         return self.__dict__
@@ -153,7 +182,32 @@ def _validate_input_variables(self, input_variables: Dict[str, Any]) -> None:
 
 
 class TextPromptTemplate(BasePromptTemplate):
-    """A class representing a standard prompt template."""
+    """A class representing a standard text prompt template.
+
+    Examples:
+        Download and use a text prompt template:
+        >>> from hf_hub_prompts import download_prompt
+        >>> # Download example translation prompt
+        >>> template = download_prompt(
+        ...     repo_id="MoritzLaurer/example_prompts",
+        ...     filename="translate.yaml"
+        ... )
+        >>> # Inspect template attributes
+        >>> template.template
+        'Translate the following text to {language}:\\n{text}'
+        >>> template.input_variables
+        ['language', 'text']
+        >>> template.metadata['name']
+        'Simple Translator'
+
+        >>> # Use the template
+        >>> prompt = template.populate_template(
+        ...     language="French",
+        ...     text="Hello world!"
+        ... )
+        >>> prompt.content
+        'Translate the following text to French:\\nHello world!'
+    """
 
     # Type hints for template-specific attributes
     template: str
@@ -169,6 +223,21 @@ def _set_required_attributes_for_template_type(self, prompt_data: Dict[str, Any]
     def populate_template(self, **input_variables: Any) -> PopulatedPrompt:
         """Populate the prompt by replacing placeholders with provided values.
 
+        Examples:
+            >>> from hf_hub_prompts import download_prompt
+            >>> template = download_prompt(
+            ...     repo_id="MoritzLaurer/example_prompts",
+            ...     filename="translate.yaml"
+            ... )
+            >>> template.template
+            'Translate the following text to {language}:\\n{text}'
+            >>> prompt = template.populate_template(
+            ...     language="French",
+            ...     text="Hello world!"
+            ... )
+            >>> prompt.content
+            'Translate the following text to French:\\nHello world!'
+
         Args:
             **input_variables: The values to fill placeholders in the template.
 
@@ -182,6 +251,18 @@ def populate_template(self, **input_variables: Any) -> PopulatedPrompt:
     def to_langchain_template(self) -> "LC_PromptTemplate":
         """Convert the TextPromptTemplate to a LangChain PromptTemplate.
 
+        Examples:
+            >>> from hf_hub_prompts import download_prompt
+            >>> template = download_prompt(
+            ...     repo_id="MoritzLaurer/example_prompts",
+            ...     filename="translate.yaml"
+            ... )
+            >>> lc_template = template.to_langchain_template()
+            >>> # test equivalence
+            >>> from langchain_core.prompts import PromptTemplate as LC_PromptTemplate
+            >>> isinstance(lc_template, LC_PromptTemplate)
+            True
+
         Returns:
             PromptTemplate: A LangChain PromptTemplate object.
 
@@ -189,7 +270,7 @@ def to_langchain_template(self) -> "LC_PromptTemplate":
             ImportError: If LangChain is not installed.
         """
         try:
-            from langchain.prompts import PromptTemplate as LC_PromptTemplate
+            from langchain_core.prompts import PromptTemplate as LC_PromptTemplate
         except ImportError as e:
             raise ImportError("LangChain is not installed. Please install it with 'pip install langchain'") from e
 
@@ -201,7 +282,45 @@ def to_langchain_template(self) -> "LC_PromptTemplate":
 
 
 class ChatPromptTemplate(BasePromptTemplate):
-    """A class representing a chat prompt template that can be formatted and used with various LLM clients."""
+    """A class representing a chat prompt template that can be formatted for and used with various LLM clients.
+
+    Examples:
+        Download and use a chat prompt template:
+        >>> from hf_hub_prompts import download_prompt
+        >>> # Download examplecode teaching prompt
+        >>> template = download_prompt(
+        ...     repo_id="MoritzLaurer/example_prompts",
+        ...     filename="code_teacher.yaml"
+        ... )
+        >>> # Inspect template attributes
+        >>> template.messages
+        [{'role': 'system', 'content': 'You are a coding assistant who explains concepts clearly and provides short examples.'}, {'role': 'user', 'content': 'Explain what {concept} is in {programming_language}.'}]
+        >>> template.input_variables
+        ['concept', 'programming_language']
+
+        >>> # Populate the template
+        >>> prompt = template.populate_template(
+        ...     concept="list comprehension",
+        ...     programming_language="Python"
+        ... )
+        >>> prompt.content
+        [{'role': 'system', 'content': 'You are a coding assistant who explains concepts clearly and provides short examples.'}, {'role': 'user', 'content': 'Explain what list comprehension is in Python.'}]
+
+        >>> # By default, the populated prompt is in the OpenAI messages format, as it is adopted by many open-source libraries
+        >>> # You can convert to formats used by other LLM clients like Anthropic like this:
+        >>> messages_anthropic = prompt.format_for_client("anthropic")
+        >>> messages_anthropic
+        {'system': 'You are a coding assistant who explains concepts clearly and provides short examples.', 'messages': [{'role': 'user', 'content': 'Explain what list comprehension is in Python.'}]}
+
+        >>> # Convenience method to populate and format in one step
+        >>> messages = template.create_messages(
+        ...     client="anthropic",
+        ...     concept="list comprehension",
+        ...     programming_language="Python"
+        ... )
+        >>> messages
+        {'system': 'You are a coding assistant who explains concepts clearly and provides short examples.', 'messages': [{'role': 'user', 'content': 'Explain what list comprehension is in Python.'}]}
+    """
 
     # Type hints for template-specific attributes
     messages: List[Dict[str, Any]]
@@ -217,6 +336,19 @@ def _set_required_attributes_for_template_type(self, prompt_data: Dict[str, Any]
     def populate_template(self, **input_variables: Any) -> PopulatedPrompt:
         """Populate the prompt messages by replacing placeholders with provided values.
 
+        Examples:
+            >>> from hf_hub_prompts import download_prompt
+            >>> template = download_prompt(
+            ...     repo_id="MoritzLaurer/example_prompts",
+            ...     filename="code_teacher.yaml"
+            ... )
+            >>> prompt = template.populate_template(
+            ...     concept="list comprehension",
+            ...     programming_language="Python"
+            ... )
+            >>> prompt.content
+            [{'role': 'system', 'content': 'You are a coding assistant who explains concepts clearly and provides short examples.'}, {'role': 'user', 'content': 'Explain what list comprehension is in Python.'}]
+
         Args:
             **input_variables: The values to fill placeholders in the messages.
 
@@ -235,13 +367,34 @@ def create_messages(
     ) -> Union[List[Dict[str, Any]], Dict[str, Any]]:
         """Convenience method to populate template and format for client in one step.
 
+        Examples:
+            >>> from hf_hub_prompts import download_prompt
+            >>> template = download_prompt(
+            ...     repo_id="MoritzLaurer/example_prompts",
+            ...     filename="code_teacher.yaml"
+            ... )
+            >>> # Format for OpenAI (default)
+            >>> messages = template.create_messages(
+            ...     concept="list comprehension",
+            ...     programming_language="Python"
+            ... )
+            >>> messages
+            [{'role': 'system', 'content': 'You are a coding assistant who explains concepts clearly and provides short examples.'}, {'role': 'user', 'content': 'Explain what list comprehension is in Python.'}]
+
+            >>> # Format for Anthropic
+            >>> messages = template.create_messages(
+            ...     client="anthropic",
+            ...     concept="list comprehension",
+            ...     programming_language="Python"
+            ... )
+            >>> messages
+            {'system': 'You are a coding assistant who explains concepts clearly and provides short examples.', 'messages': [{'role': 'user', 'content': 'Explain what list comprehension is in Python.'}]}
+
         Args:
             client (str): The client format to use ('openai', 'anthropic'). Defaults to 'openai'.
             **input_variables: The variables to fill into the template. For example, if your template
                 expects variables like 'name' and 'age', pass them as keyword arguments:
 
-                >>> messages = template.create_messages(client="openai", name="Alice", age=30)
-
         Returns:
             Union[List[Dict[str, Any]], Dict[str, Any]]: Populated and formatted messages.
         """
@@ -258,6 +411,18 @@ def create_messages(
     def to_langchain_template(self) -> "LC_ChatPromptTemplate":
         """Convert the ChatPromptTemplate to a LangChain ChatPromptTemplate.
 
+        Examples:
+            >>> from hf_hub_prompts import download_prompt
+            >>> template = download_prompt(
+            ...     repo_id="MoritzLaurer/example_prompts",
+            ...     filename="code_teacher.yaml"
+            ... )
+            >>> lc_template = template.to_langchain_template()
+            >>> # test equivalence
+            >>> from langchain_core.prompts import ChatPromptTemplate as LC_ChatPromptTemplate
+            >>> isinstance(lc_template, LC_ChatPromptTemplate)
+            True
+
         Returns:
             ChatPromptTemplate: A LangChain ChatPromptTemplate object.
 
@@ -265,7 +430,7 @@ def to_langchain_template(self) -> "LC_ChatPromptTemplate":
             ImportError: If LangChain is not installed.
         """
         try:
-            from langchain.prompts import ChatPromptTemplate as LC_ChatPromptTemplate
+            from langchain_core.prompts import ChatPromptTemplate as LC_ChatPromptTemplate
         except ImportError as e:
             raise ImportError("LangChain is not installed. Please install it with 'pip install langchain'") from e
 
diff --git a/pyproject.toml b/pyproject.toml
index 879ea0d..e44d16d 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -54,8 +54,8 @@ pre-commit = "^4.0.1"
 
 [tool.pytest.ini_options]
 minversion = "7.0"
-testpaths = ["tests"]
-python_files = "test_*.py"
+testpaths = ["tests", "hf_hub_prompts"]
+python_files = ["test_*.py", "*.py"]
 pythonpath = [".","src"]
 addopts = [
     "--verbose",
diff --git a/tests/conftest.py b/tests/conftest.py
index e69de29..414952b 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -0,0 +1,21 @@
+from pathlib import Path
+
+import pytest
+
+
+@pytest.fixture(autouse=True)
+def mock_hf_hub(monkeypatch):
+    """Mock Hugging Face Hub API calls for doctests."""
+
+    def mock_download(*args, **kwargs):
+        # Map Hub files to local test files
+        test_data_dir = Path(__file__).parent / "test_data"
+
+        if "translate.yaml" in str(args):
+            return str(test_data_dir / "translate.yaml")
+        elif "code_teacher.yaml" in str(args):
+            return str(test_data_dir / "code_teacher.yaml")
+
+        raise ValueError(f"Unknown test file requested: {args}")
+
+    monkeypatch.setattr("huggingface_hub.hf_hub_download", mock_download)
diff --git a/tests/test_data/code_teacher.yaml b/tests/test_data/code_teacher.yaml
new file mode 100644
index 0000000..ce674c0
--- /dev/null
+++ b/tests/test_data/code_teacher.yaml
@@ -0,0 +1,17 @@
+prompt:
+  messages:
+    - role: "system"
+      content: "You are a coding assistant who explains concepts clearly and provides short examples."
+    - role: "user"
+      content: "Explain what {concept} is in {programming_language}."
+  input_variables:
+    - concept
+    - programming_language
+  metadata:
+    name: "Code Teacher"
+    description: "A simple chat prompt for explaining programming concepts with examples"
+    tags:
+      - programming
+      - education
+    version: "0.0.1"
+    author: "My Awesome Company"
\ No newline at end of file
diff --git a/tests/test_data/sync_test_data.py b/tests/test_data/sync_test_data.py
new file mode 100644
index 0000000..44db500
--- /dev/null
+++ b/tests/test_data/sync_test_data.py
@@ -0,0 +1,39 @@
+"""
+Script to sync example prompts from the Hub to local test data.
+Usage:
+poetry run python tests/test_data/sync_test_data.py
+"""
+
+from pathlib import Path
+
+from huggingface_hub import HfApi, hf_hub_download
+
+
+def sync_test_files():
+    """Download all YAML/JSON example prompts from Hub and save to test_data directory."""
+    test_data_dir = Path(__file__).parent.parent / "test_data"
+    test_data_dir.mkdir(exist_ok=True, parents=True)
+
+    # Get list of all files in the repo
+    api = HfApi()
+    repo_id = "MoritzLaurer/example_prompts"
+    all_files = api.list_repo_files(repo_id)
+
+    # Filter for YAML and JSON files
+    prompt_files = [file for file in all_files if file.endswith((".yaml", ".yml", ".json"))]
+
+    # Download each file
+    for file in prompt_files:
+        print(f"Downloading {file}...")
+        hub_file = hf_hub_download(repo_id=repo_id, filename=file)
+        local_file = test_data_dir / Path(file).name
+        # Copy content to local test file
+        with open(hub_file, "r") as src, open(local_file, "w") as dst:
+            dst.write(src.read())
+        print(f"Saved to {local_file}")
+
+    print(f"\nSynced {len(prompt_files)} files to {test_data_dir}")
+
+
+if __name__ == "__main__":
+    sync_test_files()
diff --git a/tests/test_data/translate.yaml b/tests/test_data/translate.yaml
new file mode 100644
index 0000000..9b54315
--- /dev/null
+++ b/tests/test_data/translate.yaml
@@ -0,0 +1,15 @@
+prompt:
+  template: |-  # The "|-" makes the following string behave similar to a string in """...""" in Python to render linebreaks correctly. For YAML syntax explanations see e.g. https://www.cloudbees.com/blog/yaml-tutorial-everything-you-need-get-started.
+    Translate the following text to {language}:
+    {text}
+  input_variables:
+    - language
+    - text
+  metadata:
+    name: "Simple Translator"
+    description: "A simple translation prompt for illustrating the standard prompt YAML format"
+    tags: 
+      - translation
+      - multilinguality
+    version: "0.0.1"
+    author: "Some Person"
\ No newline at end of file