From 2d50d71a1602cddf46aef45286bfaf2468a1347a Mon Sep 17 00:00:00 2001 From: moritzlaurer Date: Sun, 17 Nov 2024 15:18:55 +0100 Subject: [PATCH] test: introduced doctest and doc examples --- .github/workflows/test.yml | 2 +- hf_hub_prompts/hub_api.py | 24 ++-- hf_hub_prompts/populated_prompt.py | 50 ++++++-- hf_hub_prompts/prompt_templates.py | 189 +++++++++++++++++++++++++++-- pyproject.toml | 4 +- tests/conftest.py | 21 ++++ tests/test_data/code_teacher.yaml | 17 +++ tests/test_data/sync_test_data.py | 39 ++++++ tests/test_data/translate.yaml | 15 +++ 9 files changed, 322 insertions(+), 39 deletions(-) create mode 100644 tests/test_data/code_teacher.yaml create mode 100644 tests/test_data/sync_test_data.py create mode 100644 tests/test_data/translate.yaml diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 35e6c04..38332c2 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -54,7 +54,7 @@ jobs: - name: Run tests with pytest continue-on-error: true # Won't fail the PR - run: poetry run pytest --cov=hf_hub_prompts --cov-report=xml + run: poetry run pytest --doctest-modules --cov=hf_hub_prompts --cov-report=xml - name: Upload results to Codecov uses: codecov/codecov-action@v4 diff --git a/hf_hub_prompts/hub_api.py b/hf_hub_prompts/hub_api.py index feedbf8..997a454 100644 --- a/hf_hub_prompts/hub_api.py +++ b/hf_hub_prompts/hub_api.py @@ -26,26 +26,24 @@ def download_prompt( >>> # Inspect the template >>> template.template 'Translate the following text to {language}:\\n{text}' - >>> # Populate the template - >>> prompt = template.populate_template( - ... language="French", - ... text="Hello world!" - ... ) + >>> template.input_variables + ['language', 'text'] + >>> template.metadata['name'] + 'Simple Translator' Download and use a chat prompt template: >>> # Download code teaching prompt - >>> chat_template = download_prompt( + >>> template = download_prompt( ... repo_id="MoritzLaurer/example_prompts", ... filename="code_teacher.yaml" ... ) >>> # Inspect the template - >>> chat_template.messages[1]["content"] - 'Explain what {concept} is in {programming_language}.' - >>> # Populate the template - >>> chat_prompt = chat_template.populate_template( - ... concept="list comprehension", - ... programming_language="Python" - ... ) + >>> template.messages + [{'role': 'system', 'content': 'You are a coding assistant who explains concepts clearly and provides short examples.'}, {'role': 'user', 'content': 'Explain what {concept} is in {programming_language}.'}] + >>> template.input_variables + ['concept', 'programming_language'] + >>> template.metadata['version'] + '0.0.1' Args: repo_id (str): The repository ID on Hugging Face Hub (e.g., 'username/repo_name'). diff --git a/hf_hub_prompts/populated_prompt.py b/hf_hub_prompts/populated_prompt.py index 83a7998..79f5660 100644 --- a/hf_hub_prompts/populated_prompt.py +++ b/hf_hub_prompts/populated_prompt.py @@ -7,16 +7,18 @@ @dataclass class PopulatedPrompt: - """A class representing a populated prompt. - - Examples: - >>> # For standard prompts - >>> prompt = template.populate_template(name="Alice") - >>> text = prompt.content - >>> - >>> # For chat prompts - >>> prompt = chat_template.populate_template(name="Alice") - >>> messages = prompt.format_for_client(client="anthropic") + """A class representing a populated prompt that can be formatted to be compatible with different LLM clients. + + This class serves two main purposes: + 1. Store populated prompts (both text and chat formats) + 2. Convert chat prompts between different LLM client formats (e.g., OpenAI, Anthropic) + + The class handles two types of content: + + * **Text prompts**: Simple strings that can be used directly with any LLM + * **Chat prompts**: Lists or Dicts of messages that are compatible with the format expected by different LLM clients + + For examples of converting between client formats, see the [`format_for_client()`][hf_hub_prompts.populated_prompt.PopulatedPrompt.format_for_client] method. """ content: Union[str, List[Dict[str, Any]]] @@ -24,6 +26,32 @@ class PopulatedPrompt: def format_for_client(self, client: str = "openai") -> Union[List[Dict[str, Any]], Dict[str, Any]]: """Format the prompt content for a specific client. + Examples: + Format chat messages for different clients: + >>> from hf_hub_prompts import download_prompt + >>> template = download_prompt( + ... repo_id="MoritzLaurer/example_prompts", + ... filename="code_teacher.yaml" + ... ) + >>> prompt = template.populate_template( + ... concept="list comprehension", + ... programming_language="Python" + ... ) + >>> prompt.content + [{'role': 'system', 'content': 'You are a coding assistant who explains concepts clearly and provides short examples.'}, {'role': 'user', 'content': 'Explain what list comprehension is in Python.'}] + + >>> # By default, the populated prompt.content is in the OpenAI messages format + >>> messages_openai = prompt.format_for_client("openai") + >>> messages_openai == prompt.content + True + + >>> # We can also convert the populated prompt to other formats + >>> messages_anthropic = prompt.format_for_client("anthropic") + >>> messages_anthropic == prompt.content + False + >>> messages_anthropic + {'system': 'You are a coding assistant who explains concepts clearly and provides short examples.', 'messages': [{'role': 'user', 'content': 'Explain what list comprehension is in Python.'}]} + Args: client (str): The client format to use ('openai', 'anthropic'). Defaults to 'openai'. @@ -31,7 +59,7 @@ def format_for_client(self, client: str = "openai") -> Union[List[Dict[str, Any] Union[List[Dict[str, Any]], Dict[str, Any]]: Formatted prompt content suitable for the specified client. Raises: - ValueError: If an unsupported client format is specified. + ValueError: If an unsupported client format is specified or if trying to format a text prompt. """ if isinstance(self.content, str): # For standard prompts, format_for_client does not add value diff --git a/hf_hub_prompts/prompt_templates.py b/hf_hub_prompts/prompt_templates.py index 10ad8c7..4da397f 100644 --- a/hf_hub_prompts/prompt_templates.py +++ b/hf_hub_prompts/prompt_templates.py @@ -21,7 +21,12 @@ class BasePromptTemplate(ABC): - """An abstract base class for prompt templates.""" + """An abstract base class for prompt templates. + + This class defines the common interface and shared functionality for all prompt templates. + Users should not instantiate this class directly, but instead use TextPromptTemplate + or ChatPromptTemplate, which are subclasses of BasePromptTemplate. + """ # Type hints for optional standard attributes shared across all template types metadata: Optional[Dict[str, Any]] @@ -33,8 +38,8 @@ def __init__(self, prompt_data: Dict[str, Any], prompt_url: Optional[str] = None self._set_required_attributes_for_template_type(prompt_data) # Set optional standard attributes that are the same across all templates - self.metadata = prompt_data.get("metadata") self.input_variables = prompt_data.get("input_variables") + self.metadata = prompt_data.get("metadata") # Store any additional optional data that might be present in the prompt data self.other_data = { @@ -69,14 +74,38 @@ def populate_template(self, **input_variables: Any) -> PopulatedPrompt: pass def display(self, format: Literal["json", "yaml"] = "json") -> None: - """Display the prompt configuration in the specified format.""" + """Display the prompt configuration in the specified format. + + Examples: + >>> from hf_hub_prompts import download_prompt + >>> template = download_prompt( + ... repo_id="MoritzLaurer/example_prompts", + ... filename="translate.yaml" + ... ) + >>> template.display(format="yaml") # doctest: +NORMALIZE_WHITESPACE + template: 'Translate the following text to {language}: + {text}' + input_variables: + - language + - text + metadata: + name: Simple Translator + description: A simple translation prompt for illustrating the standard prompt YAML + format + tags: + - translation + - multilinguality + version: 0.0.1 + author: Some Person + """ # Create a dict of all attributes except other_data - display_dict = {k: v for k, v in self.__dict__.items() if k not in ["other_data"] or v} + display_dict = self.__dict__.copy() + display_dict.pop("other_data", None) if format == "json": - print(json.dumps(display_dict, indent=2)) + print(json.dumps(display_dict, indent=2), end="") elif format == "yaml": - print(yaml.dump(display_dict, default_flow_style=False, sort_keys=False)) + print(yaml.dump(display_dict, default_flow_style=False, sort_keys=False), end="") def to_dict(self) -> Dict[str, Any]: return self.__dict__ @@ -153,7 +182,32 @@ def _validate_input_variables(self, input_variables: Dict[str, Any]) -> None: class TextPromptTemplate(BasePromptTemplate): - """A class representing a standard prompt template.""" + """A class representing a standard text prompt template. + + Examples: + Download and use a text prompt template: + >>> from hf_hub_prompts import download_prompt + >>> # Download example translation prompt + >>> template = download_prompt( + ... repo_id="MoritzLaurer/example_prompts", + ... filename="translate.yaml" + ... ) + >>> # Inspect template attributes + >>> template.template + 'Translate the following text to {language}:\\n{text}' + >>> template.input_variables + ['language', 'text'] + >>> template.metadata['name'] + 'Simple Translator' + + >>> # Use the template + >>> prompt = template.populate_template( + ... language="French", + ... text="Hello world!" + ... ) + >>> prompt.content + 'Translate the following text to French:\\nHello world!' + """ # Type hints for template-specific attributes template: str @@ -169,6 +223,21 @@ def _set_required_attributes_for_template_type(self, prompt_data: Dict[str, Any] def populate_template(self, **input_variables: Any) -> PopulatedPrompt: """Populate the prompt by replacing placeholders with provided values. + Examples: + >>> from hf_hub_prompts import download_prompt + >>> template = download_prompt( + ... repo_id="MoritzLaurer/example_prompts", + ... filename="translate.yaml" + ... ) + >>> template.template + 'Translate the following text to {language}:\\n{text}' + >>> prompt = template.populate_template( + ... language="French", + ... text="Hello world!" + ... ) + >>> prompt.content + 'Translate the following text to French:\\nHello world!' + Args: **input_variables: The values to fill placeholders in the template. @@ -182,6 +251,18 @@ def populate_template(self, **input_variables: Any) -> PopulatedPrompt: def to_langchain_template(self) -> "LC_PromptTemplate": """Convert the TextPromptTemplate to a LangChain PromptTemplate. + Examples: + >>> from hf_hub_prompts import download_prompt + >>> template = download_prompt( + ... repo_id="MoritzLaurer/example_prompts", + ... filename="translate.yaml" + ... ) + >>> lc_template = template.to_langchain_template() + >>> # test equivalence + >>> from langchain_core.prompts import PromptTemplate as LC_PromptTemplate + >>> isinstance(lc_template, LC_PromptTemplate) + True + Returns: PromptTemplate: A LangChain PromptTemplate object. @@ -189,7 +270,7 @@ def to_langchain_template(self) -> "LC_PromptTemplate": ImportError: If LangChain is not installed. """ try: - from langchain.prompts import PromptTemplate as LC_PromptTemplate + from langchain_core.prompts import PromptTemplate as LC_PromptTemplate except ImportError as e: raise ImportError("LangChain is not installed. Please install it with 'pip install langchain'") from e @@ -201,7 +282,45 @@ def to_langchain_template(self) -> "LC_PromptTemplate": class ChatPromptTemplate(BasePromptTemplate): - """A class representing a chat prompt template that can be formatted and used with various LLM clients.""" + """A class representing a chat prompt template that can be formatted for and used with various LLM clients. + + Examples: + Download and use a chat prompt template: + >>> from hf_hub_prompts import download_prompt + >>> # Download examplecode teaching prompt + >>> template = download_prompt( + ... repo_id="MoritzLaurer/example_prompts", + ... filename="code_teacher.yaml" + ... ) + >>> # Inspect template attributes + >>> template.messages + [{'role': 'system', 'content': 'You are a coding assistant who explains concepts clearly and provides short examples.'}, {'role': 'user', 'content': 'Explain what {concept} is in {programming_language}.'}] + >>> template.input_variables + ['concept', 'programming_language'] + + >>> # Populate the template + >>> prompt = template.populate_template( + ... concept="list comprehension", + ... programming_language="Python" + ... ) + >>> prompt.content + [{'role': 'system', 'content': 'You are a coding assistant who explains concepts clearly and provides short examples.'}, {'role': 'user', 'content': 'Explain what list comprehension is in Python.'}] + + >>> # By default, the populated prompt is in the OpenAI messages format, as it is adopted by many open-source libraries + >>> # You can convert to formats used by other LLM clients like Anthropic like this: + >>> messages_anthropic = prompt.format_for_client("anthropic") + >>> messages_anthropic + {'system': 'You are a coding assistant who explains concepts clearly and provides short examples.', 'messages': [{'role': 'user', 'content': 'Explain what list comprehension is in Python.'}]} + + >>> # Convenience method to populate and format in one step + >>> messages = template.create_messages( + ... client="anthropic", + ... concept="list comprehension", + ... programming_language="Python" + ... ) + >>> messages + {'system': 'You are a coding assistant who explains concepts clearly and provides short examples.', 'messages': [{'role': 'user', 'content': 'Explain what list comprehension is in Python.'}]} + """ # Type hints for template-specific attributes messages: List[Dict[str, Any]] @@ -217,6 +336,19 @@ def _set_required_attributes_for_template_type(self, prompt_data: Dict[str, Any] def populate_template(self, **input_variables: Any) -> PopulatedPrompt: """Populate the prompt messages by replacing placeholders with provided values. + Examples: + >>> from hf_hub_prompts import download_prompt + >>> template = download_prompt( + ... repo_id="MoritzLaurer/example_prompts", + ... filename="code_teacher.yaml" + ... ) + >>> prompt = template.populate_template( + ... concept="list comprehension", + ... programming_language="Python" + ... ) + >>> prompt.content + [{'role': 'system', 'content': 'You are a coding assistant who explains concepts clearly and provides short examples.'}, {'role': 'user', 'content': 'Explain what list comprehension is in Python.'}] + Args: **input_variables: The values to fill placeholders in the messages. @@ -235,13 +367,34 @@ def create_messages( ) -> Union[List[Dict[str, Any]], Dict[str, Any]]: """Convenience method to populate template and format for client in one step. + Examples: + >>> from hf_hub_prompts import download_prompt + >>> template = download_prompt( + ... repo_id="MoritzLaurer/example_prompts", + ... filename="code_teacher.yaml" + ... ) + >>> # Format for OpenAI (default) + >>> messages = template.create_messages( + ... concept="list comprehension", + ... programming_language="Python" + ... ) + >>> messages + [{'role': 'system', 'content': 'You are a coding assistant who explains concepts clearly and provides short examples.'}, {'role': 'user', 'content': 'Explain what list comprehension is in Python.'}] + + >>> # Format for Anthropic + >>> messages = template.create_messages( + ... client="anthropic", + ... concept="list comprehension", + ... programming_language="Python" + ... ) + >>> messages + {'system': 'You are a coding assistant who explains concepts clearly and provides short examples.', 'messages': [{'role': 'user', 'content': 'Explain what list comprehension is in Python.'}]} + Args: client (str): The client format to use ('openai', 'anthropic'). Defaults to 'openai'. **input_variables: The variables to fill into the template. For example, if your template expects variables like 'name' and 'age', pass them as keyword arguments: - >>> messages = template.create_messages(client="openai", name="Alice", age=30) - Returns: Union[List[Dict[str, Any]], Dict[str, Any]]: Populated and formatted messages. """ @@ -258,6 +411,18 @@ def create_messages( def to_langchain_template(self) -> "LC_ChatPromptTemplate": """Convert the ChatPromptTemplate to a LangChain ChatPromptTemplate. + Examples: + >>> from hf_hub_prompts import download_prompt + >>> template = download_prompt( + ... repo_id="MoritzLaurer/example_prompts", + ... filename="code_teacher.yaml" + ... ) + >>> lc_template = template.to_langchain_template() + >>> # test equivalence + >>> from langchain_core.prompts import ChatPromptTemplate as LC_ChatPromptTemplate + >>> isinstance(lc_template, LC_ChatPromptTemplate) + True + Returns: ChatPromptTemplate: A LangChain ChatPromptTemplate object. @@ -265,7 +430,7 @@ def to_langchain_template(self) -> "LC_ChatPromptTemplate": ImportError: If LangChain is not installed. """ try: - from langchain.prompts import ChatPromptTemplate as LC_ChatPromptTemplate + from langchain_core.prompts import ChatPromptTemplate as LC_ChatPromptTemplate except ImportError as e: raise ImportError("LangChain is not installed. Please install it with 'pip install langchain'") from e diff --git a/pyproject.toml b/pyproject.toml index 879ea0d..e44d16d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -54,8 +54,8 @@ pre-commit = "^4.0.1" [tool.pytest.ini_options] minversion = "7.0" -testpaths = ["tests"] -python_files = "test_*.py" +testpaths = ["tests", "hf_hub_prompts"] +python_files = ["test_*.py", "*.py"] pythonpath = [".","src"] addopts = [ "--verbose", diff --git a/tests/conftest.py b/tests/conftest.py index e69de29..414952b 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -0,0 +1,21 @@ +from pathlib import Path + +import pytest + + +@pytest.fixture(autouse=True) +def mock_hf_hub(monkeypatch): + """Mock Hugging Face Hub API calls for doctests.""" + + def mock_download(*args, **kwargs): + # Map Hub files to local test files + test_data_dir = Path(__file__).parent / "test_data" + + if "translate.yaml" in str(args): + return str(test_data_dir / "translate.yaml") + elif "code_teacher.yaml" in str(args): + return str(test_data_dir / "code_teacher.yaml") + + raise ValueError(f"Unknown test file requested: {args}") + + monkeypatch.setattr("huggingface_hub.hf_hub_download", mock_download) diff --git a/tests/test_data/code_teacher.yaml b/tests/test_data/code_teacher.yaml new file mode 100644 index 0000000..ce674c0 --- /dev/null +++ b/tests/test_data/code_teacher.yaml @@ -0,0 +1,17 @@ +prompt: + messages: + - role: "system" + content: "You are a coding assistant who explains concepts clearly and provides short examples." + - role: "user" + content: "Explain what {concept} is in {programming_language}." + input_variables: + - concept + - programming_language + metadata: + name: "Code Teacher" + description: "A simple chat prompt for explaining programming concepts with examples" + tags: + - programming + - education + version: "0.0.1" + author: "My Awesome Company" \ No newline at end of file diff --git a/tests/test_data/sync_test_data.py b/tests/test_data/sync_test_data.py new file mode 100644 index 0000000..44db500 --- /dev/null +++ b/tests/test_data/sync_test_data.py @@ -0,0 +1,39 @@ +""" +Script to sync example prompts from the Hub to local test data. +Usage: +poetry run python tests/test_data/sync_test_data.py +""" + +from pathlib import Path + +from huggingface_hub import HfApi, hf_hub_download + + +def sync_test_files(): + """Download all YAML/JSON example prompts from Hub and save to test_data directory.""" + test_data_dir = Path(__file__).parent.parent / "test_data" + test_data_dir.mkdir(exist_ok=True, parents=True) + + # Get list of all files in the repo + api = HfApi() + repo_id = "MoritzLaurer/example_prompts" + all_files = api.list_repo_files(repo_id) + + # Filter for YAML and JSON files + prompt_files = [file for file in all_files if file.endswith((".yaml", ".yml", ".json"))] + + # Download each file + for file in prompt_files: + print(f"Downloading {file}...") + hub_file = hf_hub_download(repo_id=repo_id, filename=file) + local_file = test_data_dir / Path(file).name + # Copy content to local test file + with open(hub_file, "r") as src, open(local_file, "w") as dst: + dst.write(src.read()) + print(f"Saved to {local_file}") + + print(f"\nSynced {len(prompt_files)} files to {test_data_dir}") + + +if __name__ == "__main__": + sync_test_files() diff --git a/tests/test_data/translate.yaml b/tests/test_data/translate.yaml new file mode 100644 index 0000000..9b54315 --- /dev/null +++ b/tests/test_data/translate.yaml @@ -0,0 +1,15 @@ +prompt: + template: |- # The "|-" makes the following string behave similar to a string in """...""" in Python to render linebreaks correctly. For YAML syntax explanations see e.g. https://www.cloudbees.com/blog/yaml-tutorial-everything-you-need-get-started. + Translate the following text to {language}: + {text} + input_variables: + - language + - text + metadata: + name: "Simple Translator" + description: "A simple translation prompt for illustrating the standard prompt YAML format" + tags: + - translation + - multilinguality + version: "0.0.1" + author: "Some Person" \ No newline at end of file