feat: ✨ Allow customization of LLM question (#252)

* feat: ✨ Allow customization of LLM question * fix env variable name * Add an option to get the LLM question from a file * Add a prefix in the ENVs to diambiguate them * fix property access
networktocode · Dec 14, 2023 · 57237f0 · 57237f0
1 parent c663a9b
commit 57237f0
Show file tree

Hide file tree

Showing 6 changed files with 32 additions and 11 deletions.
diff --git a/README.md b/README.md
@@ -88,7 +88,7 @@ By default, there is a `GenericProvider` that supports a `SimpleProcessor` using
 
 #### LLM-powered Parsers
 
-The library supports an optional parser option leveraging Large Language Model (LLM) to provide best-effort parsing when the specific parsers have not been successful.
+The library supports an optional parser option leveraging Large Language Models (LLM) to provide best-effort parsing when the specific parsers have not been successful.
 
 > Warning: Some of these integrations, such as OpenAI, require of extras installations parameters. Check the [extras section](#extras)
 
@@ -98,9 +98,12 @@ When the appropriate environment variable(s) are set (see below), these LLM pars
 
 These are the currently supported LLM integrations:
 
+- `PARSER_LLM_QUESTION_STR` (Optional), question to overwrite the default one. Change it carefully. It has precedence over `PARSER_LLM_QUESTION_FILEPATH`
+- `PARSER_LLM_QUESTION_FILEPATH` (Optional), a path to a file that contains a question to overwrite the default one.
+
 - [OpenAI](https://openai.com/product), these are the supported ENVs:
-  - `OPENAI_API_KEY` (Required): OpenAI API Key.
-  - `OPENAI_MODEL` (Optional): The LLM model to use, defaults to "gpt-3.5-turbo".
+  - `PARSER_OPENAI_API_KEY` (Required): OpenAI API Key.
+  - `PARSER_OPENAI_MODEL` (Optional): The LLM model to use, defaults to "gpt-3.5-turbo".
 
 ### Metadata
 

diff --git a/circuit_maintenance_parser/parser.py b/circuit_maintenance_parser/parser.py
@@ -1,5 +1,6 @@
 """Definition of Mainentance Notification base classes."""
 import logging
+import os
 import base64
 import calendar
 import datetime
@@ -346,6 +347,23 @@ def get_key_with_string(dictionary: dict, string: str):
                 return key
         return None
 
+    @property
+    def llm_question(self):
+        """Return the LLM question."""
+        custom_llm_question = os.getenv("PARSER_LLM_QUESTION_STR")
+        if custom_llm_question:
+            return custom_llm_question
+
+        custom_llm_question_path = os.getenv("PARSER_LLM_QUESTION_FILEPATH")
+        if custom_llm_question_path:
+            try:
+                with open(custom_llm_question_path, mode="r", encoding="utf-8") as llm_question_file:
+                    return llm_question_file.read()
+            except OSError as err:
+                logger.warning("The file %s can't be read: %s", custom_llm_question_path, err)
+
+        return self._llm_question
+
     def get_llm_response(self, content):
         """Method to retrieve the response from the LLM for some content."""
         raise NotImplementedError

diff --git a/circuit_maintenance_parser/parsers/openai.py b/circuit_maintenance_parser/parsers/openai.py
@@ -24,15 +24,15 @@ def get_llm_response(self, content) -> Optional[List]:
         if not _HAS_OPENAI:
             raise ImportError("openai extra is required to use OpenAIParser.")
 
-        client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
-        model = os.getenv("OPENAI_MODEL", "gpt-3.5-turbo")
+        client = OpenAI(api_key=os.getenv("PARSER_OPENAI_API_KEY"))
+        model = os.getenv("PARSER_OPENAI_MODEL", "gpt-3.5-turbo")
         try:
             response = client.chat.completions.create(
                 model=model,
                 messages=[
                     {  # type: ignore
                         "role": "system",
-                        "content": self._llm_question,
+                        "content": self.llm_question,
                     },
                     {  # type: ignore
                         "role": "user",

diff --git a/circuit_maintenance_parser/provider.py b/circuit_maintenance_parser/provider.py
@@ -123,7 +123,7 @@ def get_maintenances(self, data: NotificationData) -> Iterable[Maintenance]:
             logger.debug("Skipping notification %s due filtering policy for %s.", data, self.__class__.__name__)
             return []
 
-        if os.getenv("OPENAI_API_KEY"):
+        if os.getenv("PARSER_OPENAI_API_KEY"):
             self._processors.append(CombinedProcessor(data_parsers=[EmailDateParser, OpenAIParser]))
 
         for processor in self._processors:

diff --git a/tests/conftest.py b/tests/conftest.py
@@ -2,16 +2,16 @@
 import os
 
 
-token_openai = os.getenv("OPENAI_API_KEY")
+token_openai = os.getenv("PARSER_OPENAI_API_KEY")
 
 
 def pytest_configure(config):  # pylint: disable=unused-argument
     """Clean environment for tests."""
     if token_openai:
-        del os.environ["OPENAI_API_KEY"]
+        del os.environ["PARSER_OPENAI_API_KEY"]
 
 
 def pytest_sessionfinish(session, exitstatus):  # pylint: disable=unused-argument
     """Recove environment after tests."""
     if token_openai:
-        os.environ["OPENAI_API_KEY"] = token_openai
+        os.environ["PARSER_OPENAI_API_KEY"] = token_openai
diff --git a/tests/unit/test_providers.py b/tests/unit/test_providers.py
@@ -117,7 +117,7 @@ class ProviderWithIncludeFilter(GenericProvider):
 )
 def test_provider_gets_mlparser(provider_class):
     """Test to check the any provider gets a default ML parser when ENV is activated."""
-    os.environ["OPENAI_API_KEY"] = "some_api_key"
+    os.environ["PARSER_OPENAI_API_KEY"] = "some_api_key"
     data = NotificationData.init_from_raw("text/plain", b"fake data")
     data.add_data_part("text/html", b"other data")