diff --git a/README.md b/README.md index deef876a..e11debf1 100644 --- a/README.md +++ b/README.md @@ -88,7 +88,7 @@ By default, there is a `GenericProvider` that supports a `SimpleProcessor` using #### LLM-powered Parsers -The library supports an optional parser option leveraging Large Language Model (LLM) to provide best-effort parsing when the specific parsers have not been successful. +The library supports an optional parser option leveraging Large Language Models (LLM) to provide best-effort parsing when the specific parsers have not been successful. > Warning: Some of these integrations, such as OpenAI, require of extras installations parameters. Check the [extras section](#extras) @@ -98,9 +98,12 @@ When the appropriate environment variable(s) are set (see below), these LLM pars These are the currently supported LLM integrations: +- `PARSER_LLM_QUESTION_STR` (Optional), question to overwrite the default one. Change it carefully. It has precedence over `PARSER_LLM_QUESTION_FILEPATH` +- `PARSER_LLM_QUESTION_FILEPATH` (Optional), a path to a file that contains a question to overwrite the default one. + - [OpenAI](https://openai.com/product), these are the supported ENVs: - - `OPENAI_API_KEY` (Required): OpenAI API Key. - - `OPENAI_MODEL` (Optional): The LLM model to use, defaults to "gpt-3.5-turbo". + - `PARSER_OPENAI_API_KEY` (Required): OpenAI API Key. + - `PARSER_OPENAI_MODEL` (Optional): The LLM model to use, defaults to "gpt-3.5-turbo". ### Metadata diff --git a/circuit_maintenance_parser/parser.py b/circuit_maintenance_parser/parser.py index 6e7c8660..e220e363 100644 --- a/circuit_maintenance_parser/parser.py +++ b/circuit_maintenance_parser/parser.py @@ -1,5 +1,6 @@ """Definition of Mainentance Notification base classes.""" import logging +import os import base64 import calendar import datetime @@ -346,6 +347,23 @@ def get_key_with_string(dictionary: dict, string: str): return key return None + @property + def llm_question(self): + """Return the LLM question.""" + custom_llm_question = os.getenv("PARSER_LLM_QUESTION_STR") + if custom_llm_question: + return custom_llm_question + + custom_llm_question_path = os.getenv("PARSER_LLM_QUESTION_FILEPATH") + if custom_llm_question_path: + try: + with open(custom_llm_question_path, mode="r", encoding="utf-8") as llm_question_file: + return llm_question_file.read() + except OSError as err: + logger.warning("The file %s can't be read: %s", custom_llm_question_path, err) + + return self._llm_question + def get_llm_response(self, content): """Method to retrieve the response from the LLM for some content.""" raise NotImplementedError diff --git a/circuit_maintenance_parser/parsers/openai.py b/circuit_maintenance_parser/parsers/openai.py index 3a62a22b..d2dd1d24 100644 --- a/circuit_maintenance_parser/parsers/openai.py +++ b/circuit_maintenance_parser/parsers/openai.py @@ -24,15 +24,15 @@ def get_llm_response(self, content) -> Optional[List]: if not _HAS_OPENAI: raise ImportError("openai extra is required to use OpenAIParser.") - client = OpenAI(api_key=os.getenv("OPENAI_API_KEY")) - model = os.getenv("OPENAI_MODEL", "gpt-3.5-turbo") + client = OpenAI(api_key=os.getenv("PARSER_OPENAI_API_KEY")) + model = os.getenv("PARSER_OPENAI_MODEL", "gpt-3.5-turbo") try: response = client.chat.completions.create( model=model, messages=[ { # type: ignore "role": "system", - "content": self._llm_question, + "content": self.llm_question, }, { # type: ignore "role": "user", diff --git a/circuit_maintenance_parser/provider.py b/circuit_maintenance_parser/provider.py index 6f1cd31b..507bacb3 100644 --- a/circuit_maintenance_parser/provider.py +++ b/circuit_maintenance_parser/provider.py @@ -123,7 +123,7 @@ def get_maintenances(self, data: NotificationData) -> Iterable[Maintenance]: logger.debug("Skipping notification %s due filtering policy for %s.", data, self.__class__.__name__) return [] - if os.getenv("OPENAI_API_KEY"): + if os.getenv("PARSER_OPENAI_API_KEY"): self._processors.append(CombinedProcessor(data_parsers=[EmailDateParser, OpenAIParser])) for processor in self._processors: diff --git a/tests/conftest.py b/tests/conftest.py index 0cb59276..eaad7253 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -2,16 +2,16 @@ import os -token_openai = os.getenv("OPENAI_API_KEY") +token_openai = os.getenv("PARSER_OPENAI_API_KEY") def pytest_configure(config): # pylint: disable=unused-argument """Clean environment for tests.""" if token_openai: - del os.environ["OPENAI_API_KEY"] + del os.environ["PARSER_OPENAI_API_KEY"] def pytest_sessionfinish(session, exitstatus): # pylint: disable=unused-argument """Recove environment after tests.""" if token_openai: - os.environ["OPENAI_API_KEY"] = token_openai + os.environ["PARSER_OPENAI_API_KEY"] = token_openai diff --git a/tests/unit/test_providers.py b/tests/unit/test_providers.py index ed5a9afe..b25c6810 100644 --- a/tests/unit/test_providers.py +++ b/tests/unit/test_providers.py @@ -117,7 +117,7 @@ class ProviderWithIncludeFilter(GenericProvider): ) def test_provider_gets_mlparser(provider_class): """Test to check the any provider gets a default ML parser when ENV is activated.""" - os.environ["OPENAI_API_KEY"] = "some_api_key" + os.environ["PARSER_OPENAI_API_KEY"] = "some_api_key" data = NotificationData.init_from_raw("text/plain", b"fake data") data.add_data_part("text/html", b"other data")