Skip to content

Commit

Permalink
feat: ✨ Allow customization of LLM question (#252)
Browse files Browse the repository at this point in the history
* feat: ✨ Allow customization of LLM question

* fix env variable name

* Add an option to get the LLM question from a file

* Add a  prefix in the ENVs to diambiguate them

* fix property access
  • Loading branch information
chadell authored Dec 14, 2023
1 parent c663a9b commit 57237f0
Show file tree
Hide file tree
Showing 6 changed files with 32 additions and 11 deletions.
9 changes: 6 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ By default, there is a `GenericProvider` that supports a `SimpleProcessor` using
#### LLM-powered Parsers

The library supports an optional parser option leveraging Large Language Model (LLM) to provide best-effort parsing when the specific parsers have not been successful.
The library supports an optional parser option leveraging Large Language Models (LLM) to provide best-effort parsing when the specific parsers have not been successful.

> Warning: Some of these integrations, such as OpenAI, require of extras installations parameters. Check the [extras section](#extras)
Expand All @@ -98,9 +98,12 @@ When the appropriate environment variable(s) are set (see below), these LLM pars
These are the currently supported LLM integrations:

- `PARSER_LLM_QUESTION_STR` (Optional), question to overwrite the default one. Change it carefully. It has precedence over `PARSER_LLM_QUESTION_FILEPATH`
- `PARSER_LLM_QUESTION_FILEPATH` (Optional), a path to a file that contains a question to overwrite the default one.

- [OpenAI](https://openai.com/product), these are the supported ENVs:
- `OPENAI_API_KEY` (Required): OpenAI API Key.
- `OPENAI_MODEL` (Optional): The LLM model to use, defaults to "gpt-3.5-turbo".
- `PARSER_OPENAI_API_KEY` (Required): OpenAI API Key.
- `PARSER_OPENAI_MODEL` (Optional): The LLM model to use, defaults to "gpt-3.5-turbo".

### Metadata

Expand Down
18 changes: 18 additions & 0 deletions circuit_maintenance_parser/parser.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
"""Definition of Mainentance Notification base classes."""
import logging
import os
import base64
import calendar
import datetime
Expand Down Expand Up @@ -346,6 +347,23 @@ def get_key_with_string(dictionary: dict, string: str):
return key
return None

@property
def llm_question(self):
"""Return the LLM question."""
custom_llm_question = os.getenv("PARSER_LLM_QUESTION_STR")
if custom_llm_question:
return custom_llm_question

custom_llm_question_path = os.getenv("PARSER_LLM_QUESTION_FILEPATH")
if custom_llm_question_path:
try:
with open(custom_llm_question_path, mode="r", encoding="utf-8") as llm_question_file:
return llm_question_file.read()
except OSError as err:
logger.warning("The file %s can't be read: %s", custom_llm_question_path, err)

return self._llm_question

def get_llm_response(self, content):
"""Method to retrieve the response from the LLM for some content."""
raise NotImplementedError
Expand Down
6 changes: 3 additions & 3 deletions circuit_maintenance_parser/parsers/openai.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,15 +24,15 @@ def get_llm_response(self, content) -> Optional[List]:
if not _HAS_OPENAI:
raise ImportError("openai extra is required to use OpenAIParser.")

client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
model = os.getenv("OPENAI_MODEL", "gpt-3.5-turbo")
client = OpenAI(api_key=os.getenv("PARSER_OPENAI_API_KEY"))
model = os.getenv("PARSER_OPENAI_MODEL", "gpt-3.5-turbo")
try:
response = client.chat.completions.create(
model=model,
messages=[
{ # type: ignore
"role": "system",
"content": self._llm_question,
"content": self.llm_question,
},
{ # type: ignore
"role": "user",
Expand Down
2 changes: 1 addition & 1 deletion circuit_maintenance_parser/provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,7 @@ def get_maintenances(self, data: NotificationData) -> Iterable[Maintenance]:
logger.debug("Skipping notification %s due filtering policy for %s.", data, self.__class__.__name__)
return []

if os.getenv("OPENAI_API_KEY"):
if os.getenv("PARSER_OPENAI_API_KEY"):
self._processors.append(CombinedProcessor(data_parsers=[EmailDateParser, OpenAIParser]))

for processor in self._processors:
Expand Down
6 changes: 3 additions & 3 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,16 +2,16 @@
import os


token_openai = os.getenv("OPENAI_API_KEY")
token_openai = os.getenv("PARSER_OPENAI_API_KEY")


def pytest_configure(config): # pylint: disable=unused-argument
"""Clean environment for tests."""
if token_openai:
del os.environ["OPENAI_API_KEY"]
del os.environ["PARSER_OPENAI_API_KEY"]


def pytest_sessionfinish(session, exitstatus): # pylint: disable=unused-argument
"""Recove environment after tests."""
if token_openai:
os.environ["OPENAI_API_KEY"] = token_openai
os.environ["PARSER_OPENAI_API_KEY"] = token_openai
2 changes: 1 addition & 1 deletion tests/unit/test_providers.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ class ProviderWithIncludeFilter(GenericProvider):
)
def test_provider_gets_mlparser(provider_class):
"""Test to check the any provider gets a default ML parser when ENV is activated."""
os.environ["OPENAI_API_KEY"] = "some_api_key"
os.environ["PARSER_OPENAI_API_KEY"] = "some_api_key"
data = NotificationData.init_from_raw("text/plain", b"fake data")
data.add_data_part("text/html", b"other data")

Expand Down

0 comments on commit 57237f0

Please sign in to comment.