Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Jacques/evaluate prompt #1023

Merged
merged 28 commits into from
Jan 14, 2025
Merged
Changes from 1 commit
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
60986c5
WIP
jverre Jan 8, 2025
911c01b
WIP
jverre Jan 10, 2025
e7c3cc2
WIP
jverre Jan 10, 2025
13626dc
Update evaluation
jverre Jan 12, 2025
8cafa1c
Update for linters
jverre Jan 12, 2025
79b5f90
Update testing of code blocks
jverre Jan 12, 2025
49f27a3
Update testing of code blocks
jverre Jan 12, 2025
a3d7279
Update testing of code blocks
jverre Jan 12, 2025
e9ded5b
Update github actions
jverre Jan 12, 2025
1e251f7
Fix codeblocks
jverre Jan 12, 2025
b507c0d
Fix codeblocks
jverre Jan 12, 2025
05c2fbd
Fix codeblocks
jverre Jan 12, 2025
b985e5e
Fix codeblocks
jverre Jan 12, 2025
ff3399f
Update github actions
jverre Jan 12, 2025
fe154cd
Update github actions
jverre Jan 12, 2025
367fdba
Update github actions
jverre Jan 12, 2025
b99eb62
Fix codeblocks
jverre Jan 12, 2025
def794b
Updated following review
jverre Jan 13, 2025
8028c65
Updated following review
jverre Jan 13, 2025
131c151
Updated following review
jverre Jan 13, 2025
83c44bd
Move litellm opik monitoring logic to a separate module, add project …
alexkuzmik Jan 14, 2025
f383c09
Fix error_callback -> failure_callback
alexkuzmik Jan 14, 2025
5d217bd
Reorganize imports
alexkuzmik Jan 14, 2025
1b8d875
Make it possible to disable litellm tracking, dont track if litellm a…
alexkuzmik Jan 14, 2025
22ed1a2
Disable litellm monitoring via the callback in tests
alexkuzmik Jan 14, 2025
7a89c69
Merge branch 'main' into jacques/evaluate_prompt
alexkuzmik Jan 14, 2025
c279051
Explicitly disable litellm monitoring in every integration test workflow
alexkuzmik Jan 14, 2025
9dedf64
Fix lint errors
alexkuzmik Jan 14, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Make it possible to disable litellm tracking, dont track if litellm a…
…lready decorated
alexkuzmik committed Jan 14, 2025
commit 1b8d87538b8572afea29d4ea36c199564638ba1a
7 changes: 7 additions & 0 deletions sdks/python/src/opik/config.py
Original file line number Diff line number Diff line change
@@ -166,6 +166,13 @@ def settings_customise_sources(
it might lead to unexpected results for the features that rely on spans/traces created.
"""

disable_litellm_models_monitoring: bool = False
"""
If set to True - Opik will not create llm spans for LiteLLMChatModel calls.
It is mainly to be used in tests since litellm uses external Opik callback
which makes HTTP requests not via the opik package.
"""

@property
def config_file_fullpath(self) -> pathlib.Path:
config_file_path = os.getenv("OPIK_CONFIG_PATH", CONFIG_FILE_PATH_DEFAULT)
Original file line number Diff line number Diff line change
@@ -155,7 +155,8 @@ def generate_provider_response(
valid_litellm_params = self._filter_supported_params(kwargs)
all_kwargs = {**self._completion_kwargs, **valid_litellm_params}

all_kwargs = opik_monitor.add_opik_monitoring_to_params(all_kwargs)
if not opik_monitor.disabled_in_config():
all_kwargs = opik_monitor.add_opik_monitoring_to_params(all_kwargs)

response = self._engine.completion(
model=self.model_name, messages=messages, **all_kwargs
@@ -209,7 +210,8 @@ async def agenerate_provider_response(self, **kwargs: Any) -> ModelResponse:
valid_litellm_params = self._filter_supported_params(kwargs)
all_kwargs = {**self._completion_kwargs, **valid_litellm_params}

all_kwargs = opik_monitor.add_opik_monitoring_to_params(all_kwargs)
if not opik_monitor.disabled_in_config():
all_kwargs = opik_monitor.add_opik_monitoring_to_params(all_kwargs)

response = await self._engine.completion(
model=self.model_name, messages=messages, **all_kwargs
15 changes: 13 additions & 2 deletions sdks/python/src/opik/evaluation/models/litellm/opik_monitor.py
Original file line number Diff line number Diff line change
@@ -5,14 +5,25 @@
from litellm.integrations.opik import opik as litellm_opik_logger

from opik import opik_context
from opik import config


def add_opik_monitoring_to_params(params: Dict[str, Any]) -> Dict[str, Any]:
already_decorated = hasattr(litellm.completion, "opik_tracked")
if already_decorated:
return params

params = _add_span_metadata_to_params(params)
params = _add_callback_to_params(params)
params = _ensure_params_have_callback(params)
return params


@functools.lru_cache
def disabled_in_config() -> bool:
config_ = config.OpikConfig()
return config_.disable_litellm_models_monitoring


def _add_span_metadata_to_params(params: Dict[str, Any]) -> Dict[str, Any]:
current_span = opik_context.get_current_span_data()

@@ -35,7 +46,7 @@ def _add_span_metadata_to_params(params: Dict[str, Any]) -> Dict[str, Any]:
}


def _add_callback_to_params(params: Dict[str, Any]) -> Dict[str, Any]:
def _ensure_params_have_callback(params: Dict[str, Any]) -> Dict[str, Any]:
has_global_opik_logger = any(
isinstance(callback, litellm_opik_logger.OpikLogger)
for callback in litellm.callbacks
11 changes: 6 additions & 5 deletions sdks/python/tests/unit/evaluation/test_evaluate.py
Original file line number Diff line number Diff line change
@@ -8,6 +8,7 @@
from opik.api_objects import opik_client
from opik.api_objects.dataset import dataset_item
from opik.evaluation import metrics
from opik.evaluation.models import models_factory
from ...testlib import ANY_BUT_NONE, ANY_STRING, SpanModel, assert_equal
from ...testlib.models import FeedbackScoreModel, TraceModel

@@ -606,13 +607,13 @@ def test_evaluate_prompt_happyflow(fake_backend):
mock_get_experiment_url = mock.Mock()
mock_get_experiment_url.return_value = "any_url"

mock_LiteLLMChatModel = mock.Mock()
mock_models_factory_get = mock.Mock()
mock_model = mock.Mock()
mock_model.model_name = MODEL_NAME
mock_model.generate_provider_response.return_value = mock.Mock(
choices=[mock.Mock(message=mock.Mock(content="Hello, world!"))]
)
mock_LiteLLMChatModel.return_value = mock_model
mock_models_factory_get.return_value = mock_model

with mock.patch.object(
opik_client.Opik, "create_experiment", mock_create_experiment
@@ -621,9 +622,9 @@ def test_evaluate_prompt_happyflow(fake_backend):
url_helpers, "get_experiment_url", mock_get_experiment_url
):
with mock.patch.object(
evaluation.models.litellm_chat_model,
"LiteLLMChatModel",
mock_LiteLLMChatModel,
models_factory,
"get",
mock_models_factory_get,
):
evaluation.evaluate_prompt(
dataset=mock_dataset,