-
Notifications
You must be signed in to change notification settings - Fork 345
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
feat: Experiment and dataset improvements #6163
Changes from 10 commits
313adc4
5603d1e
ecdf7df
196414d
c900e76
c9cd5d8
07b2612
6d59100
16ae667
a7ac664
aea412c
2c1a658
d71e707
ddceee8
1b1f5d3
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
|
@@ -7,6 +7,7 @@ | |||||
OpenInferenceMimeTypeValues, | ||||||
OpenInferenceSpanKindValues, | ||||||
SpanAttributes, | ||||||
ToolAttributes, | ||||||
ToolCallAttributes, | ||||||
) | ||||||
|
||||||
|
@@ -27,12 +28,18 @@ def get_dataset_example_input(span: Span) -> dict[str, Any]: | |||||
input_mime_type = get_attribute_value(attributes, INPUT_MIME_TYPE) | ||||||
prompt_template_variables = get_attribute_value(attributes, LLM_PROMPT_TEMPLATE_VARIABLES) | ||||||
input_messages = get_attribute_value(attributes, LLM_INPUT_MESSAGES) | ||||||
tool_definitions = [] | ||||||
if tools := get_attribute_value(attributes, LLM_TOOLS): | ||||||
for tool in tools: | ||||||
if definition := get_attribute_value(tool, TOOL_DEFINITION): | ||||||
tool_definitions.append(definition) | ||||||
if span_kind == LLM: | ||||||
return _get_llm_span_input( | ||||||
input_messages=input_messages, | ||||||
input_value=input_value, | ||||||
input_mime_type=input_mime_type, | ||||||
prompt_template_variables=prompt_template_variables, | ||||||
tool_definitions=tool_definitions, | ||||||
) | ||||||
return _get_generic_io_value(io_value=input_value, mime_type=input_mime_type, kind="input") | ||||||
|
||||||
|
@@ -71,6 +78,7 @@ def _get_llm_span_input( | |||||
input_value: Any, | ||||||
input_mime_type: Optional[str], | ||||||
prompt_template_variables: Any, | ||||||
tool_definitions: Any, | ||||||
) -> dict[str, Any]: | ||||||
""" | ||||||
Extracts the input value from an LLM span and returns it as a dictionary. | ||||||
|
@@ -84,6 +92,10 @@ def _get_llm_span_input( | |||||
input = _get_generic_io_value(io_value=input_value, mime_type=input_mime_type, kind="input") | ||||||
if prompt_template_variables_data := _safely_json_decode(prompt_template_variables): | ||||||
input["prompt_template_variables"] = prompt_template_variables_data | ||||||
if tool_definitions_data := [ | ||||||
_safely_json_decode(tool_definition) for tool_definition in tool_definitions | ||||||
]: | ||||||
input["tool_definitions"] = tool_definitions_data | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
Kinda leaning this direction. Does this work with openai ft / evals format? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. looks like the tool definitions are a key alongside the input: https://platform.openai.com/docs/guides/fine-tuning#preparing-your-dataset-for-dpo |
||||||
return input | ||||||
|
||||||
|
||||||
|
@@ -215,3 +227,7 @@ def _safely_json_decode(value: Any) -> Any: | |||||
# ToolCallAttributes | ||||||
TOOL_CALL_FUNCTION_ARGUMENTS_JSON = ToolCallAttributes.TOOL_CALL_FUNCTION_ARGUMENTS_JSON | ||||||
TOOL_CALL_FUNCTION_NAME = ToolCallAttributes.TOOL_CALL_FUNCTION_NAME | ||||||
|
||||||
# ToolAttributes | ||||||
LLM_TOOLS = SpanAttributes.LLM_TOOLS | ||||||
TOOL_DEFINITION = ToolAttributes.TOOL_JSON_SCHEMA | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Maybe we can rename to |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -795,7 +795,7 @@ async def chat_completion_create( | |
elif isinstance(event, anthropic_streaming.InputJsonEvent): | ||
raise NotImplementedError | ||
else: | ||
assert_never(event) | ||
assert_never(event) # type: ignore | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There's a fix for this that @RogerHYang put out. It's the anthropic citations event. Can you cherry pick that and remove this ignore? |
||
|
||
def _build_anthropic_messages( | ||
self, | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
can we just double check that this works with openai ft format? also it might be just simpler if the key was
tools