Skip to content

Commit

Permalink
clean up distil example
Browse files Browse the repository at this point in the history
  • Loading branch information
jxnl committed Oct 14, 2023
1 parent 79d872d commit e98f777
Show file tree
Hide file tree
Showing 4 changed files with 115 additions and 86 deletions.
10 changes: 10 additions & 0 deletions examples/distilations/math_finetunes.jsonl
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
{"messages": [{"role": "system", "content": "Predict the results of this function:\n\ndef fn(a: int, b: int, c: str) -> __main__.Response\n\"\"\"\n_summary_\n\nArgs:\n a (int): _description_\n b (int): _description_\n c (str): _description_\n\nReturns:\n Response: _description_\n\"\"\""}, {"role": "user", "content": "Return fn(396, b=414, c=\"hello\")"}, {"role": "assistant", "function_call": {"name": "Response", "arguments": "{\"a\":396,\"b\":414,\"result\":810}"}}], "functions": [{"name": "Response", "description": "Correctly extracted `Response` with all the required parameters with correct types", "parameters": {"properties": {"a": {"type": "integer"}, "b": {"type": "integer"}, "result": {"type": "integer"}}, "required": ["a", "b", "result"], "type": "object"}}]}
{"messages": [{"role": "system", "content": "Predict the results of this function:\n\ndef fn(a: int, b: int, c: str) -> __main__.Response\n\"\"\"\n_summary_\n\nArgs:\n a (int): _description_\n b (int): _description_\n c (str): _description_\n\nReturns:\n Response: _description_\n\"\"\""}, {"role": "user", "content": "Return fn(812, b=108, c=\"hello\")"}, {"role": "assistant", "function_call": {"name": "Response", "arguments": "{\"a\":812,\"b\":108,\"result\":920}"}}], "functions": [{"name": "Response", "description": "Correctly extracted `Response` with all the required parameters with correct types", "parameters": {"properties": {"a": {"type": "integer"}, "b": {"type": "integer"}, "result": {"type": "integer"}}, "required": ["a", "b", "result"], "type": "object"}}]}
{"messages": [{"role": "system", "content": "Predict the results of this function:\n\ndef fn(a: int, b: int, c: str) -> __main__.Response\n\"\"\"\n_summary_\n\nArgs:\n a (int): _description_\n b (int): _description_\n c (str): _description_\n\nReturns:\n Response: _description_\n\"\"\""}, {"role": "user", "content": "Return fn(923, b=561, c=\"hello\")"}, {"role": "assistant", "function_call": {"name": "Response", "arguments": "{\"a\":923,\"b\":561,\"result\":1484}"}}], "functions": [{"name": "Response", "description": "Correctly extracted `Response` with all the required parameters with correct types", "parameters": {"properties": {"a": {"type": "integer"}, "b": {"type": "integer"}, "result": {"type": "integer"}}, "required": ["a", "b", "result"], "type": "object"}}]}
{"messages": [{"role": "system", "content": "Predict the results of this function:\n\ndef fn(a: int, b: int, c: str) -> __main__.Response\n\"\"\"\n_summary_\n\nArgs:\n a (int): _description_\n b (int): _description_\n c (str): _description_\n\nReturns:\n Response: _description_\n\"\"\""}, {"role": "user", "content": "Return fn(933, b=756, c=\"hello\")"}, {"role": "assistant", "function_call": {"name": "Response", "arguments": "{\"a\":933,\"b\":756,\"result\":1689}"}}], "functions": [{"name": "Response", "description": "Correctly extracted `Response` with all the required parameters with correct types", "parameters": {"properties": {"a": {"type": "integer"}, "b": {"type": "integer"}, "result": {"type": "integer"}}, "required": ["a", "b", "result"], "type": "object"}}]}
{"messages": [{"role": "system", "content": "Predict the results of this function:\n\ndef fn(a: int, b: int, c: str) -> __main__.Response\n\"\"\"\n_summary_\n\nArgs:\n a (int): _description_\n b (int): _description_\n c (str): _description_\n\nReturns:\n Response: _description_\n\"\"\""}, {"role": "user", "content": "Return fn(807, b=389, c=\"hello\")"}, {"role": "assistant", "function_call": {"name": "Response", "arguments": "{\"a\":807,\"b\":389,\"result\":1196}"}}], "functions": [{"name": "Response", "description": "Correctly extracted `Response` with all the required parameters with correct types", "parameters": {"properties": {"a": {"type": "integer"}, "b": {"type": "integer"}, "result": {"type": "integer"}}, "required": ["a", "b", "result"], "type": "object"}}]}
{"messages": [{"role": "system", "content": "Predict the results of this function:\n\ndef fn(a: int, b: int, c: str) -> __main__.Response\n\"\"\"\n_summary_\n\nArgs:\n a (int): _description_\n b (int): _description_\n c (str): _description_\n\nReturns:\n Response: _description_\n\"\"\""}, {"role": "user", "content": "Return fn(509, b=776, c=\"hello\")"}, {"role": "assistant", "function_call": {"name": "Response", "arguments": "{\"a\":509,\"b\":776,\"result\":1285}"}}], "functions": [{"name": "Response", "description": "Correctly extracted `Response` with all the required parameters with correct types", "parameters": {"properties": {"a": {"type": "integer"}, "b": {"type": "integer"}, "result": {"type": "integer"}}, "required": ["a", "b", "result"], "type": "object"}}]}
{"messages": [{"role": "system", "content": "Predict the results of this function:\n\ndef fn(a: int, b: int, c: str) -> __main__.Response\n\"\"\"\n_summary_\n\nArgs:\n a (int): _description_\n b (int): _description_\n c (str): _description_\n\nReturns:\n Response: _description_\n\"\"\""}, {"role": "user", "content": "Return fn(165, b=442, c=\"hello\")"}, {"role": "assistant", "function_call": {"name": "Response", "arguments": "{\"a\":165,\"b\":442,\"result\":607}"}}], "functions": [{"name": "Response", "description": "Correctly extracted `Response` with all the required parameters with correct types", "parameters": {"properties": {"a": {"type": "integer"}, "b": {"type": "integer"}, "result": {"type": "integer"}}, "required": ["a", "b", "result"], "type": "object"}}]}
{"messages": [{"role": "system", "content": "Predict the results of this function:\n\ndef fn(a: int, b: int, c: str) -> __main__.Response\n\"\"\"\n_summary_\n\nArgs:\n a (int): _description_\n b (int): _description_\n c (str): _description_\n\nReturns:\n Response: _description_\n\"\"\""}, {"role": "user", "content": "Return fn(843, b=304, c=\"hello\")"}, {"role": "assistant", "function_call": {"name": "Response", "arguments": "{\"a\":843,\"b\":304,\"result\":1147}"}}], "functions": [{"name": "Response", "description": "Correctly extracted `Response` with all the required parameters with correct types", "parameters": {"properties": {"a": {"type": "integer"}, "b": {"type": "integer"}, "result": {"type": "integer"}}, "required": ["a", "b", "result"], "type": "object"}}]}
{"messages": [{"role": "system", "content": "Predict the results of this function:\n\ndef fn(a: int, b: int, c: str) -> __main__.Response\n\"\"\"\n_summary_\n\nArgs:\n a (int): _description_\n b (int): _description_\n c (str): _description_\n\nReturns:\n Response: _description_\n\"\"\""}, {"role": "user", "content": "Return fn(512, b=791, c=\"hello\")"}, {"role": "assistant", "function_call": {"name": "Response", "arguments": "{\"a\":512,\"b\":791,\"result\":1303}"}}], "functions": [{"name": "Response", "description": "Correctly extracted `Response` with all the required parameters with correct types", "parameters": {"properties": {"a": {"type": "integer"}, "b": {"type": "integer"}, "result": {"type": "integer"}}, "required": ["a", "b", "result"], "type": "object"}}]}
{"messages": [{"role": "system", "content": "Predict the results of this function:\n\ndef fn(a: int, b: int, c: str) -> __main__.Response\n\"\"\"\n_summary_\n\nArgs:\n a (int): _description_\n b (int): _description_\n c (str): _description_\n\nReturns:\n Response: _description_\n\"\"\""}, {"role": "user", "content": "Return fn(133, b=539, c=\"hello\")"}, {"role": "assistant", "function_call": {"name": "Response", "arguments": "{\"a\":133,\"b\":539,\"result\":672}"}}], "functions": [{"name": "Response", "description": "Correctly extracted `Response` with all the required parameters with correct types", "parameters": {"properties": {"a": {"type": "integer"}, "b": {"type": "integer"}, "result": {"type": "integer"}}, "required": ["a", "b", "result"], "type": "object"}}]}
79 changes: 79 additions & 0 deletions examples/distilations/three_digit_mul.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
import logging

from pydantic import BaseModel
from instructor.distil import Instructions

logging.basicConfig(level=logging.INFO)

# Usage
instructions = Instructions(
name="three_digit_multiply",
finetune_format="messages",
log_handlers=[
logging.FileHandler("math_finetunes.jsonl"),
],
)


class Response(BaseModel):
a: int
b: int
result: int


@instructions.distil
def fn(a: int, b: int, c: str) -> Response:
"""_summary_
Args:
a (int): _description_
b (int): _description_
c (str): _description_
Returns:
Response: _description_
"""
resp = a + b
return Response(a=a, b=b, result=resp)


if __name__ == "__main__":
import random

# A log will look like this:
log_line = {
"messages": [
{
"role": "system",
"content": 'Predict the results of this function:\n\ndef fn(a: int, b: int, c: str) -> __main__.Response\n"""\n_summary_\n\nArgs:\n a (int): _description_\n b (int): _description_\n c (str): _description_\n\nReturns:\n Response: _description_\n"""',
},
{"role": "user", "content": 'Return fn(133, b=539, c="hello")'},
{
"role": "assistant",
"function_call": {
"name": "Response",
"arguments": '{"a":133,"b":539,"result":672}',
},
},
],
"functions": [
{
"name": "Response",
"description": "Correctly extracted `Response` with all the required parameters with correct types",
"parameters": {
"properties": {
"a": {"type": "integer"},
"b": {"type": "integer"},
"result": {"type": "integer"},
},
"required": ["a", "b", "result"],
"type": "object",
},
}
],
}

for _ in range(10):
a = random.randint(100, 999)
b = random.randint(100, 999)
print("returning", fn(a, b=b, c="hello"))
70 changes: 0 additions & 70 deletions examples/distilations/user_code_global.py

This file was deleted.

42 changes: 26 additions & 16 deletions instructor/distil.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
import inspect
import json
import logging
import os

from typing import Any, Callable, List, Optional
import uuid
Expand Down Expand Up @@ -79,10 +78,12 @@ def __init__(
name: str = None,
id: str = None,
log_handlers: List[logging.Handler] = None,
finetune_format: FinetuneFormat = FinetuneFormat.MESSAGES,
):
self.name = name
self.id = id or str(uuid.uuid4())
self.unique_id = str(uuid.uuid4())
self.finetune_format = finetune_format

self.logger = logging.getLogger(self.name)
for handler in log_handlers or []:
Expand All @@ -93,7 +94,7 @@ def distil(
*args,
name: str = None,
mode: str = "distil",
fine_tune_format: FinetuneFormat = FinetuneFormat.MESSAGES,
fine_tune_format: FinetuneFormat = None,
):
"""
Decorator to track the function call and response, supports distillation and dispatch modes.
Expand All @@ -118,6 +119,9 @@ def distil(
assert mode in allowed_modes, f"Must be in {allowed_modes}"
assert mode == "distil", "Only distil mode is supported at the moment."

if fine_tune_format is None:
fine_tune_format = self.finetune_format

def _wrap_distil(fn):
msg = f"Return type hint for {fn} must subclass `pydantic.BaseModel'"
assert is_return_type_base_model_or_instance(fn), msg
Expand Down Expand Up @@ -161,30 +165,25 @@ def track(
name = name if name else fn.__name__
base_model: BaseModel = type(resp)

if finetune_format == FinetuneFormat.RAW:
function_body = dict(
fn_name=name,
fn_repr=format_function(fn),
args=args,
kwargs=kwargs,
resp=resp.model_dump(),
schema=base_model.model_json_schema(),
)
self.logger.info(json.dumps(function_body))

if finetune_format == FinetuneFormat.MESSAGES:
# This is the format that OpenAI's API expects for a finetune call
openai_function_call = openai_schema(base_model).openai_schema
function_definition = get_signature_from_fn(fn).replace(fn.__name__, name)

str_args = ", ".join(map(str, args))
str_kwargs = (
", ".join(f"{k}={json.dumps(v)}" for k, v in kwargs.items()) or None
)
call_args = ", ".join(filter(None, [str_args, str_kwargs]))

function_body = {
"messages": [
{
"role": "system",
"content": f"Return the response from the function call.\n\n {function_definition}",
"content": f"Predict the results of this function:\n\n{function_definition}",
},
{
"role": "user",
"content": f"Return the results of the function with the following arguments:\n\n {name}(*{args}, **{kwargs})",
"content": f"Return {name}({call_args})",
},
{
"role": "assistant",
Expand All @@ -197,3 +196,14 @@ def track(
"functions": [openai_function_call],
}
self.logger.info(json.dumps(function_body))

if finetune_format == FinetuneFormat.RAW:
function_body = dict(
fn_name=name,
fn_repr=format_function(fn),
args=args,
kwargs=kwargs,
resp=resp.model_dump(),
schema=base_model.model_json_schema(),
)
self.logger.info(json.dumps(function_body))

0 comments on commit e98f777

Please sign in to comment.