Skip to content

Commit

Permalink
Merge branch 'x/retryable-errors' of https://github.com/julep-ai/julep
Browse files Browse the repository at this point in the history
…into x/retryable-errors
  • Loading branch information
HamadaSalhab committed Oct 29, 2024
2 parents 7302bc0 + 643d7df commit 408b927
Show file tree
Hide file tree
Showing 14 changed files with 355 additions and 121 deletions.
7 changes: 6 additions & 1 deletion .env.example
Original file line number Diff line number Diff line change
Expand Up @@ -101,4 +101,9 @@ LITELLM_REDIS_PASSWORD=<your_litellm_redis_password>

# S3_ENDPOINT=http://seaweedfs:8333
# S3_ACCESS_KEY=<your_s3_access_key>
# S3_SECRET_KEY=<your_s3_secret_key>
# S3_SECRET_KEY=<your_s3_secret_key>

# Integrations Service
# ------------

# INTEGRATIONS_SERVICE_PORT=8000
47 changes: 39 additions & 8 deletions agents-api/agents_api/activities/task_steps/base_evaluate.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,33 @@
from beartype import beartype
from box import Box
from openai import BaseModel
from simpleeval import NameNotDefined
from temporalio import activity
from thefuzz import fuzz

from ...common.storage_handler import auto_blob_store
from ...env import testing
from ..utils import get_evaluator


class EvaluateError(Exception):
def __init__(self, error, expression, values):
error_message = error.message if hasattr(error, "message") else str(error)
message = error_message

# Catch a possible jinja template error
if "unhashable" in error_message and "{{" in expression:
message += "\nSuggestion: It seems like you used a jinja template, did you mean to use a python expression?"

# Catch a possible misspell in a variable name
if isinstance(error, NameNotDefined):
misspelledName = error_message.split("'")[1]
for variableName in values.keys():
if fuzz.ratio(variableName, misspelledName) >= 90.0:
message += f"\nDid you mean '{variableName}' instead of '{misspelledName}'?"
super().__init__(message)


@auto_blob_store
@beartype
async def base_evaluate(
Expand Down Expand Up @@ -47,22 +67,32 @@ async def base_evaluate(

evaluator = get_evaluator(names=values, extra_functions=extra_lambdas)

chosen_expression = ""

try:
result = None
match exprs:
case str():
chosen_expression = exprs
result = evaluator.eval(exprs)
case list():
result = [evaluator.eval(expr) for expr in exprs]
result = []
for expr in exprs:
chosen_expression = expr
result.append(evaluator.eval(expr))
case dict() as d if all(
isinstance(v, dict) or isinstance(v, str) for v in d.values()
):
result = {
k: {ik: evaluator.eval(iv) for ik, iv in v.items()}
if isinstance(v, dict)
else evaluator.eval(v)
for k, v in d.items()
}
result = {}
for k, v in d.items():
if isinstance(v, str):
chosen_expression = v
result[k] = evaluator.eval(v)
else:
result[k] = {}
for k1, v1 in v.items():
chosen_expression = v1
result[k][k1] = evaluator.eval(v1)
case _:
raise ValueError(f"Invalid expression: {exprs}")

Expand All @@ -71,7 +101,8 @@ async def base_evaluate(
except BaseException as e:
if activity.in_activity():
activity.logger.error(f"Error in base_evaluate: {e}")
raise
newException = EvaluateError(e, chosen_expression, values)
raise newException from e


# Note: This is here just for clarity. We could have just imported base_evaluate directly
Expand Down
81 changes: 56 additions & 25 deletions agents-api/agents_api/common/exceptions/tasks.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,12 @@
"""
This module defines non-retryable error types and provides a function to check
if a given error is non-retryable. These are used in conjunction with custom
Temporal interceptors to prevent unnecessary retries of certain error types.
🎯 Error Handling: The Art of Knowing When to Try Again
This module is like a bouncer at an error club - it decides which errors get a
second chance and which ones are permanently banned. Some errors are just having
a bad day (like network timeouts), while others are fundamentally problematic
(like trying to divide by zero... seriously, who does that?).
Remember: To err is human, to retry divine... but only if it makes sense!
"""

import asyncio
Expand All @@ -19,19 +24,20 @@
import requests
import temporalio.exceptions

# List of error types that should not be retried
# 🚫 The "No Second Chances" Club - errors that we won't retry
# Because sometimes, no means no!
NON_RETRYABLE_ERROR_TYPES = (
# Temporal-specific errors
# Temporal-specific errors (when time itself says no)
temporalio.exceptions.WorkflowAlreadyStartedError,
temporalio.exceptions.TerminatedError,
temporalio.exceptions.CancelledError,
#
# Built-in Python exceptions
# Built-in Python exceptions (the classics that never go out of style)
TypeError,
AssertionError,
SyntaxError,
ValueError,
ZeroDivisionError,
ZeroDivisionError, # Because dividing by zero is still not cool
IndexError,
AttributeError,
LookupError,
Expand All @@ -40,41 +46,41 @@
KeyError,
NameError,
NotImplementedError,
RecursionError,
RecursionError, # When your code goes down the rabbit hole too deep
RuntimeError,
StopIteration,
StopAsyncIteration,
IndentationError,
IndentationError, # Spaces vs tabs: the eternal debate
TabError,
#
# Unicode-related errors
# Unicode-related errors (when characters misbehave)
UnicodeError,
UnicodeEncodeError,
UnicodeDecodeError,
UnicodeTranslateError,
#
# HTTP and API-related errors
# HTTP and API-related errors (when the web says "nope")
fastapi.exceptions.RequestValidationError,
#
# Asynchronous programming errors
# Asynchronous programming errors (async/await gone wrong)
asyncio.CancelledError,
asyncio.InvalidStateError,
GeneratorExit,
#
# Third-party library exceptions
# Third-party library exceptions (when other people's code says no)
jinja2.exceptions.TemplateSyntaxError,
jinja2.exceptions.TemplateNotFound,
jsonschema.exceptions.ValidationError,
pydantic.ValidationError,
requests.exceptions.InvalidURL,
requests.exceptions.MissingSchema,
#
# Box exceptions
# Box exceptions (when your box is broken)
box.exceptions.BoxKeyError,
box.exceptions.BoxTypeError,
box.exceptions.BoxValueError,
#
# Beartype exceptions
# Beartype exceptions (when your types are unbearable)
beartype.roar.BeartypeException,
beartype.roar.BeartypeDecorException,
beartype.roar.BeartypeDecorHintException,
Expand All @@ -89,20 +95,22 @@
beartype.roar.BeartypeDecorHintParamDefaultViolation,
beartype.roar.BeartypeDoorHintViolation,
#
# LiteLLM exceptions
# LiteLLM exceptions (when AI has a bad day)
litellm.exceptions.NotFoundError,
litellm.exceptions.InvalidRequestError,
litellm.exceptions.AuthenticationError,
litellm.exceptions.ServiceUnavailableError,
litellm.exceptions.OpenAIError,
)

# 🔄 The "Try Again" Club - errors that deserve another shot
# Because everyone deserves a second chance... or third... or fourth...
RETRYABLE_ERROR_TYPES = (
# LiteLLM exceptions
# LiteLLM exceptions (when AI needs a coffee break)
litellm.exceptions.RateLimitError,
litellm.exceptions.APIError, # Added to retry on "APIError: OpenAIException - Connection error"
#
# HTTP/Network related errors
# HTTP/Network related errors (internet having a bad hair day)
requests.exceptions.ConnectionError,
requests.exceptions.Timeout,
requests.exceptions.ConnectTimeout,
Expand All @@ -113,36 +121,59 @@
httpx.WriteTimeout,
httpx.PoolTimeout,
#
# Standard library errors that are typically transient
# Standard library errors that are typically transient (like a bad mood)
ConnectionError,
TimeoutError,
OSError, # Covers many IO-related errors that may be transient
IOError,
#
# Database/storage related
# Database/storage related (when the database needs a nap)
asyncio.TimeoutError,
)

RETRYABLE_HTTP_STATUS_CODES = (408, 429, 503, 504)
# HTTP status codes that say "maybe try again later?"
RETRYABLE_HTTP_STATUS_CODES = (
408, # Request Timeout (server needs a coffee break)
429, # Too Many Requests (slow down, speedster!)
503, # Service Unavailable (server is having a moment)
504, # Gateway Timeout (the internet took a detour)
)


def is_retryable_error(error: BaseException) -> bool:
"""
Determines if the given error should be retried or not.
The Great Error Judge: Decides if an error deserves another chance at life.
Think of this function as a very understanding but firm teacher - some mistakes
get a do-over, others are learning opportunities (aka failures).
Args:
error (Exception): The error to check.
error (Exception): The error that's pleading its case
Returns:
bool: True if the error is retryable, False otherwise.
bool: True if the error gets another shot, False if it's game over
"""

# First, check if it's in the "permanently banned" list
if isinstance(error, NON_RETRYABLE_ERROR_TYPES):
return False

# Check if it's in the "VIP retry club"
if isinstance(error, RETRYABLE_ERROR_TYPES):
return True

# Special handling for HTTP errors (because they're special snowflakes)
if isinstance(error, fastapi.exceptions.HTTPException):
if error.status_code in RETRYABLE_HTTP_STATUS_CODES:
return True

if isinstance(error, httpx.HTTPStatusError):
if error.response.status_code in RETRYABLE_HTTP_STATUS_CODES:
return True

# If we don't know this error, we play it safe and don't retry
# (stranger danger!)
return False

# Check for specific HTTP errors that should be retried
if isinstance(error, fastapi.exceptions.HTTPException):
if error.status_code in RETRYABLE_HTTP_STATUS_CODES:
Expand Down
Loading

0 comments on commit 408b927

Please sign in to comment.