Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Translation adpater support #1809

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion dspy/adapters/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from dspy.adapters.base import Adapter
from dspy.adapters.chat_adapter import ChatAdapter
from dspy.adapters.json_adapter import JSONAdapter
from dspy.adapters.json_adapter import JSONAdapter
from dspy.adapters.translation_adapter import TranslationAdapter
178 changes: 178 additions & 0 deletions dspy/adapters/chained_translator.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,178 @@
from typing import Any, Dict, List, Optional
from dspy.signatures.signature import Signature, SignatureMeta
from dspy.adapters.base import Adapter
from dspy.adapters.chat_adapter import ChatAdapter, format_fields, FieldInfoWithName
import logging
from pydantic import TypeAdapter

class ChainedTranslationAdapter(Adapter):
"""
A sophisticated adapter that implements a three-step translation process:
1. Translate source language to English
2. Process the query/task in English
3. Translate results back to target language
"""

def __init__(
self,
lm, # The language model being used
source_lang: str,
target_lang: str,
base_adapter: ChatAdapter,
logger: Optional[logging.Logger] = None
):
"""
Initialize ChainedTranslationAdapter with required components.

Args:
lm: Language model instance to use for translations
source_lang: Source language of the input
target_lang: Target language for final output
base_adapter: Underlying ChatAdapter for message handling
logger: Optional custom logger
"""
super().__init__()

if not isinstance(source_lang, str) or not source_lang.strip():
raise ValueError("source_lang must be a non-empty string")
if not isinstance(target_lang, str) or not target_lang.strip():
raise ValueError("target_lang must be a non-empty string")
if not isinstance(base_adapter, ChatAdapter):
raise TypeError("base_adapter must be an instance of ChatAdapter")

self.lm = lm
self.source_lang = source_lang
self.target_lang = target_lang
self.base_adapter = base_adapter
self.logger = logger or logging.getLogger(__name__)

def _translate_to_english(self, text: str) -> str:
"""Translate input text from source language to English."""
messages = [
{
"role": "system",
"content": f"""You are a professional translator from {self.source_lang} to English.
Translate the following text to English, preserving all meaning and context.
Maintain any special formatting, numbers, or technical terms.
Provide ONLY the translation, no explanations or notes."""
},
{
"role": "user",
"content": text
}
]

response = self.lm(messages=messages)
return response[0] if isinstance(response, list) else response

def _translate_to_target(self, text: str) -> str:
"""Translate processed English text to target language."""
messages = [
{
"role": "system",
"content": f"""You are a professional translator from English to {self.target_lang}.
Translate the following text to {self.target_lang}, preserving all meaning and context.
Maintain any special formatting, numbers, or technical terms.
Provide ONLY the translation, no explanations or notes."""
},
{
"role": "user",
"content": text
}
]

response = self.lm(messages=messages)
return response[0] if isinstance(response, list) else response

def _translate_dict_to_english(self, data: Dict[str, Any]) -> Dict[str, Any]:
"""Translate all string values in a dictionary from source language to English."""
translated = {}
for key, value in data.items():
if isinstance(value, str) and value.strip():
translated[key] = self._translate_to_english(value)
else:
translated[key] = value
return translated

def _translate_dict_to_target(self, data: Dict[str, Any]) -> Dict[str, Any]:
"""Translate all string values in a dictionary from English to target language."""
translated = {}
for key, value in data.items():
if isinstance(value, str) and value.strip():
translated[key] = self._translate_to_target(value)
else:
translated[key] = value
return translated

def format(self, signature: Signature, demos: List[Dict[str, Any]],
inputs: Dict[str, Any]) -> List[Dict[str, Any]]:
"""Format input for processing, translating from source language to English first."""
try:
# Translate demos to English
english_demos = []
for demo in demos:
english_demo = self._translate_dict_to_english(demo)
english_demos.append(english_demo)

# Translate inputs to English
english_inputs = self._translate_dict_to_english(inputs)

# Use base adapter to format the English content
messages = self.base_adapter.format(signature, english_demos, english_inputs)

# Add translation context to system message
if messages and messages[0]["role"] == "system":
messages[0]["content"] = messages[0]["content"] + f"\nNote: Final output should be in {self.target_lang}."

return messages

except Exception as e:
self.logger.error(f"Error in format step: {str(e)}")
raise

def parse(self, signature: Signature, completion: str,
_parse_values: bool = True) -> Dict[str, Any]:
"""Parse completion and translate results to target language."""
try:
# First parse the English completion using base adapter
parsed_result = self.base_adapter.parse(
signature, completion, _parse_values=False # Parse without value conversion first
)

# Translate the parsed results to target language
translated_result = self._translate_dict_to_target(parsed_result)

# Now parse values if needed
if _parse_values:
final_result = {}
for field_name, field_value in translated_result.items():
if field_name in signature.output_fields:
try:
field_type = signature.output_fields[field_name].annotation
if field_type is str:
final_result[field_name] = field_value
else:
# For non-string types, we need to parse the value
final_result[field_name] = TypeAdapter(field_type).validate_python(field_value)
except Exception as e:
self.logger.error(f"Error parsing field {field_name}: {str(e)}")
final_result[field_name] = field_value
return final_result

return translated_result

except Exception as e:
self.logger.error(f"Error in parse step: {str(e)}")
return {field: "" for field in signature.output_fields}

def format_finetune_data(self, signature: Signature, demos: List[Dict[str, Any]],
inputs: Dict[str, Any], outputs: Dict[str, Any]) -> Dict[str, Any]:
"""Format data for fine-tuning, handling translations appropriately."""
# Translate all content to English for fine-tuning
english_demos = [self._translate_dict_to_english(demo) for demo in demos]
english_inputs = self._translate_dict_to_english(inputs)
english_outputs = self._translate_dict_to_english(outputs)

return self.base_adapter.format_finetune_data(
signature, english_demos, english_inputs, english_outputs
)
170 changes: 170 additions & 0 deletions dspy/adapters/translation_adapter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,170 @@


from typing import Any, Dict, List, Optional
from dspy.signatures.signature import Signature, SignatureMeta
from dspy.adapters.base import *
from dspy.adapters.chat_adapter import *
import logging

class TranslationAdapter(Adapter):
"""
A streamlined adapter for translation tasks that leverages ChatAdapter's core functionality
while maintaining comprehensive translation instructions.
"""

def __init__(
self,
target_lang: str,
base_adapter: ChatAdapter,
logger: Optional[logging.Logger] = None
):
"""
Initialize TranslationAdapter with required components.

Args:
target_lang: Target language for translations
base_adapter: Underlying ChatAdapter for message handling
logger: Optional custom logger
"""
super().__init__()

if not isinstance(target_lang, str) or not target_lang.strip():
raise ValueError("target_lang must be a non-empty string")

if not isinstance(base_adapter, ChatAdapter):
raise TypeError("base_adapter must be an instance of ChatAdapter")

self.target_lang = target_lang
self.base_adapter = base_adapter
self.logger = logger or logging.getLogger(__name__)

def _get_translation_prompt(self, signature: SignatureMeta) -> str:
"""Generate comprehensive translation-specific system prompt."""
parts = [
f"""You are a professional translation assistant specializing in translations to {self.target_lang}.
Your task is to provide accurate, culturally-appropriate translations that preserve the original meaning
while adapting naturally to {self.target_lang} conventions.""",

"\n### Core Translation Principles ###",
"1. Accuracy and Fidelity:",
" - Preserve the complete meaning and intent of the original content",
" - Maintain all key information, context, and nuances",
" - Neither add nor remove information during translation",
" - Keep the original tone and style (formal, casual, technical, etc.)",

"2. Natural Language Use:",
f" - Use natural, idiomatic expressions in {self.target_lang}",
" - Avoid literal translations that sound unnatural",
" - Maintain consistent style and terminology throughout",
" - Follow target language grammar and punctuation rules",

"3. Cultural Adaptation:",
" - Adapt cultural references to resonate with the target audience",
" - Provide cultural context notes when necessary",
" - Use culturally appropriate expressions and metaphors",
" - Consider target region-specific language variations",

"\n### Technical Guidelines ###",
"1. Names and Identifiers:",
" - Keep person names in their original form",
" - Maintain brand names unless they have official translations",
" - Include original terms in parentheses where helpful",
" - Translate titles only when official translations exist",

"2. Numbers and Formatting:",
" - Adapt number formats to target language conventions",
" - Convert units of measurement if culturally appropriate",
" - Maintain date/time formats per target language standards",
" - Preserve all formatting markers (bold, italic, lists, etc.)",

"3. Technical Terms:",
" - Use standardized translations for technical terminology",
" - Maintain consistency in technical term usage",
" - Keep industry-specific jargon appropriate to the field",
" - Include original terms in parentheses for clarity if needed",

"\n### Output Structure Requirements ###",
"1. Field Formatting:",
" - Begin each field with: [[ ## fieldname ## ]]",
" - Keep field names in English - DO NOT translate them",
" - Include one blank line between fields",
" - Ensure proper field marker formatting",

"2. Content Organization:",
" - Place translated content directly after each field marker",
" - Preserve original paragraph structure and formatting",
" - Maintain document hierarchy and organization",
" - Complete each field's content before starting the next",

"\n### Required Output Fields ###",
f"Include ALL of these fields in order:\n{', '.join(signature.output_fields)}",

"\n### Quality Assurance ###",
"1. Pre-submission Checklist:",
" - Verify all required fields are present and complete",
" - Check for consistent terminology throughout",
" - Confirm formatting and structure are preserved",
" - Ensure translations maintain original meaning and context",

"2. Common Error Prevention:",
" - Double-check numbers and dates for accuracy",
" - Verify proper handling of technical terms",
" - Confirm cultural references are appropriately adapted",
" - Check for natural flow in target language",

"### Language Enforcement ###",
f"- ALL outputs MUST be in {self.target_lang}",
"- Do not switch languages between responses",
"- If you detect yourself using any other language, stop and restart in the target language",
]

if signature.instructions:
parts.extend([
"\n### Task-Specific Instructions ###",
signature.instructions,
"\nFollow these additional instructions while maintaining all general translation requirements."
])

return "\n".join(parts)

def format(self, signature: Signature, demos: List[Dict[str, Any]],
inputs: Dict[str, Any]) -> List[Dict[str, Any]]:
"""Format input for translation, leveraging ChatAdapter's formatting."""
messages = []

# Add translation-specific system message with original comprehensive instructions
messages.append({
"role": "system",
"content": self._get_translation_prompt(signature)
})

# Use base adapter for demo and input formatting
formatted_messages = self.base_adapter.format(signature, demos, inputs)

# Skip base adapter's system message if present
if formatted_messages and formatted_messages[0]["role"] == "system":
formatted_messages = formatted_messages[1:]

messages.extend(formatted_messages)
return messages

def parse(self, signature: Signature, completion: str,
_parse_values: bool = True) -> Dict[str, Any]:
"""Parse completion using ChatAdapter's robust parsing."""
try:
# Leverage ChatAdapter's parsing
parsed_result = self.base_adapter.parse(
signature, completion, _parse_values
)
return parsed_result

except Exception as e:
self.logger.error(f"Error parsing translation: {str(e)}")
return {field: "" for field in signature.output_fields}

def format_finetune_data(self, signature: Signature, demos: List[Dict[str, Any]],
inputs: Dict[str, Any], outputs: Dict[str, Any]) -> Dict[str, Any]:
"""Format data for fine-tuning, delegating to ChatAdapter."""
return self.base_adapter.format_finetune_data(
signature, demos, inputs, outputs
)
Loading