import json from typing import Any, Dict, List from anthropic import Anthropic class LLMTranslationEditor: def __init__(self, validation_results: dict, anthropic_api_key: str): """Initialize with validation results and Anthropic API key. Args: validation_results (dict): Results from TranslationValidator anthropic_api_key (str): Anthropic API key for Claude access """ self.results = validation_results self.client = Anthropic(api_key=anthropic_api_key) def edit_translation(self, source_text: str, current_translation: str, terms_info: List[Dict[str, Any]]) -> Dict[str, Any]: """Use Claude to edit the translation considering validation results and context. Args: source_text (str): Original Tibetan text current_translation (str): Current English translation terms_info (list): Terms information from validation results Returns: Dict[str, Any]: Edited translation with analysis """ # Build context for terms that need attention terms_context = [] for term in terms_info: analysis = term['analysis'] assessment = analysis['translation_assessment'] if assessment['should_be_counted'] and not assessment['translated_correctly']: term_context = { 'term': term['source_term'], 'current': analysis['translated_as'], 'suggested': analysis['glossary_translation'], 'categories': {} } # Add category information for cat_name, cat_data in term['categories'].items(): if cat_name in analysis['matching_categories']: term_context['categories'][cat_name] = { 'translations': cat_data.get('translations', []), 'definitions': cat_data.get('definitions', []) } terms_context.append(term_context) if not terms_context: return { 'edited_translation': current_translation, 'modified': False, 'reasoning': 'No terms requiring editing' } prompt = f"""You are an expert Tibetan translator. Review and improve this translation, focusing on accuracy and natural English: Tibetan text: {source_text} Current translation: {current_translation} The following terms need attention:""" for term in terms_context: prompt += f"\n\nTibetan term: {term['term']}" prompt += f"\nCurrently translated as: {term['current']}" prompt += f"\nGlossary suggestion: {term['suggested']}" for cat_name, cat_data in term['categories'].items(): prompt += f"\n{cat_name}:" if cat_data['definitions']: prompt += f"\n- Definitions: {', '.join(cat_data['definitions'])}" if cat_data['translations']: prompt += f"\n- Translations: {', '.join(cat_data['translations'])}" prompt += """ Please provide: 1. An improved translation that: - Maintains the meaning of the Tibetan text - Maintains the style and tone of the current translation - Uses appropriate technical terms from the glossary - Preserves any correct parts of the current translation 2. Your reasoning for the changes Respond in JSON format: { "edited_translation": "your improved translation", "reasoning": "explanation of changes and decisions", "modified": true/false }""" try: message = self.client.messages.create( model="claude-3-sonnet-20240229", max_tokens=1000, temperature=0, messages=[{"role": "user", "content": prompt}] ) # Extract JSON from response import re json_match = re.search(r'\{.*\}', message.content[0].text, re.DOTALL) if json_match: return json.loads(json_match.group()) else: return { 'edited_translation': current_translation, 'modified': False, 'reasoning': 'Failed to parse LLM response' } except Exception as e: print(f"Error during LLM editing: {e}") return { 'edited_translation': current_translation, 'modified': False, 'reasoning': f'LLM editing failed: {str(e)}' } def post_edit_translations(self) -> List[Dict[str, Any]]: """Process all lines and post-edit translations using LLM. Returns: List[Dict[str, Any]]: List of edited translations with analysis """ edited_translations = [] for line in self.results['lines']: source = line['source'] target = line['target'] terms = line['terms'] if not terms: edited_translations.append({ 'line_number': line['line_number'], 'source': source, 'original': target, 'edited': target, 'modified': False, 'reasoning': 'No terms to edit' }) continue # Get LLM to edit the translation edit_result = self.edit_translation(source, target, terms) edited_translations.append({ 'line_number': line['line_number'], 'source': source, 'original': target, 'edited': edit_result['edited_translation'], 'modified': edit_result['modified'], 'reasoning': edit_result['reasoning'] }) return edited_translations def save_edits(self, edited_translations: List[Dict[str, Any]], output_path: str) -> None: """Save the post-edited translations with analysis to a file. Args: edited_translations (List[Dict[str, Any]]): Edited translations with analysis output_path (str): Path to save results """ with open(output_path, 'w', encoding='utf-8') as f: json.dump({ 'summary': { 'total_lines': len(edited_translations), 'modified_lines': sum(1 for t in edited_translations if t['modified']) }, 'translations': edited_translations }, f, ensure_ascii=False, indent=2) # Example usage: if __name__ == "__main__": import os # Load validation results with open('data/validation_results.json', 'r', encoding='utf-8') as f: validation_results = json.load(f) # Create editor and process translations editor = LLMTranslationEditor( validation_results, os.getenv('ANTHROPIC_API_KEY') ) edited_translations = editor.post_edit_translations() # Save results editor.save_edits(edited_translations, 'llm_post_edited_translations.json') # Print summary and examples print(f"Post-editing completed:") print(f"Total lines: {len(edited_translations)}") print(f"Modified lines: {sum(1 for t in edited_translations if t['modified'])}") print("\nExample modifications:") for trans in edited_translations: if trans['modified']: print(f"\nLine {trans['line_number']}:") print(f"Source : {trans['source']}") print(f"Original: {trans['original']}") print(f"Edited : {trans['edited']}") print(f"Reasoning: {trans['reasoning']}")