Spaces:

openpecha
/

translation_glossary_checker

Sleeping

App Files Files Community

test commited on 22 days ago

Commit

2ba7d76

1 Parent(s): 2290099

add llm based post editor

Browse files

Files changed (2) hide show

app.py +141 -47
llm_post_editor.py +211 -0

app.py CHANGED Viewed

@@ -1,69 +1,104 @@
 import json
 import os
 from pathlib import Path
 import gradio as gr
 from glossary_checker import GlossaryChecker
 from trans_validator import TranslationValidator
-# Configure glossary paths
 GLOSSARIES = {
     "84000 Glossary": "data/84000_glossary.json",
 }
-def load_and_validate(file_obj, selected_glossary, api_key):
     if not api_key or not api_key.startswith("sk-"):
         return "Please provide a valid Anthropic API key (starts with 'sk-')"
     try:
         # Read content from the file
         content = file_obj.decode('utf-8')
         # Save content to temporary file
         temp_path = "temp_aligned.txt"
         with open(temp_path, "w", encoding='utf-8') as f:
             f.write(content)
-        # Initialize checker and validator
         glossary_path = GLOSSARIES[selected_glossary]
         checker = GlossaryChecker(glossary_path)
         validator = TranslationValidator(checker, api_key)
         # Run validation
-        results = validator.validate_translation(temp_path)
         # Create result display
         markdown_output = []
         # Add summary
-        total_score = sum(r['score'] for r in results) / len(results)
         markdown_output.append(f"# Validation Results\n")
         markdown_output.append(f"**Overall Score**: {total_score:.2f}%\n")
         markdown_output.append("*(Score based on terms counted in scoring)*\n\n")
-        markdown_output.append(f"**Total Lines**: {len(results)}\n\n")
-        # Add detailed results for each line
-        for result in results:
-            markdown_output.append(f"## Line {result['line_number']}\n")
-            markdown_output.append(f"**Score**: {result['score']:.2f}%\n")
-            markdown_output.append(f"**Source**: {result['source']}\n")
-            markdown_output.append(f"**Target**: {result['target']}\n")
-            if result['terms']:
                 # Separate terms into counted and not counted
                 counted_terms = []
                 other_terms = []
-                for term in result['terms']:
                     if term['analysis']['translation_assessment']['should_be_counted']:
                         counted_terms.append(term)
                     else:
                         other_terms.append(term)
-                # Display counted terms first with clear scoring implications
                 if counted_terms:
-                    markdown_output.append("\n### 📊 Terms Counted in Scoring\n")
                     for term in counted_terms:
                         analysis = term['analysis']
                         assessment = analysis['translation_assessment']
@@ -72,61 +107,120 @@ def load_and_validate(file_obj, selected_glossary, api_key):
                         markdown_output.append(f"- Found Translation: **{analysis['translated_as']}**\n")
                         markdown_output.append(f"- Expected Translation: **{analysis['glossary_translation']}**\n")
-                        # Add matching categories for context
                         for cat_name in analysis['matching_categories']:
                             cat_data = term['categories'].get(cat_name, {})
                             markdown_output.append(f"\n*{cat_name}*:\n")
                             if 'definitions' in cat_data:
-                                markdown_output.append(f"- Definition: {', '.join(cat_data['definitions'])}\n")
-                # Display other found terms separately
                 if other_terms:
-                    markdown_output.append("\n### Other Found Terms (Not Counted)\n")
                     for term in other_terms:
                         analysis = term['analysis']
                         markdown_output.append(f"\n#### `{term['source_term']}`\n")
                         markdown_output.append(f"- Found Translation: {analysis['translated_as']}\n")
                         markdown_output.append(f"- Note: Term not counted due to usage context\n")
                 markdown_output.append("\n---\n")
             else:
                 markdown_output.append("\n*No glossary terms found in this line*\n\n---\n")
         # Clean up temp file
         os.remove(temp_path)
         return "\n".join(markdown_output)
     except Exception as e:
         if os.path.exists(temp_path):
             os.remove(temp_path)
-        return f"Error during validation: {str(e)}"
-# Create Gradio interface
-demo = gr.Interface(
-    fn=load_and_validate,
-    inputs=[
-        gr.File(label="Upload aligned translations file (tab-separated)", type="binary"),
-        gr.Dropdown(choices=list(GLOSSARIES.keys()), label="Select Glossary"),
-        gr.Textbox(label="Anthropic API Key", placeholder="sk-...", type="password")
-    ],
-    outputs=gr.Markdown(),
-    title="Translation Validation Tool",
-    description="""Upload a file with tab-separated Tibetan source and English translation pairs.
-                The tool validates translations against the glossary using semantic analysis.
-                Scoring System:
-                - 📊 Only terms that match glossary definitions are counted in scoring
-                - ✅ Correct translations must use glossary terms (with allowed grammatical variations)
-                - ❌ Semantic equivalents or synonyms are marked as incorrect
-                - Score = (correct translations) / (total counted terms) × 100
-                You'll need an Anthropic API key to use this tool. Get one at https://console.anthropic.com/""",
-    examples=[
-        ["data/example_translations.txt", "84000 Glossary", ""]  # Example with masked API key
-    ],
-    cache_examples=False  # Don't cache examples with API keys
-)
 if __name__ == "__main__":
     demo.launch()

 import json
 import os
+import time
 from pathlib import Path
 import gradio as gr
 from glossary_checker import GlossaryChecker
+from llm_post_editor import LLMTranslationEditor
 from trans_validator import TranslationValidator
+# Configure paths
 GLOSSARIES = {
     "84000 Glossary": "data/84000_glossary.json",
 }
+def load_validate_and_edit(file_obj, selected_glossary, api_key, progress=gr.Progress()):
+    """Process translations with progress updates."""
     if not api_key or not api_key.startswith("sk-"):
         return "Please provide a valid Anthropic API key (starts with 'sk-')"
     try:
+        # Initialize progress tracking
+        progress(0, desc="Starting processing...")
         # Read content from the file
         content = file_obj.decode('utf-8')
+        progress(0.1, desc="File loaded")
         # Save content to temporary file
         temp_path = "temp_aligned.txt"
         with open(temp_path, "w", encoding='utf-8') as f:
             f.write(content)
+        # Count total lines for progress tracking
+        total_lines = len([line for line in content.split('\n') if line.strip()])
+        progress(0.15, desc=f"Found {total_lines} lines to process")
+        # Initialize components
+        progress(0.2, desc="Initializing validation...")
         glossary_path = GLOSSARIES[selected_glossary]
         checker = GlossaryChecker(glossary_path)
         validator = TranslationValidator(checker, api_key)
         # Run validation
+        progress(0.3, desc="Running validation...")
+        validation_results = validator.validate_translation(temp_path)
+        progress(0.6, desc="Validation complete")
+        # Initialize editor and get edited translations
+        progress(0.7, desc="Starting post-editing...")
+        editor = LLMTranslationEditor({"lines": validation_results}, api_key)
+        edited_translations = editor.post_edit_translations()
+        progress(0.9, desc="Post-editing complete")
         # Create result display
+        progress(0.95, desc="Generating report...")
         markdown_output = []
         # Add summary
+        total_score = sum(r['score'] for r in validation_results) / len(validation_results)
         markdown_output.append(f"# Validation Results\n")
         markdown_output.append(f"**Overall Score**: {total_score:.2f}%\n")
         markdown_output.append("*(Score based on terms counted in scoring)*\n\n")
+        markdown_output.append(f"**Total Lines**: {len(validation_results)}\n\n")
+        # Add processing statistics
+        modified_lines = sum(1 for t in edited_translations if t['modified'])
+        markdown_output.append("## Processing Statistics\n")
+        markdown_output.append(f"- Lines Modified: {modified_lines}/{len(validation_results)}\n")
+        markdown_output.append(f"- Processed at: {time.strftime('%Y-%m-%d %H:%M:%S')}\n\n")
+        # Add detailed results for each line
+        for idx, (validation, editing) in enumerate(zip(validation_results, edited_translations)):
+            markdown_output.append(f"## Line {validation['line_number']}\n")
+            markdown_output.append(f"**Score**: {validation['score']:.2f}%\n")
+            markdown_output.append(f"**Source**: {validation['source']}\n")
+            markdown_output.append(f"**Current Translation**: {validation['target']}\n")
+            # Add edited translation if available and modified
+            if editing['modified']:
+                markdown_output.append(f"\n**Post-Edited Translation**: {editing['edited']}\n")
+                markdown_output.append(f"\n**Editing Notes**: {editing['reasoning']}\n")
+            if validation['terms']:
                 # Separate terms into counted and not counted
                 counted_terms = []
                 other_terms = []
+                for term in validation['terms']:
                     if term['analysis']['translation_assessment']['should_be_counted']:
                         counted_terms.append(term)
                     else:
                         other_terms.append(term)
+                # Display counted terms in collapsible section
                 if counted_terms:
+                    markdown_output.append("\n<details>")
+                    markdown_output.append("<summary>📊 Terms Counted in Scoring</summary>\n")
                     for term in counted_terms:
                         analysis = term['analysis']
                         assessment = analysis['translation_assessment']
                         markdown_output.append(f"- Found Translation: **{analysis['translated_as']}**\n")
                         markdown_output.append(f"- Expected Translation: **{analysis['glossary_translation']}**\n")
+                        # Add categories in collapsible section
+                        markdown_output.append("\n<details>")
+                        markdown_output.append("<summary>Show Categories & Definitions</summary>\n")
                         for cat_name in analysis['matching_categories']:
                             cat_data = term['categories'].get(cat_name, {})
                             markdown_output.append(f"\n*{cat_name}*:\n")
+                            if 'translations' in cat_data:
+                                markdown_output.append(f"- Translations: {', '.join(cat_data['translations'])}\n")
                             if 'definitions' in cat_data:
+                                markdown_output.append(f"- Definitions: {', '.join(cat_data['definitions'])}\n")
+                        markdown_output.append("</details>\n")
+                    markdown_output.append("</details>\n")
+                # Display other terms in separate collapsible section
                 if other_terms:
+                    markdown_output.append("\n<details>")
+                    markdown_output.append("<summary>Terms Not Counted in Scoring</summary>\n")
                     for term in other_terms:
                         analysis = term['analysis']
                         markdown_output.append(f"\n#### `{term['source_term']}`\n")
                         markdown_output.append(f"- Found Translation: {analysis['translated_as']}\n")
                         markdown_output.append(f"- Note: Term not counted due to usage context\n")
+                        # Add categories in collapsible section
+                        markdown_output.append("\n<details>")
+                        markdown_output.append("<summary>Show Categories & Definitions</summary>\n")
+                        for cat_name in analysis['matching_categories']:
+                            cat_data = term['categories'].get(cat_name, {})
+                            markdown_output.append(f"\n*{cat_name}*:\n")
+                            if 'translations' in cat_data:
+                                markdown_output.append(f"- Translations: {', '.join(cat_data['translations'])}\n")
+                            if 'definitions' in cat_data:
+                                markdown_output.append(f"- Definitions: {', '.join(cat_data['definitions'])}\n")
+                        markdown_output.append("</details>\n")
+                    markdown_output.append("</details>\n")
                 markdown_output.append("\n---\n")
             else:
                 markdown_output.append("\n*No glossary terms found in this line*\n\n---\n")
         # Clean up temp file
         os.remove(temp_path)
+        progress(1.0, desc="Processing complete!")
         return "\n".join(markdown_output)
     except Exception as e:
         if os.path.exists(temp_path):
             os.remove(temp_path)
+        return f"Error during processing: {str(e)}\n\nPlease check your input file and API key and try again."
+# Create Gradio interface with examples
+with gr.Blocks() as demo:
+    gr.Markdown("# Translation Validation & Editing Tool")
+    with gr.Row():
+        with gr.Column():
+            file_input = gr.File(
+                label="Upload aligned translations file (tab-separated)",
+                type="binary"
+            )
+            glossary_input = gr.Dropdown(
+                choices=list(GLOSSARIES.keys()),
+                label="Select Glossary",
+                value=list(GLOSSARIES.keys())[0]
+            )
+            api_key_input = gr.Textbox(
+                label="Anthropic API Key",
+                placeholder="sk-...",
+                type="password"
+            )
+            submit_btn = gr.Button("Process Translations", variant="primary")
+            # Add examples
+            gr.Examples(
+                examples=[
+                    [str(Path("data/example_translations.txt").resolve()), "84000 Glossary", "sk-..."],
+                ],
+                inputs=[file_input, glossary_input, api_key_input],
+                label="Example Inputs"
+            )
+        with gr.Column():
+            output = gr.Markdown()
+    gr.Markdown("""### Instructions
+1. Upload a tab-separated file with Tibetan source and English translations
+2. Select the glossary to use for validation
+3. Enter your Anthropic API key
+4. Click "Process Translations" and wait for results
+The tool will:
+- Validate translations against the glossary
+- Calculate accuracy scores
+- Suggest improvements using Claude
+- Show detailed term analysis
+Key:
+- 📊 Terms used for scoring
+- ✅ Correctly translated terms
+- ❌ Terms needing improvement""")
+    submit_btn.click(
+        fn=load_validate_and_edit,
+        inputs=[file_input, glossary_input, api_key_input],
+        outputs=output
+    )
 if __name__ == "__main__":
     demo.launch()

llm_post_editor.py ADDED Viewed

	@@ -0,0 +1,211 @@

+import json
+from typing import Any, Dict, List
+from anthropic import Anthropic
+class LLMTranslationEditor:
+    def __init__(self, validation_results: dict, anthropic_api_key: str):
+        """Initialize with validation results and Anthropic API key.
+        Args:
+            validation_results (dict): Results from TranslationValidator
+            anthropic_api_key (str): Anthropic API key for Claude access
+        """
+        self.results = validation_results
+        self.client = Anthropic(api_key=anthropic_api_key)
+    def edit_translation(self, source_text: str, current_translation: str,
+                        terms_info: List[Dict[str, Any]]) -> Dict[str, Any]:
+        """Use Claude to edit the translation considering validation results and context.
+        Args:
+            source_text (str): Original Tibetan text
+            current_translation (str): Current English translation
+            terms_info (list): Terms information from validation results
+        Returns:
+            Dict[str, Any]: Edited translation with analysis
+        """
+        # Build context for terms that need attention
+        terms_context = []
+        for term in terms_info:
+            analysis = term['analysis']
+            assessment = analysis['translation_assessment']
+            if assessment['should_be_counted'] and not assessment['translated_correctly']:
+                term_context = {
+                    'term': term['source_term'],
+                    'current': analysis['translated_as'],
+                    'suggested': analysis['glossary_translation'],
+                    'categories': {}
+                }
+                # Add category information
+                for cat_name, cat_data in term['categories'].items():
+                    if cat_name in analysis['matching_categories']:
+                        term_context['categories'][cat_name] = {
+                            'translations': cat_data.get('translations', []),
+                            'definitions': cat_data.get('definitions', [])
+                        }
+                terms_context.append(term_context)
+        if not terms_context:
+            return {
+                'edited_translation': current_translation,
+                'modified': False,
+                'reasoning': 'No terms requiring editing'
+            }
+        prompt = f"""You are an expert Tibetan translator. Review and improve this translation, focusing on accuracy and natural English:
+Tibetan text: {source_text}
+Current translation: {current_translation}
+The following terms need attention:"""
+        for term in terms_context:
+            prompt += f"\n\nTibetan term: {term['term']}"
+            prompt += f"\nCurrently translated as: {term['current']}"
+            prompt += f"\nGlossary suggestion: {term['suggested']}"
+            for cat_name, cat_data in term['categories'].items():
+                prompt += f"\n{cat_name}:"
+                if cat_data['definitions']:
+                    prompt += f"\n- Definitions: {', '.join(cat_data['definitions'])}"
+                if cat_data['translations']:
+                    prompt += f"\n- Translations: {', '.join(cat_data['translations'])}"
+        prompt += """
+Please provide:
+1. An improved translation that:
+   - Maintains the meaning of the Tibetan text
+   - Maintains the style and tone of the current translation
+   - Uses appropriate technical terms from the glossary
+   - Preserves any correct parts of the current translation
+2. Your reasoning for the changes
+Respond in JSON format:
+{
+  "edited_translation": "your improved translation",
+  "reasoning": "explanation of changes and decisions",
+  "modified": true/false
+}"""
+        try:
+            message = self.client.messages.create(
+                model="claude-3-sonnet-20240229",
+                max_tokens=1000,
+                temperature=0,
+                messages=[{"role": "user", "content": prompt}]
+            )
+            # Extract JSON from response
+            import re
+            json_match = re.search(r'\{.*\}', message.content[0].text, re.DOTALL)
+            if json_match:
+                return json.loads(json_match.group())
+            else:
+                return {
+                    'edited_translation': current_translation,
+                    'modified': False,
+                    'reasoning': 'Failed to parse LLM response'
+                }
+        except Exception as e:
+            print(f"Error during LLM editing: {e}")
+            return {
+                'edited_translation': current_translation,
+                'modified': False,
+                'reasoning': f'LLM editing failed: {str(e)}'
+            }
+    def post_edit_translations(self) -> List[Dict[str, Any]]:
+        """Process all lines and post-edit translations using LLM.
+        Returns:
+            List[Dict[str, Any]]: List of edited translations with analysis
+        """
+        edited_translations = []
+        for line in self.results['lines']:
+            source = line['source']
+            target = line['target']
+            terms = line['terms']
+            if not terms:
+                edited_translations.append({
+                    'line_number': line['line_number'],
+                    'source': source,
+                    'original': target,
+                    'edited': target,
+                    'modified': False,
+                    'reasoning': 'No terms to edit'
+                })
+                continue
+            # Get LLM to edit the translation
+            edit_result = self.edit_translation(source, target, terms)
+            edited_translations.append({
+                'line_number': line['line_number'],
+                'source': source,
+                'original': target,
+                'edited': edit_result['edited_translation'],
+                'modified': edit_result['modified'],
+                'reasoning': edit_result['reasoning']
+            })
+        return edited_translations
+    def save_edits(self, edited_translations: List[Dict[str, Any]],
+                  output_path: str) -> None:
+        """Save the post-edited translations with analysis to a file.
+        Args:
+            edited_translations (List[Dict[str, Any]]): Edited translations with analysis
+            output_path (str): Path to save results
+        """
+        with open(output_path, 'w', encoding='utf-8') as f:
+            json.dump({
+                'summary': {
+                    'total_lines': len(edited_translations),
+                    'modified_lines': sum(1 for t in edited_translations if t['modified'])
+                },
+                'translations': edited_translations
+            }, f, ensure_ascii=False, indent=2)
+# Example usage:
+if __name__ == "__main__":
+    import os
+    # Load validation results
+    with open('data/validation_results.json', 'r', encoding='utf-8') as f:
+        validation_results = json.load(f)
+    # Create editor and process translations
+    editor = LLMTranslationEditor(
+        validation_results,
+        os.getenv('ANTHROPIC_API_KEY')
+    )
+    edited_translations = editor.post_edit_translations()
+    # Save results
+    editor.save_edits(edited_translations, 'llm_post_edited_translations.json')
+    # Print summary and examples
+    print(f"Post-editing completed:")
+    print(f"Total lines: {len(edited_translations)}")
+    print(f"Modified lines: {sum(1 for t in edited_translations if t['modified'])}")
+    print("\nExample modifications:")
+    for trans in edited_translations:
+        if trans['modified']:
+            print(f"\nLine {trans['line_number']}:")
+            print(f"Source  : {trans['source']}")
+            print(f"Original: {trans['original']}")
+            print(f"Edited  : {trans['edited']}")
+            print(f"Reasoning: {trans['reasoning']}")