test commited on
Commit
2ba7d76
·
1 Parent(s): 2290099

add llm based post editor

Browse files
Files changed (2) hide show
  1. app.py +141 -47
  2. llm_post_editor.py +211 -0
app.py CHANGED
@@ -1,69 +1,104 @@
1
  import json
2
  import os
 
3
  from pathlib import Path
4
 
5
  import gradio as gr
6
 
7
  from glossary_checker import GlossaryChecker
 
8
  from trans_validator import TranslationValidator
9
 
10
- # Configure glossary paths
11
  GLOSSARIES = {
12
  "84000 Glossary": "data/84000_glossary.json",
13
  }
14
 
15
- def load_and_validate(file_obj, selected_glossary, api_key):
 
 
16
  if not api_key or not api_key.startswith("sk-"):
17
  return "Please provide a valid Anthropic API key (starts with 'sk-')"
18
 
19
  try:
 
 
 
20
  # Read content from the file
21
  content = file_obj.decode('utf-8')
 
22
 
23
  # Save content to temporary file
24
  temp_path = "temp_aligned.txt"
25
  with open(temp_path, "w", encoding='utf-8') as f:
26
  f.write(content)
27
 
28
- # Initialize checker and validator
 
 
 
 
 
29
  glossary_path = GLOSSARIES[selected_glossary]
30
  checker = GlossaryChecker(glossary_path)
31
  validator = TranslationValidator(checker, api_key)
32
 
33
  # Run validation
34
- results = validator.validate_translation(temp_path)
 
 
 
 
 
 
 
 
35
 
36
  # Create result display
 
37
  markdown_output = []
38
 
39
  # Add summary
40
- total_score = sum(r['score'] for r in results) / len(results)
41
  markdown_output.append(f"# Validation Results\n")
42
  markdown_output.append(f"**Overall Score**: {total_score:.2f}%\n")
43
  markdown_output.append("*(Score based on terms counted in scoring)*\n\n")
44
- markdown_output.append(f"**Total Lines**: {len(results)}\n\n")
45
 
46
- # Add detailed results for each line
47
- for result in results:
48
- markdown_output.append(f"## Line {result['line_number']}\n")
49
- markdown_output.append(f"**Score**: {result['score']:.2f}%\n")
50
- markdown_output.append(f"**Source**: {result['source']}\n")
51
- markdown_output.append(f"**Target**: {result['target']}\n")
52
 
53
- if result['terms']:
 
 
 
 
 
 
 
 
 
 
 
 
54
  # Separate terms into counted and not counted
55
  counted_terms = []
56
  other_terms = []
57
 
58
- for term in result['terms']:
59
  if term['analysis']['translation_assessment']['should_be_counted']:
60
  counted_terms.append(term)
61
  else:
62
  other_terms.append(term)
63
 
64
- # Display counted terms first with clear scoring implications
65
  if counted_terms:
66
- markdown_output.append("\n### 📊 Terms Counted in Scoring\n")
 
 
67
  for term in counted_terms:
68
  analysis = term['analysis']
69
  assessment = analysis['translation_assessment']
@@ -72,61 +107,120 @@ def load_and_validate(file_obj, selected_glossary, api_key):
72
  markdown_output.append(f"- Found Translation: **{analysis['translated_as']}**\n")
73
  markdown_output.append(f"- Expected Translation: **{analysis['glossary_translation']}**\n")
74
 
75
- # Add matching categories for context
 
 
 
76
  for cat_name in analysis['matching_categories']:
77
  cat_data = term['categories'].get(cat_name, {})
78
  markdown_output.append(f"\n*{cat_name}*:\n")
 
 
79
  if 'definitions' in cat_data:
80
- markdown_output.append(f"- Definition: {', '.join(cat_data['definitions'])}\n")
 
 
 
 
81
 
82
- # Display other found terms separately
83
  if other_terms:
84
- markdown_output.append("\n### Other Found Terms (Not Counted)\n")
 
 
85
  for term in other_terms:
86
  analysis = term['analysis']
87
  markdown_output.append(f"\n#### `{term['source_term']}`\n")
88
  markdown_output.append(f"- Found Translation: {analysis['translated_as']}\n")
89
  markdown_output.append(f"- Note: Term not counted due to usage context\n")
90
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
91
  markdown_output.append("\n---\n")
92
  else:
93
  markdown_output.append("\n*No glossary terms found in this line*\n\n---\n")
94
 
95
  # Clean up temp file
96
  os.remove(temp_path)
 
97
 
98
  return "\n".join(markdown_output)
99
 
100
  except Exception as e:
101
  if os.path.exists(temp_path):
102
  os.remove(temp_path)
103
- return f"Error during validation: {str(e)}"
104
-
105
- # Create Gradio interface
106
- demo = gr.Interface(
107
- fn=load_and_validate,
108
- inputs=[
109
- gr.File(label="Upload aligned translations file (tab-separated)", type="binary"),
110
- gr.Dropdown(choices=list(GLOSSARIES.keys()), label="Select Glossary"),
111
- gr.Textbox(label="Anthropic API Key", placeholder="sk-...", type="password")
112
- ],
113
- outputs=gr.Markdown(),
114
- title="Translation Validation Tool",
115
- description="""Upload a file with tab-separated Tibetan source and English translation pairs.
116
- The tool validates translations against the glossary using semantic analysis.
117
-
118
- Scoring System:
119
- - 📊 Only terms that match glossary definitions are counted in scoring
120
- - Correct translations must use glossary terms (with allowed grammatical variations)
121
- - Semantic equivalents or synonyms are marked as incorrect
122
- - Score = (correct translations) / (total counted terms) × 100
123
-
124
- You'll need an Anthropic API key to use this tool. Get one at https://console.anthropic.com/""",
125
- examples=[
126
- ["data/example_translations.txt", "84000 Glossary", ""] # Example with masked API key
127
- ],
128
- cache_examples=False # Don't cache examples with API keys
129
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
130
 
131
  if __name__ == "__main__":
132
  demo.launch()
 
1
  import json
2
  import os
3
+ import time
4
  from pathlib import Path
5
 
6
  import gradio as gr
7
 
8
  from glossary_checker import GlossaryChecker
9
+ from llm_post_editor import LLMTranslationEditor
10
  from trans_validator import TranslationValidator
11
 
12
+ # Configure paths
13
  GLOSSARIES = {
14
  "84000 Glossary": "data/84000_glossary.json",
15
  }
16
 
17
+
18
+ def load_validate_and_edit(file_obj, selected_glossary, api_key, progress=gr.Progress()):
19
+ """Process translations with progress updates."""
20
  if not api_key or not api_key.startswith("sk-"):
21
  return "Please provide a valid Anthropic API key (starts with 'sk-')"
22
 
23
  try:
24
+ # Initialize progress tracking
25
+ progress(0, desc="Starting processing...")
26
+
27
  # Read content from the file
28
  content = file_obj.decode('utf-8')
29
+ progress(0.1, desc="File loaded")
30
 
31
  # Save content to temporary file
32
  temp_path = "temp_aligned.txt"
33
  with open(temp_path, "w", encoding='utf-8') as f:
34
  f.write(content)
35
 
36
+ # Count total lines for progress tracking
37
+ total_lines = len([line for line in content.split('\n') if line.strip()])
38
+ progress(0.15, desc=f"Found {total_lines} lines to process")
39
+
40
+ # Initialize components
41
+ progress(0.2, desc="Initializing validation...")
42
  glossary_path = GLOSSARIES[selected_glossary]
43
  checker = GlossaryChecker(glossary_path)
44
  validator = TranslationValidator(checker, api_key)
45
 
46
  # Run validation
47
+ progress(0.3, desc="Running validation...")
48
+ validation_results = validator.validate_translation(temp_path)
49
+ progress(0.6, desc="Validation complete")
50
+
51
+ # Initialize editor and get edited translations
52
+ progress(0.7, desc="Starting post-editing...")
53
+ editor = LLMTranslationEditor({"lines": validation_results}, api_key)
54
+ edited_translations = editor.post_edit_translations()
55
+ progress(0.9, desc="Post-editing complete")
56
 
57
  # Create result display
58
+ progress(0.95, desc="Generating report...")
59
  markdown_output = []
60
 
61
  # Add summary
62
+ total_score = sum(r['score'] for r in validation_results) / len(validation_results)
63
  markdown_output.append(f"# Validation Results\n")
64
  markdown_output.append(f"**Overall Score**: {total_score:.2f}%\n")
65
  markdown_output.append("*(Score based on terms counted in scoring)*\n\n")
66
+ markdown_output.append(f"**Total Lines**: {len(validation_results)}\n\n")
67
 
68
+ # Add processing statistics
69
+ modified_lines = sum(1 for t in edited_translations if t['modified'])
70
+ markdown_output.append("## Processing Statistics\n")
71
+ markdown_output.append(f"- Lines Modified: {modified_lines}/{len(validation_results)}\n")
72
+ markdown_output.append(f"- Processed at: {time.strftime('%Y-%m-%d %H:%M:%S')}\n\n")
 
73
 
74
+ # Add detailed results for each line
75
+ for idx, (validation, editing) in enumerate(zip(validation_results, edited_translations)):
76
+ markdown_output.append(f"## Line {validation['line_number']}\n")
77
+ markdown_output.append(f"**Score**: {validation['score']:.2f}%\n")
78
+ markdown_output.append(f"**Source**: {validation['source']}\n")
79
+ markdown_output.append(f"**Current Translation**: {validation['target']}\n")
80
+
81
+ # Add edited translation if available and modified
82
+ if editing['modified']:
83
+ markdown_output.append(f"\n**Post-Edited Translation**: {editing['edited']}\n")
84
+ markdown_output.append(f"\n**Editing Notes**: {editing['reasoning']}\n")
85
+
86
+ if validation['terms']:
87
  # Separate terms into counted and not counted
88
  counted_terms = []
89
  other_terms = []
90
 
91
+ for term in validation['terms']:
92
  if term['analysis']['translation_assessment']['should_be_counted']:
93
  counted_terms.append(term)
94
  else:
95
  other_terms.append(term)
96
 
97
+ # Display counted terms in collapsible section
98
  if counted_terms:
99
+ markdown_output.append("\n<details>")
100
+ markdown_output.append("<summary>📊 Terms Counted in Scoring</summary>\n")
101
+
102
  for term in counted_terms:
103
  analysis = term['analysis']
104
  assessment = analysis['translation_assessment']
 
107
  markdown_output.append(f"- Found Translation: **{analysis['translated_as']}**\n")
108
  markdown_output.append(f"- Expected Translation: **{analysis['glossary_translation']}**\n")
109
 
110
+ # Add categories in collapsible section
111
+ markdown_output.append("\n<details>")
112
+ markdown_output.append("<summary>Show Categories & Definitions</summary>\n")
113
+
114
  for cat_name in analysis['matching_categories']:
115
  cat_data = term['categories'].get(cat_name, {})
116
  markdown_output.append(f"\n*{cat_name}*:\n")
117
+ if 'translations' in cat_data:
118
+ markdown_output.append(f"- Translations: {', '.join(cat_data['translations'])}\n")
119
  if 'definitions' in cat_data:
120
+ markdown_output.append(f"- Definitions: {', '.join(cat_data['definitions'])}\n")
121
+
122
+ markdown_output.append("</details>\n")
123
+
124
+ markdown_output.append("</details>\n")
125
 
126
+ # Display other terms in separate collapsible section
127
  if other_terms:
128
+ markdown_output.append("\n<details>")
129
+ markdown_output.append("<summary>Terms Not Counted in Scoring</summary>\n")
130
+
131
  for term in other_terms:
132
  analysis = term['analysis']
133
  markdown_output.append(f"\n#### `{term['source_term']}`\n")
134
  markdown_output.append(f"- Found Translation: {analysis['translated_as']}\n")
135
  markdown_output.append(f"- Note: Term not counted due to usage context\n")
136
 
137
+ # Add categories in collapsible section
138
+ markdown_output.append("\n<details>")
139
+ markdown_output.append("<summary>Show Categories & Definitions</summary>\n")
140
+
141
+ for cat_name in analysis['matching_categories']:
142
+ cat_data = term['categories'].get(cat_name, {})
143
+ markdown_output.append(f"\n*{cat_name}*:\n")
144
+ if 'translations' in cat_data:
145
+ markdown_output.append(f"- Translations: {', '.join(cat_data['translations'])}\n")
146
+ if 'definitions' in cat_data:
147
+ markdown_output.append(f"- Definitions: {', '.join(cat_data['definitions'])}\n")
148
+
149
+ markdown_output.append("</details>\n")
150
+
151
+ markdown_output.append("</details>\n")
152
+
153
  markdown_output.append("\n---\n")
154
  else:
155
  markdown_output.append("\n*No glossary terms found in this line*\n\n---\n")
156
 
157
  # Clean up temp file
158
  os.remove(temp_path)
159
+ progress(1.0, desc="Processing complete!")
160
 
161
  return "\n".join(markdown_output)
162
 
163
  except Exception as e:
164
  if os.path.exists(temp_path):
165
  os.remove(temp_path)
166
+ return f"Error during processing: {str(e)}\n\nPlease check your input file and API key and try again."
167
+
168
+ # Create Gradio interface with examples
169
+ with gr.Blocks() as demo:
170
+ gr.Markdown("# Translation Validation & Editing Tool")
171
+
172
+ with gr.Row():
173
+ with gr.Column():
174
+ file_input = gr.File(
175
+ label="Upload aligned translations file (tab-separated)",
176
+ type="binary"
177
+ )
178
+ glossary_input = gr.Dropdown(
179
+ choices=list(GLOSSARIES.keys()),
180
+ label="Select Glossary",
181
+ value=list(GLOSSARIES.keys())[0]
182
+ )
183
+ api_key_input = gr.Textbox(
184
+ label="Anthropic API Key",
185
+ placeholder="sk-...",
186
+ type="password"
187
+ )
188
+ submit_btn = gr.Button("Process Translations", variant="primary")
189
+
190
+ # Add examples
191
+ gr.Examples(
192
+ examples=[
193
+ [str(Path("data/example_translations.txt").resolve()), "84000 Glossary", "sk-..."],
194
+ ],
195
+ inputs=[file_input, glossary_input, api_key_input],
196
+ label="Example Inputs"
197
+ )
198
+
199
+ with gr.Column():
200
+ output = gr.Markdown()
201
+
202
+ gr.Markdown("""### Instructions
203
+ 1. Upload a tab-separated file with Tibetan source and English translations
204
+ 2. Select the glossary to use for validation
205
+ 3. Enter your Anthropic API key
206
+ 4. Click "Process Translations" and wait for results
207
+
208
+ The tool will:
209
+ - Validate translations against the glossary
210
+ - Calculate accuracy scores
211
+ - Suggest improvements using Claude
212
+ - Show detailed term analysis
213
+
214
+ Key:
215
+ - 📊 Terms used for scoring
216
+ - ✅ Correctly translated terms
217
+ - ❌ Terms needing improvement""")
218
+
219
+ submit_btn.click(
220
+ fn=load_validate_and_edit,
221
+ inputs=[file_input, glossary_input, api_key_input],
222
+ outputs=output
223
+ )
224
 
225
  if __name__ == "__main__":
226
  demo.launch()
llm_post_editor.py ADDED
@@ -0,0 +1,211 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ from typing import Any, Dict, List
3
+
4
+ from anthropic import Anthropic
5
+
6
+
7
+ class LLMTranslationEditor:
8
+ def __init__(self, validation_results: dict, anthropic_api_key: str):
9
+ """Initialize with validation results and Anthropic API key.
10
+
11
+ Args:
12
+ validation_results (dict): Results from TranslationValidator
13
+ anthropic_api_key (str): Anthropic API key for Claude access
14
+ """
15
+ self.results = validation_results
16
+ self.client = Anthropic(api_key=anthropic_api_key)
17
+
18
+ def edit_translation(self, source_text: str, current_translation: str,
19
+ terms_info: List[Dict[str, Any]]) -> Dict[str, Any]:
20
+ """Use Claude to edit the translation considering validation results and context.
21
+
22
+ Args:
23
+ source_text (str): Original Tibetan text
24
+ current_translation (str): Current English translation
25
+ terms_info (list): Terms information from validation results
26
+
27
+ Returns:
28
+ Dict[str, Any]: Edited translation with analysis
29
+ """
30
+ # Build context for terms that need attention
31
+ terms_context = []
32
+ for term in terms_info:
33
+ analysis = term['analysis']
34
+ assessment = analysis['translation_assessment']
35
+
36
+ if assessment['should_be_counted'] and not assessment['translated_correctly']:
37
+ term_context = {
38
+ 'term': term['source_term'],
39
+ 'current': analysis['translated_as'],
40
+ 'suggested': analysis['glossary_translation'],
41
+ 'categories': {}
42
+ }
43
+
44
+ # Add category information
45
+ for cat_name, cat_data in term['categories'].items():
46
+ if cat_name in analysis['matching_categories']:
47
+ term_context['categories'][cat_name] = {
48
+ 'translations': cat_data.get('translations', []),
49
+ 'definitions': cat_data.get('definitions', [])
50
+ }
51
+
52
+ terms_context.append(term_context)
53
+
54
+ if not terms_context:
55
+ return {
56
+ 'edited_translation': current_translation,
57
+ 'modified': False,
58
+ 'reasoning': 'No terms requiring editing'
59
+ }
60
+
61
+ prompt = f"""You are an expert Tibetan translator. Review and improve this translation, focusing on accuracy and natural English:
62
+
63
+ Tibetan text: {source_text}
64
+ Current translation: {current_translation}
65
+
66
+ The following terms need attention:"""
67
+
68
+ for term in terms_context:
69
+ prompt += f"\n\nTibetan term: {term['term']}"
70
+ prompt += f"\nCurrently translated as: {term['current']}"
71
+ prompt += f"\nGlossary suggestion: {term['suggested']}"
72
+
73
+ for cat_name, cat_data in term['categories'].items():
74
+ prompt += f"\n{cat_name}:"
75
+ if cat_data['definitions']:
76
+ prompt += f"\n- Definitions: {', '.join(cat_data['definitions'])}"
77
+ if cat_data['translations']:
78
+ prompt += f"\n- Translations: {', '.join(cat_data['translations'])}"
79
+
80
+ prompt += """
81
+
82
+ Please provide:
83
+ 1. An improved translation that:
84
+ - Maintains the meaning of the Tibetan text
85
+ - Maintains the style and tone of the current translation
86
+ - Uses appropriate technical terms from the glossary
87
+ - Preserves any correct parts of the current translation
88
+ 2. Your reasoning for the changes
89
+
90
+ Respond in JSON format:
91
+ {
92
+ "edited_translation": "your improved translation",
93
+ "reasoning": "explanation of changes and decisions",
94
+ "modified": true/false
95
+ }"""
96
+
97
+ try:
98
+ message = self.client.messages.create(
99
+ model="claude-3-sonnet-20240229",
100
+ max_tokens=1000,
101
+ temperature=0,
102
+ messages=[{"role": "user", "content": prompt}]
103
+ )
104
+
105
+ # Extract JSON from response
106
+ import re
107
+ json_match = re.search(r'\{.*\}', message.content[0].text, re.DOTALL)
108
+ if json_match:
109
+ return json.loads(json_match.group())
110
+ else:
111
+ return {
112
+ 'edited_translation': current_translation,
113
+ 'modified': False,
114
+ 'reasoning': 'Failed to parse LLM response'
115
+ }
116
+
117
+ except Exception as e:
118
+ print(f"Error during LLM editing: {e}")
119
+ return {
120
+ 'edited_translation': current_translation,
121
+ 'modified': False,
122
+ 'reasoning': f'LLM editing failed: {str(e)}'
123
+ }
124
+
125
+ def post_edit_translations(self) -> List[Dict[str, Any]]:
126
+ """Process all lines and post-edit translations using LLM.
127
+
128
+ Returns:
129
+ List[Dict[str, Any]]: List of edited translations with analysis
130
+ """
131
+ edited_translations = []
132
+
133
+ for line in self.results['lines']:
134
+ source = line['source']
135
+ target = line['target']
136
+ terms = line['terms']
137
+
138
+ if not terms:
139
+ edited_translations.append({
140
+ 'line_number': line['line_number'],
141
+ 'source': source,
142
+ 'original': target,
143
+ 'edited': target,
144
+ 'modified': False,
145
+ 'reasoning': 'No terms to edit'
146
+ })
147
+ continue
148
+
149
+ # Get LLM to edit the translation
150
+ edit_result = self.edit_translation(source, target, terms)
151
+
152
+ edited_translations.append({
153
+ 'line_number': line['line_number'],
154
+ 'source': source,
155
+ 'original': target,
156
+ 'edited': edit_result['edited_translation'],
157
+ 'modified': edit_result['modified'],
158
+ 'reasoning': edit_result['reasoning']
159
+ })
160
+
161
+ return edited_translations
162
+
163
+ def save_edits(self, edited_translations: List[Dict[str, Any]],
164
+ output_path: str) -> None:
165
+ """Save the post-edited translations with analysis to a file.
166
+
167
+ Args:
168
+ edited_translations (List[Dict[str, Any]]): Edited translations with analysis
169
+ output_path (str): Path to save results
170
+ """
171
+ with open(output_path, 'w', encoding='utf-8') as f:
172
+ json.dump({
173
+ 'summary': {
174
+ 'total_lines': len(edited_translations),
175
+ 'modified_lines': sum(1 for t in edited_translations if t['modified'])
176
+ },
177
+ 'translations': edited_translations
178
+ }, f, ensure_ascii=False, indent=2)
179
+
180
+
181
+ # Example usage:
182
+ if __name__ == "__main__":
183
+ import os
184
+
185
+ # Load validation results
186
+ with open('data/validation_results.json', 'r', encoding='utf-8') as f:
187
+ validation_results = json.load(f)
188
+
189
+ # Create editor and process translations
190
+ editor = LLMTranslationEditor(
191
+ validation_results,
192
+ os.getenv('ANTHROPIC_API_KEY')
193
+ )
194
+ edited_translations = editor.post_edit_translations()
195
+
196
+ # Save results
197
+ editor.save_edits(edited_translations, 'llm_post_edited_translations.json')
198
+
199
+ # Print summary and examples
200
+ print(f"Post-editing completed:")
201
+ print(f"Total lines: {len(edited_translations)}")
202
+ print(f"Modified lines: {sum(1 for t in edited_translations if t['modified'])}")
203
+
204
+ print("\nExample modifications:")
205
+ for trans in edited_translations:
206
+ if trans['modified']:
207
+ print(f"\nLine {trans['line_number']}:")
208
+ print(f"Source : {trans['source']}")
209
+ print(f"Original: {trans['original']}")
210
+ print(f"Edited : {trans['edited']}")
211
+ print(f"Reasoning: {trans['reasoning']}")