Kr08 commited on
Commit
0427f41
·
verified ·
1 Parent(s): 7a158c9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +37 -28
app.py CHANGED
@@ -7,6 +7,8 @@ import traceback
7
  import sys
8
  from audio_processing import AudioProcessor
9
  import spaces
 
 
10
 
11
 
12
  logging.basicConfig(
@@ -19,9 +21,10 @@ logger = logging.getLogger(__name__)
19
  def load_qa_model():
20
  """Load question-answering model"""
21
  try:
 
22
  qa_pipeline = pipeline(
23
  "text-generation",
24
- model="meta-llama/Meta-Llama-3-8B-Instruct",
25
  model_kwargs={"torch_dtype": torch.bfloat16},
26
  device_map="auto",
27
  use_auth_token=os.getenv("HF_TOKEN")
@@ -48,32 +51,35 @@ def load_summarization_model():
48
  @spaces.GPU(duration=60)
49
  def process_audio(audio_file, translate=False):
50
  """Process audio file"""
51
- try:
52
- processor = AudioProcessor()
53
- language_segments, final_segments = processor.process_audio(audio_file, translate)
 
 
 
54
 
55
- # Format output
56
- transcription = ""
57
- full_text = ""
58
 
59
- # Add language detection information
60
- for segment in language_segments:
61
- transcription += f"Language: {segment['language']}\n"
62
- transcription += f"Time: {segment['start']:.2f}s - {segment['end']:.2f}s\n\n"
63
 
64
- # Add transcription/translation information
65
- transcription += "Transcription with language detection:\n\n"
66
- for segment in final_segments:
67
- transcription += f"[{segment['start']:.2f}s - {segment['end']:.2f}s] ({segment['language']}):\n"
68
- transcription += f"Original: {segment['text']}\n"
69
- if translate and 'translated' in segment:
70
- transcription += f"Translated: {segment['translated']}\n"
71
- full_text += segment['translated'] + " "
72
- else:
73
- full_text += segment['text'] + " "
74
- transcription += "\n"
75
 
76
- return transcription, full_text
77
 
78
  except Exception as e:
79
  logger.error(f"Audio processing failed: {str(e)}")
@@ -81,14 +87,14 @@ def process_audio(audio_file, translate=False):
81
 
82
 
83
  @spaces.GPU(duration=60)
84
- def summarize_text(text):
85
  """Summarize text"""
86
  try:
87
  summarizer = load_summarization_model()
88
  if summarizer is None:
89
  return "Summarization model could not be loaded."
90
 
91
- summary = summarizer(text, max_length=150, min_length=50, do_sample=False)[0]['summary_text']
92
  return summary
93
  except Exception as e:
94
  logger.error(f"Summarization failed: {str(e)}")
@@ -102,7 +108,8 @@ def answer_question(context, question):
102
  qa_pipeline = load_qa_model()
103
  if qa_pipeline is None:
104
  return "Q&A model could not be loaded."
105
-
 
106
  messages = [
107
  {"role": "system", "content": "You are a helpful assistant who can answer questions based on the given context."},
108
  {"role": "user", "content": f"Context: {context}\n\nQuestion: {question}"}
@@ -143,12 +150,14 @@ with gr.Blocks() as iface:
143
  process_button.click(
144
  process_audio,
145
  inputs=[audio_input, translate_checkbox],
146
- outputs=[transcription_output, full_text_output]
 
147
  )
148
 
149
  summarize_button.click(
150
  summarize_text,
151
- inputs=[full_text_output],
 
152
  outputs=[summary_output]
153
  )
154
 
 
7
  import sys
8
  from audio_processing import AudioProcessor
9
  import spaces
10
+ from chunkedTranscriber import ChunkedTranscriber
11
+ from IPython.display import display
12
 
13
 
14
  logging.basicConfig(
 
21
  def load_qa_model():
22
  """Load question-answering model"""
23
  try:
24
+ model_id = "meta-llama/Meta-Llama-3-8B-Instruct"
25
  qa_pipeline = pipeline(
26
  "text-generation",
27
+ model="hugging-quants/Meta-Llama-3.1-8B-Instruct-AWQ-INT4",
28
  model_kwargs={"torch_dtype": torch.bfloat16},
29
  device_map="auto",
30
  use_auth_token=os.getenv("HF_TOKEN")
 
51
  @spaces.GPU(duration=60)
52
  def process_audio(audio_file, translate=False):
53
  """Process audio file"""
54
+ transcriber = ChunkedTranscriber(chunk_size=5, overlap=1)
55
+ results = transcriber.transcribe_audio("/content/test_case_1.wav", translate=True)
56
+ return json.dumps(results, indent=4 )
57
+ # try:
58
+ # processor = AudioProcessor()
59
+ # language_segments, final_segments = processor.process_audio(audio_file, translate)
60
 
61
+ # # Format output
62
+ # transcription = ""
63
+ # full_text = ""
64
 
65
+ # # Add language detection information
66
+ # for segment in language_segments:
67
+ # transcription += f"Language: {segment['language']}\n"
68
+ # transcription += f"Time: {segment['start']:.2f}s - {segment['end']:.2f}s\n\n"
69
 
70
+ # # Add transcription/translation information
71
+ # transcription += "Transcription with language detection:\n\n"
72
+ # for segment in final_segments:
73
+ # transcription += f"[{segment['start']:.2f}s - {segment['end']:.2f}s] ({segment['language']}):\n"
74
+ # transcription += f"Original: {segment['text']}\n"
75
+ # if translate and 'translated' in segment:
76
+ # transcription += f"Translated: {segment['translated']}\n"
77
+ # full_text += segment['translated'] + " "
78
+ # else:
79
+ # full_text += segment['text'] + " "
80
+ # transcription += "\n"
81
 
82
+ # return transcription, full_text
83
 
84
  except Exception as e:
85
  logger.error(f"Audio processing failed: {str(e)}")
 
87
 
88
 
89
  @spaces.GPU(duration=60)
90
+ def summarize_text(results):
91
  """Summarize text"""
92
  try:
93
  summarizer = load_summarization_model()
94
  if summarizer is None:
95
  return "Summarization model could not be loaded."
96
 
97
+ summary = summarizer('\n'.join(d['translated'] for d in results if 'translated' in d), max_length=150, min_length=50, do_sample=False)[0]['summary_text']
98
  return summary
99
  except Exception as e:
100
  logger.error(f"Summarization failed: {str(e)}")
 
108
  qa_pipeline = load_qa_model()
109
  if qa_pipeline is None:
110
  return "Q&A model could not be loaded."
111
+ if not question :
112
+ return "Please enter your Question"
113
  messages = [
114
  {"role": "system", "content": "You are a helpful assistant who can answer questions based on the given context."},
115
  {"role": "user", "content": f"Context: {context}\n\nQuestion: {question}"}
 
150
  process_button.click(
151
  process_audio,
152
  inputs=[audio_input, translate_checkbox],
153
+ # outputs=[transcription_output, full_text_output]
154
+ outputs=[results]
155
  )
156
 
157
  summarize_button.click(
158
  summarize_text,
159
+ inputs=[results],
160
+ # inputs=[full_text_output],
161
  outputs=[summary_output]
162
  )
163