Spaces:

Shashwat2528
/

avishkaaram

Sleeping

App Files Files Community

Shashwat2528 commited on Jun 25, 2023

Commit

785f4dd

1 Parent(s): 473ac33

Upload app.py

Browse files

Files changed (1) hide show

app.py +214 -0

app.py ADDED Viewed

	@@ -0,0 +1,214 @@

+# import sounddevice as sd
+# import soundfile as sf
+# import speech_recognition as sr
+# from gtts import gTTS
+# import pygame
+# import time
+# import gradio as gr
+# from transformers import AutoTokenizer, AutoModelForQuestionAnswering
+# model = AutoModelForQuestionAnswering.from_pretrained('AVISHKAARAM/avishkaarak-ekta-hindi')
+# tokenizer = AutoTokenizer.from_pretrained('AVISHKAARAM/avishkaarak-ekta-hindi')
+# class AvishkaaramEkta:
+#     def __init__(self, model):
+#         self.model = model
+#         self.tokenizer = tokenizer
+#     def text_to_speech(self, text, output_file):
+#         # Create a gTTS object with the text and desired language
+#         tts = gTTS(text=text, lang='en')
+#         # Save the audio to a file
+#         tts.save(output_file)
+#     def play_mp3(self, file_path):
+#         pygame.mixer.init()
+#         pygame.mixer.music.load(file_path)
+#         pygame.mixer.music.play()
+#         while pygame.mixer.music.get_busy():
+#             continue
+#     def ask_question(self, audio_file):
+#         print("Recording audio...")
+#         audio = sd.rec(int(44100 * 6), samplerate=44100, channels=1)
+#         sd.wait()
+#         # Save the audio to a file
+#         sf.write(audio_file, audio, 44100)
+#         print(f"Audio saved to {audio_file}")
+#         r = sr.Recognizer()
+#         with sr.AudioFile(audio_file) as source:
+#             audio_data = r.record(source)
+#         text = ""
+#         try:
+#             text = r.recognize_google(audio_data)
+#             print("Transcription:", text)
+#         except sr.UnknownValueError:
+#             print("Speech recognition could not understand audio")
+#         except sr.RequestError as e:
+#             print("Could not request results from Google Speech Recognition service; {0}".format(e))
+#         return text
+#     def answer_question(self, passage, question):
+#         inputs = self.tokenizer(passage, question, return_tensors="pt")
+#         outputs = self.model(**inputs)
+#         start_logits = outputs.start_logits
+#         end_logits = outputs.end_logits
+#         start_index = start_logits.argmax(dim=1).item()
+#         end_index = end_logits.argmax(dim=1).item()
+#         tokens = self.tokenizer.convert_ids_to_tokens(inputs["input_ids"][0])
+#         answer = self.tokenizer.convert_tokens_to_string(tokens[start_index:end_index+1])
+#         return answer
+#     def question_answer(self, passage, question):
+#         passage_audio_file = "passage.mp3"
+#         question_audio_file = "question.wav"
+#         answer_audio_file = "answer.mp3"
+#         self.text_to_speech(passage, passage_audio_file)
+#         self.play_mp3(passage_audio_file)
+#         question_text = self.ask_question(question_audio_file)
+#         answer = self.answer_question(passage, question_text)
+#         self.text_to_speech("The answer to the question is: " + answer, answer_audio_file)
+#         self.play_mp3(answer_audio_file)
+#         time.sleep(5)  # Wait for 5 seconds before ending
+#         return answer
+# # Create an instance of the AvishkaaramEkta class
+# avishkaaram_ekta = AvishkaaramEkta(model)
+# # Define the Gradio interface
+# iface = gr.Interface(
+#     fn=avishkaaram_ekta.question_answer,
+#     inputs=["text", "text"],
+#     outputs="text",
+#     title="Audio Question Answering",
+#     description="Ask a question about a given passage using audio input",
+#     examples=[
+#         ["In 1960, Dr. Jane Goodall arrived in Gombe, Tanzania to study chimpanzees.", "What did Dr. Jane Goodall study?"],
+#         ["The Taj Mahal is located in Agra, India.", "Where is the Taj Mahal situated?"],
+#     ],
+#     interpretation="default",
+# )
+# # Launch the Gradio interface
+# iface.launch()
+import torch
+import torchaudio
+import soundfile as sf
+import speech_recognition as sr
+from gtts import gTTS
+import pygame
+import time
+import gradio as gr
+from transformers import AutoTokenizer, AutoModelForQuestionAnswering
+model = AutoModelForQuestionAnswering.from_pretrained('AVISHKAARAM/avishkaarak-ekta-hindi')
+tokenizer = AutoTokenizer.from_pretrained('AVISHKAARAM/avishkaarak-ekta-hindi')
+class AvishkaaramEkta:
+    def __init__(self, model):
+        self.model = model
+        self.tokenizer = tokenizer
+    def text_to_speech(self, text, output_file):
+        # Create a gTTS object with the text and desired language
+        tts = gTTS(text=text, lang='en')
+        # Save the audio to a file
+        tts.save(output_file)
+    def play_mp3(self, file_path):
+        pygame.mixer.init()
+        pygame.mixer.music.load(file_path)
+        pygame.mixer.music.play()
+        while pygame.mixer.music.get_busy():
+            continue
+    def ask_question(self, audio_file):
+        print("Recording audio...")
+        waveform, sample_rate = torchaudio.rec(6, sr=44100, channels=1)
+        # Save the audio to a file
+        sf.write(audio_file, waveform.squeeze().numpy(), sample_rate)
+        print(f"Audio saved to {audio_file}")
+        r = sr.Recognizer()
+        with sr.AudioFile(audio_file) as source:
+            audio_data = r.record(source)
+        text = ""
+        try:
+            text = r.recognize_google(audio_data)
+            print("Transcription:", text)
+        except sr.UnknownValueError:
+            print("Speech recognition could not understand audio")
+        except sr.RequestError as e:
+            print("Could not request results from Google Speech Recognition service; {0}".format(e))
+        return text
+    def answer_question(self, passage, question):
+        inputs = self.tokenizer(passage, question, return_tensors="pt")
+        outputs = self.model(**inputs)
+        start_logits = outputs.start_logits
+        end_logits = outputs.end_logits
+        start_index = start_logits.argmax(dim=1).item()
+        end_index = end_logits.argmax(dim=1).item()
+        tokens = self.tokenizer.convert_ids_to_tokens(inputs["input_ids"][0])
+        answer = self.tokenizer.convert_tokens_to_string(tokens[start_index:end_index+1])
+        return answer
+    def question_answer(self, passage, question):
+        passage_audio_file = "passage.mp3"
+        question_audio_file = "question.wav"
+        answer_audio_file = "answer.mp3"
+        self.text_to_speech(passage, passage_audio_file)
+        self.play_mp3(passage_audio_file)
+        question_text = self.ask_question(question_audio_file)
+        answer = self.answer_question(passage, question_text)
+        self.text_to_speech("The answer to the question is: " + answer, answer_audio_file)
+        self.play_mp3(answer_audio_file)
+        time.sleep(5)  # Wait for 5 seconds before ending
+        return answer
+# Create an instance of the AvishkaaramEkta class
+avishkaaram_ekta = AvishkaaramEkta(model)
+# Define the Gradio interface
+iface = gr.Interface(
+    fn=avishkaaram_ekta.question_answer,
+    inputs=["text", "text"],
+    outputs="text",
+    title="Audio Question Answering",
+    description="Ask a question about a given passage using audio input",
+    examples=[
+        ["In 1960, Dr. Jane Goodall arrived in Gombe, Tanzania to study chimpanzees.", "What did Dr. Jane Goodall study?"],
+        ["The Taj Mahal is located in Agra, India.", "Where is the Taj Mahal situated?"],
+    ],
+    interpretation="default",
+)
+# Launch the Gradio interface
+iface.launch()