Spaces:

jacktol
/

ATC-Transcription-Assistant

Paused

App Files Files Community

jacktol commited on Oct 8, 2024

Commit

9d6e60d

1 Parent(s): 1f36570

added various files

Browse files

Files changed (2) hide show

Dockerfile +4 -4
app.py +19 -46

Dockerfile CHANGED Viewed

@@ -16,8 +16,8 @@ RUN chmod -R 777 /app
 # Install any necessary dependencies
 RUN pip install -r requirements.txt
-# Make port 8080 available to the world outside this container
-EXPOSE 8080
-# Run the application
-CMD ["chainlit", "run", "app.py", "--host", "0.0.0.0", "--port", "7860"]

 # Install any necessary dependencies
 RUN pip install -r requirements.txt
+# Expose the correct port that the app will run on
+EXPOSE 7860
+# Run the application with proper WebSocket support
+CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860", "--ws", "websockets"]

app.py CHANGED Viewed

@@ -17,9 +17,8 @@ system_prompt = """Convert the provided transcript into standard pilot-ATC synta
 Ensure that all runway and heading numbers are formatted correctly (e.g., '11L' for 'one one left'). Use standard
 aviation phraseology wherever applicable. Maintain the segmentation of the transcript as provided, but exclude the timestamps.
 Based on the context and segmentation of each transmission, label it as either 'ATC' or 'Pilot'. At the very beginning of your
-response place a horizonal div with "---" and then line-break, and then add a H2 which says "Transciption, and then
-proceed with the transciption."""
 # Function to transcribe audio and return the concatenated transcript with segment info
 def transcribe_audio(file_path):
@@ -30,49 +29,23 @@ def transcribe_audio(file_path):
     for segment in segments:
         transcript.append(f"[{segment.start:.2f}s -> {segment.end:.2f}s] {segment.text}")
-    print('\n'.join(transcript).strip())
     return '\n'.join(transcript).strip()
 @cl.on_chat_start
-async def start_chat():
-    # Welcome message
-    welcome_message = """
-## Welcome to the **ATC Transcription Assistant**
----
-### What is this tool for?
-This tool transcribes **Air Traffic Control (ATC)** audio using OpenAI’s **Whisper medium.en** model, fine-tuned for ATC communications. Developed as part of a research project, the fine-tuned **Whisper medium.en** model offers significant improvements in transcription accuracy for ATC audio.
----
-### Performance
-- **Fine-tuned Whisper medium.en WER**: 15.08%
-- **Non fine-tuned Whisper medium.en WER**: 94.59%
-- **Relative improvement**: 84.06%
-While the fine-tuned model performs much better, **we cannot guarantee the accuracy of the transcriptions**. For more details on the fine-tuning process, see the [blog post](https://jacktol.net/posts/fine-tuning_whisper_on_atc_data), or check out the [project repository](https://github.com/jack-tol/fine-tuning-whisper-on-atc-data). Feel free to contact me at [[email protected]](mailto:[email protected]).
----
-### How to Use
-1. **Upload an ATC audio file**: Upload an audio file in **MP3** or **WAV** format containing ATC communications.
-2. **View the transcription**: The tool will transcribe the audio and display the text on the screen.
-3. **Transcribe another audio**: Click **New Chat** in the top-right to start a new transcription.
----
-To get started, upload the audio below.
-"""
-    await cl.Message(content=welcome_message).send()
-    # Prompt user to upload audio file
-# Prompt user to upload audio file (MP3 or WAV)
     files = await cl.AskFileMessage(
         content="",
         accept={
@@ -83,7 +56,6 @@ To get started, upload the audio below.
         timeout=3600
     ).send()
     if files:
         audio_file = files[0]
@@ -91,9 +63,10 @@ To get started, upload the audio below.
         transcription = transcribe_audio(audio_file.path)
         # Send the entire transcription to the LLM for ATC syntax processing
-        msg = cl.Message(content="")
         await msg.send()
         stream = await client.chat.completions.create(
             messages=[
                 {"role": "system", "content": system_prompt},
@@ -109,4 +82,4 @@ To get started, upload the audio below.
             token = part.choices[0].delta.content or ""
             await msg.stream_token(token)
-        await msg.update()

 Ensure that all runway and heading numbers are formatted correctly (e.g., '11L' for 'one one left'). Use standard
 aviation phraseology wherever applicable. Maintain the segmentation of the transcript as provided, but exclude the timestamps.
 Based on the context and segmentation of each transmission, label it as either 'ATC' or 'Pilot'. At the very beginning of your
+response place a horizonal div with "---" and then line-break, and then add a H2 which says "Transcription", and then
+proceed with the transcription."""
 # Function to transcribe audio and return the concatenated transcript with segment info
 def transcribe_audio(file_path):
     for segment in segments:
         transcript.append(f"[{segment.start:.2f}s -> {segment.end:.2f}s] {segment.text}")
     return '\n'.join(transcript).strip()
+# Start chat session
 @cl.on_chat_start
+def on_chat_start():
+    # Initialize the session data
+    cl.user_session.set("transcription_counter", 0)
+# Handle incoming chat
+@cl.on_message
+async def handle_message(message: cl.Message):
+    # Retrieve transcription counter for the user session
+    counter = cl.user_session.get("transcription_counter")
+    counter += 1
+    cl.user_session.set("transcription_counter", counter)
+    # Get the uploaded audio file
     files = await cl.AskFileMessage(
         content="",
         accept={
         timeout=3600
     ).send()
     if files:
         audio_file = files[0]
         transcription = transcribe_audio(audio_file.path)
         # Send the entire transcription to the LLM for ATC syntax processing
+        msg = cl.Message(content="Processing your transcription...")
         await msg.send()
+        # Process the transcription via the LLM
         stream = await client.chat.completions.create(
             messages=[
                 {"role": "system", "content": system_prompt},
             token = part.choices[0].delta.content or ""
             await msg.stream_token(token)
+        await msg.update(content="Here is the ATC transcription:")