|
import chainlit as cl |
|
from faster_whisper import WhisperModel |
|
from openai import AsyncOpenAI |
|
|
|
|
|
model_path = "jacktol/whisper-medium.en-fine-tuned-for-ATC-faster-whisper" |
|
|
|
|
|
whisper_model = WhisperModel(model_path, device="cuda", compute_type="float32") |
|
client = AsyncOpenAI() |
|
|
|
|
|
system_prompt = """Convert the provided transcript into standard pilot-ATC syntax without altering the content. |
|
Ensure that all runway and heading numbers are formatted correctly (e.g., '11L' for 'one one left'). Use standard |
|
aviation phraseology wherever applicable. Maintain the segmentation of the transcript as provided, but exclude the timestamps. |
|
Based on the context and segmentation of each transmission, label it as either 'ATC' or 'Pilot'. At the very beginning of your |
|
response place a horizonal div with "---" and then line-break, and then add a H2 which says "Transciption, and then |
|
proceed with the transciption.""" |
|
|
|
|
|
|
|
def transcribe_audio(file_path): |
|
segments, info = whisper_model.transcribe(file_path, beam_size=5) |
|
transcript = [] |
|
|
|
|
|
for segment in segments: |
|
transcript.append(f"[{segment.start:.2f}s -> {segment.end:.2f}s] {segment.text}") |
|
|
|
print('\n'.join(transcript).strip()) |
|
|
|
return '\n'.join(transcript).strip() |
|
|
|
@cl.on_chat_start |
|
async def start_chat(): |
|
|
|
welcome_message = """ |
|
## Welcome to the **ATC Transcription Assistant** |
|
|
|
--- |
|
|
|
### What is this tool for? |
|
|
|
This tool transcribes **Air Traffic Control (ATC)** audio using OpenAI’s **Whisper medium.en** model, fine-tuned for ATC communications. Developed as part of a research project, the fine-tuned **Whisper medium.en** model offers significant improvements in transcription accuracy for ATC audio. |
|
|
|
--- |
|
|
|
### Performance |
|
|
|
- **Fine-tuned Whisper medium.en WER**: 15.08% |
|
- **Non fine-tuned Whisper medium.en WER**: 94.59% |
|
- **Relative improvement**: 84.06% |
|
|
|
While the fine-tuned model performs much better, **we cannot guarantee the accuracy of the transcriptions**. For more details on the fine-tuning process, see the [blog post](https://jacktol.net/posts/fine-tuning_whisper_on_atc_data), or check out the [project repository](https://github.com/jack-tol/fine-tuning-whisper-on-atc-data). Feel free to contact me at [[email protected]](mailto:[email protected]). |
|
|
|
--- |
|
|
|
### How to Use |
|
|
|
1. **Upload an ATC audio file**: Upload an audio file in **MP3** or **WAV** format containing ATC communications. |
|
2. **View the transcription**: The tool will transcribe the audio and display the text on the screen. |
|
3. **Transcribe another audio**: Click **New Chat** in the top-right to start a new transcription. |
|
|
|
--- |
|
|
|
To get started, upload the audio below. |
|
""" |
|
|
|
await cl.Message(content=welcome_message).send() |
|
|
|
|
|
|
|
files = await cl.AskFileMessage( |
|
content="", |
|
accept={ |
|
"audio/wav": [".wav"], |
|
"audio/mpeg": [".mp3"] |
|
}, |
|
max_size_mb=50, |
|
timeout=3600 |
|
).send() |
|
|
|
|
|
if files: |
|
audio_file = files[0] |
|
|
|
|
|
transcription = transcribe_audio(audio_file.path) |
|
|
|
|
|
msg = cl.Message(content="") |
|
await msg.send() |
|
|
|
stream = await client.chat.completions.create( |
|
messages=[ |
|
{"role": "system", "content": system_prompt}, |
|
{"role": "user", "content": transcription}, |
|
], |
|
stream=True, |
|
model="gpt-4o", |
|
temperature=0, |
|
) |
|
|
|
|
|
async for part in stream: |
|
token = part.choices[0].delta.content or "" |
|
await msg.stream_token(token) |
|
|
|
await msg.update() |
|
|