jacktol commited on
Commit
9d6e60d
·
1 Parent(s): 1f36570

added various files

Browse files
Files changed (2) hide show
  1. Dockerfile +4 -4
  2. app.py +19 -46
Dockerfile CHANGED
@@ -16,8 +16,8 @@ RUN chmod -R 777 /app
16
  # Install any necessary dependencies
17
  RUN pip install -r requirements.txt
18
 
19
- # Make port 8080 available to the world outside this container
20
- EXPOSE 8080
21
 
22
- # Run the application
23
- CMD ["chainlit", "run", "app.py", "--host", "0.0.0.0", "--port", "7860"]
 
16
  # Install any necessary dependencies
17
  RUN pip install -r requirements.txt
18
 
19
+ # Expose the correct port that the app will run on
20
+ EXPOSE 7860
21
 
22
+ # Run the application with proper WebSocket support
23
+ CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860", "--ws", "websockets"]
app.py CHANGED
@@ -17,9 +17,8 @@ system_prompt = """Convert the provided transcript into standard pilot-ATC synta
17
  Ensure that all runway and heading numbers are formatted correctly (e.g., '11L' for 'one one left'). Use standard
18
  aviation phraseology wherever applicable. Maintain the segmentation of the transcript as provided, but exclude the timestamps.
19
  Based on the context and segmentation of each transmission, label it as either 'ATC' or 'Pilot'. At the very beginning of your
20
- response place a horizonal div with "---" and then line-break, and then add a H2 which says "Transciption, and then
21
- proceed with the transciption."""
22
-
23
 
24
  # Function to transcribe audio and return the concatenated transcript with segment info
25
  def transcribe_audio(file_path):
@@ -30,49 +29,23 @@ def transcribe_audio(file_path):
30
  for segment in segments:
31
  transcript.append(f"[{segment.start:.2f}s -> {segment.end:.2f}s] {segment.text}")
32
 
33
- print('\n'.join(transcript).strip())
34
-
35
  return '\n'.join(transcript).strip()
36
 
 
37
  @cl.on_chat_start
38
- async def start_chat():
39
- # Welcome message
40
- welcome_message = """
41
- ## Welcome to the **ATC Transcription Assistant**
42
-
43
- ---
44
-
45
- ### What is this tool for?
46
-
47
- This tool transcribes **Air Traffic Control (ATC)** audio using OpenAI’s **Whisper medium.en** model, fine-tuned for ATC communications. Developed as part of a research project, the fine-tuned **Whisper medium.en** model offers significant improvements in transcription accuracy for ATC audio.
48
-
49
- ---
50
-
51
- ### Performance
52
-
53
- - **Fine-tuned Whisper medium.en WER**: 15.08%
54
- - **Non fine-tuned Whisper medium.en WER**: 94.59%
55
- - **Relative improvement**: 84.06%
56
-
57
- While the fine-tuned model performs much better, **we cannot guarantee the accuracy of the transcriptions**. For more details on the fine-tuning process, see the [blog post](https://jacktol.net/posts/fine-tuning_whisper_on_atc_data), or check out the [project repository](https://github.com/jack-tol/fine-tuning-whisper-on-atc-data). Feel free to contact me at [[email protected]](mailto:[email protected]).
58
-
59
- ---
60
-
61
- ### How to Use
62
-
63
- 1. **Upload an ATC audio file**: Upload an audio file in **MP3** or **WAV** format containing ATC communications.
64
- 2. **View the transcription**: The tool will transcribe the audio and display the text on the screen.
65
- 3. **Transcribe another audio**: Click **New Chat** in the top-right to start a new transcription.
66
-
67
- ---
68
-
69
- To get started, upload the audio below.
70
- """
71
-
72
- await cl.Message(content=welcome_message).send()
73
-
74
- # Prompt user to upload audio file
75
- # Prompt user to upload audio file (MP3 or WAV)
76
  files = await cl.AskFileMessage(
77
  content="",
78
  accept={
@@ -83,7 +56,6 @@ To get started, upload the audio below.
83
  timeout=3600
84
  ).send()
85
 
86
-
87
  if files:
88
  audio_file = files[0]
89
 
@@ -91,9 +63,10 @@ To get started, upload the audio below.
91
  transcription = transcribe_audio(audio_file.path)
92
 
93
  # Send the entire transcription to the LLM for ATC syntax processing
94
- msg = cl.Message(content="")
95
  await msg.send()
96
 
 
97
  stream = await client.chat.completions.create(
98
  messages=[
99
  {"role": "system", "content": system_prompt},
@@ -109,4 +82,4 @@ To get started, upload the audio below.
109
  token = part.choices[0].delta.content or ""
110
  await msg.stream_token(token)
111
 
112
- await msg.update()
 
17
  Ensure that all runway and heading numbers are formatted correctly (e.g., '11L' for 'one one left'). Use standard
18
  aviation phraseology wherever applicable. Maintain the segmentation of the transcript as provided, but exclude the timestamps.
19
  Based on the context and segmentation of each transmission, label it as either 'ATC' or 'Pilot'. At the very beginning of your
20
+ response place a horizonal div with "---" and then line-break, and then add a H2 which says "Transcription", and then
21
+ proceed with the transcription."""
 
22
 
23
  # Function to transcribe audio and return the concatenated transcript with segment info
24
  def transcribe_audio(file_path):
 
29
  for segment in segments:
30
  transcript.append(f"[{segment.start:.2f}s -> {segment.end:.2f}s] {segment.text}")
31
 
 
 
32
  return '\n'.join(transcript).strip()
33
 
34
+ # Start chat session
35
  @cl.on_chat_start
36
+ def on_chat_start():
37
+ # Initialize the session data
38
+ cl.user_session.set("transcription_counter", 0)
39
+
40
+ # Handle incoming chat
41
+ @cl.on_message
42
+ async def handle_message(message: cl.Message):
43
+ # Retrieve transcription counter for the user session
44
+ counter = cl.user_session.get("transcription_counter")
45
+ counter += 1
46
+ cl.user_session.set("transcription_counter", counter)
47
+
48
+ # Get the uploaded audio file
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
49
  files = await cl.AskFileMessage(
50
  content="",
51
  accept={
 
56
  timeout=3600
57
  ).send()
58
 
 
59
  if files:
60
  audio_file = files[0]
61
 
 
63
  transcription = transcribe_audio(audio_file.path)
64
 
65
  # Send the entire transcription to the LLM for ATC syntax processing
66
+ msg = cl.Message(content="Processing your transcription...")
67
  await msg.send()
68
 
69
+ # Process the transcription via the LLM
70
  stream = await client.chat.completions.create(
71
  messages=[
72
  {"role": "system", "content": system_prompt},
 
82
  token = part.choices[0].delta.content or ""
83
  await msg.stream_token(token)
84
 
85
+ await msg.update(content="Here is the ATC transcription:")