sanchit-gandhi commited on
Commit
a8cda10
·
1 Parent(s): c327bbb

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +117 -0
app.py ADDED
@@ -0,0 +1,117 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+
3
+ import gradio as gr
4
+ import soundfile as sf
5
+ import torch
6
+ from gradio_client import Client
7
+ from huggingface_hub import Repository
8
+ from pandas import read_csv
9
+
10
+ from transformers import pipeline
11
+
12
+
13
+ # load the results file from the private repo
14
+ USERNAMES_DATASET_ID = "huggingface-course/audio-course-u7-hands-on"
15
+ HF_TOKEN = os.environ.get("HF_TOKEN")
16
+
17
+ usernames_url = os.path.join("https://huggingface.co/datasets", USERNAMES_DATASET_ID)
18
+
19
+ usernames_repo = Repository(local_dir="usernames", clone_from=usernames_url, use_auth_token=HF_TOKEN)
20
+ usernames_repo.git_pull()
21
+
22
+ CSV_RESULTS_FILE = os.path.join("usernames", "usernames.csv")
23
+ all_results = read_csv(CSV_RESULTS_FILE)
24
+
25
+ # load the LID checkpoint
26
+ device = "cuda:0" if torch.cuda.is_available() else "cpu"
27
+ pipe = pipeline("audio-classification", model="facebook/mms-lid-126", device=device)
28
+
29
+ # define some constants
30
+ TITLE = "🤗 Audio Transformers Course: Unit 7 Assessment"
31
+ DESCRIPTION = """
32
+ Check that you have successfully completed the hands-on exercise for Unit 7 of the 🤗 Audio Transformers Course by submitting your demo to this Space.
33
+
34
+ As a reminder, you should start with the template Space provided at [`course-demos/speech-to-speech-translation`](https://huggingface.co/spaces/course-demos/speech-to-speech-translation),
35
+ and update the Space to translate from any language X to a **non-English** language Y.
36
+
37
+ Your demo should take as input an audio file, and return as output another audio file, matching the signature of the
38
+ [`speech_to_speech_translation`](https://huggingface.co/spaces/course-demos/speech-to-speech-translation/blob/3946ba6705a6632a63de8672ac52a482ab74b3fc/app.py#L35)
39
+ function in the template demo.
40
+
41
+ To submit your demo for assessment, give the repo id or URL to your demo. For the template demo, this would be `course-demos/speech-to-speech-translation`.
42
+
43
+ This Space will submit a test file to your demo, and check that the output is non-English audio. If your demo successfully
44
+ returns an audio file, and this audio file is classified as being non-English, you will pass the demo and get a green
45
+ tick next to your name! ✅
46
+
47
+ If you experience any issues with using this checker, [open an issue](https://huggingface.co/spaces/huggingface-course/audio-course-u7-assessment/discussions/new)
48
+ on this Space and tag [`@sanchit-gandhi`](https://huggingface.co/sanchit-gandhi).
49
+ """
50
+ THRESHOLD = 0.5
51
+ PASS_MESSAGE = "Congratulations! Your demo passed the assessment!"
52
+
53
+
54
+ def verify_demo(repo_id):
55
+ if "/" not in repo_id:
56
+ raise gr.Error(f"Ensure you pass a valid repo id to the assessor, got `{repo_id}`")
57
+
58
+ split_repo_id = repo_id.split("/")
59
+ user_name = split_repo_id[-2]
60
+
61
+ if len(split_repo_id) > 2:
62
+ repo_id = "/".join(split_repo_id[-2:])
63
+
64
+ if user_name in all_results["username"]:
65
+ raise gr.Error(f"Username {user_name} has already passed the assessment!")
66
+
67
+ try:
68
+ client = Client(repo_id, hf_token=HF_TOKEN)
69
+ except Exception as e:
70
+ raise gr.Error(f"Error with loading Space: {e}")
71
+
72
+ try:
73
+ audio_file = client.predict("test.wav", api_name="/predict")
74
+ except Exception as e:
75
+ raise gr.Error(
76
+ f"Error with querying Space, ensure your Space takes an audio file as input and returns an audio as output: {e}"
77
+ )
78
+
79
+ audio, sampling_rate = sf.read(audio_file)
80
+
81
+ language_prediction = pipe({"array": audio, "sampling_rate": sampling_rate})
82
+
83
+ label_outputs = {}
84
+ for pred in language_prediction:
85
+ label_outputs[pred["label"]] = pred["score"]
86
+
87
+ top_prediction = language_prediction[0]
88
+
89
+ if top_prediction["score"] < THRESHOLD:
90
+ raise gr.Error(
91
+ f"Model made random predictions - predicted {top_prediction['label']} with probability {top_prediction['score']}"
92
+ )
93
+ elif top_prediction["label"] == "eng":
94
+ raise gr.Error(
95
+ "Model generated an English audio - ensure the model is set to generate audio in a non-English langauge, e.g. Dutch"
96
+ )
97
+
98
+ # save and upload new evaluated usernames
99
+ all_results.loc[len(all_results)] = {"username": user_name}
100
+ all_results.to_csv(CSV_RESULTS_FILE, index=False)
101
+ usernames_repo.push_to_hub()
102
+
103
+ return PASS_MESSAGE, (sampling_rate, audio), label_outputs
104
+
105
+
106
+ demo = gr.Interface(
107
+ fn=verify_demo,
108
+ inputs=gr.Textbox(placeholder="course-demos/speech-to-speech-translation", label="Repo id or URL of your demo"),
109
+ outputs=[
110
+ gr.Textbox(label="Status"),
111
+ gr.Audio(label="Generated Speech", type="numpy"),
112
+ gr.Label(label="Language prediction"),
113
+ ],
114
+ title=TITLE,
115
+ description=DESCRIPTION,
116
+ )
117
+ demo.launch()