Yingxu He
commited on
Update README.md
Browse files
README.md
CHANGED
@@ -41,7 +41,46 @@ This is the model card of a 🤗 transformers model that has been pushed on the
|
|
41 |
|
42 |
<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
|
43 |
|
44 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
45 |
|
46 |
### Downstream Use [optional]
|
47 |
|
|
|
41 |
|
42 |
<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
|
43 |
|
44 |
+
```python
|
45 |
+
|
46 |
+
from datasets import load_dataset
|
47 |
+
from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor
|
48 |
+
|
49 |
+
repo_id = "MERaLiON/AudioLLM"
|
50 |
+
|
51 |
+
processor = AutoProcessor.from_pretrained(
|
52 |
+
repo_id,
|
53 |
+
trust_remote_code=True,
|
54 |
+
)
|
55 |
+
model = AutoModelForSpeechSeq2Seq.from_pretrained(
|
56 |
+
repo_id,
|
57 |
+
use_safetensors=True,
|
58 |
+
trust_remote_code=True,
|
59 |
+
)
|
60 |
+
|
61 |
+
prompt = "Can you please turn this audio into text format?"
|
62 |
+
conversation = [
|
63 |
+
{
|
64 |
+
"role": "user",
|
65 |
+
"content": f"Given the following audio context: <SpeechHere>\n\nText instruction: {prompt}"
|
66 |
+
}
|
67 |
+
]
|
68 |
+
chat_prompt = processor.tokenizer.apply_chat_template(
|
69 |
+
conversation=conversation,
|
70 |
+
tokenize=False,
|
71 |
+
add_generation_prompt=True
|
72 |
+
)
|
73 |
+
|
74 |
+
libri_data = load_dataset("distil-whisper/librispeech_long", "clean", split="validation")
|
75 |
+
audio_array = libri_data[0]["audio"]["array"]
|
76 |
+
|
77 |
+
inputs = processor(text=chat_prompt, audios=audio_array, time_duration_limit=20)
|
78 |
+
|
79 |
+
outputs = model.generate(**inputs, max_new_tokens=128)
|
80 |
+
|
81 |
+
print(processor.decode(outputs[0, inputs['input_ids'].size(1):], skip_special_tokens=True))
|
82 |
+
|
83 |
+
```
|
84 |
|
85 |
### Downstream Use [optional]
|
86 |
|