gorkemgoknar
commited on
Commit
·
d69a18a
1
Parent(s):
cc4e759
Update README.md
Browse files
README.md
CHANGED
@@ -37,8 +37,8 @@ import torchaudio
|
|
37 |
from datasets import load_dataset
|
38 |
from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor
|
39 |
test_dataset = load_dataset("common_voice", "tr", split="test[:2%]")
|
40 |
-
processor = Wav2Vec2Processor.from_pretrained("
|
41 |
-
model = Wav2Vec2ForCTC.from_pretrained("
|
42 |
resampler = torchaudio.transforms.Resample(48_000, 16_000)
|
43 |
# Preprocessing the datasets.
|
44 |
# We need to read the aduio files as arrays
|
@@ -69,7 +69,7 @@ model = Wav2Vec2ForCTC.from_pretrained("ozcangundes/wav2vec2-large-xlsr-53-turki
|
|
69 |
model.to("cuda")
|
70 |
|
71 |
#Note: Not ignoring "'" on this one
|
72 |
-
chars_to_ignore_regex = '[
|
73 |
|
74 |
resampler = torchaudio.transforms.Resample(48_000, 16_000)
|
75 |
# Preprocessing the datasets.
|
|
|
37 |
from datasets import load_dataset
|
38 |
from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor
|
39 |
test_dataset = load_dataset("common_voice", "tr", split="test[:2%]")
|
40 |
+
processor = Wav2Vec2Processor.from_pretrained("gorkemgoknar/wav2vec2-large-xlsr-53-turkish")
|
41 |
+
model = Wav2Vec2ForCTC.from_pretrained("gorkemgoknar/wav2vec2-large-xlsr-53-turkish")
|
42 |
resampler = torchaudio.transforms.Resample(48_000, 16_000)
|
43 |
# Preprocessing the datasets.
|
44 |
# We need to read the aduio files as arrays
|
|
|
69 |
model.to("cuda")
|
70 |
|
71 |
#Note: Not ignoring "'" on this one
|
72 |
+
chars_to_ignore_regex = '[\\\\,\\\\?\\\\.\\\\!\\\\-\\\\;\\\\:\\\\"\\\\“\\\\%\\\\‘\\\\”\\\\�]'
|
73 |
|
74 |
resampler = torchaudio.transforms.Resample(48_000, 16_000)
|
75 |
# Preprocessing the datasets.
|