Update README.md
Browse files
README.md
CHANGED
@@ -45,7 +45,7 @@ ds = load_dataset("common_voice", "fr", split="test", cache_dir="./data/fr")
|
|
45 |
|
46 |
|
47 |
|
48 |
-
chars_to_ignore_regex = '[
|
49 |
def map_to_array(batch):
|
50 |
speech, _ = torchaudio.load(batch["path"])
|
51 |
batch["speech"] = resampler.forward(speech.squeeze(0)).numpy()
|
@@ -74,7 +74,7 @@ print(wer.compute(predictions=result["predicted"], references=result["target"]))
|
|
74 |
|
75 |
## Training
|
76 |
|
77 |
-
6% of the Common Voice `train`, `validation` datasets were used for training.
|
78 |
|
79 |
## Testing
|
80 |
|
|
|
45 |
|
46 |
|
47 |
|
48 |
+
chars_to_ignore_regex = '[\\\\,\\\\?\\\\.\\\\!\\\\-\\\\;\\\\:\\\\"\\\\“\\\\%\\\\‘\\\\”\\\\�\\\\‘\\\\’\\\\’\\\\’\\\\‘\\\\…\\\\·\\\\!\\\\ǃ\\\\?\\\\«\\\\‹\\\\»\\\\›“\\\\”\\\\\\\\ʿ\\\\ʾ\\\\„\\\\∞\\\\\\\\|\\\\.\\\\,\\\\;\\\\:\\\\*\\\\—\\\\–\\\\─\\\\―\\\\_\\\\/\\\\:\\\\ː\\\\;\\\\,\\\\=\\\\«\\\\»\\\\→]'
|
49 |
def map_to_array(batch):
|
50 |
speech, _ = torchaudio.load(batch["path"])
|
51 |
batch["speech"] = resampler.forward(speech.squeeze(0)).numpy()
|
|
|
74 |
|
75 |
## Training
|
76 |
|
77 |
+
6% of the Common Voice `train`, `validation` datasets (20K files) were used for training.
|
78 |
|
79 |
## Testing
|
80 |
|