gigant commited on
Commit
840f0db
·
1 Parent(s): 8859789

update model card README.md

Browse files
.ipynb_checkpoints/Untitled-checkpoint.ipynb DELETED
@@ -1,6 +0,0 @@
1
- {
2
- "cells": [],
3
- "metadata": {},
4
- "nbformat": 4,
5
- "nbformat_minor": 5
6
- }
 
 
 
 
 
 
 
.ipynb_checkpoints/fine-tune-whisper-streaming-checkpoint.ipynb CHANGED
@@ -722,10 +722,18 @@
722
  },
723
  {
724
  "cell_type": "code",
725
- "execution_count": 22,
726
  "id": "0ae3e9af-97b7-4aa0-ae85-20b23b5bcb3a",
727
  "metadata": {},
728
- "outputs": [],
 
 
 
 
 
 
 
 
729
  "source": [
730
  "from transformers import Seq2SeqTrainingArguments\n",
731
  "\n",
@@ -773,7 +781,7 @@
773
  },
774
  {
775
  "cell_type": "code",
776
- "execution_count": 23,
777
  "id": "3ac16b62-b3c0-4c68-8f3d-9ecf471534b2",
778
  "metadata": {},
779
  "outputs": [],
@@ -802,7 +810,7 @@
802
  },
803
  {
804
  "cell_type": "code",
805
- "execution_count": 27,
806
  "id": "d546d7fe-0543-479a-b708-2ebabec19493",
807
  "metadata": {},
808
  "outputs": [
@@ -841,7 +849,7 @@
841
  },
842
  {
843
  "cell_type": "code",
844
- "execution_count": 28,
845
  "id": "a1ccb9ed-cbc8-4419-91c0-651e9424b672",
846
  "metadata": {},
847
  "outputs": [
@@ -887,34 +895,181 @@
887
  },
888
  {
889
  "cell_type": "code",
890
- "execution_count": 30,
891
  "id": "ee8b7b8e-1c9a-4d77-9137-1778a629e6de",
892
  "metadata": {},
893
  "outputs": [
894
  {
895
- "ename": "AttributeError",
896
- "evalue": "/home/ubuntu/whisper-ft/bin/python: undefined symbol: cudaRuntimeGetVersion",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
897
  "output_type": "error",
898
  "traceback": [
899
  "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
900
- "\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)",
901
- "Cell \u001b[0;32mIn[30], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mtrainer\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtrain\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n",
902
  "File \u001b[0;32m~/whisper-ft/lib/python3.8/site-packages/transformers/trainer.py:1536\u001b[0m, in \u001b[0;36mTrainer.train\u001b[0;34m(self, resume_from_checkpoint, trial, ignore_keys_for_eval, **kwargs)\u001b[0m\n\u001b[1;32m 1531\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mmodel_wrapped \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mmodel\n\u001b[1;32m 1533\u001b[0m inner_training_loop \u001b[38;5;241m=\u001b[39m find_executable_batch_size(\n\u001b[1;32m 1534\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_inner_training_loop, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_train_batch_size, args\u001b[38;5;241m.\u001b[39mauto_find_batch_size\n\u001b[1;32m 1535\u001b[0m )\n\u001b[0;32m-> 1536\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43minner_training_loop\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 1537\u001b[0m \u001b[43m \u001b[49m\u001b[43margs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1538\u001b[0m \u001b[43m \u001b[49m\u001b[43mresume_from_checkpoint\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mresume_from_checkpoint\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1539\u001b[0m \u001b[43m \u001b[49m\u001b[43mtrial\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtrial\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1540\u001b[0m \u001b[43m \u001b[49m\u001b[43mignore_keys_for_eval\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mignore_keys_for_eval\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1541\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n",
903
- "File \u001b[0;32m~/whisper-ft/lib/python3.8/site-packages/transformers/trainer.py:1614\u001b[0m, in \u001b[0;36mTrainer._inner_training_loop\u001b[0;34m(self, batch_size, args, resume_from_checkpoint, trial, ignore_keys_for_eval)\u001b[0m\n\u001b[1;32m 1612\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mlr_scheduler \u001b[38;5;241m=\u001b[39m lr_scheduler\n\u001b[1;32m 1613\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m delay_optimizer_creation:\n\u001b[0;32m-> 1614\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcreate_optimizer_and_scheduler\u001b[49m\u001b[43m(\u001b[49m\u001b[43mnum_training_steps\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmax_steps\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1616\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mstate \u001b[38;5;241m=\u001b[39m TrainerState()\n\u001b[1;32m 1617\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mstate\u001b[38;5;241m.\u001b[39mis_hyper_param_search \u001b[38;5;241m=\u001b[39m trial \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n",
904
- "File \u001b[0;32m~/whisper-ft/lib/python3.8/site-packages/transformers/trainer.py:1001\u001b[0m, in \u001b[0;36mTrainer.create_optimizer_and_scheduler\u001b[0;34m(self, num_training_steps)\u001b[0m\n\u001b[1;32m 993\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mcreate_optimizer_and_scheduler\u001b[39m(\u001b[38;5;28mself\u001b[39m, num_training_steps: \u001b[38;5;28mint\u001b[39m):\n\u001b[1;32m 994\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 995\u001b[0m \u001b[38;5;124;03m Setup the optimizer and the learning rate scheduler.\u001b[39;00m\n\u001b[1;32m 996\u001b[0m \n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 999\u001b[0m \u001b[38;5;124;03m `create_scheduler`) in a subclass.\u001b[39;00m\n\u001b[1;32m 1000\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[0;32m-> 1001\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcreate_optimizer\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1002\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m IS_SAGEMAKER_MP_POST_1_10 \u001b[38;5;129;01mand\u001b[39;00m smp\u001b[38;5;241m.\u001b[39mstate\u001b[38;5;241m.\u001b[39mcfg\u001b[38;5;241m.\u001b[39mfp16:\n\u001b[1;32m 1003\u001b[0m \u001b[38;5;66;03m# If smp >= 1.10 and fp16 is enabled, we unwrap the optimizer\u001b[39;00m\n\u001b[1;32m 1004\u001b[0m optimizer \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39moptimizer\u001b[38;5;241m.\u001b[39moptimizer\n",
905
- "File \u001b[0;32m~/whisper-ft/lib/python3.8/site-packages/transformers/trainer.py:1032\u001b[0m, in \u001b[0;36mTrainer.create_optimizer\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 1020\u001b[0m decay_parameters \u001b[38;5;241m=\u001b[39m [name \u001b[38;5;28;01mfor\u001b[39;00m name \u001b[38;5;129;01min\u001b[39;00m decay_parameters \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mbias\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m name]\n\u001b[1;32m 1021\u001b[0m optimizer_grouped_parameters \u001b[38;5;241m=\u001b[39m [\n\u001b[1;32m 1022\u001b[0m {\n\u001b[1;32m 1023\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mparams\u001b[39m\u001b[38;5;124m\"\u001b[39m: [p \u001b[38;5;28;01mfor\u001b[39;00m n, p \u001b[38;5;129;01min\u001b[39;00m opt_model\u001b[38;5;241m.\u001b[39mnamed_parameters() \u001b[38;5;28;01mif\u001b[39;00m n \u001b[38;5;129;01min\u001b[39;00m decay_parameters],\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 1029\u001b[0m },\n\u001b[1;32m 1030\u001b[0m ]\n\u001b[0;32m-> 1032\u001b[0m optimizer_cls, optimizer_kwargs \u001b[38;5;241m=\u001b[39m \u001b[43mTrainer\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget_optimizer_cls_and_kwargs\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1034\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39msharded_ddp \u001b[38;5;241m==\u001b[39m ShardedDDPOption\u001b[38;5;241m.\u001b[39mSIMPLE:\n\u001b[1;32m 1035\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39moptimizer \u001b[38;5;241m=\u001b[39m OSS(\n\u001b[1;32m 1036\u001b[0m params\u001b[38;5;241m=\u001b[39moptimizer_grouped_parameters,\n\u001b[1;32m 1037\u001b[0m optim\u001b[38;5;241m=\u001b[39moptimizer_cls,\n\u001b[1;32m 1038\u001b[0m \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39moptimizer_kwargs,\n\u001b[1;32m 1039\u001b[0m )\n",
906
- "File \u001b[0;32m~/whisper-ft/lib/python3.8/site-packages/transformers/trainer.py:1112\u001b[0m, in \u001b[0;36mTrainer.get_optimizer_cls_and_kwargs\u001b[0;34m(args)\u001b[0m\n\u001b[1;32m 1110\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m args\u001b[38;5;241m.\u001b[39moptim \u001b[38;5;241m==\u001b[39m OptimizerNames\u001b[38;5;241m.\u001b[39mADAMW_BNB:\n\u001b[1;32m 1111\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m-> 1112\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mbitsandbytes\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01moptim\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m Adam8bit\n\u001b[1;32m 1114\u001b[0m optimizer_cls \u001b[38;5;241m=\u001b[39m Adam8bit\n\u001b[1;32m 1115\u001b[0m optimizer_kwargs\u001b[38;5;241m.\u001b[39mupdate(adam_kwargs)\n",
907
- "File \u001b[0;32m~/whisper-ft/lib/python3.8/site-packages/bitsandbytes/__init__.py:6\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;66;03m# Copyright (c) Facebook, Inc. and its affiliates.\u001b[39;00m\n\u001b[1;32m 2\u001b[0m \u001b[38;5;66;03m#\u001b[39;00m\n\u001b[1;32m 3\u001b[0m \u001b[38;5;66;03m# This source code is licensed under the MIT license found in the\u001b[39;00m\n\u001b[1;32m 4\u001b[0m \u001b[38;5;66;03m# LICENSE file in the root directory of this source tree.\u001b[39;00m\n\u001b[0;32m----> 6\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mautograd\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01m_functions\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m (\n\u001b[1;32m 7\u001b[0m MatmulLtState,\n\u001b[1;32m 8\u001b[0m bmm_cublas,\n\u001b[1;32m 9\u001b[0m matmul,\n\u001b[1;32m 10\u001b[0m matmul_cublas,\n\u001b[1;32m 11\u001b[0m mm_cublas,\n\u001b[1;32m 12\u001b[0m )\n\u001b[1;32m 13\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mcextension\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m COMPILED_WITH_CUDA\n\u001b[1;32m 14\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mnn\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m modules\n",
908
- "File \u001b[0;32m~/whisper-ft/lib/python3.8/site-packages/bitsandbytes/autograd/_functions.py:5\u001b[0m\n\u001b[1;32m 2\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mwarnings\u001b[39;00m\n\u001b[1;32m 4\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mtorch\u001b[39;00m\n\u001b[0;32m----> 5\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mbitsandbytes\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mfunctional\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m \u001b[38;5;21;01mF\u001b[39;00m\n\u001b[1;32m 7\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mdataclasses\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m dataclass\n\u001b[1;32m 8\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mfunctools\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m reduce \u001b[38;5;66;03m# Required in Python 3\u001b[39;00m\n",
909
- "File \u001b[0;32m~/whisper-ft/lib/python3.8/site-packages/bitsandbytes/functional.py:13\u001b[0m\n\u001b[1;32m 10\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mtyping\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m Tuple\n\u001b[1;32m 11\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mtorch\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m Tensor\n\u001b[0;32m---> 13\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mcextension\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m COMPILED_WITH_CUDA, lib\n\u001b[1;32m 14\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mfunctools\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m reduce \u001b[38;5;66;03m# Required in Python 3\u001b[39;00m\n\u001b[1;32m 16\u001b[0m \u001b[38;5;66;03m# math.prod not compatible with python < 3.8\u001b[39;00m\n",
910
- "File \u001b[0;32m~/whisper-ft/lib/python3.8/site-packages/bitsandbytes/cextension.py:113\u001b[0m\n\u001b[1;32m 109\u001b[0m \u001b[38;5;28mcls\u001b[39m\u001b[38;5;241m.\u001b[39m_instance\u001b[38;5;241m.\u001b[39minitialize()\n\u001b[1;32m 110\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mcls\u001b[39m\u001b[38;5;241m.\u001b[39m_instance\n\u001b[0;32m--> 113\u001b[0m lib \u001b[38;5;241m=\u001b[39m \u001b[43mCUDASetup\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget_instance\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241m.\u001b[39mlib\n\u001b[1;32m 114\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m 115\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m lib \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;129;01mand\u001b[39;00m torch\u001b[38;5;241m.\u001b[39mcuda\u001b[38;5;241m.\u001b[39mis_available():\n",
911
- "File \u001b[0;32m~/whisper-ft/lib/python3.8/site-packages/bitsandbytes/cextension.py:109\u001b[0m, in \u001b[0;36mCUDASetup.get_instance\u001b[0;34m(cls)\u001b[0m\n\u001b[1;32m 107\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mcls\u001b[39m\u001b[38;5;241m.\u001b[39m_instance \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 108\u001b[0m \u001b[38;5;28mcls\u001b[39m\u001b[38;5;241m.\u001b[39m_instance \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mcls\u001b[39m\u001b[38;5;241m.\u001b[39m\u001b[38;5;21m__new__\u001b[39m(\u001b[38;5;28mcls\u001b[39m)\n\u001b[0;32m--> 109\u001b[0m \u001b[38;5;28;43mcls\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_instance\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43minitialize\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 110\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mcls\u001b[39m\u001b[38;5;241m.\u001b[39m_instance\n",
912
- "File \u001b[0;32m~/whisper-ft/lib/python3.8/site-packages/bitsandbytes/cextension.py:59\u001b[0m, in \u001b[0;36mCUDASetup.initialize\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 56\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mlib \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m 58\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mcuda_setup\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mmain\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m evaluate_cuda_setup\n\u001b[0;32m---> 59\u001b[0m binary_name, cudart_path, cuda, cc, cuda_version_string \u001b[38;5;241m=\u001b[39m \u001b[43mevaluate_cuda_setup\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 60\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcudart_path \u001b[38;5;241m=\u001b[39m cudart_path\n\u001b[1;32m 61\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcuda \u001b[38;5;241m=\u001b[39m cuda\n",
913
- "File \u001b[0;32m~/whisper-ft/lib/python3.8/site-packages/bitsandbytes/cuda_setup/main.py:125\u001b[0m, in \u001b[0;36mevaluate_cuda_setup\u001b[0;34m()\u001b[0m\n\u001b[1;32m 123\u001b[0m cuda \u001b[38;5;241m=\u001b[39m get_cuda_lib_handle()\n\u001b[1;32m 124\u001b[0m cc \u001b[38;5;241m=\u001b[39m get_compute_capability(cuda)\n\u001b[0;32m--> 125\u001b[0m cuda_version_string \u001b[38;5;241m=\u001b[39m \u001b[43mget_cuda_version\u001b[49m\u001b[43m(\u001b[49m\u001b[43mcuda\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcudart_path\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 127\u001b[0m failure \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mFalse\u001b[39;00m\n\u001b[1;32m 128\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m cudart_path \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n",
914
- "File \u001b[0;32m~/whisper-ft/lib/python3.8/site-packages/bitsandbytes/cuda_setup/main.py:45\u001b[0m, in \u001b[0;36mget_cuda_version\u001b[0;34m(cuda, cudart_path)\u001b[0m\n\u001b[1;32m 42\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m 44\u001b[0m version \u001b[38;5;241m=\u001b[39m ctypes\u001b[38;5;241m.\u001b[39mc_int()\n\u001b[0;32m---> 45\u001b[0m check_cuda_result(cuda, \u001b[43mcudart\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcudaRuntimeGetVersion\u001b[49m(ctypes\u001b[38;5;241m.\u001b[39mbyref(version)))\n\u001b[1;32m 46\u001b[0m version \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mint\u001b[39m(version\u001b[38;5;241m.\u001b[39mvalue)\n\u001b[1;32m 47\u001b[0m major \u001b[38;5;241m=\u001b[39m version\u001b[38;5;241m/\u001b[39m\u001b[38;5;241m/\u001b[39m\u001b[38;5;241m1000\u001b[39m\n",
915
- "File \u001b[0;32m/usr/lib/python3.8/ctypes/__init__.py:386\u001b[0m, in \u001b[0;36mCDLL.__getattr__\u001b[0;34m(self, name)\u001b[0m\n\u001b[1;32m 384\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m name\u001b[38;5;241m.\u001b[39mstartswith(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m__\u001b[39m\u001b[38;5;124m'\u001b[39m) \u001b[38;5;129;01mand\u001b[39;00m name\u001b[38;5;241m.\u001b[39mendswith(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m__\u001b[39m\u001b[38;5;124m'\u001b[39m):\n\u001b[1;32m 385\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mAttributeError\u001b[39;00m(name)\n\u001b[0;32m--> 386\u001b[0m func \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[38;5;21;43m__getitem__\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mname\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 387\u001b[0m \u001b[38;5;28msetattr\u001b[39m(\u001b[38;5;28mself\u001b[39m, name, func)\n\u001b[1;32m 388\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m func\n",
916
- "File \u001b[0;32m/usr/lib/python3.8/ctypes/__init__.py:391\u001b[0m, in \u001b[0;36mCDLL.__getitem__\u001b[0;34m(self, name_or_ordinal)\u001b[0m\n\u001b[1;32m 390\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m__getitem__\u001b[39m(\u001b[38;5;28mself\u001b[39m, name_or_ordinal):\n\u001b[0;32m--> 391\u001b[0m func \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_FuncPtr\u001b[49m\u001b[43m(\u001b[49m\u001b[43m(\u001b[49m\u001b[43mname_or_ordinal\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m)\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 392\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(name_or_ordinal, \u001b[38;5;28mint\u001b[39m):\n\u001b[1;32m 393\u001b[0m func\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m \u001b[38;5;241m=\u001b[39m name_or_ordinal\n",
917
- "\u001b[0;31mAttributeError\u001b[0m: /home/ubuntu/whisper-ft/bin/python: undefined symbol: cudaRuntimeGetVersion"
918
  ]
919
  }
920
  ],
@@ -944,7 +1099,7 @@
944
  },
945
  {
946
  "cell_type": "code",
947
- "execution_count": null,
948
  "id": "6dd0e310-9b07-4133-ac14-2ed2d7524e22",
949
  "metadata": {},
950
  "outputs": [],
@@ -952,8 +1107,8 @@
952
  "kwargs = {\n",
953
  " \"dataset_tags\": \"mozilla-foundation/common_voice_11_0\",\n",
954
  " \"dataset\": \"Common Voice 11.0\", # a 'pretty' name for the training dataset\n",
955
- " \"language\": \"es\",\n",
956
- " \"model_name\": \"Whisper Small Es - Sanchit Gandhi\", # a 'pretty' name for your model\n",
957
  " \"finetuned_from\": \"openai/whisper-small\",\n",
958
  " \"tasks\": \"automatic-speech-recognition\",\n",
959
  " \"tags\": \"whisper-event\",\n",
@@ -973,7 +1128,21 @@
973
  "execution_count": null,
974
  "id": "95737cda-c5dd-4887-a4d0-dfcb0d61d977",
975
  "metadata": {},
976
- "outputs": [],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
977
  "source": [
978
  "trainer.push_to_hub(**kwargs)"
979
  ]
 
722
  },
723
  {
724
  "cell_type": "code",
725
+ "execution_count": 31,
726
  "id": "0ae3e9af-97b7-4aa0-ae85-20b23b5bcb3a",
727
  "metadata": {},
728
+ "outputs": [
729
+ {
730
+ "name": "stderr",
731
+ "output_type": "stream",
732
+ "text": [
733
+ "PyTorch: setting up devices\n"
734
+ ]
735
+ }
736
+ ],
737
  "source": [
738
  "from transformers import Seq2SeqTrainingArguments\n",
739
  "\n",
 
781
  },
782
  {
783
  "cell_type": "code",
784
+ "execution_count": 32,
785
  "id": "3ac16b62-b3c0-4c68-8f3d-9ecf471534b2",
786
  "metadata": {},
787
  "outputs": [],
 
810
  },
811
  {
812
  "cell_type": "code",
813
+ "execution_count": 33,
814
  "id": "d546d7fe-0543-479a-b708-2ebabec19493",
815
  "metadata": {},
816
  "outputs": [
 
849
  },
850
  {
851
  "cell_type": "code",
852
+ "execution_count": 34,
853
  "id": "a1ccb9ed-cbc8-4419-91c0-651e9424b672",
854
  "metadata": {},
855
  "outputs": [
 
895
  },
896
  {
897
  "cell_type": "code",
898
+ "execution_count": 35,
899
  "id": "ee8b7b8e-1c9a-4d77-9137-1778a629e6de",
900
  "metadata": {},
901
  "outputs": [
902
  {
903
+ "name": "stderr",
904
+ "output_type": "stream",
905
+ "text": [
906
+ "/home/ubuntu/whisper-ft/lib/python3.8/site-packages/transformers/optimization.py:306: FutureWarning: This implementation of AdamW is deprecated and will be removed in a future version. Use the PyTorch implementation torch.optim.AdamW instead, or set `no_deprecation_warning=True` to disable this warning\n",
907
+ " warnings.warn(\n",
908
+ "***** Running training *****\n",
909
+ " Num examples = 320000\n",
910
+ " Num Epochs = 9223372036854775807\n",
911
+ " Instantaneous batch size per device = 64\n",
912
+ " Total train batch size (w. parallel, distributed & accumulation) = 64\n",
913
+ " Gradient Accumulation steps = 1\n",
914
+ " Total optimization steps = 5000\n",
915
+ " Number of trainable parameters = 241734912\n",
916
+ "Automatic Weights & Biases logging enabled, to disable set os.environ[\"WANDB_DISABLED\"] = \"true\"\n",
917
+ "Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.\n",
918
+ "\u001b[34m\u001b[1mwandb\u001b[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)\n",
919
+ "\u001b[34m\u001b[1mwandb\u001b[0m: You can find your API key in your browser here: https://wandb.ai/authorize\n",
920
+ "\u001b[34m\u001b[1mwandb\u001b[0m: Paste an API key from your profile and hit enter, or press ctrl+c to quit:"
921
+ ]
922
+ },
923
+ {
924
+ "name": "stdin",
925
+ "output_type": "stream",
926
+ "text": [
927
+ " ········\n"
928
+ ]
929
+ },
930
+ {
931
+ "name": "stderr",
932
+ "output_type": "stream",
933
+ "text": [
934
+ "\u001b[34m\u001b[1mwandb\u001b[0m: Appending key for api.wandb.ai to your netrc file: /home/ubuntu/.netrc\n"
935
+ ]
936
+ },
937
+ {
938
+ "data": {
939
+ "text/html": [
940
+ "Tracking run with wandb version 0.13.6"
941
+ ],
942
+ "text/plain": [
943
+ "<IPython.core.display.HTML object>"
944
+ ]
945
+ },
946
+ "metadata": {},
947
+ "output_type": "display_data"
948
+ },
949
+ {
950
+ "data": {
951
+ "text/html": [
952
+ "Run data is saved locally in <code>/home/ubuntu/whisper-small-ro/wandb/run-20221207_155111-30fb33sa</code>"
953
+ ],
954
+ "text/plain": [
955
+ "<IPython.core.display.HTML object>"
956
+ ]
957
+ },
958
+ "metadata": {},
959
+ "output_type": "display_data"
960
+ },
961
+ {
962
+ "data": {
963
+ "text/html": [
964
+ "Syncing run <strong><a href=\"https://wandb.ai/gigant/huggingface/runs/30fb33sa\" target=\"_blank\">./</a></strong> to <a href=\"https://wandb.ai/gigant/huggingface\" target=\"_blank\">Weights & Biases</a> (<a href=\"https://wandb.me/run\" target=\"_blank\">docs</a>)<br/>"
965
+ ],
966
+ "text/plain": [
967
+ "<IPython.core.display.HTML object>"
968
+ ]
969
+ },
970
+ "metadata": {},
971
+ "output_type": "display_data"
972
+ },
973
+ {
974
+ "name": "stderr",
975
+ "output_type": "stream",
976
+ "text": [
977
+ "Reading metadata...: 5187it [00:00, 30765.68it/s]\n",
978
+ "Reading metadata...: 3703it [00:00, 26251.44it/s]\n",
979
+ "Reading metadata...: 19267it [00:00, 32817.93it/s]\n",
980
+ "The following columns in the training set don't have a corresponding argument in `WhisperForConditionalGeneration.forward` and have been ignored: input_length. If input_length are not expected by `WhisperForConditionalGeneration.forward`, you can safely ignore this message.\n"
981
+ ]
982
+ },
983
+ {
984
+ "data": {
985
+ "text/html": [
986
+ "\n",
987
+ " <div>\n",
988
+ " \n",
989
+ " <progress value='1001' max='5000' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
990
+ " [1001/5000 1:50:49 < 7:23:38, 0.15 it/s, Epoch 5.03/9223372036854775807]\n",
991
+ " </div>\n",
992
+ " <table border=\"1\" class=\"dataframe\">\n",
993
+ " <thead>\n",
994
+ " <tr style=\"text-align: left;\">\n",
995
+ " <th>Step</th>\n",
996
+ " <th>Training Loss</th>\n",
997
+ " <th>Validation Loss</th>\n",
998
+ " <th>Wer</th>\n",
999
+ " </tr>\n",
1000
+ " </thead>\n",
1001
+ " <tbody>\n",
1002
+ " <tr>\n",
1003
+ " <td>1000</td>\n",
1004
+ " <td>0.013700</td>\n",
1005
+ " <td>0.206843</td>\n",
1006
+ " <td>15.703064</td>\n",
1007
+ " </tr>\n",
1008
+ " </tbody>\n",
1009
+ "</table><p>"
1010
+ ],
1011
+ "text/plain": [
1012
+ "<IPython.core.display.HTML object>"
1013
+ ]
1014
+ },
1015
+ "metadata": {},
1016
+ "output_type": "display_data"
1017
+ },
1018
+ {
1019
+ "name": "stderr",
1020
+ "output_type": "stream",
1021
+ "text": [
1022
+ "Reading metadata...: 5187it [00:00, 20496.49it/s]\n",
1023
+ "Reading metadata...: 3703it [00:00, 22242.62it/s]\n",
1024
+ "Reading metadata...: 19267it [00:00, 40619.61it/s]\n",
1025
+ "Reading metadata...: 5187it [00:00, 26427.59it/s]\n",
1026
+ "Reading metadata...: 3703it [00:00, 24175.02it/s]\n",
1027
+ "Reading metadata...: 19267it [00:00, 37108.24it/s]\n",
1028
+ "Reading metadata...: 5187it [00:00, 23505.99it/s]\n",
1029
+ "Reading metadata...: 3703it [00:00, 24004.94it/s]\n",
1030
+ "Reading metadata...: 19267it [00:00, 48305.49it/s]\n",
1031
+ "Reading metadata...: 5187it [00:00, 61068.55it/s]\n",
1032
+ "Reading metadata...: 3703it [00:00, 62782.48it/s]\n",
1033
+ "Reading metadata...: 19267it [00:00, 83700.58it/s]\n",
1034
+ "Reading metadata...: 5187it [00:00, 68870.74it/s]\n",
1035
+ "Reading metadata...: 3703it [00:00, 50878.77it/s]\n",
1036
+ "Reading metadata...: 19267it [00:00, 83165.23it/s]\n",
1037
+ "***** Running Evaluation *****\n",
1038
+ " Num examples: Unknown\n",
1039
+ " Batch size = 8\n",
1040
+ "Reading metadata...: 3859it [00:00, 15916.98it/s]\n",
1041
+ "The following columns in the evaluation set don't have a corresponding argument in `WhisperForConditionalGeneration.forward` and have been ignored: input_length. If input_length are not expected by `WhisperForConditionalGeneration.forward`, you can safely ignore this message.\n",
1042
+ "Saving model checkpoint to ./checkpoint-1000\n",
1043
+ "Configuration saved in ./checkpoint-1000/config.json\n",
1044
+ "Model weights saved in ./checkpoint-1000/pytorch_model.bin\n",
1045
+ "Feature extractor saved in ./checkpoint-1000/preprocessor_config.json\n",
1046
+ "tokenizer config file saved in ./checkpoint-1000/tokenizer_config.json\n",
1047
+ "Special tokens file saved in ./checkpoint-1000/special_tokens_map.json\n",
1048
+ "added tokens file saved in ./checkpoint-1000/added_tokens.json\n",
1049
+ "Feature extractor saved in ./preprocessor_config.json\n",
1050
+ "tokenizer config file saved in ./tokenizer_config.json\n",
1051
+ "Special tokens file saved in ./special_tokens_map.json\n",
1052
+ "added tokens file saved in ./added_tokens.json\n"
1053
+ ]
1054
+ },
1055
+ {
1056
+ "ename": "IsADirectoryError",
1057
+ "evalue": "[Errno 21] Is a directory: '/home/ubuntu/whisper-small-ro/./wandb/latest-run'",
1058
  "output_type": "error",
1059
  "traceback": [
1060
  "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
1061
+ "\u001b[0;31mIsADirectoryError\u001b[0m Traceback (most recent call last)",
1062
+ "Cell \u001b[0;32mIn[35], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mtrainer\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtrain\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n",
1063
  "File \u001b[0;32m~/whisper-ft/lib/python3.8/site-packages/transformers/trainer.py:1536\u001b[0m, in \u001b[0;36mTrainer.train\u001b[0;34m(self, resume_from_checkpoint, trial, ignore_keys_for_eval, **kwargs)\u001b[0m\n\u001b[1;32m 1531\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mmodel_wrapped \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mmodel\n\u001b[1;32m 1533\u001b[0m inner_training_loop \u001b[38;5;241m=\u001b[39m find_executable_batch_size(\n\u001b[1;32m 1534\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_inner_training_loop, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_train_batch_size, args\u001b[38;5;241m.\u001b[39mauto_find_batch_size\n\u001b[1;32m 1535\u001b[0m )\n\u001b[0;32m-> 1536\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43minner_training_loop\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 1537\u001b[0m \u001b[43m \u001b[49m\u001b[43margs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1538\u001b[0m \u001b[43m \u001b[49m\u001b[43mresume_from_checkpoint\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mresume_from_checkpoint\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1539\u001b[0m \u001b[43m \u001b[49m\u001b[43mtrial\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtrial\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1540\u001b[0m \u001b[43m \u001b[49m\u001b[43mignore_keys_for_eval\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mignore_keys_for_eval\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1541\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n",
1064
+ "File \u001b[0;32m~/whisper-ft/lib/python3.8/site-packages/transformers/trainer.py:1861\u001b[0m, in \u001b[0;36mTrainer._inner_training_loop\u001b[0;34m(self, batch_size, args, resume_from_checkpoint, trial, ignore_keys_for_eval)\u001b[0m\n\u001b[1;32m 1858\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mstate\u001b[38;5;241m.\u001b[39mepoch \u001b[38;5;241m=\u001b[39m epoch \u001b[38;5;241m+\u001b[39m (step \u001b[38;5;241m+\u001b[39m \u001b[38;5;241m1\u001b[39m) \u001b[38;5;241m/\u001b[39m steps_in_epoch\n\u001b[1;32m 1859\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcontrol \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcallback_handler\u001b[38;5;241m.\u001b[39mon_step_end(args, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mstate, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcontrol)\n\u001b[0;32m-> 1861\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_maybe_log_save_evaluate\u001b[49m\u001b[43m(\u001b[49m\u001b[43mtr_loss\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmodel\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtrial\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mepoch\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mignore_keys_for_eval\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1862\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 1863\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcontrol \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcallback_handler\u001b[38;5;241m.\u001b[39mon_substep_end(args, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mstate, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcontrol)\n",
1065
+ "File \u001b[0;32m~/whisper-ft/lib/python3.8/site-packages/transformers/trainer.py:2128\u001b[0m, in \u001b[0;36mTrainer._maybe_log_save_evaluate\u001b[0;34m(self, tr_loss, model, trial, epoch, ignore_keys_for_eval)\u001b[0m\n\u001b[1;32m 2125\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_report_to_hp_search(trial, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mstate\u001b[38;5;241m.\u001b[39mglobal_step, metrics)\n\u001b[1;32m 2127\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcontrol\u001b[38;5;241m.\u001b[39mshould_save:\n\u001b[0;32m-> 2128\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_save_checkpoint\u001b[49m\u001b[43m(\u001b[49m\u001b[43mmodel\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtrial\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmetrics\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmetrics\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 2129\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcontrol \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcallback_handler\u001b[38;5;241m.\u001b[39mon_save(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39margs, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mstate, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcontrol)\n",
1066
+ "File \u001b[0;32m~/whisper-ft/lib/python3.8/site-packages/transformers/trainer.py:2272\u001b[0m, in \u001b[0;36mTrainer._save_checkpoint\u001b[0;34m(self, model, trial, metrics)\u001b[0m\n\u001b[1;32m 2269\u001b[0m torch\u001b[38;5;241m.\u001b[39msave(rng_states, os\u001b[38;5;241m.\u001b[39mpath\u001b[38;5;241m.\u001b[39mjoin(output_dir, \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mrng_state_\u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39margs\u001b[38;5;241m.\u001b[39mprocess_index\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m.pth\u001b[39m\u001b[38;5;124m\"\u001b[39m))\n\u001b[1;32m 2271\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39margs\u001b[38;5;241m.\u001b[39mpush_to_hub:\n\u001b[0;32m-> 2272\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_push_from_checkpoint\u001b[49m\u001b[43m(\u001b[49m\u001b[43moutput_dir\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 2274\u001b[0m \u001b[38;5;66;03m# Maybe delete some older checkpoints.\u001b[39;00m\n\u001b[1;32m 2275\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39margs\u001b[38;5;241m.\u001b[39mshould_save:\n",
1067
+ "File \u001b[0;32m~/whisper-ft/lib/python3.8/site-packages/transformers/trainer.py:3444\u001b[0m, in \u001b[0;36mTrainer._push_from_checkpoint\u001b[0;34m(self, checkpoint_folder)\u001b[0m\n\u001b[1;32m 3442\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 3443\u001b[0m commit_message \u001b[38;5;241m=\u001b[39m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mTraining in progress, epoch \u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mint\u001b[39m(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mstate\u001b[38;5;241m.\u001b[39mepoch)\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m-> 3444\u001b[0m _, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mpush_in_progress \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrepo\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mpush_to_hub\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 3445\u001b[0m \u001b[43m \u001b[49m\u001b[43mcommit_message\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcommit_message\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mblocking\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mauto_lfs_prune\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\n\u001b[1;32m 3446\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 3447\u001b[0m \u001b[38;5;28;01mfinally\u001b[39;00m:\n\u001b[1;32m 3448\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39margs\u001b[38;5;241m.\u001b[39mhub_strategy \u001b[38;5;241m==\u001b[39m HubStrategy\u001b[38;5;241m.\u001b[39mCHECKPOINT:\n\u001b[1;32m 3449\u001b[0m \u001b[38;5;66;03m# Move back the checkpoint to its place\u001b[39;00m\n",
1068
+ "File \u001b[0;32m~/whisper-ft/lib/python3.8/site-packages/huggingface_hub/repository.py:1430\u001b[0m, in \u001b[0;36mRepository.push_to_hub\u001b[0;34m(self, commit_message, blocking, clean_ok, auto_lfs_prune)\u001b[0m\n\u001b[1;32m 1428\u001b[0m logger\u001b[38;5;241m.\u001b[39minfo(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mRepo currently clean. Ignoring push_to_hub\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 1429\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[0;32m-> 1430\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mgit_add\u001b[49m\u001b[43m(\u001b[49m\u001b[43mauto_lfs_track\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m)\u001b[49m\n\u001b[1;32m 1431\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mgit_commit(commit_message)\n\u001b[1;32m 1432\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mgit_push(\n\u001b[1;32m 1433\u001b[0m upstream\u001b[38;5;241m=\u001b[39m\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124morigin \u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcurrent_branch\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m 1434\u001b[0m blocking\u001b[38;5;241m=\u001b[39mblocking,\n\u001b[1;32m 1435\u001b[0m auto_lfs_prune\u001b[38;5;241m=\u001b[39mauto_lfs_prune,\n\u001b[1;32m 1436\u001b[0m )\n",
1069
+ "File \u001b[0;32m~/whisper-ft/lib/python3.8/site-packages/huggingface_hub/repository.py:1113\u001b[0m, in \u001b[0;36mRepository.git_add\u001b[0;34m(self, pattern, auto_lfs_track)\u001b[0m\n\u001b[1;32m 1110\u001b[0m tracked_files \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mauto_track_large_files(pattern)\n\u001b[1;32m 1112\u001b[0m \u001b[38;5;66;03m# Read the remaining files and track them if they're binary\u001b[39;00m\n\u001b[0;32m-> 1113\u001b[0m tracked_files\u001b[38;5;241m.\u001b[39mextend(\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mauto_track_binary_files\u001b[49m\u001b[43m(\u001b[49m\u001b[43mpattern\u001b[49m\u001b[43m)\u001b[49m)\n\u001b[1;32m 1115\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m tracked_files:\n\u001b[1;32m 1116\u001b[0m logger\u001b[38;5;241m.\u001b[39mwarning(\n\u001b[1;32m 1117\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mAdding files tracked by Git LFS: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mtracked_files\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m. This may take a\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 1118\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m bit of time if the files are large.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 1119\u001b[0m )\n",
1070
+ "File \u001b[0;32m~/whisper-ft/lib/python3.8/site-packages/huggingface_hub/repository.py:1001\u001b[0m, in \u001b[0;36mRepository.auto_track_binary_files\u001b[0;34m(self, pattern)\u001b[0m\n\u001b[1;32m 994\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m size_in_mb \u001b[38;5;241m>\u001b[39m\u001b[38;5;241m=\u001b[39m \u001b[38;5;241m10\u001b[39m:\n\u001b[1;32m 995\u001b[0m logger\u001b[38;5;241m.\u001b[39mwarning(\n\u001b[1;32m 996\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mParsing a large file to check if binary or not. Tracking large\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 997\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m files using `repository.auto_track_large_files` is\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 998\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m recommended so as to not load the full file in memory.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 999\u001b[0m )\n\u001b[0;32m-> 1001\u001b[0m is_binary \u001b[38;5;241m=\u001b[39m \u001b[43mis_binary_file\u001b[49m\u001b[43m(\u001b[49m\u001b[43mpath_to_file\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1003\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m is_binary:\n\u001b[1;32m 1004\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mlfs_track(filename)\n",
1071
+ "File \u001b[0;32m~/whisper-ft/lib/python3.8/site-packages/huggingface_hub/repository.py:235\u001b[0m, in \u001b[0;36mis_binary_file\u001b[0;34m(filename)\u001b[0m\n\u001b[1;32m 224\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 225\u001b[0m \u001b[38;5;124;03mCheck if file is a binary file.\u001b[39;00m\n\u001b[1;32m 226\u001b[0m \n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 232\u001b[0m \u001b[38;5;124;03m `bool`: `True` if the file passed is a binary file, `False` otherwise.\u001b[39;00m\n\u001b[1;32m 233\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 234\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 235\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m \u001b[38;5;28;43mopen\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mfilename\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mrb\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m \u001b[38;5;28;01mas\u001b[39;00m f:\n\u001b[1;32m 236\u001b[0m content \u001b[38;5;241m=\u001b[39m f\u001b[38;5;241m.\u001b[39mread(\u001b[38;5;241m10\u001b[39m \u001b[38;5;241m*\u001b[39m (\u001b[38;5;241m1024\u001b[39m\u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39m\u001b[38;5;241m2\u001b[39m)) \u001b[38;5;66;03m# Read a maximum of 10MB\u001b[39;00m\n\u001b[1;32m 238\u001b[0m \u001b[38;5;66;03m# Code sample taken from the following stack overflow thread\u001b[39;00m\n\u001b[1;32m 239\u001b[0m \u001b[38;5;66;03m# https://stackoverflow.com/questions/898669/how-can-i-detect-if-a-file-is-binary-non-text-in-python/7392391#7392391\u001b[39;00m\n",
1072
+ "\u001b[0;31mIsADirectoryError\u001b[0m: [Errno 21] Is a directory: '/home/ubuntu/whisper-small-ro/./wandb/latest-run'"
 
 
 
 
 
 
1073
  ]
1074
  }
1075
  ],
 
1099
  },
1100
  {
1101
  "cell_type": "code",
1102
+ "execution_count": 36,
1103
  "id": "6dd0e310-9b07-4133-ac14-2ed2d7524e22",
1104
  "metadata": {},
1105
  "outputs": [],
 
1107
  "kwargs = {\n",
1108
  " \"dataset_tags\": \"mozilla-foundation/common_voice_11_0\",\n",
1109
  " \"dataset\": \"Common Voice 11.0\", # a 'pretty' name for the training dataset\n",
1110
+ " \"language\": \"ro\",\n",
1111
+ " \"model_name\": \"Whisper Small Romanian\", # a 'pretty' name for your model\n",
1112
  " \"finetuned_from\": \"openai/whisper-small\",\n",
1113
  " \"tasks\": \"automatic-speech-recognition\",\n",
1114
  " \"tags\": \"whisper-event\",\n",
 
1128
  "execution_count": null,
1129
  "id": "95737cda-c5dd-4887-a4d0-dfcb0d61d977",
1130
  "metadata": {},
1131
+ "outputs": [
1132
+ {
1133
+ "name": "stderr",
1134
+ "output_type": "stream",
1135
+ "text": [
1136
+ "Saving model checkpoint to ./\n",
1137
+ "Configuration saved in ./config.json\n",
1138
+ "Model weights saved in ./pytorch_model.bin\n",
1139
+ "Feature extractor saved in ./preprocessor_config.json\n",
1140
+ "tokenizer config file saved in ./tokenizer_config.json\n",
1141
+ "Special tokens file saved in ./special_tokens_map.json\n",
1142
+ "added tokens file saved in ./added_tokens.json\n"
1143
+ ]
1144
+ }
1145
+ ],
1146
  "source": [
1147
  "trainer.push_to_hub(**kwargs)"
1148
  ]
README.md ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ language:
3
+ - ro
4
+ license: apache-2.0
5
+ tags:
6
+ - whisper-event
7
+ - generated_from_trainer
8
+ datasets:
9
+ - mozilla-foundation/common_voice_11_0
10
+ model-index:
11
+ - name: Whisper Small Romanian
12
+ results: []
13
+ ---
14
+
15
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
16
+ should probably proofread and complete it, then remove this comment. -->
17
+
18
+ # Whisper Small Romanian
19
+
20
+ This model is a fine-tuned version of [openai/whisper-small](https://huggingface.co/openai/whisper-small) on the Common Voice 11.0 dataset.
21
+ It achieves the following results on the evaluation set:
22
+ - eval_loss: 0.2068
23
+ - eval_wer: 15.7031
24
+ - eval_runtime: 1070.693
25
+ - eval_samples_per_second: 3.604
26
+ - eval_steps_per_second: 0.451
27
+ - epoch: 5.03
28
+ - step: 1000
29
+
30
+ ## Model description
31
+
32
+ More information needed
33
+
34
+ ## Intended uses & limitations
35
+
36
+ More information needed
37
+
38
+ ## Training and evaluation data
39
+
40
+ More information needed
41
+
42
+ ## Training procedure
43
+
44
+ ### Training hyperparameters
45
+
46
+ The following hyperparameters were used during training:
47
+ - learning_rate: 1e-05
48
+ - train_batch_size: 64
49
+ - eval_batch_size: 8
50
+ - seed: 42
51
+ - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
52
+ - lr_scheduler_type: linear
53
+ - lr_scheduler_warmup_steps: 500
54
+ - training_steps: 5000
55
+ - mixed_precision_training: Native AMP
56
+
57
+ ### Framework versions
58
+
59
+ - Transformers 4.26.0.dev0
60
+ - Pytorch 1.13.0+cu117
61
+ - Datasets 2.7.1.dev0
62
+ - Tokenizers 0.13.2
Untitled.ipynb DELETED
@@ -1,6 +0,0 @@
1
- {
2
- "cells": [],
3
- "metadata": {},
4
- "nbformat": 4,
5
- "nbformat_minor": 5
6
- }
 
 
 
 
 
 
 
fine-tune-whisper-streaming.ipynb CHANGED
@@ -1099,7 +1099,7 @@
1099
  },
1100
  {
1101
  "cell_type": "code",
1102
- "execution_count": null,
1103
  "id": "6dd0e310-9b07-4133-ac14-2ed2d7524e22",
1104
  "metadata": {},
1105
  "outputs": [],
@@ -1107,8 +1107,8 @@
1107
  "kwargs = {\n",
1108
  " \"dataset_tags\": \"mozilla-foundation/common_voice_11_0\",\n",
1109
  " \"dataset\": \"Common Voice 11.0\", # a 'pretty' name for the training dataset\n",
1110
- " \"language\": \"es\",\n",
1111
- " \"model_name\": \"Whisper Small Es - Sanchit Gandhi\", # a 'pretty' name for your model\n",
1112
  " \"finetuned_from\": \"openai/whisper-small\",\n",
1113
  " \"tasks\": \"automatic-speech-recognition\",\n",
1114
  " \"tags\": \"whisper-event\",\n",
@@ -1128,7 +1128,21 @@
1128
  "execution_count": null,
1129
  "id": "95737cda-c5dd-4887-a4d0-dfcb0d61d977",
1130
  "metadata": {},
1131
- "outputs": [],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1132
  "source": [
1133
  "trainer.push_to_hub(**kwargs)"
1134
  ]
 
1099
  },
1100
  {
1101
  "cell_type": "code",
1102
+ "execution_count": 36,
1103
  "id": "6dd0e310-9b07-4133-ac14-2ed2d7524e22",
1104
  "metadata": {},
1105
  "outputs": [],
 
1107
  "kwargs = {\n",
1108
  " \"dataset_tags\": \"mozilla-foundation/common_voice_11_0\",\n",
1109
  " \"dataset\": \"Common Voice 11.0\", # a 'pretty' name for the training dataset\n",
1110
+ " \"language\": \"ro\",\n",
1111
+ " \"model_name\": \"Whisper Small Romanian\", # a 'pretty' name for your model\n",
1112
  " \"finetuned_from\": \"openai/whisper-small\",\n",
1113
  " \"tasks\": \"automatic-speech-recognition\",\n",
1114
  " \"tags\": \"whisper-event\",\n",
 
1128
  "execution_count": null,
1129
  "id": "95737cda-c5dd-4887-a4d0-dfcb0d61d977",
1130
  "metadata": {},
1131
+ "outputs": [
1132
+ {
1133
+ "name": "stderr",
1134
+ "output_type": "stream",
1135
+ "text": [
1136
+ "Saving model checkpoint to ./\n",
1137
+ "Configuration saved in ./config.json\n",
1138
+ "Model weights saved in ./pytorch_model.bin\n",
1139
+ "Feature extractor saved in ./preprocessor_config.json\n",
1140
+ "tokenizer config file saved in ./tokenizer_config.json\n",
1141
+ "Special tokens file saved in ./special_tokens_map.json\n",
1142
+ "added tokens file saved in ./added_tokens.json\n"
1143
+ ]
1144
+ }
1145
+ ],
1146
  "source": [
1147
  "trainer.push_to_hub(**kwargs)"
1148
  ]