erkhem-gantulga commited on
Commit
88fddec
·
1 Parent(s): 1899cc9

Added cog container image file for deployment to Replicate

Browse files
.cog/openapi_schema.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"components":{"schemas":{"HTTPValidationError":{"properties":{"detail":{"items":{"$ref":"#/components/schemas/ValidationError"},"title":"Detail","type":"array"}},"title":"HTTPValidationError","type":"object"},"Input":{"properties":{"audio":{"description":"Audio file to transcribe","format":"uri","title":"Audio","type":"string","x-order":0},"batch_size":{"default":8,"description":"Batch size for processing","title":"Batch Size","type":"integer","x-order":4},"chunk_length":{"default":30,"description":"Length of audio chunks to process at once, in seconds","title":"Chunk Length","type":"integer","x-order":3},"language":{"description":"Language of the audio (optional)","title":"Language","type":"string","x-order":1},"task":{"allOf":[{"$ref":"#/components/schemas/task"}],"default":"transcribe","description":"Task to perform: transcribe or translate","x-order":2}},"required":["audio"],"title":"Input","type":"object"},"Output":{"format":"uri","title":"Output","type":"string"},"PredictionRequest":{"properties":{"created_at":{"format":"date-time","title":"Created At","type":"string"},"id":{"title":"Id","type":"string"},"input":{"$ref":"#/components/schemas/Input"},"output_file_prefix":{"title":"Output File Prefix","type":"string"},"webhook":{"format":"uri","maxLength":65536,"minLength":1,"title":"Webhook","type":"string"},"webhook_events_filter":{"default":["start","output","logs","completed"],"items":{"$ref":"#/components/schemas/WebhookEvent"},"type":"array"}},"title":"PredictionRequest","type":"object"},"PredictionResponse":{"properties":{"completed_at":{"format":"date-time","title":"Completed At","type":"string"},"created_at":{"format":"date-time","title":"Created At","type":"string"},"error":{"title":"Error","type":"string"},"id":{"title":"Id","type":"string"},"input":{"$ref":"#/components/schemas/Input"},"logs":{"default":"","title":"Logs","type":"string"},"metrics":{"title":"Metrics","type":"object"},"output":{"$ref":"#/components/schemas/Output"},"started_at":{"format":"date-time","title":"Started At","type":"string"},"status":{"$ref":"#/components/schemas/Status"},"version":{"title":"Version","type":"string"}},"title":"PredictionResponse","type":"object"},"Status":{"description":"An enumeration.","enum":["starting","processing","succeeded","canceled","failed"],"title":"Status","type":"string"},"ValidationError":{"properties":{"loc":{"items":{"anyOf":[{"type":"string"},{"type":"integer"}]},"title":"Location","type":"array"},"msg":{"title":"Message","type":"string"},"type":{"title":"Error Type","type":"string"}},"required":["loc","msg","type"],"title":"ValidationError","type":"object"},"WebhookEvent":{"description":"An enumeration.","enum":["start","output","logs","completed"],"title":"WebhookEvent","type":"string"},"task":{"description":"An enumeration.","enum":["transcribe","translate"],"title":"task","type":"string"}}},"info":{"title":"Cog","version":"0.1.0"},"openapi":"3.0.2","paths":{"/":{"get":{"operationId":"root__get","responses":{"200":{"content":{"application/json":{"schema":{"title":"Response Root Get"}}},"description":"Successful Response"}},"summary":"Root"}},"/health-check":{"get":{"operationId":"healthcheck_health_check_get","responses":{"200":{"content":{"application/json":{"schema":{"title":"Response Healthcheck Health Check Get"}}},"description":"Successful Response"}},"summary":"Healthcheck"}},"/predictions":{"post":{"description":"Run a single prediction on the model","operationId":"predict_predictions_post","parameters":[{"in":"header","name":"prefer","required":false,"schema":{"title":"Prefer","type":"string"}}],"requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/PredictionRequest"}}}},"responses":{"200":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/PredictionResponse"}}},"description":"Successful Response"},"422":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}},"description":"Validation Error"}},"summary":"Predict"}},"/predictions/{prediction_id}":{"put":{"description":"Run a single prediction on the model (idempotent creation).","operationId":"predict_idempotent_predictions__prediction_id__put","parameters":[{"in":"path","name":"prediction_id","required":true,"schema":{"title":"Prediction ID","type":"string"}},{"in":"header","name":"prefer","required":false,"schema":{"title":"Prefer","type":"string"}}],"requestBody":{"content":{"application/json":{"schema":{"allOf":[{"$ref":"#/components/schemas/PredictionRequest"}],"title":"Prediction Request"}}},"required":true},"responses":{"200":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/PredictionResponse"}}},"description":"Successful Response"},"422":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}},"description":"Validation Error"}},"summary":"Predict Idempotent"}},"/predictions/{prediction_id}/cancel":{"post":{"description":"Cancel a running prediction","operationId":"cancel_predictions__prediction_id__cancel_post","parameters":[{"in":"path","name":"prediction_id","required":true,"schema":{"title":"Prediction ID","type":"string"}}],"responses":{"200":{"content":{"application/json":{"schema":{"title":"Response Cancel Predictions Prediction Id Cancel Post"}}},"description":"Successful Response"},"422":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}},"description":"Validation Error"}},"summary":"Cancel"}},"/shutdown":{"post":{"operationId":"start_shutdown_shutdown_post","responses":{"200":{"content":{"application/json":{"schema":{"title":"Response Start Shutdown Shutdown Post"}}},"description":"Successful Response"}},"summary":"Start Shutdown"}}}}
.cog/tmp/build20240912112156.1729871670561658/requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ --extra-index-url https://download.pytorch.org/whl/cu118
2
+ numpy==1.26.4
3
+ torch==2.0.1
4
+ transformers==4.30.2
5
+ librosa==0.10.0
.dockerignore ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # The .dockerignore file excludes files from the container build process.
2
+ #
3
+ # https://docs.docker.com/engine/reference/builder/#dockerignore-file
4
+
5
+ # Exclude Git files
6
+ **/.git
7
+ **/.github
8
+ **/.gitignore
9
+
10
+ # Exclude Python cache files
11
+ __pycache__
12
+ .mypy_cache
13
+ .pytest_cache
14
+ .ruff_cache
15
+
16
+ # Exclude Python virtual environment
17
+ /venv
.github/workflows/push.yaml ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: Push to Replicate
2
+
3
+ on:
4
+ # Workflow dispatch allows you to manually trigger the workflow from GitHub.com
5
+ # Go to your repo, click "Actions", click "Push to Replicate", click "Run workflow"
6
+ workflow_dispatch:
7
+ inputs:
8
+ model_name:
9
+ description: 'Enter the model name, like "alice/bunny-detector". If unset, this will default to the value of `image` in cog.yaml.'
10
+ # # Uncomment these lines to trigger the workflow on every push to the main branch
11
+ # push:
12
+ # branches:
13
+ # - main
14
+
15
+ jobs:
16
+ push_to_replicate:
17
+ name: Push to Replicate
18
+
19
+ # If your model is large, the default GitHub Actions runner may not
20
+ # have enough disk space. If you need more space you can set up a
21
+ # bigger runner on GitHub.
22
+ runs-on: ubuntu-latest
23
+
24
+ steps:
25
+ # This action cleans up disk space to make more room for your
26
+ # model code, weights, etc.
27
+ - name: Free disk space
28
+ uses: jlumbroso/[email protected]
29
+ with:
30
+ tool-cache: false
31
+ docker-images: false
32
+
33
+ - name: Checkout
34
+ uses: actions/checkout@v4
35
+
36
+ # This action installs Docker buildx and Cog (and optionally CUDA)
37
+ - name: Setup Cog
38
+ uses: replicate/setup-cog@v2
39
+ with:
40
+ # If you set REPLICATE_API_TOKEN in your GitHub repository secrets,
41
+ # the action will authenticate with Replicate automatically so you
42
+ # can push your model
43
+ token: ${{ secrets.REPLICATE_API_TOKEN }}
44
+
45
+ # If you trigger the workflow manually, you can specify the model name.
46
+ # If you leave it blank (or if the workflow is triggered by a push), the
47
+ # model name will be derived from the `image` value in cog.yaml.
48
+ - name: Push to Replicate
49
+ run: |
50
+ if [ -n "${{ inputs.model_name }}" ]; then
51
+ cog push r8.im/${{ inputs.model_name }}
52
+ else
53
+ cog push
54
+ fi
cog.yaml ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Configuration for Cog ⚙️
2
+ # Reference: https://cog.run/yaml
3
+
4
+ build:
5
+ # set to true if your model requires a GPU
6
+ gpu: true
7
+
8
+ # a list of ubuntu apt packages to install
9
+ # system_packages:
10
+ # - "libgl1-mesa-glx"
11
+ # - "libglib2.0-0"
12
+
13
+ # python version in the form '3.11' or '3.11.4'
14
+ python_version: "3.11"
15
+
16
+ # a list of packages in the format <package-name>==<version>
17
+ python_packages:
18
+ - "numpy==1.26.4"
19
+ - "torch==2.0.1"
20
+ - "transformers==4.30.2"
21
+ - "librosa==0.10.0"
22
+
23
+ # commands run after the environment is setup
24
+ # run:
25
+ # - "echo env is ready!"
26
+ # - "echo another command if needed"
27
+
28
+ # predict.py defines how predictions are run on your model
29
+ predict: "predict.py:Predictor"
predict.py ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Prediction interface for Cog ⚙️
2
+ # https://cog.run/python
3
+
4
+ from cog import BasePredictor, Input, Path
5
+ import torch
6
+ from transformers import pipeline
7
+ import os
8
+ import time
9
+
10
+ class Predictor(BasePredictor):
11
+ def setup(self):
12
+ """Load the model into memory to make running multiple predictions efficient"""
13
+ # Check if CUDA is available and set the device accordingly
14
+ self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
15
+ self.pipe = pipeline("automatic-speech-recognition",
16
+ model="./",
17
+ device=self.device,
18
+ torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32)
19
+
20
+ def predict(
21
+ self,
22
+ audio: Path = Input(description="Audio file to transcribe"),
23
+ language: str = Input(description="Language of the audio (optional)", default=None),
24
+ task: str = Input(description="Task to perform: transcribe or translate", default="transcribe", choices=["transcribe", "translate"]),
25
+ chunk_length: int = Input(description="Length of audio chunks to process at once, in seconds", default=30),
26
+ batch_size: int = Input(description="Batch size for processing", default=8),
27
+ ) -> Path:
28
+ """Run a single prediction on the model"""
29
+ start_time = time.perf_counter()
30
+
31
+ # Set the language and task
32
+ if language:
33
+ self.pipe.model.config.forced_decoder_ids = self.pipe.tokenizer.get_decoder_prompt_ids(language=language, task=task)
34
+
35
+ # Perform ASR
36
+ outputs = self.pipe(str(audio), chunk_length_s=chunk_length, batch_size=batch_size, return_timestamps=True)
37
+
38
+ end_time = time.perf_counter()
39
+ elapsed_time = end_time - start_time
40
+ print(f"ASR took {elapsed_time:.2f} seconds.")
41
+
42
+ # Save ASR chunks to an SRT file
43
+ audio_file_name = os.path.splitext(os.path.basename(str(audio)))[0]
44
+ srt_filename = f"{audio_file_name}.srt"
45
+ with open(srt_filename, 'w', encoding="utf-8") as srt_file:
46
+ prev = 0
47
+ for index, chunk in enumerate(outputs['chunks']):
48
+ prev, start_time = self.seconds_to_srt_time_format(prev, chunk['timestamp'][0])
49
+ prev, end_time = self.seconds_to_srt_time_format(prev, chunk['timestamp'][1])
50
+ srt_file.write(f"{index + 1}\n")
51
+ srt_file.write(f"{start_time} --> {end_time}\n")
52
+ srt_file.write(f"{chunk['text'].strip()}\n\n")
53
+
54
+ return Path(srt_filename)
55
+
56
+ def seconds_to_srt_time_format(self, prev, seconds):
57
+ if not (isinstance(seconds, int) or isinstance(seconds, float)):
58
+ seconds = prev
59
+ else:
60
+ prev = seconds
61
+ hours = seconds // 3600
62
+ seconds %= 3600
63
+ minutes = seconds // 60
64
+ seconds %= 60
65
+ milliseconds = int((seconds - int(seconds)) * 1000)
66
+ hours = int(hours)
67
+ minutes = int(minutes)
68
+ seconds = int(seconds)
69
+ return (prev, f"{hours:02d}:{minutes:02d}:{int(seconds):02d},{milliseconds:03d}")