erkhem-gantulga
commited on
Commit
·
88fddec
1
Parent(s):
1899cc9
Added cog container image file for deployment to Replicate
Browse files- .cog/openapi_schema.json +1 -0
- .cog/tmp/build20240912112156.1729871670561658/requirements.txt +5 -0
- .dockerignore +17 -0
- .github/workflows/push.yaml +54 -0
- cog.yaml +29 -0
- predict.py +69 -0
.cog/openapi_schema.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"components":{"schemas":{"HTTPValidationError":{"properties":{"detail":{"items":{"$ref":"#/components/schemas/ValidationError"},"title":"Detail","type":"array"}},"title":"HTTPValidationError","type":"object"},"Input":{"properties":{"audio":{"description":"Audio file to transcribe","format":"uri","title":"Audio","type":"string","x-order":0},"batch_size":{"default":8,"description":"Batch size for processing","title":"Batch Size","type":"integer","x-order":4},"chunk_length":{"default":30,"description":"Length of audio chunks to process at once, in seconds","title":"Chunk Length","type":"integer","x-order":3},"language":{"description":"Language of the audio (optional)","title":"Language","type":"string","x-order":1},"task":{"allOf":[{"$ref":"#/components/schemas/task"}],"default":"transcribe","description":"Task to perform: transcribe or translate","x-order":2}},"required":["audio"],"title":"Input","type":"object"},"Output":{"format":"uri","title":"Output","type":"string"},"PredictionRequest":{"properties":{"created_at":{"format":"date-time","title":"Created At","type":"string"},"id":{"title":"Id","type":"string"},"input":{"$ref":"#/components/schemas/Input"},"output_file_prefix":{"title":"Output File Prefix","type":"string"},"webhook":{"format":"uri","maxLength":65536,"minLength":1,"title":"Webhook","type":"string"},"webhook_events_filter":{"default":["start","output","logs","completed"],"items":{"$ref":"#/components/schemas/WebhookEvent"},"type":"array"}},"title":"PredictionRequest","type":"object"},"PredictionResponse":{"properties":{"completed_at":{"format":"date-time","title":"Completed At","type":"string"},"created_at":{"format":"date-time","title":"Created At","type":"string"},"error":{"title":"Error","type":"string"},"id":{"title":"Id","type":"string"},"input":{"$ref":"#/components/schemas/Input"},"logs":{"default":"","title":"Logs","type":"string"},"metrics":{"title":"Metrics","type":"object"},"output":{"$ref":"#/components/schemas/Output"},"started_at":{"format":"date-time","title":"Started At","type":"string"},"status":{"$ref":"#/components/schemas/Status"},"version":{"title":"Version","type":"string"}},"title":"PredictionResponse","type":"object"},"Status":{"description":"An enumeration.","enum":["starting","processing","succeeded","canceled","failed"],"title":"Status","type":"string"},"ValidationError":{"properties":{"loc":{"items":{"anyOf":[{"type":"string"},{"type":"integer"}]},"title":"Location","type":"array"},"msg":{"title":"Message","type":"string"},"type":{"title":"Error Type","type":"string"}},"required":["loc","msg","type"],"title":"ValidationError","type":"object"},"WebhookEvent":{"description":"An enumeration.","enum":["start","output","logs","completed"],"title":"WebhookEvent","type":"string"},"task":{"description":"An enumeration.","enum":["transcribe","translate"],"title":"task","type":"string"}}},"info":{"title":"Cog","version":"0.1.0"},"openapi":"3.0.2","paths":{"/":{"get":{"operationId":"root__get","responses":{"200":{"content":{"application/json":{"schema":{"title":"Response Root Get"}}},"description":"Successful Response"}},"summary":"Root"}},"/health-check":{"get":{"operationId":"healthcheck_health_check_get","responses":{"200":{"content":{"application/json":{"schema":{"title":"Response Healthcheck Health Check Get"}}},"description":"Successful Response"}},"summary":"Healthcheck"}},"/predictions":{"post":{"description":"Run a single prediction on the model","operationId":"predict_predictions_post","parameters":[{"in":"header","name":"prefer","required":false,"schema":{"title":"Prefer","type":"string"}}],"requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/PredictionRequest"}}}},"responses":{"200":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/PredictionResponse"}}},"description":"Successful Response"},"422":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}},"description":"Validation Error"}},"summary":"Predict"}},"/predictions/{prediction_id}":{"put":{"description":"Run a single prediction on the model (idempotent creation).","operationId":"predict_idempotent_predictions__prediction_id__put","parameters":[{"in":"path","name":"prediction_id","required":true,"schema":{"title":"Prediction ID","type":"string"}},{"in":"header","name":"prefer","required":false,"schema":{"title":"Prefer","type":"string"}}],"requestBody":{"content":{"application/json":{"schema":{"allOf":[{"$ref":"#/components/schemas/PredictionRequest"}],"title":"Prediction Request"}}},"required":true},"responses":{"200":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/PredictionResponse"}}},"description":"Successful Response"},"422":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}},"description":"Validation Error"}},"summary":"Predict Idempotent"}},"/predictions/{prediction_id}/cancel":{"post":{"description":"Cancel a running prediction","operationId":"cancel_predictions__prediction_id__cancel_post","parameters":[{"in":"path","name":"prediction_id","required":true,"schema":{"title":"Prediction ID","type":"string"}}],"responses":{"200":{"content":{"application/json":{"schema":{"title":"Response Cancel Predictions Prediction Id Cancel Post"}}},"description":"Successful Response"},"422":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}},"description":"Validation Error"}},"summary":"Cancel"}},"/shutdown":{"post":{"operationId":"start_shutdown_shutdown_post","responses":{"200":{"content":{"application/json":{"schema":{"title":"Response Start Shutdown Shutdown Post"}}},"description":"Successful Response"}},"summary":"Start Shutdown"}}}}
|
.cog/tmp/build20240912112156.1729871670561658/requirements.txt
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
--extra-index-url https://download.pytorch.org/whl/cu118
|
2 |
+
numpy==1.26.4
|
3 |
+
torch==2.0.1
|
4 |
+
transformers==4.30.2
|
5 |
+
librosa==0.10.0
|
.dockerignore
ADDED
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# The .dockerignore file excludes files from the container build process.
|
2 |
+
#
|
3 |
+
# https://docs.docker.com/engine/reference/builder/#dockerignore-file
|
4 |
+
|
5 |
+
# Exclude Git files
|
6 |
+
**/.git
|
7 |
+
**/.github
|
8 |
+
**/.gitignore
|
9 |
+
|
10 |
+
# Exclude Python cache files
|
11 |
+
__pycache__
|
12 |
+
.mypy_cache
|
13 |
+
.pytest_cache
|
14 |
+
.ruff_cache
|
15 |
+
|
16 |
+
# Exclude Python virtual environment
|
17 |
+
/venv
|
.github/workflows/push.yaml
ADDED
@@ -0,0 +1,54 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
name: Push to Replicate
|
2 |
+
|
3 |
+
on:
|
4 |
+
# Workflow dispatch allows you to manually trigger the workflow from GitHub.com
|
5 |
+
# Go to your repo, click "Actions", click "Push to Replicate", click "Run workflow"
|
6 |
+
workflow_dispatch:
|
7 |
+
inputs:
|
8 |
+
model_name:
|
9 |
+
description: 'Enter the model name, like "alice/bunny-detector". If unset, this will default to the value of `image` in cog.yaml.'
|
10 |
+
# # Uncomment these lines to trigger the workflow on every push to the main branch
|
11 |
+
# push:
|
12 |
+
# branches:
|
13 |
+
# - main
|
14 |
+
|
15 |
+
jobs:
|
16 |
+
push_to_replicate:
|
17 |
+
name: Push to Replicate
|
18 |
+
|
19 |
+
# If your model is large, the default GitHub Actions runner may not
|
20 |
+
# have enough disk space. If you need more space you can set up a
|
21 |
+
# bigger runner on GitHub.
|
22 |
+
runs-on: ubuntu-latest
|
23 |
+
|
24 |
+
steps:
|
25 |
+
# This action cleans up disk space to make more room for your
|
26 |
+
# model code, weights, etc.
|
27 |
+
- name: Free disk space
|
28 |
+
uses: jlumbroso/[email protected]
|
29 |
+
with:
|
30 |
+
tool-cache: false
|
31 |
+
docker-images: false
|
32 |
+
|
33 |
+
- name: Checkout
|
34 |
+
uses: actions/checkout@v4
|
35 |
+
|
36 |
+
# This action installs Docker buildx and Cog (and optionally CUDA)
|
37 |
+
- name: Setup Cog
|
38 |
+
uses: replicate/setup-cog@v2
|
39 |
+
with:
|
40 |
+
# If you set REPLICATE_API_TOKEN in your GitHub repository secrets,
|
41 |
+
# the action will authenticate with Replicate automatically so you
|
42 |
+
# can push your model
|
43 |
+
token: ${{ secrets.REPLICATE_API_TOKEN }}
|
44 |
+
|
45 |
+
# If you trigger the workflow manually, you can specify the model name.
|
46 |
+
# If you leave it blank (or if the workflow is triggered by a push), the
|
47 |
+
# model name will be derived from the `image` value in cog.yaml.
|
48 |
+
- name: Push to Replicate
|
49 |
+
run: |
|
50 |
+
if [ -n "${{ inputs.model_name }}" ]; then
|
51 |
+
cog push r8.im/${{ inputs.model_name }}
|
52 |
+
else
|
53 |
+
cog push
|
54 |
+
fi
|
cog.yaml
ADDED
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Configuration for Cog ⚙️
|
2 |
+
# Reference: https://cog.run/yaml
|
3 |
+
|
4 |
+
build:
|
5 |
+
# set to true if your model requires a GPU
|
6 |
+
gpu: true
|
7 |
+
|
8 |
+
# a list of ubuntu apt packages to install
|
9 |
+
# system_packages:
|
10 |
+
# - "libgl1-mesa-glx"
|
11 |
+
# - "libglib2.0-0"
|
12 |
+
|
13 |
+
# python version in the form '3.11' or '3.11.4'
|
14 |
+
python_version: "3.11"
|
15 |
+
|
16 |
+
# a list of packages in the format <package-name>==<version>
|
17 |
+
python_packages:
|
18 |
+
- "numpy==1.26.4"
|
19 |
+
- "torch==2.0.1"
|
20 |
+
- "transformers==4.30.2"
|
21 |
+
- "librosa==0.10.0"
|
22 |
+
|
23 |
+
# commands run after the environment is setup
|
24 |
+
# run:
|
25 |
+
# - "echo env is ready!"
|
26 |
+
# - "echo another command if needed"
|
27 |
+
|
28 |
+
# predict.py defines how predictions are run on your model
|
29 |
+
predict: "predict.py:Predictor"
|
predict.py
ADDED
@@ -0,0 +1,69 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Prediction interface for Cog ⚙️
|
2 |
+
# https://cog.run/python
|
3 |
+
|
4 |
+
from cog import BasePredictor, Input, Path
|
5 |
+
import torch
|
6 |
+
from transformers import pipeline
|
7 |
+
import os
|
8 |
+
import time
|
9 |
+
|
10 |
+
class Predictor(BasePredictor):
|
11 |
+
def setup(self):
|
12 |
+
"""Load the model into memory to make running multiple predictions efficient"""
|
13 |
+
# Check if CUDA is available and set the device accordingly
|
14 |
+
self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
|
15 |
+
self.pipe = pipeline("automatic-speech-recognition",
|
16 |
+
model="./",
|
17 |
+
device=self.device,
|
18 |
+
torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32)
|
19 |
+
|
20 |
+
def predict(
|
21 |
+
self,
|
22 |
+
audio: Path = Input(description="Audio file to transcribe"),
|
23 |
+
language: str = Input(description="Language of the audio (optional)", default=None),
|
24 |
+
task: str = Input(description="Task to perform: transcribe or translate", default="transcribe", choices=["transcribe", "translate"]),
|
25 |
+
chunk_length: int = Input(description="Length of audio chunks to process at once, in seconds", default=30),
|
26 |
+
batch_size: int = Input(description="Batch size for processing", default=8),
|
27 |
+
) -> Path:
|
28 |
+
"""Run a single prediction on the model"""
|
29 |
+
start_time = time.perf_counter()
|
30 |
+
|
31 |
+
# Set the language and task
|
32 |
+
if language:
|
33 |
+
self.pipe.model.config.forced_decoder_ids = self.pipe.tokenizer.get_decoder_prompt_ids(language=language, task=task)
|
34 |
+
|
35 |
+
# Perform ASR
|
36 |
+
outputs = self.pipe(str(audio), chunk_length_s=chunk_length, batch_size=batch_size, return_timestamps=True)
|
37 |
+
|
38 |
+
end_time = time.perf_counter()
|
39 |
+
elapsed_time = end_time - start_time
|
40 |
+
print(f"ASR took {elapsed_time:.2f} seconds.")
|
41 |
+
|
42 |
+
# Save ASR chunks to an SRT file
|
43 |
+
audio_file_name = os.path.splitext(os.path.basename(str(audio)))[0]
|
44 |
+
srt_filename = f"{audio_file_name}.srt"
|
45 |
+
with open(srt_filename, 'w', encoding="utf-8") as srt_file:
|
46 |
+
prev = 0
|
47 |
+
for index, chunk in enumerate(outputs['chunks']):
|
48 |
+
prev, start_time = self.seconds_to_srt_time_format(prev, chunk['timestamp'][0])
|
49 |
+
prev, end_time = self.seconds_to_srt_time_format(prev, chunk['timestamp'][1])
|
50 |
+
srt_file.write(f"{index + 1}\n")
|
51 |
+
srt_file.write(f"{start_time} --> {end_time}\n")
|
52 |
+
srt_file.write(f"{chunk['text'].strip()}\n\n")
|
53 |
+
|
54 |
+
return Path(srt_filename)
|
55 |
+
|
56 |
+
def seconds_to_srt_time_format(self, prev, seconds):
|
57 |
+
if not (isinstance(seconds, int) or isinstance(seconds, float)):
|
58 |
+
seconds = prev
|
59 |
+
else:
|
60 |
+
prev = seconds
|
61 |
+
hours = seconds // 3600
|
62 |
+
seconds %= 3600
|
63 |
+
minutes = seconds // 60
|
64 |
+
seconds %= 60
|
65 |
+
milliseconds = int((seconds - int(seconds)) * 1000)
|
66 |
+
hours = int(hours)
|
67 |
+
minutes = int(minutes)
|
68 |
+
seconds = int(seconds)
|
69 |
+
return (prev, f"{hours:02d}:{minutes:02d}:{int(seconds):02d},{milliseconds:03d}")
|