Added cog container image file for deployment to Replicate

Browse files

Files changed (6) hide show

.cog/openapi_schema.json +1 -0
.cog/tmp/build20240912112156.1729871670561658/requirements.txt +5 -0
.dockerignore +17 -0
.github/workflows/push.yaml +54 -0
cog.yaml +29 -0
predict.py +69 -0

.cog/openapi_schema.json ADDED Viewed

	@@ -0,0 +1 @@

+ {"components":{"schemas":{"HTTPValidationError":{"properties":{"detail":{"items":{"$ref":"#/components/schemas/ValidationError"},"title":"Detail","type":"array"}},"title":"HTTPValidationError","type":"object"},"Input":{"properties":{"audio":{"description":"Audio file to transcribe","format":"uri","title":"Audio","type":"string","x-order":0},"batch_size":{"default":8,"description":"Batch size for processing","title":"Batch Size","type":"integer","x-order":4},"chunk_length":{"default":30,"description":"Length of audio chunks to process at once, in seconds","title":"Chunk Length","type":"integer","x-order":3},"language":{"description":"Language of the audio (optional)","title":"Language","type":"string","x-order":1},"task":{"allOf":[{"$ref":"#/components/schemas/task"}],"default":"transcribe","description":"Task to perform: transcribe or translate","x-order":2}},"required":["audio"],"title":"Input","type":"object"},"Output":{"format":"uri","title":"Output","type":"string"},"PredictionRequest":{"properties":{"created_at":{"format":"date-time","title":"Created At","type":"string"},"id":{"title":"Id","type":"string"},"input":{"$ref":"#/components/schemas/Input"},"output_file_prefix":{"title":"Output File Prefix","type":"string"},"webhook":{"format":"uri","maxLength":65536,"minLength":1,"title":"Webhook","type":"string"},"webhook_events_filter":{"default":["start","output","logs","completed"],"items":{"$ref":"#/components/schemas/WebhookEvent"},"type":"array"}},"title":"PredictionRequest","type":"object"},"PredictionResponse":{"properties":{"completed_at":{"format":"date-time","title":"Completed At","type":"string"},"created_at":{"format":"date-time","title":"Created At","type":"string"},"error":{"title":"Error","type":"string"},"id":{"title":"Id","type":"string"},"input":{"$ref":"#/components/schemas/Input"},"logs":{"default":"","title":"Logs","type":"string"},"metrics":{"title":"Metrics","type":"object"},"output":{"$ref":"#/components/schemas/Output"},"started_at":{"format":"date-time","title":"Started At","type":"string"},"status":{"$ref":"#/components/schemas/Status"},"version":{"title":"Version","type":"string"}},"title":"PredictionResponse","type":"object"},"Status":{"description":"An enumeration.","enum":["starting","processing","succeeded","canceled","failed"],"title":"Status","type":"string"},"ValidationError":{"properties":{"loc":{"items":{"anyOf":[{"type":"string"},{"type":"integer"}]},"title":"Location","type":"array"},"msg":{"title":"Message","type":"string"},"type":{"title":"Error Type","type":"string"}},"required":["loc","msg","type"],"title":"ValidationError","type":"object"},"WebhookEvent":{"description":"An enumeration.","enum":["start","output","logs","completed"],"title":"WebhookEvent","type":"string"},"task":{"description":"An enumeration.","enum":["transcribe","translate"],"title":"task","type":"string"}}},"info":{"title":"Cog","version":"0.1.0"},"openapi":"3.0.2","paths":{"/":{"get":{"operationId":"root__get","responses":{"200":{"content":{"application/json":{"schema":{"title":"Response Root Get"}}},"description":"Successful Response"}},"summary":"Root"}},"/health-check":{"get":{"operationId":"healthcheck_health_check_get","responses":{"200":{"content":{"application/json":{"schema":{"title":"Response Healthcheck Health Check Get"}}},"description":"Successful Response"}},"summary":"Healthcheck"}},"/predictions":{"post":{"description":"Run a single prediction on the model","operationId":"predict_predictions_post","parameters":[{"in":"header","name":"prefer","required":false,"schema":{"title":"Prefer","type":"string"}}],"requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/PredictionRequest"}}}},"responses":{"200":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/PredictionResponse"}}},"description":"Successful Response"},"422":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}},"description":"Validation Error"}},"summary":"Predict"}},"/predictions/{prediction_id}":{"put":{"description":"Run a single prediction on the model (idempotent creation).","operationId":"predict_idempotent_predictions__prediction_id__put","parameters":[{"in":"path","name":"prediction_id","required":true,"schema":{"title":"Prediction ID","type":"string"}},{"in":"header","name":"prefer","required":false,"schema":{"title":"Prefer","type":"string"}}],"requestBody":{"content":{"application/json":{"schema":{"allOf":[{"$ref":"#/components/schemas/PredictionRequest"}],"title":"Prediction Request"}}},"required":true},"responses":{"200":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/PredictionResponse"}}},"description":"Successful Response"},"422":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}},"description":"Validation Error"}},"summary":"Predict Idempotent"}},"/predictions/{prediction_id}/cancel":{"post":{"description":"Cancel a running prediction","operationId":"cancel_predictions__prediction_id__cancel_post","parameters":[{"in":"path","name":"prediction_id","required":true,"schema":{"title":"Prediction ID","type":"string"}}],"responses":{"200":{"content":{"application/json":{"schema":{"title":"Response Cancel Predictions Prediction Id Cancel Post"}}},"description":"Successful Response"},"422":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}},"description":"Validation Error"}},"summary":"Cancel"}},"/shutdown":{"post":{"operationId":"start_shutdown_shutdown_post","responses":{"200":{"content":{"application/json":{"schema":{"title":"Response Start Shutdown Shutdown Post"}}},"description":"Successful Response"}},"summary":"Start Shutdown"}}}}

.cog/tmp/build20240912112156.1729871670561658/requirements.txt ADDED Viewed

	@@ -0,0 +1,5 @@

+--extra-index-url https://download.pytorch.org/whl/cu118
+numpy==1.26.4
+torch==2.0.1
+transformers==4.30.2
+librosa==0.10.0

.dockerignore ADDED Viewed

	@@ -0,0 +1,17 @@

+# The .dockerignore file excludes files from the container build process.
+#
+# https://docs.docker.com/engine/reference/builder/#dockerignore-file
+# Exclude Git files
+**/.git
+**/.github
+**/.gitignore
+# Exclude Python cache files
+__pycache__
+.mypy_cache
+.pytest_cache
+.ruff_cache
+# Exclude Python virtual environment
+/venv

.github/workflows/push.yaml ADDED Viewed

	@@ -0,0 +1,54 @@

+name: Push to Replicate
+on:
+  # Workflow dispatch allows you to manually trigger the workflow from GitHub.com
+  # Go to your repo, click "Actions", click "Push to Replicate", click "Run workflow"
+  workflow_dispatch:
+    inputs:
+      model_name:
+        description: 'Enter the model name, like "alice/bunny-detector". If unset, this will default to the value of `image` in cog.yaml.'
+  # # Uncomment these lines to trigger the workflow on every push to the main branch
+  # push:
+  #   branches:
+  #     - main
+jobs:
+  push_to_replicate:
+    name: Push to Replicate
+    # If your model is large, the default GitHub Actions runner may not
+    # have enough disk space. If you need more space you can set up a
+    # bigger runner on GitHub.
+    runs-on: ubuntu-latest
+    steps:
+      # This action cleans up disk space to make more room for your
+      # model code, weights, etc.
+      - name: Free disk space
+        uses: jlumbroso/[email protected]
+        with:
+          tool-cache: false
+          docker-images: false
+      - name: Checkout
+        uses: actions/checkout@v4
+      # This action installs Docker buildx and Cog (and optionally CUDA)
+      - name: Setup Cog
+        uses: replicate/setup-cog@v2
+        with:
+          # If you set REPLICATE_API_TOKEN in your GitHub repository secrets,
+          # the action will authenticate with Replicate automatically so you
+          # can push your model
+          token: ${{ secrets.REPLICATE_API_TOKEN }}
+      # If you trigger the workflow manually, you can specify the model name.
+      # If you leave it blank (or if the workflow is triggered by a push), the
+      # model name will be derived from the `image` value in cog.yaml.
+      - name: Push to Replicate
+        run: |
+          if [ -n "${{ inputs.model_name }}" ]; then
+            cog push r8.im/${{ inputs.model_name }}
+          else
+            cog push
+          fi

cog.yaml ADDED Viewed

	@@ -0,0 +1,29 @@

+# Configuration for Cog ⚙️
+# Reference: https://cog.run/yaml
+build:
+  # set to true if your model requires a GPU
+  gpu: true
+  # a list of ubuntu apt packages to install
+  # system_packages:
+  #   - "libgl1-mesa-glx"
+  #   - "libglib2.0-0"
+  # python version in the form '3.11' or '3.11.4'
+  python_version: "3.11"
+  # a list of packages in the format <package-name>==<version>
+  python_packages:
+    - "numpy==1.26.4"
+    - "torch==2.0.1"
+    - "transformers==4.30.2"
+    - "librosa==0.10.0"
+  # commands run after the environment is setup
+  # run:
+  #   - "echo env is ready!"
+  #   - "echo another command if needed"
+# predict.py defines how predictions are run on your model
+predict: "predict.py:Predictor"

predict.py ADDED Viewed

	@@ -0,0 +1,69 @@

+# Prediction interface for Cog ⚙️
+# https://cog.run/python
+from cog import BasePredictor, Input, Path
+import torch
+from transformers import pipeline
+import os
+import time
+class Predictor(BasePredictor):
+    def setup(self):
+        """Load the model into memory to make running multiple predictions efficient"""
+        # Check if CUDA is available and set the device accordingly
+        self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
+        self.pipe = pipeline("automatic-speech-recognition",
+                             model="./",
+                             device=self.device,
+                             torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32)
+    def predict(
+        self,
+        audio: Path = Input(description="Audio file to transcribe"),
+        language: str = Input(description="Language of the audio (optional)", default=None),
+        task: str = Input(description="Task to perform: transcribe or translate", default="transcribe", choices=["transcribe", "translate"]),
+        chunk_length: int = Input(description="Length of audio chunks to process at once, in seconds", default=30),
+        batch_size: int = Input(description="Batch size for processing", default=8),
+    ) -> Path:
+        """Run a single prediction on the model"""
+        start_time = time.perf_counter()
+        # Set the language and task
+        if language:
+            self.pipe.model.config.forced_decoder_ids = self.pipe.tokenizer.get_decoder_prompt_ids(language=language, task=task)
+        # Perform ASR
+        outputs = self.pipe(str(audio), chunk_length_s=chunk_length, batch_size=batch_size, return_timestamps=True)
+        end_time = time.perf_counter()
+        elapsed_time = end_time - start_time
+        print(f"ASR took {elapsed_time:.2f} seconds.")
+        # Save ASR chunks to an SRT file
+        audio_file_name = os.path.splitext(os.path.basename(str(audio)))[0]
+        srt_filename = f"{audio_file_name}.srt"
+        with open(srt_filename, 'w', encoding="utf-8") as srt_file:
+            prev = 0
+            for index, chunk in enumerate(outputs['chunks']):
+                prev, start_time = self.seconds_to_srt_time_format(prev, chunk['timestamp'][0])
+                prev, end_time = self.seconds_to_srt_time_format(prev, chunk['timestamp'][1])
+                srt_file.write(f"{index + 1}\n")
+                srt_file.write(f"{start_time} --> {end_time}\n")
+                srt_file.write(f"{chunk['text'].strip()}\n\n")
+        return Path(srt_filename)
+    def seconds_to_srt_time_format(self, prev, seconds):
+        if not (isinstance(seconds, int) or isinstance(seconds, float)):
+            seconds = prev
+        else:
+            prev = seconds
+        hours = seconds // 3600
+        seconds %= 3600
+        minutes = seconds // 60
+        seconds %= 60
+        milliseconds = int((seconds - int(seconds)) * 1000)
+        hours = int(hours)
+        minutes = int(minutes)
+        seconds = int(seconds)
+        return (prev, f"{hours:02d}:{minutes:02d}:{int(seconds):02d},{milliseconds:03d}")