Quantization using bitsandbytes
of the model located at:
https://huggingface.co/allenai/Molmo-7B-D-0924
NOTE:
The sample script below requires that you install the following libraries into your virtual environment, and an Nvidia GPU is required. You can rely on a system-wide installation of CUDA, in which case just remove the set_cuda_paths
function.
nvidia-cublas-cu12==12.1.3.1
nvidia-cuda-runtime-cu12==12.1.105
nvidia-cuda-nvrtc-cu12==12.1.105
nvidia-cufft-cu12==11.0.2.54
Also, I've only tested it on torch==2.2.2
although I plan on testing with higher versions. For Windows users, install torch with these commands:
pip install https://download.pytorch.org/whl/cu121/torch-2.2.2%2Bcu121-cp311-cp311-win_amd64.whl#sha256=efbcfdd4399197d06b32f7c0e1711c615188cdd65427b933648c7478fb880b3f
pip install https://download.pytorch.org/whl/cu121/torchvision-0.17.2%2Bcu121-cp311-cp311-win_amd64.whl#sha256=10ad542aab6b47dbe73c441381986d50a7ed5021cbe01d593a14477ec1f067a0
pip install https://download.pytorch.org/whl/cu121/torchaudio-2.2.2%2Bcu121-cp311-cp311-win_amd64.whl#sha256=c7dee68cd3d2b889bab71d4a0c345bdc3ea2fe79a62b921a6b49292c605b6071
Example script (process single image):
import sys
import os
from pathlib import Path
def set_cuda_paths():
venv_base = Path(sys.executable).parent.parent
nvidia_base_path = venv_base / 'Lib' / 'site-packages' / 'nvidia'
cuda_path = nvidia_base_path / 'cuda_runtime' / 'bin'
cublas_path = nvidia_base_path / 'cublas' / 'bin'
cudnn_path = nvidia_base_path / 'cudnn' / 'bin'
nvrtc_path = nvidia_base_path / 'cuda_nvrtc' / 'bin'
paths_to_add = [
str(cuda_path),
str(cublas_path),
str(cudnn_path),
str(nvrtc_path),
]
env_vars = ['CUDA_PATH', 'CUDA_PATH_V12_1', 'PATH']
for env_var in env_vars:
current_value = os.environ.get(env_var, '')
new_value = os.pathsep.join(paths_to_add + [current_value] if current_value else paths_to_add)
os.environ[env_var] = new_value
set_cuda_paths()
import torch
from PIL import Image
from transformers import AutoModelForCausalLM, AutoProcessor, GenerationConfig
model_path = r"[INSERT THE PATH TO THE FOLDER HOLDING THE MODEL FILES HERE]"
class VisionModel:
def __init__(self):
self.model = None
self.processor = None
self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
def initialize_model_and_processor(self):
self.processor = AutoProcessor.from_pretrained(
model_path,
trust_remote_code=True,
torch_dtype='auto',
device_map='auto'
)
self.model = AutoModelForCausalLM.from_pretrained(
model_path,
trust_remote_code=True,
torch_dtype='auto',
device_map='auto'
)
def process_single_image(self, image_path):
image = Image.open(image_path)
if image.mode != "RGB":
image = image.convert("RGB")
text = "Describe this image in detail as possible but be succinct and don't repeat yourself."
inputs = self.processor.process(images=[image], text=text)
inputs = {k: v.to(self.device).unsqueeze(0) for k, v in inputs.items()}
output = self.model.generate_from_batch(
inputs,
GenerationConfig(max_new_tokens=500, stop_strings=["<|endoftext|>"]),
tokenizer=self.processor.tokenizer
)
generated_text = self.processor.tokenizer.decode(output[0, inputs['input_ids'].size(1):], skip_special_tokens=True)
print(f"\nGenerated Text:\n{generated_text}\n")
if __name__ == "__main__":
image_path = r"[INSERT THE PATH TO THE IMAGE YOU WANT TO PROCESS HERE]"
vision_model = VisionModel()
vision_model.initialize_model_and_processor()
vision_model.process_single_image(image_path)
- Downloads last month
- 26
Inference API (serverless) does not yet support model repos that contain custom code.