Spaces:
Sleeping
Sleeping
CamiloVega
commited on
Update app.py
Browse files
app.py
CHANGED
@@ -44,7 +44,7 @@ class ModelManager:
|
|
44 |
|
45 |
@spaces.GPU(duration=120)
|
46 |
def initialize_models(self):
|
47 |
-
"""Initialize models with
|
48 |
try:
|
49 |
import torch
|
50 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
@@ -56,7 +56,7 @@ class ModelManager:
|
|
56 |
logger.info("Starting model initialization...")
|
57 |
model_name = "meta-llama/Llama-2-7b-chat-hf"
|
58 |
|
59 |
-
# Load tokenizer
|
60 |
logger.info("Loading tokenizer...")
|
61 |
self.tokenizer = AutoTokenizer.from_pretrained(
|
62 |
model_name,
|
@@ -66,18 +66,22 @@ class ModelManager:
|
|
66 |
)
|
67 |
self.tokenizer.pad_token = self.tokenizer.eos_token
|
68 |
|
69 |
-
# Initialize model with
|
70 |
logger.info("Loading model...")
|
71 |
self.model = AutoModelForCausalLM.from_pretrained(
|
72 |
model_name,
|
73 |
token=HUGGINGFACE_TOKEN,
|
74 |
device_map="auto",
|
75 |
torch_dtype=torch.float16,
|
76 |
-
load_in_8bit=True,
|
77 |
low_cpu_mem_usage=True,
|
|
|
|
|
|
|
|
|
|
|
78 |
)
|
79 |
|
80 |
-
# Create pipeline
|
81 |
logger.info("Creating pipeline...")
|
82 |
from transformers import pipeline
|
83 |
self.news_generator = pipeline(
|
@@ -95,13 +99,12 @@ class ModelManager:
|
|
95 |
early_stopping=True
|
96 |
)
|
97 |
|
98 |
-
# Load Whisper model with
|
99 |
logger.info("Loading Whisper model...")
|
100 |
self.whisper_model = whisper.load_model(
|
101 |
"tiny",
|
102 |
device="cuda" if torch.cuda.is_available() else "cpu",
|
103 |
-
download_root="/tmp/whisper"
|
104 |
-
in_memory=True
|
105 |
)
|
106 |
|
107 |
logger.info("All models initialized successfully")
|
@@ -113,7 +116,7 @@ class ModelManager:
|
|
113 |
raise
|
114 |
|
115 |
def reset_models(self):
|
116 |
-
"""Reset all models and clear
|
117 |
try:
|
118 |
if hasattr(self, 'model') and self.model is not None:
|
119 |
self.model.cpu()
|
@@ -126,7 +129,8 @@ class ModelManager:
|
|
126 |
del self.news_generator
|
127 |
|
128 |
if hasattr(self, 'whisper_model') and self.whisper_model is not None:
|
129 |
-
self.whisper_model
|
|
|
130 |
del self.whisper_model
|
131 |
|
132 |
self.tokenizer = None
|
@@ -138,6 +142,7 @@ class ModelManager:
|
|
138 |
torch.cuda.empty_cache()
|
139 |
torch.cuda.synchronize()
|
140 |
|
|
|
141 |
gc.collect()
|
142 |
|
143 |
except Exception as e:
|
@@ -153,7 +158,7 @@ class ModelManager:
|
|
153 |
"""Get initialized models, initializing if necessary"""
|
154 |
self.check_models_initialized()
|
155 |
return self.tokenizer, self.model, self.news_generator, self.whisper_model
|
156 |
-
|
157 |
# Create global model manager instance
|
158 |
model_manager = ModelManager()
|
159 |
|
|
|
44 |
|
45 |
@spaces.GPU(duration=120)
|
46 |
def initialize_models(self):
|
47 |
+
"""Initialize models with ZeroGPU compatible settings"""
|
48 |
try:
|
49 |
import torch
|
50 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
|
|
56 |
logger.info("Starting model initialization...")
|
57 |
model_name = "meta-llama/Llama-2-7b-chat-hf"
|
58 |
|
59 |
+
# Load tokenizer
|
60 |
logger.info("Loading tokenizer...")
|
61 |
self.tokenizer = AutoTokenizer.from_pretrained(
|
62 |
model_name,
|
|
|
66 |
)
|
67 |
self.tokenizer.pad_token = self.tokenizer.eos_token
|
68 |
|
69 |
+
# Initialize model with ZeroGPU compatible settings
|
70 |
logger.info("Loading model...")
|
71 |
self.model = AutoModelForCausalLM.from_pretrained(
|
72 |
model_name,
|
73 |
token=HUGGINGFACE_TOKEN,
|
74 |
device_map="auto",
|
75 |
torch_dtype=torch.float16,
|
|
|
76 |
low_cpu_mem_usage=True,
|
77 |
+
use_safetensors=True,
|
78 |
+
# ZeroGPU specific settings
|
79 |
+
max_memory={0: "6GB"},
|
80 |
+
offload_folder="offload",
|
81 |
+
offload_state_dict=True
|
82 |
)
|
83 |
|
84 |
+
# Create pipeline with minimal settings
|
85 |
logger.info("Creating pipeline...")
|
86 |
from transformers import pipeline
|
87 |
self.news_generator = pipeline(
|
|
|
99 |
early_stopping=True
|
100 |
)
|
101 |
|
102 |
+
# Load Whisper model with minimal settings
|
103 |
logger.info("Loading Whisper model...")
|
104 |
self.whisper_model = whisper.load_model(
|
105 |
"tiny",
|
106 |
device="cuda" if torch.cuda.is_available() else "cpu",
|
107 |
+
download_root="/tmp/whisper"
|
|
|
108 |
)
|
109 |
|
110 |
logger.info("All models initialized successfully")
|
|
|
116 |
raise
|
117 |
|
118 |
def reset_models(self):
|
119 |
+
"""Reset all models and clear memory"""
|
120 |
try:
|
121 |
if hasattr(self, 'model') and self.model is not None:
|
122 |
self.model.cpu()
|
|
|
129 |
del self.news_generator
|
130 |
|
131 |
if hasattr(self, 'whisper_model') and self.whisper_model is not None:
|
132 |
+
if hasattr(self.whisper_model, 'cpu'):
|
133 |
+
self.whisper_model.cpu()
|
134 |
del self.whisper_model
|
135 |
|
136 |
self.tokenizer = None
|
|
|
142 |
torch.cuda.empty_cache()
|
143 |
torch.cuda.synchronize()
|
144 |
|
145 |
+
import gc
|
146 |
gc.collect()
|
147 |
|
148 |
except Exception as e:
|
|
|
158 |
"""Get initialized models, initializing if necessary"""
|
159 |
self.check_models_initialized()
|
160 |
return self.tokenizer, self.model, self.news_generator, self.whisper_model
|
161 |
+
|
162 |
# Create global model manager instance
|
163 |
model_manager = ModelManager()
|
164 |
|