CamiloVega commited on
Commit
7cdb936
·
verified ·
1 Parent(s): f2f9165

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +16 -11
app.py CHANGED
@@ -44,7 +44,7 @@ class ModelManager:
44
 
45
  @spaces.GPU(duration=120)
46
  def initialize_models(self):
47
- """Initialize models with optimized settings"""
48
  try:
49
  import torch
50
  from transformers import AutoModelForCausalLM, AutoTokenizer
@@ -56,7 +56,7 @@ class ModelManager:
56
  logger.info("Starting model initialization...")
57
  model_name = "meta-llama/Llama-2-7b-chat-hf"
58
 
59
- # Load tokenizer with optimized settings
60
  logger.info("Loading tokenizer...")
61
  self.tokenizer = AutoTokenizer.from_pretrained(
62
  model_name,
@@ -66,18 +66,22 @@ class ModelManager:
66
  )
67
  self.tokenizer.pad_token = self.tokenizer.eos_token
68
 
69
- # Initialize model with basic settings
70
  logger.info("Loading model...")
71
  self.model = AutoModelForCausalLM.from_pretrained(
72
  model_name,
73
  token=HUGGINGFACE_TOKEN,
74
  device_map="auto",
75
  torch_dtype=torch.float16,
76
- load_in_8bit=True,
77
  low_cpu_mem_usage=True,
 
 
 
 
 
78
  )
79
 
80
- # Create pipeline
81
  logger.info("Creating pipeline...")
82
  from transformers import pipeline
83
  self.news_generator = pipeline(
@@ -95,13 +99,12 @@ class ModelManager:
95
  early_stopping=True
96
  )
97
 
98
- # Load Whisper model with basic settings
99
  logger.info("Loading Whisper model...")
100
  self.whisper_model = whisper.load_model(
101
  "tiny",
102
  device="cuda" if torch.cuda.is_available() else "cpu",
103
- download_root="/tmp/whisper",
104
- in_memory=True
105
  )
106
 
107
  logger.info("All models initialized successfully")
@@ -113,7 +116,7 @@ class ModelManager:
113
  raise
114
 
115
  def reset_models(self):
116
- """Reset all models and clear GPU memory"""
117
  try:
118
  if hasattr(self, 'model') and self.model is not None:
119
  self.model.cpu()
@@ -126,7 +129,8 @@ class ModelManager:
126
  del self.news_generator
127
 
128
  if hasattr(self, 'whisper_model') and self.whisper_model is not None:
129
- self.whisper_model.cpu()
 
130
  del self.whisper_model
131
 
132
  self.tokenizer = None
@@ -138,6 +142,7 @@ class ModelManager:
138
  torch.cuda.empty_cache()
139
  torch.cuda.synchronize()
140
 
 
141
  gc.collect()
142
 
143
  except Exception as e:
@@ -153,7 +158,7 @@ class ModelManager:
153
  """Get initialized models, initializing if necessary"""
154
  self.check_models_initialized()
155
  return self.tokenizer, self.model, self.news_generator, self.whisper_model
156
-
157
  # Create global model manager instance
158
  model_manager = ModelManager()
159
 
 
44
 
45
  @spaces.GPU(duration=120)
46
  def initialize_models(self):
47
+ """Initialize models with ZeroGPU compatible settings"""
48
  try:
49
  import torch
50
  from transformers import AutoModelForCausalLM, AutoTokenizer
 
56
  logger.info("Starting model initialization...")
57
  model_name = "meta-llama/Llama-2-7b-chat-hf"
58
 
59
+ # Load tokenizer
60
  logger.info("Loading tokenizer...")
61
  self.tokenizer = AutoTokenizer.from_pretrained(
62
  model_name,
 
66
  )
67
  self.tokenizer.pad_token = self.tokenizer.eos_token
68
 
69
+ # Initialize model with ZeroGPU compatible settings
70
  logger.info("Loading model...")
71
  self.model = AutoModelForCausalLM.from_pretrained(
72
  model_name,
73
  token=HUGGINGFACE_TOKEN,
74
  device_map="auto",
75
  torch_dtype=torch.float16,
 
76
  low_cpu_mem_usage=True,
77
+ use_safetensors=True,
78
+ # ZeroGPU specific settings
79
+ max_memory={0: "6GB"},
80
+ offload_folder="offload",
81
+ offload_state_dict=True
82
  )
83
 
84
+ # Create pipeline with minimal settings
85
  logger.info("Creating pipeline...")
86
  from transformers import pipeline
87
  self.news_generator = pipeline(
 
99
  early_stopping=True
100
  )
101
 
102
+ # Load Whisper model with minimal settings
103
  logger.info("Loading Whisper model...")
104
  self.whisper_model = whisper.load_model(
105
  "tiny",
106
  device="cuda" if torch.cuda.is_available() else "cpu",
107
+ download_root="/tmp/whisper"
 
108
  )
109
 
110
  logger.info("All models initialized successfully")
 
116
  raise
117
 
118
  def reset_models(self):
119
+ """Reset all models and clear memory"""
120
  try:
121
  if hasattr(self, 'model') and self.model is not None:
122
  self.model.cpu()
 
129
  del self.news_generator
130
 
131
  if hasattr(self, 'whisper_model') and self.whisper_model is not None:
132
+ if hasattr(self.whisper_model, 'cpu'):
133
+ self.whisper_model.cpu()
134
  del self.whisper_model
135
 
136
  self.tokenizer = None
 
142
  torch.cuda.empty_cache()
143
  torch.cuda.synchronize()
144
 
145
+ import gc
146
  gc.collect()
147
 
148
  except Exception as e:
 
158
  """Get initialized models, initializing if necessary"""
159
  self.check_models_initialized()
160
  return self.tokenizer, self.model, self.news_generator, self.whisper_model
161
+
162
  # Create global model manager instance
163
  model_manager = ModelManager()
164