Spaces:

CamiloVega
/

NewsIA

Sleeping

App Files Files Community

CamiloVega commited on Nov 3, 2024

Commit

7cdb936

verified ·

1 Parent(s): f2f9165

Update app.py

Browse files

Files changed (1) hide show

app.py +16 -11

app.py CHANGED Viewed

@@ -44,7 +44,7 @@ class ModelManager:
     @spaces.GPU(duration=120)
     def initialize_models(self):
-        """Initialize models with optimized settings"""
         try:
             import torch
             from transformers import AutoModelForCausalLM, AutoTokenizer
@@ -56,7 +56,7 @@ class ModelManager:
             logger.info("Starting model initialization...")
             model_name = "meta-llama/Llama-2-7b-chat-hf"
-            # Load tokenizer with optimized settings
             logger.info("Loading tokenizer...")
             self.tokenizer = AutoTokenizer.from_pretrained(
                 model_name,
@@ -66,18 +66,22 @@ class ModelManager:
             )
             self.tokenizer.pad_token = self.tokenizer.eos_token
-            # Initialize model with basic settings
             logger.info("Loading model...")
             self.model = AutoModelForCausalLM.from_pretrained(
                 model_name,
                 token=HUGGINGFACE_TOKEN,
                 device_map="auto",
                 torch_dtype=torch.float16,
-                load_in_8bit=True,
                 low_cpu_mem_usage=True,
             )
-            # Create pipeline
             logger.info("Creating pipeline...")
             from transformers import pipeline
             self.news_generator = pipeline(
@@ -95,13 +99,12 @@ class ModelManager:
                 early_stopping=True
             )
-            # Load Whisper model with basic settings
             logger.info("Loading Whisper model...")
             self.whisper_model = whisper.load_model(
                 "tiny",
                 device="cuda" if torch.cuda.is_available() else "cpu",
-                download_root="/tmp/whisper",
-                in_memory=True
             )
             logger.info("All models initialized successfully")
@@ -113,7 +116,7 @@ class ModelManager:
             raise
     def reset_models(self):
-        """Reset all models and clear GPU memory"""
         try:
             if hasattr(self, 'model') and self.model is not None:
                 self.model.cpu()
@@ -126,7 +129,8 @@ class ModelManager:
                 del self.news_generator
             if hasattr(self, 'whisper_model') and self.whisper_model is not None:
-                self.whisper_model.cpu()
                 del self.whisper_model
             self.tokenizer = None
@@ -138,6 +142,7 @@ class ModelManager:
                 torch.cuda.empty_cache()
                 torch.cuda.synchronize()
             gc.collect()
         except Exception as e:
@@ -153,7 +158,7 @@ class ModelManager:
         """Get initialized models, initializing if necessary"""
         self.check_models_initialized()
         return self.tokenizer, self.model, self.news_generator, self.whisper_model
 # Create global model manager instance
 model_manager = ModelManager()

     @spaces.GPU(duration=120)
     def initialize_models(self):
+        """Initialize models with ZeroGPU compatible settings"""
         try:
             import torch
             from transformers import AutoModelForCausalLM, AutoTokenizer
             logger.info("Starting model initialization...")
             model_name = "meta-llama/Llama-2-7b-chat-hf"
+            # Load tokenizer
             logger.info("Loading tokenizer...")
             self.tokenizer = AutoTokenizer.from_pretrained(
                 model_name,
             )
             self.tokenizer.pad_token = self.tokenizer.eos_token
+            # Initialize model with ZeroGPU compatible settings
             logger.info("Loading model...")
             self.model = AutoModelForCausalLM.from_pretrained(
                 model_name,
                 token=HUGGINGFACE_TOKEN,
                 device_map="auto",
                 torch_dtype=torch.float16,
                 low_cpu_mem_usage=True,
+                use_safetensors=True,
+                # ZeroGPU specific settings
+                max_memory={0: "6GB"},
+                offload_folder="offload",
+                offload_state_dict=True
             )
+            # Create pipeline with minimal settings
             logger.info("Creating pipeline...")
             from transformers import pipeline
             self.news_generator = pipeline(
                 early_stopping=True
             )
+            # Load Whisper model with minimal settings
             logger.info("Loading Whisper model...")
             self.whisper_model = whisper.load_model(
                 "tiny",
                 device="cuda" if torch.cuda.is_available() else "cpu",
+                download_root="/tmp/whisper"
             )
             logger.info("All models initialized successfully")
             raise
     def reset_models(self):
+        """Reset all models and clear memory"""
         try:
             if hasattr(self, 'model') and self.model is not None:
                 self.model.cpu()
                 del self.news_generator
             if hasattr(self, 'whisper_model') and self.whisper_model is not None:
+                if hasattr(self.whisper_model, 'cpu'):
+                    self.whisper_model.cpu()
                 del self.whisper_model
             self.tokenizer = None
                 torch.cuda.empty_cache()
                 torch.cuda.synchronize()
+            import gc
             gc.collect()
         except Exception as e:
         """Get initialized models, initializing if necessary"""
         self.check_models_initialized()
         return self.tokenizer, self.model, self.news_generator, self.whisper_model
 # Create global model manager instance
 model_manager = ModelManager()