Spaces:

fullstuckdev
/

medication-ai-model

Build error

App Files Files Community

fullstuckdev commited on Nov 27, 2024

Commit

ce875c8

1 Parent(s): 93374aa

update script

Browse files

Files changed (1) hide show

app.py +133 -49

app.py CHANGED Viewed

@@ -93,48 +93,77 @@ async def root():
 async def generate_text(request: GenerateRequest):
     """
     Generate medical text based on input prompt
-    Parameters:
-    - text: Input text prompt
-    - max_length: Maximum length of generated text
-    - temperature: Sampling temperature (0.0 to 1.0)
-    - num_return_sequences: Number of sequences to generate
-    Returns:
-    - List of generated text sequences
     """
     try:
         if model is None or tokenizer is None:
-            raise HTTPException(status_code=500, detail="Model not loaded")
-        inputs = tokenizer(
-            request.text,
-            return_tensors="pt",
-            padding=True,
-            truncation=True,
-            max_length=request.max_length
-        ).to(model.device)
-        with torch.no_grad():
-            generated_ids = model.generate(
-                inputs.input_ids,
-                max_length=request.max_length,
-                num_return_sequences=request.num_return_sequences,
-                temperature=request.temperature,
-                pad_token_id=tokenizer.pad_token_id,
-                eos_token_id=tokenizer.eos_token_id,
             )
-        generated_texts = [
-            tokenizer.decode(g, skip_special_tokens=True)
-            for g in generated_ids
-        ]
         return GenerateResponse(generated_text=generated_texts)
     except Exception as e:
-        logger.error(f"Generation error: {str(e)}")
-        raise HTTPException(status_code=500, detail=str(e))
 @app.get("/health", tags=["Health"])
 async def health_check():
@@ -297,25 +326,80 @@ def init_model():
         device = "cuda" if torch.cuda.is_available() else "cpu"
         logger.info(f"Loading model on device: {device}")
-        # Try to load fine-tuned model if it exists
-        if os.path.exists(model_output_path):
-            tokenizer = AutoTokenizer.from_pretrained(model_output_path)
-            model = AutoModelForCausalLM.from_pretrained(
-                model_output_path,
-                torch_dtype=torch.float16 if device == "cuda" else torch.float32,
-                device_map="auto"
-            )
-        else:
-            # Load base model if no fine-tuned model exists
-            model_name = "nvidia/Meta-Llama-3.2-3B-Instruct-ONNX-INT4"
-            tokenizer = AutoTokenizer.from_pretrained(model_name)
-            model = AutoModelForCausalLM.from_pretrained(
-                model_name,
-                torch_dtype=torch.float16 if device == "cuda" else torch.float32,
-                device_map="auto"
-            )
         return tokenizer, model
     except Exception as e:
         logger.error(f"Model initialization error: {str(e)}")
-        raise

 async def generate_text(request: GenerateRequest):
     """
     Generate medical text based on input prompt
     """
     try:
+        # Check if model is loaded
         if model is None or tokenizer is None:
+            logger.error("Model or tokenizer not initialized")
+            raise HTTPException(
+                status_code=500,
+                detail="Model not loaded. Please check if model was initialized correctly."
+            )
+        logger.info(f"Generating text for input: {request.text[:50]}...")
+        # Log device information
+        device_info = f"Using device: {model.device}"
+        logger.info(device_info)
+        # Tokenize input
+        try:
+            inputs = tokenizer(
+                request.text,
+                return_tensors="pt",
+                padding=True,
+                truncation=True,
+                max_length=request.max_length
             )
+            logger.info("Input tokenized successfully")
+            # Move inputs to correct device
+            inputs = {k: v.to(model.device) for k, v in inputs.items()}
+        except Exception as e:
+            logger.error(f"Tokenization error: {str(e)}")
+            raise HTTPException(status_code=500, detail=f"Tokenization failed: {str(e)}")
+        # Generate text
+        try:
+            with torch.no_grad():
+                generated_ids = model.generate(
+                    inputs.input_ids,
+                    max_length=request.max_length,
+                    num_return_sequences=request.num_return_sequences,
+                    temperature=request.temperature,
+                    pad_token_id=tokenizer.pad_token_id,
+                    eos_token_id=tokenizer.eos_token_id,
+                )
+                logger.info("Text generated successfully")
+        except Exception as e:
+            logger.error(f"Generation error: {str(e)}")
+            raise HTTPException(status_code=500, detail=f"Text generation failed: {str(e)}")
+        # Decode generated text
+        try:
+            generated_texts = [
+                tokenizer.decode(g, skip_special_tokens=True)
+                for g in generated_ids
+            ]
+            logger.info("Text decoded successfully")
+        except Exception as e:
+            logger.error(f"Decoding error: {str(e)}")
+            raise HTTPException(status_code=500, detail=f"Text decoding failed: {str(e)}")
         return GenerateResponse(generated_text=generated_texts)
+    except HTTPException as he:
+        raise he
     except Exception as e:
+        logger.error(f"Unexpected error: {str(e)}")
+        raise HTTPException(
+            status_code=500,
+            detail=f"An unexpected error occurred: {str(e)}"
+        )
 @app.get("/health", tags=["Health"])
 async def health_check():
         device = "cuda" if torch.cuda.is_available() else "cpu"
         logger.info(f"Loading model on device: {device}")
+        model_name = "nvidia/Meta-Llama-3.2-3B-Instruct-ONNX-INT4"
+        # Load tokenizer
+        logger.info("Loading tokenizer...")
+        tokenizer = AutoTokenizer.from_pretrained(
+            model_name,
+            cache_dir="/app/cache",
+            trust_remote_code=True
+        )
+        # Add padding token if not present
+        if tokenizer.pad_token is None:
+            tokenizer.pad_token = tokenizer.eos_token
+        logger.info("Loading model...")
+        model = AutoModelForCausalLM.from_pretrained(
+            model_name,
+            torch_dtype=torch.float16 if device == "cuda" else torch.float32,
+            device_map="auto",
+            cache_dir="/app/cache",
+            trust_remote_code=True
+        )
+        logger.info(f"Model loaded successfully on {device}")
         return tokenizer, model
     except Exception as e:
         logger.error(f"Model initialization error: {str(e)}")
+        raise
+@app.get("/model-status", tags=["Health"])
+async def model_status():
+    """
+    Get detailed model status
+    """
+    try:
+        model_info = {
+            "model_loaded": model is not None,
+            "tokenizer_loaded": tokenizer is not None,
+            "model_device": str(model.device) if model else None,
+            "gpu_available": torch.cuda.is_available(),
+            "cuda_device_count": torch.cuda.device_count() if torch.cuda.is_available() else 0,
+            "cuda_device_name": torch.cuda.get_device_name(0) if torch.cuda.is_available() else None,
+            "model_type": type(model).__name__ if model else None,
+            "tokenizer_type": type(tokenizer).__name__ if tokenizer else None,
+        }
+        if model is not None:
+            try:
+                # Test tokenizer
+                test_input = tokenizer("test", return_tensors="pt")
+                model_info["tokenizer_working"] = True
+            except Exception as e:
+                model_info["tokenizer_working"] = False
+                model_info["tokenizer_error"] = str(e)
+            try:
+                # Test model forward pass
+                with torch.no_grad():
+                    test_output = model.generate(
+                        test_input.input_ids.to(model.device),
+                        max_length=10
+                    )
+                model_info["model_working"] = True
+            except Exception as e:
+                model_info["model_working"] = False
+                model_info["model_error"] = str(e)
+        return model_info
+    except Exception as e:
+        logger.error(f"Error checking model status: {str(e)}")
+        return {
+            "error": str(e),
+            "model_loaded": model is not None,
+            "tokenizer_loaded": tokenizer is not None
+        }