Spaces:

mlwong
/

npc-bert-demo

Running on Zero

App Files Files Community

mlwong commited on Oct 12, 2024

Commit

8e24969

1 Parent(s): 6ece534

Fall back to use CPU

Browse files

Files changed (3) hide show

npc_bert_models/cls_module.py +4 -8
npc_bert_models/mlm_module.py +2 -7
npc_bert_models/summary_module.py +5 -21

npc_bert_models/cls_module.py CHANGED Viewed

@@ -48,14 +48,8 @@ class NpcBertCLS():
         self.model = AutoModelForSequenceClassification.from_pretrained(self.pretrained_model)
         self.tokenizer = AutoTokenizer.from_pretrained(self.pretrained_model)
-        try:
-            self.pipeline = hf_pipeline("text-classification", model=self.model, tokenizer=self.tokenizer, device='cuda')
-            self.pipeline.model.to('cuda')
-        except Exception as e:
-            self.pipeline = hf_pipeline("text-classification", model=self.model, tokenizer=self.tokenizer, device='cpu')
-            self.logger.warning("No GPU!")
-            self.logger.exception(e)
     @spaces.GPU
     def __call__(self, *args: Any) -> Any:
         """Performs classification on the given reports.
@@ -82,6 +76,8 @@ class NpcBertCLS():
         if len(args[0]) < 10:
             return "Not enough text for classification!"
         pipe_out = self.pipeline(*args)
         pipe_out = {o['label']: o['score'] for o in pipe_out}
         return pipe_out

         self.model = AutoModelForSequenceClassification.from_pretrained(self.pretrained_model)
         self.tokenizer = AutoTokenizer.from_pretrained(self.pretrained_model)
+        self.pipeline = hf_pipeline("text-classification", model=self.model, tokenizer=self.tokenizer, device_map='auto')
     @spaces.GPU
     def __call__(self, *args: Any) -> Any:
         """Performs classification on the given reports.
         if len(args[0]) < 10:
             return "Not enough text for classification!"
+        self.logger.info(f"{self.pipeline.model.device = }")
         pipe_out = self.pipeline(*args)
         pipe_out = {o['label']: o['score'] for o in pipe_out}
         return pipe_out

npc_bert_models/mlm_module.py CHANGED Viewed

@@ -47,13 +47,7 @@ class NpcBertMLM():
         self.model = AutoModelForMaskedLM.from_pretrained(self.pretrained_model)
         self.tokenizer = AutoTokenizer.from_pretrained(self.pretrained_model)
-        try:
-            self.pipeline = hf_pipeline("fill-mask", model=self.model, tokenizer=self.tokenizer, device='cuda')
-            self.pipeline.model.to('cuda')
-        except Exception as e:
-            self.pipeline = hf_pipeline("fill-mask", model=self.model, tokenizer=self.tokenizer, device='cpu')
-            self.logger.warning("No GPU")
-            self.logger.exception(e)
     @spaces.GPU
     def __call__(self, *args):
@@ -77,6 +71,7 @@ class NpcBertMLM():
             msg = "Model was not initialized, have you run load()?"
             raise BrokenPipeError(msg)
         pipe_out = self.pipeline(*args)
         # Just use the first output
         if not isinstance(pipe_out[0], dict):

         self.model = AutoModelForMaskedLM.from_pretrained(self.pretrained_model)
         self.tokenizer = AutoTokenizer.from_pretrained(self.pretrained_model)
+        self.pipeline = hf_pipeline("fill-mask", model=self.model, tokenizer=self.tokenizer, device_map='auto')
     @spaces.GPU
     def __call__(self, *args):
             msg = "Model was not initialized, have you run load()?"
             raise BrokenPipeError(msg)
+        self.logger.info(f"{self.pipeline.model.device = }")
         pipe_out = self.pipeline(*args)
         # Just use the first output
         if not isinstance(pipe_out[0], dict):

npc_bert_models/summary_module.py CHANGED Viewed

@@ -30,25 +30,11 @@ class NpcBertGPT2():
         self.model = EncoderDecoderModel.from_pretrained(self.pretrained_model)
         self.tokenizer = AutoTokenizer.from_pretrained(self.pretrained_model)
-        try:
-            self.pipeline = hf_pipeline("text2text-generation",
-                                        model=self.model,
-                                        tokenizer=self.tokenizer,
-                                        device='cuda',
-                                        num_beams=4,
-                                        do_sample=True,
-                                        top_k = 5,
-                                        temperature=.95,
-                                        early_stopping=True,
-                                        no_repeat_ngram_size=5,
-                                        max_new_tokens=60)
-            self.pipeline.model.to('cuda')
-        except Exception as e:
-            self.pipeline = hf_pipeline("text2text-generation",
                                     model=self.model,
                                     tokenizer=self.tokenizer,
-                                    device='cpu',
                                     num_beams=4,
                                     do_sample=True,
                                     top_k = 5,
@@ -56,8 +42,7 @@ class NpcBertGPT2():
                                     early_stopping=True,
                                     no_repeat_ngram_size=5,
                                     max_new_tokens=60)
-            self.logger.warning("No GPU!")
-            self.logger.exception(e)
     @spaces.GPU
     def __call__(self, *args):
@@ -80,8 +65,7 @@ class NpcBertGPT2():
             msg = "Model was not initialized, have you run load()?"
             raise BrokenPipeError(msg)
-        self.logger.info(f"Called with arguments {args = }")
-        self.logger.info("Model: {self.pipeline.model}")
         pipe_out, = self.pipeline(*args)
         pipe_out = pipe_out['generated_text']
         self.logger.info(f"Generated text: {pipe_out}")

         self.model = EncoderDecoderModel.from_pretrained(self.pretrained_model)
         self.tokenizer = AutoTokenizer.from_pretrained(self.pretrained_model)
+        self.pipeline = hf_pipeline("text2text-generation",
                                     model=self.model,
                                     tokenizer=self.tokenizer,
+                                    device_map='auto',
                                     num_beams=4,
                                     do_sample=True,
                                     top_k = 5,
                                     early_stopping=True,
                                     no_repeat_ngram_size=5,
                                     max_new_tokens=60)
     @spaces.GPU
     def __call__(self, *args):
             msg = "Model was not initialized, have you run load()?"
             raise BrokenPipeError(msg)
+        self.logger.info(f"Model: {self.pipeline.model.device = }")
         pipe_out, = self.pipeline(*args)
         pipe_out = pipe_out['generated_text']
         self.logger.info(f"Generated text: {pipe_out}")