Spaces:

Prgckwb
/

gemma2-ppl

Running on Zero

App Files Files Community

Prgckwb commited on Dec 10, 2024

Commit

78a5cec

1 Parent(s): f22dc04

change

Browse files

Files changed (4) hide show

.gitignore +0 -0
app.py +22 -112
metric.py +125 -0
requirements.txt +1 -0

.gitignore ADDED Viewed

The diff for this file is too large to render. See raw diff

app.py CHANGED Viewed

@@ -1,125 +1,41 @@
-import gc
 import os
-from math import exp
-from typing import List, Union
 import gradio as gr
 import spaces
 import torch
-import transformers
 os.environ['OMP_NUM_THREADS'] = '1'
 os.environ['TOKENIZERS_PARALLELISM'] = 'false'
 PAD_TOKEN_LABEL_ID = torch.nn.CrossEntropyLoss().ignore_index
 DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
-class PerplexityCalculator:
-    """
-    Calculates perplexity of text using a pre-trained language model.
-    Adapted from https://github.com/asahi417/lmppl/blob/main/lmppl/ppl_recurrent_lm.py
-    Parameters
-    ----------
-    model_path : str
-        Path to the pre-trained language model
-    load_in_8bit : bool, default=False
-        Use 8-bit quantization for the model. Requires CUDA.
-    device_map : str, default="auto"
-        Device mapping for the model.
-    """
-    def __init__(
-            self,
-            model_path: str,
-            load_in_8bit: bool = False,
-            device_map: str = 'auto',
-    ):
-        self.tokenizer = transformers.AutoTokenizer.from_pretrained(model_path, padding_side="right")
-        # Configure model loading based on quantization setting and device availability
-        if load_in_8bit:
-            if DEVICE.type != 'cuda':
-                raise ValueError('8-bit quantization requires CUDA device')
-            quantization_config = transformers.BitsAndBytesConfig(load_in_8bit=True)
-            self.model = transformers.AutoModelForCausalLM.from_pretrained(
-                model_path,
-                quantization_config=quantization_config,
-                device_map=device_map,
-            )
-        else:
-            self.model = transformers.AutoModelForCausalLM.from_pretrained(
-                model_path,
-                torch_dtype=torch.float16 if DEVICE.type == 'cuda' else torch.float32,
-                device_map=device_map,
-            )
-        self.loss_fct = torch.nn.CrossEntropyLoss(reduction='none')
-        self.model.eval()
-    def get_perplexity(self, input_texts: Union[str, List[str]], batch_size: int = 1) -> Union[float, List[float]]:
-        single_input = isinstance(input_texts, str)
-        input_texts = [input_texts] if single_input else input_texts
-        loss_list = []
-        batches = len(input_texts) // batch_size + (len(input_texts) % batch_size != 0)
-        for j in range(batches):
-            a = j * batch_size
-            b = (j + 1) * batch_size
-            input_batch = input_texts[a:b]
-            with torch.no_grad():
-                text_with_special = [f"{self.tokenizer.bos_token}{text}{self.tokenizer.eos_token}" for text in
-                                     input_batch]
-                model_inputs = self.tokenizer(text_with_special, return_tensors='pt', add_special_tokens=False,
-                                              padding=True)
-                if 'token_type_ids' in model_inputs:
-                    model_inputs.pop('token_type_ids')
-                model_inputs = {k: v.to(DEVICE) for k, v in model_inputs.items()}
-                output = self.model(**model_inputs, use_cache=False)
-                logits = output['logits']
-                label = model_inputs['input_ids']
-                label[label == self.tokenizer.pad_token_id] = PAD_TOKEN_LABEL_ID
-                shift_logits = logits[..., :-1, :].contiguous()
-                shift_labels = label[..., 1:].contiguous()
-                loss = self.loss_fct(shift_logits.view(-1, shift_logits.size(-1)), shift_labels.view(-1))
-                loss = loss.view(len(logits), -1)
-                valid_length = (shift_labels != PAD_TOKEN_LABEL_ID).sum(dim=-1)
-                loss = torch.sum(loss, -1) / valid_length
-                loss_list += loss.cpu().tolist()
-        ppl = [exp(i) for i in loss_list]
-        return ppl[0] if single_input else ppl
-    def clear_gpu_memory(self) -> None:
-        """Clears GPU memory by deleting references and emptying caches."""
-        if not torch.cuda.is_available():
-            return
-        # Delete model and tokenizer if they exist
-        if hasattr(self, 'model'):
-            del self.model
-        if hasattr(self, 'tokenizer'):
-            del self.tokenizer
-        # Run garbage collection
-        gc.collect()
-        # Clear CUDA cache and reset memory stats
-        with DEVICE:
-            torch.cuda.empty_cache()
-            torch.cuda.ipc_collect()
-            torch.cuda.reset_peak_memory_stats()
 scorer = PerplexityCalculator('google/gemma-2-9b')
 @spaces.GPU()
-def inference(text: str):
     score = scorer.get_perplexity(text)
-    return score
 if __name__ == '__main__':
@@ -129,15 +45,9 @@ if __name__ == '__main__':
         outputs=[
             # gr.Number(label='Index'),
             gr.Number(label='Perplexity'),
         ],
-        examples=[
-            'advent chimney elf family fireplace gingerbread mistletoe ornament reindeer scrooge',
-            'advent chimney elf family fireplace gingerbread mistletoe ornament reindeer scrooge walk give jump drive bake the sleep night laugh and',
-            'yuletide decorations gifts cheer holiday carol magi nutcracker polar grinch sleigh chimney workshop stocking ornament holly jingle beard naughty nice',
-            'yuletide decorations gifts cheer holiday carol magi nutcracker polar grinch sleigh chimney workshop stocking ornament holly jingle beard naughty nice sing cheer and of the is eat visit relax unwrap',
-            'hohoho candle poinsettia snowglobe peppermint eggnog fruitcake chocolate candy puzzle game doll toy workshop wonder believe dream hope peace joy merry season greeting card wrapping paper bow fireplace night cookie milk star wish wreath angel the to of and in that have it not with as you from we kaggle',
-            'advent chimney elf family fireplace gingerbread mistletoe ornament reindeer scrooge walk give jump drive bake the sleep night laugh and yuletide decorations gifts cheer holiday carol magi nutcracker polar grinch sleigh chimney workshop stocking ornament holly jingle beard naughty nice sing cheer and of the is eat visit relax unwrap hohoho candle poinsettia snowglobe peppermint eggnog fruitcake chocolate candy puzzle game doll toy workshop wonder believe dream hope peace joy merry season greeting card wrapping paper bow fireplace night cookie milk star wish wreath angel the to of and in that have it not with as you from we kaggle'
-        ],
         title='Gemma-2-9b Perplexity Calculator',
     )
     demo.queue().launch()

 import os
+from collections import Counter
 import gradio as gr
+import polars as pl
 import spaces
 import torch
+from metric import PerplexityCalculator
 os.environ['OMP_NUM_THREADS'] = '1'
 os.environ['TOKENIZERS_PARALLELISM'] = 'false'
 PAD_TOKEN_LABEL_ID = torch.nn.CrossEntropyLoss().ignore_index
 DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+df_sample_submission = pl.read_csv('data/sample_submission.csv')
+text_list = df_sample_submission.get_column('text').to_list()
+text_counters = [Counter(text.split()) for text in text_list]
+# Model Loading
 scorer = PerplexityCalculator('google/gemma-2-9b')
 @spaces.GPU()
+def inference(text: str, progress=gr.Progress(track_tqdm=True)):
     score = scorer.get_perplexity(text)
+    input_counter = Counter(text.split())
+    is_match_list = [input_counter == text_counter for text_counter in text_counters]
+    if any(is_match_list):
+        index = is_match_list.index(True)
+        index_text = f'Task #{index}'
+        return score, index_text
+    else:
+        index_text = 'No Match'
+        gr.Warning(index_text)
+        return score, index_text
 if __name__ == '__main__':
         outputs=[
             # gr.Number(label='Index'),
             gr.Number(label='Perplexity'),
+            gr.Textbox(label='Index')
         ],
+        examples=text_list,
         title='Gemma-2-9b Perplexity Calculator',
     )
     demo.queue().launch()

metric.py ADDED Viewed

	@@ -0,0 +1,125 @@

+import gc
+import os
+from math import exp
+from typing import List, Union
+import torch
+import transformers
+os.environ["OMP_NUM_THREADS"] = "1"
+os.environ["TOKENIZERS_PARALLELISM"] = "false"
+PAD_TOKEN_LABEL_ID = torch.nn.CrossEntropyLoss().ignore_index
+DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+class PerplexityCalculator:
+    """
+    Calculates perplexity of text using a pre-trained language model.
+    Adapted from https://github.com/asahi417/lmppl/blob/main/lmppl/ppl_recurrent_lm.py
+    Parameters
+    ----------
+    model_path : str
+        Path to the pre-trained language model
+    load_in_8bit : bool, default=False
+        Use 8-bit quantization for the model. Requires CUDA.
+    device_map : str, default="auto"
+        Device mapping for the model.
+    """
+    def __init__(
+        self,
+        model_path: str,
+        load_in_8bit: bool = False,
+        device_map: str = "auto",
+        dtype: torch.dtype = torch.float16,
+    ):
+        self.tokenizer = transformers.AutoTokenizer.from_pretrained(
+            model_path, padding_side="right"
+        )
+        # Configure model loading based on quantization setting and device availability
+        if load_in_8bit:
+            if DEVICE.type != "cuda":
+                raise ValueError("8-bit quantization requires CUDA device")
+            quantization_config = transformers.BitsAndBytesConfig(load_in_8bit=True)
+            self.model = transformers.AutoModelForCausalLM.from_pretrained(
+                model_path,
+                quantization_config=quantization_config,
+                device_map=device_map,
+            )
+        else:
+            self.model = transformers.AutoModelForCausalLM.from_pretrained(
+                model_path,
+                torch_dtype=dtype,
+                device_map=device_map,
+            )
+        self.loss_fct = torch.nn.CrossEntropyLoss(reduction="none")
+        self.model.eval()
+    def get_perplexity(
+        self, input_texts: Union[str, List[str]], batch_size: int = 1
+    ) -> Union[float, List[float]]:
+        single_input = isinstance(input_texts, str)
+        input_texts = [input_texts] if single_input else input_texts
+        loss_list = []
+        batches = len(input_texts) // batch_size + (len(input_texts) % batch_size != 0)
+        for j in range(batches):
+            a = j * batch_size
+            b = (j + 1) * batch_size
+            input_batch = input_texts[a:b]
+            with torch.no_grad():
+                text_with_special = [
+                    f"{self.tokenizer.bos_token}{text}{self.tokenizer.eos_token}"
+                    for text in input_batch
+                ]
+                model_inputs = self.tokenizer(
+                    text_with_special,
+                    return_tensors="pt",
+                    add_special_tokens=False,
+                    padding=True,
+                )
+                if "token_type_ids" in model_inputs:
+                    model_inputs.pop("token_type_ids")
+                model_inputs = {k: v.to(DEVICE) for k, v in model_inputs.items()}
+                output = self.model(**model_inputs, use_cache=False)
+                logits = output["logits"]
+                label = model_inputs["input_ids"]
+                label[label == self.tokenizer.pad_token_id] = PAD_TOKEN_LABEL_ID
+                shift_logits = logits[..., :-1, :].contiguous()
+                shift_labels = label[..., 1:].contiguous()
+                loss = self.loss_fct(
+                    shift_logits.view(-1, shift_logits.size(-1)), shift_labels.view(-1)
+                )
+                loss = loss.view(len(logits), -1)
+                valid_length = (shift_labels != PAD_TOKEN_LABEL_ID).sum(dim=-1)
+                loss = torch.sum(loss, -1) / valid_length
+                loss_list += loss.cpu().tolist()
+        ppl = [exp(i) for i in loss_list]
+        return ppl[0] if single_input else ppl
+    def clear_gpu_memory(self) -> None:
+        """Clears GPU memory by deleting references and emptying caches."""
+        if not torch.cuda.is_available():
+            return
+        # Delete model and tokenizer if they exist
+        if hasattr(self, "model"):
+            del self.model
+        if hasattr(self, "tokenizer"):
+            del self.tokenizer
+        # Run garbage collection
+        gc.collect()
+        # Clear CUDA cache and reset memory stats
+        with DEVICE:
+            torch.cuda.empty_cache()
+            torch.cuda.ipc_collect()
+            torch.cuda.reset_peak_memory_stats()

requirements.txt CHANGED Viewed

@@ -1,3 +1,4 @@
 transformers
 safetensors
 accelerate

 transformers
 safetensors
 accelerate
+polars