Htzhang
/

efficient-splade-VI-BT-large-query

Inference Endpoints

Model card Files Files and versions Community

Htzhang commited on Dec 30, 2023

Commit

18ca2f4

·

1 Parent(s): 5917a9e

Upload handler.py

Files changed (1) hide show

handler.py +52 -0

handler.py ADDED Viewed

	@@ -0,0 +1,52 @@

+from typing import Dict, List, Any, Tuple
+from transformers import AutoTokenizer, AutoModelForMaskedLM
+import torch
+from subprocess import run
+# set device
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+class EndpointHandler():
+    def __init__(self, path=""):
+        # self.pipeline = pipeline("text-classification", model=path)
+        # self.holidays = holidays.US()
+        self.query_model = AutoModelForMaskedLM.from_pretrained(path).to(device)
+        self.query_tokenizer = AutoTokenizer.from_pretrained(path)
+    def __call__(self, data: Dict[str, Any]) -> Tuple[List[List[int]], List[List[float]]]:
+        """
+        data args:
+            inputs (:obj: `str`)
+            date (:obj: `str`)
+        Return:
+            A :obj:`list` | `dict`: will be serialized and returned
+        """
+        # get inputs
+        texts = data.pop("inputs", data)
+        tokens = self.query_tokenizer(
+            texts, truncation=True, padding=True, return_tensors="pt"
+        )
+        tokens = self.query_tokenizer(
+            texts, truncation=True, padding=True, return_tensors="pt"
+        )
+        if torch.cuda.is_available():
+            tokens = tokens.to("cuda")
+        output = self.query_model(**tokens)
+        logits, attention_mask = output.logits, tokens.attention_mask
+        relu_log = torch.log(1 + torch.relu(logits))
+        weighted_log = relu_log * attention_mask.unsqueeze(-1)
+        tvecs, _ = torch.max(weighted_log, dim=1)
+        # extract the vectors that are non-zero and their indices
+        indices = []
+        vecs = []
+        for batch in tvecs:
+            indices.append(batch.nonzero(as_tuple=True)[0].tolist())
+            vecs.append(batch[indices[-1]].tolist())
+        return [indices, vecs]