Kororinpa
/

Eye-Catching-Paper-Title-Detector

Text Classification

Inference Endpoints

Model card Files Files and versions Community

Kororinpa commited on Dec 2, 2024

Commit

8f5c615

·

verified ·

1 Parent(s): 10b2f97

Create handler

Files changed (1) hide show

handler +78 -0

handler ADDED Viewed

	@@ -0,0 +1,78 @@

+from typing import List, Dict
+import torch
+from transformers import AutoConfig, AutoTokenizer, PreTrainedTokenizer
+from .modeling import BinaryClassifier  # 你的模型类
+class EndpointHandler:
+    def __init__(self, path=""):
+        # 加载配置
+        self.config = AutoConfig.from_pretrained(path)
+        # 初始化模型
+        self.model = BinaryClassifier.from_pretrained(path)
+        self.model.eval()
+        # 初始化tokenizer
+        self.tokenizer = AutoTokenizer.from_pretrained(path)
+        # 设置最大长度，可以根据你的需求调整
+        self.max_length = 512
+    def __call__(self, data: List[Dict[str, str]]) -> List[Dict[str, float]]:
+        """
+        处理文本推理请求
+        Args:
+            data: 输入数据列表，每个元素是一个字典
+                 例如：[{"inputs": "这是一段测试文本"}]
+        Returns:
+            预测结果列表
+        """
+        # 获取所有输入文本
+        texts = [item["inputs"] for item in data]
+        # tokenization
+        encoded_inputs = self.tokenizer(
+            texts,
+            padding=True,
+            truncation=True,
+            max_length=self.max_length,
+            return_tensors="pt"
+        )
+        # 进行预测
+        with torch.no_grad():
+            outputs = self.model(**encoded_inputs)
+            predictions = (outputs >= 0.5).float()
+        # 格式化输出
+        results = []
+        for pred, score in zip(predictions, outputs):
+            results.append({
+                "label": str(int(pred.item())),  # 0 或 1
+                "score": float(score.item())     # 预测概率
+            })
+        return results
+    def preprocess(self, text: str) -> Dict[str, torch.Tensor]:
+        """
+        可选的预处理方法，如果需要更复杂的预处理可以使用
+        """
+        encoded = self.tokenizer(
+            text,
+            padding=True,
+            truncation=True,
+            max_length=self.max_length,
+            return_tensors="pt"
+        )
+        return encoded
+    def postprocess(self, model_outputs: torch.Tensor) -> Dict:
+        """
+        可选的后处理方法，如果需要更复杂的后处理可以使用
+        """
+        predictions = (model_outputs >= 0.5).float()
+        return {
+            "label": str(int(predictions[0].item())),
+            "score": float(model_outputs[0].item())
+        }