|
import gradio as gr |
|
import torch |
|
from transformers import BertTokenizer, BertModel |
|
import torch.nn.functional as F |
|
|
|
|
|
model_name = "indobenchmark/indobert-base-p1" |
|
tokenizer = BertTokenizer.from_pretrained(model_name) |
|
|
|
class IndoBERTMultiTaskClassifier(torch.nn.Module): |
|
def __init__(self, bert_model_name, num_labels_task1, num_labels_task2, dropout_rate=0.3): |
|
super(IndoBERTMultiTaskClassifier, self).__init__() |
|
self.bert = BertModel.from_pretrained(bert_model_name) |
|
self.dropout = torch.nn.Dropout(dropout_rate) |
|
self.classifier_task1 = torch.nn.Linear(self.bert.config.hidden_size, num_labels_task1) |
|
self.classifier_task2 = torch.nn.Linear(self.bert.config.hidden_size, num_labels_task2) |
|
|
|
def forward(self, input_ids, attention_mask): |
|
outputs = self.bert(input_ids=input_ids, attention_mask=attention_mask) |
|
cls_output = outputs[1] |
|
cls_output = self.dropout(cls_output) |
|
|
|
logits_task1 = self.classifier_task1(cls_output) |
|
logits_task2 = self.classifier_task2(cls_output) |
|
|
|
return logits_task1, logits_task2 |
|
|
|
|
|
model = IndoBERTMultiTaskClassifier( |
|
bert_model_name=model_name, |
|
num_labels_task1=3, |
|
num_labels_task2=3 |
|
) |
|
model.eval() |
|
|
|
|
|
label_mapping_task1 = ["trusted", "fake", "non"] |
|
label_mapping_task2 = ["positive", "negative", "neutral"] |
|
|
|
def classify(text): |
|
|
|
inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=128) |
|
input_ids = inputs['input_ids'] |
|
attention_mask = inputs['attention_mask'] |
|
|
|
|
|
with torch.no_grad(): |
|
logits_task1, logits_task2 = model(input_ids, attention_mask) |
|
|
|
|
|
probs_task1 = F.softmax(logits_task1, dim=1).cpu().numpy()[0] |
|
probs_task2 = F.softmax(logits_task2, dim=1).cpu().numpy()[0] |
|
|
|
|
|
pred_task1 = label_mapping_task1[probs_task1.argmax()] |
|
pred_task2 = label_mapping_task2[probs_task2.argmax()] |
|
|
|
|
|
probs_task1_str = ", ".join([f"{label}: {prob*100:.2f}%" for label, prob in zip(label_mapping_task1, probs_task1)]) |
|
probs_task2_str = ", ".join([f"{label}: {prob*100:.2f}%" for label, prob in zip(label_mapping_task2, probs_task2)]) |
|
|
|
|
|
result_task1 = f"{pred_task1} ({probs_task1_str})" |
|
result_task2 = f"{pred_task2} ({probs_task2_str})" |
|
|
|
return result_task1, result_task2 |
|
|
|
|
|
iface = gr.Interface(fn=classify, |
|
inputs="text", |
|
outputs=[gr.Label(label="Fake Review Detection"), |
|
gr.Label(label="Sentiment Classification")], |
|
title="Multitask IndoBERT: Fake Review & Sentiment Classification", |
|
description="Enter a skincare product review in Indonesian and the model will classify it as fake or trusted, and determine the sentiment.") |
|
|
|
iface.launch() |