Spaces:

shobrunjb
/

spiill-fake-review-product-v2

Sleeping

App Files Files Community

spiill-fake-review-product-v2 / app.py

shobrunjb

a2774cf verified 5 months ago

raw

history blame contribute delete

3.35 kB

	import gradio as gr
	import torch
	from transformers import BertTokenizer, BertModel
	import torch.nn.functional as F

	# Load model and tokenizer from Hugging Face
	model_name = "indobenchmark/indobert-base-p1"
	tokenizer = BertTokenizer.from_pretrained(model_name)

	class IndoBERTMultiTaskClassifier(torch.nn.Module):
	def __init__(self, bert_model_name, num_labels_task1, num_labels_task2, dropout_rate=0.3):
	super(IndoBERTMultiTaskClassifier, self).__init__()
	self.bert = BertModel.from_pretrained(bert_model_name)
	self.dropout = torch.nn.Dropout(dropout_rate)
	self.classifier_task1 = torch.nn.Linear(self.bert.config.hidden_size, num_labels_task1)
	self.classifier_task2 = torch.nn.Linear(self.bert.config.hidden_size, num_labels_task2)

	def forward(self, input_ids, attention_mask):
	outputs = self.bert(input_ids=input_ids, attention_mask=attention_mask)
	cls_output = outputs[1] # CLS token
	cls_output = self.dropout(cls_output)

	logits_task1 = self.classifier_task1(cls_output)
	logits_task2 = self.classifier_task2(cls_output)

	return logits_task1, logits_task2

	# Load model directly from Hugging Face
	model = IndoBERTMultiTaskClassifier(
	bert_model_name=model_name,
	num_labels_task1=3, # Adjust with your task1 classes
	num_labels_task2=3 # Adjust with your task2 classes
	)
	model.eval()

	# Define label mappings
	label_mapping_task1 = ["trusted", "fake", "non"] # Adjust with your task1 labels
	label_mapping_task2 = ["positive", "negative", "neutral"] # Adjust with your task2 labels

	def classify(text):
	# Tokenize input text
	inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=128)
	input_ids = inputs['input_ids']
	attention_mask = inputs['attention_mask']

	# Prediction with model
	with torch.no_grad():
	logits_task1, logits_task2 = model(input_ids, attention_mask)

	# Softmax to get probabilities
	probs_task1 = F.softmax(logits_task1, dim=1).cpu().numpy()[0] # Extract the first batch item
	probs_task2 = F.softmax(logits_task2, dim=1).cpu().numpy()[0] # Extract the first batch item

	# Predict label with highest probability
	pred_task1 = label_mapping_task1[probs_task1.argmax()]
	pred_task2 = label_mapping_task2[probs_task2.argmax()]

	# Format probabilities as percentages
	probs_task1_str = ", ".join([f"{label}: {prob*100:.2f}%" for label, prob in zip(label_mapping_task1, probs_task1)])
	probs_task2_str = ", ".join([f"{label}: {prob*100:.2f}%" for label, prob in zip(label_mapping_task2, probs_task2)])

	# Combine label predictions with their probabilities
	result_task1 = f"{pred_task1} ({probs_task1_str})"
	result_task2 = f"{pred_task2} ({probs_task2_str})"

	return result_task1, result_task2

	# Gradio Interface
	iface = gr.Interface(fn=classify,
	inputs="text",
	outputs=[gr.Label(label="Fake Review Detection"),
	gr.Label(label="Sentiment Classification")],
	title="Multitask IndoBERT: Fake Review & Sentiment Classification",
	description="Enter a skincare product review in Indonesian and the model will classify it as fake or trusted, and determine the sentiment.")

	iface.launch()