|
import numpy as np |
|
import pandas as pd |
|
from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification |
|
import torch |
|
from tqdm import tqdm |
|
|
|
|
|
|
|
TEST_SIZE = 2000 |
|
FINE_TUNED_MODEL = "andyqin18/finetuned-bert-uncased" |
|
|
|
|
|
|
|
def analyze(text: str): |
|
''' |
|
Input: Text string |
|
Output: Prediction array (6x1) with threshold prob > 0.5 |
|
''' |
|
encoding = tokenizer(text, return_tensors="pt") |
|
encoding = {k: v.to(model.device) for k,v in encoding.items()} |
|
outputs = model(**encoding) |
|
logits = outputs.logits |
|
sigmoid = torch.nn.Sigmoid() |
|
probs = sigmoid(logits.squeeze().cpu()) |
|
predictions = np.zeros(probs.shape) |
|
predictions[np.where(probs >= 0.5)] = 1 |
|
return predictions |
|
|
|
|
|
|
|
df = pd.read_csv("milestone3/comp/train.csv") |
|
labels = df.columns[2:] |
|
num_label = len(labels) |
|
train_texts = df["comment_text"].values |
|
train_labels = df[labels].values |
|
|
|
np.random.seed(1) |
|
small_test_texts = np.random.choice(train_texts, size=TEST_SIZE, replace=False) |
|
|
|
np.random.seed(1) |
|
small_test_labels_idx = np.random.choice(train_labels.shape[0], size=TEST_SIZE, replace=False) |
|
small_test_labels = train_labels[small_test_labels_idx, :] |
|
|
|
|
|
|
|
model = AutoModelForSequenceClassification.from_pretrained(FINE_TUNED_MODEL) |
|
tokenizer = AutoTokenizer.from_pretrained(FINE_TUNED_MODEL) |
|
total_true = 0 |
|
total_success = 0 |
|
TP, FP, TN, FN = 0, 0, 0, 0 |
|
|
|
|
|
|
|
for comment_idx in tqdm(range(TEST_SIZE), desc="Analyzing..."): |
|
comment = small_test_texts[comment_idx] |
|
target = small_test_labels[comment_idx] |
|
result = analyze(comment[:500]) |
|
|
|
|
|
for i in range(num_label): |
|
if result[i] == target[i]: |
|
if result[i] == 1: |
|
TP += 1 |
|
else: |
|
TN += 1 |
|
else: |
|
if result[i] == 1: |
|
FP += 1 |
|
else: |
|
FN += 1 |
|
|
|
|
|
num_true = (result == target).sum() |
|
if num_true == len(labels): |
|
total_success += 1 |
|
total_true += num_true |
|
|
|
|
|
performance = {} |
|
performance["label_accuracy"] = total_true/(len(labels) * TEST_SIZE) |
|
performance["prediction_accuracy"] = total_success/TEST_SIZE |
|
performance["precision"] = TP / (TP + FP) |
|
performance["recall"] = TP / (TP + FN) |
|
print(performance) |