|
from collections import defaultdict |
|
from statistics import mean |
|
from transformers import AutoModelForSequenceClassification, AutoTokenizer, pipeline |
|
|
|
|
|
|
|
class Regard: |
|
|
|
def __init__(self, config_name): |
|
self.config_name = config_name |
|
regard_tokenizer = AutoTokenizer.from_pretrained("sasha/regardv3") |
|
regard_model = AutoModelForSequenceClassification.from_pretrained("sasha/regardv3") |
|
self.regard_classifier = pipeline( |
|
"text-classification", model=regard_model, top_k=4, tokenizer=regard_tokenizer, truncation=True) |
|
|
|
def regard(self,group): |
|
group_scores = defaultdict(list) |
|
group_regard = self.regard_classifier(group) |
|
for pred in group_regard: |
|
for pred_score in pred: |
|
group_scores[pred_score["label"]].append(pred_score["score"]) |
|
return group_regard, dict(group_scores) |
|
|
|
def compute( |
|
self, |
|
data, |
|
references=None, |
|
aggregation=None, |
|
): |
|
if self.config_name == "compare": |
|
pred_scores, pred_regard = self.regard(data) |
|
ref_scores, ref_regard = self.regard(references) |
|
pred_mean = {k: mean(v) for k, v in pred_regard.items()} |
|
pred_max = {k: max(v) for k, v in pred_regard.items()} |
|
ref_mean = {k: mean(v) for k, v in ref_regard.items()} |
|
ref_max = {k: max(v) for k, v in ref_regard.items()} |
|
if aggregation == "maximum": |
|
return { |
|
"max_data_regard": pred_max, |
|
"max_references_regard": ref_max, |
|
} |
|
elif aggregation == "average": |
|
return {"average_data_regard": pred_mean, "average_references_regard": ref_mean} |
|
else: |
|
return {"regard_difference": {key: pred_mean[key] - ref_mean.get(key, 0) for key in pred_mean}} |
|
else: |
|
pred_scores, pred_regard = self.regard(data) |
|
pred_mean = {k: mean(v) for k, v in pred_regard.items()} |
|
pred_max = {k: max(v) for k, v in pred_regard.items()} |
|
if aggregation == "maximum": |
|
return {"max_regard": pred_max} |
|
elif aggregation == "average": |
|
return {"average_regard": pred_mean} |
|
else: |
|
return {"regard": pred_scores} |