Job_sentiment_classifier / sentimentorr.py
sepehr's picture
Create sentimentorr.py
e335ec1
import pandas as pd
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from torch.nn.functional import softmax as softmax
import numpy as np
import torch
tokenizer = AutoTokenizer.from_pretrained("joeddav/distilbert-base-uncased-go-emotions-student")
model = AutoModelForSequenceClassification.from_pretrained("joeddav/distilbert-base-uncased-go-emotions-student")
labels = ["admiration","amusement","anger","annoyance","approval","caring","confusion","curiosity","desire","disappointment", "disapproval", "disgust", "embarrassment", "excitement", "fear", "gratitude", "grief", "joy", "love", "nervousness", "optimism", "pride", "realization", "relief", "remorse", "sadness", "surprise", "neutral"
]
labels7larg=['admiration',
'amusement',
'anger',
'annoyance',
'approval',
'caring',
'confusion',
'curiosity',
'desire',
'disappointment',
'disapproval',
'disgust',
'embarrassment',
'excitement',
'fear',
'gratitude',
'grief',
'joy',
'love',
'nervousness',
'optimism',
'pride',
'realization',
'relief',
'remorse',
'sadness',
'surprise',
'neutral',
'larg']
def sentimentor(mmm):
dfda = pd.Series(mmm)
vecs = np.array([model(**tokenizer(txt, return_tensors="pt", padding=True))[0][0].detach().numpy() for txt in dfda])
ds=pd.DataFrame(columns=labels7larg)
for iii in range(len(vecs)):
softt=softmax(torch.from_numpy(vecs[iii]), dim=0)
kki=pd.DataFrame(softt, index=labels)
ji=kki.nlargest(1, 0)
dv=pd.DataFrame(columns=labels)
dv.loc[iii,:]=softt
dv.loc[iii,'larg']=ji.index[0]
ds=pd.concat([ds, dv])
if len(ds)==len(dfda):
concatt = pd.concat([dfda,ds], axis=1)
xsd=concatt['larg']
return xsd[0]
else:
print('eshteba kardi')