Geo-GenderStudy / get_region.py
AliMustapha's picture
fix prediction on empty dataframe
83c2953
raw
history blame
3.76 kB
__copyright__ = "Copyright (C) 2023 Ali Mustapha"
__license__ = "GPL-3.0-or-later"
import pandas as pd
import numpy as np
import tensorflow as tf
import pickle
class RegionPredictor:
def __init__(self, models_directory):
self.models_directory = models_directory
def load_model(self, path):
model = tf.keras.models.load_model(path+"bestmodel.tf")
#compile and train the model
model.compile(
loss = tf.keras.losses.categorical_crossentropy,
optimizer=tf.keras.optimizers.Adam(),
metrics=['accuracy'])
with open(path+'label_encoder.pkl', 'rb') as file:
label_encoder = pickle.load(file)
with open(path+'optimal_thresholds_f1.pkl', 'rb') as file:
optF1 = pickle.load(file)
with open(path+'optimal_thresholds_ROC.pkl', 'rb') as file:
optROC = pickle.load(file)
return model,label_encoder,optF1,optROC
def model_prediction(self, dataset,model,label_encoder,optF1=None,optROC=None):
input_Full_name=np.asarray(dataset['Author']).astype('str')
input_offset=np.asarray(dataset['Author_Timezone']).astype('float')
predictions_proba = model.predict({
"input_text": input_Full_name,
"input_offset": input_offset
})
# predictions = np.argmax(predictions_proba,axis=1)
y_pred_F1=[]
y_pred_ROC=[]
if optF1 is not None:
y_pred_F1 = (predictions_proba >= optF1).astype(int)
y_pred_F1=np.argmax(y_pred_F1,axis=1)
y_pred_F1 = label_encoder.inverse_transform(y_pred_F1)
if optROC is not None:
y_pred_ROC = (predictions_proba >= optROC).astype(int)
y_pred_ROC=np.argmax(y_pred_ROC,axis=1)
y_pred_ROC = label_encoder.inverse_transform(y_pred_ROC)
return y_pred_F1,y_pred_ROC
def get_region(self,dataset):
dataset["Author_Timezone"]= dataset["Author_Timezone"] /60
model,label_encoder,optF1,optROC=self.load_model(self.models_directory+"/region/files/")
y_pred,_=self.model_prediction(dataset,model,label_encoder,optF1,optROC)
dataset["region-prediction"]=y_pred
Europe=dataset[dataset["region-prediction"]=="Europe"]
Africa=dataset[dataset["region-prediction"]=="Africa"]
Asia=dataset[dataset["region-prediction"]=="Asia"]
Americas=dataset[dataset["region-prediction"]=="Americas"]
Oceania=dataset[dataset["region-prediction"]=="Oceania"]
if not Europe.empty:
model,label_encoder,optF1,optROC=self.load_model(self.models_directory+"/Europe/files/")
y_pred,_=self.model_prediction(Europe,model,label_encoder,optF1,optROC)
Europe["sub-region-prediction"]=y_pred
if not Asia.empty:
model,label_encoder,optF1,optROC=self.load_model(self.models_directory+"/Asia/files/")
y_pred,_=self.model_prediction(Asia,model,label_encoder,optF1,optROC)
Asia["sub-region-prediction"]=y_pred
if not Americas.empty:
model,label_encoder,optF1,optROC=self.load_model(self.models_directory+"/Americas/files/")
y_pred,_=self.model_prediction(Americas,model,label_encoder,optF1,optROC)
Americas["sub-region-prediction"]=y_pred
if not Oceania.empty:
Oceania["sub-region-prediction"]="Australia and New Zealand"
if not Africa.empty:
Africa["sub-region-prediction"]="Africa"
data=pd.concat([Europe,Asia,Oceania,Americas,Africa])
return data