Spaces:
Runtime error
Runtime error
__copyright__ = "Copyright (C) 2023 Ali Mustapha" | |
__license__ = "GPL-3.0-or-later" | |
import pandas as pd | |
import numpy as np | |
import tensorflow as tf | |
import pickle | |
class RegionPredictor: | |
def __init__(self, models_directory): | |
self.models_directory = models_directory | |
def load_model(self, path): | |
model = tf.keras.models.load_model(path+"bestmodel.tf") | |
#compile and train the model | |
model.compile( | |
loss = tf.keras.losses.categorical_crossentropy, | |
optimizer=tf.keras.optimizers.Adam(), | |
metrics=['accuracy']) | |
with open(path+'label_encoder.pkl', 'rb') as file: | |
label_encoder = pickle.load(file) | |
with open(path+'optimal_thresholds_f1.pkl', 'rb') as file: | |
optF1 = pickle.load(file) | |
with open(path+'optimal_thresholds_ROC.pkl', 'rb') as file: | |
optROC = pickle.load(file) | |
return model,label_encoder,optF1,optROC | |
def model_prediction(self, dataset,model,label_encoder,optF1=None,optROC=None): | |
input_Full_name=np.asarray(dataset['Author']).astype('str') | |
input_offset=np.asarray(dataset['Author_Timezone']).astype('float') | |
predictions_proba = model.predict({ | |
"input_text": input_Full_name, | |
"input_offset": input_offset | |
}) | |
# predictions = np.argmax(predictions_proba,axis=1) | |
y_pred_F1=[] | |
y_pred_ROC=[] | |
if optF1 is not None: | |
y_pred_F1 = (predictions_proba >= optF1).astype(int) | |
y_pred_F1=np.argmax(y_pred_F1,axis=1) | |
y_pred_F1 = label_encoder.inverse_transform(y_pred_F1) | |
if optROC is not None: | |
y_pred_ROC = (predictions_proba >= optROC).astype(int) | |
y_pred_ROC=np.argmax(y_pred_ROC,axis=1) | |
y_pred_ROC = label_encoder.inverse_transform(y_pred_ROC) | |
return y_pred_F1,y_pred_ROC | |
def get_region(self,dataset): | |
dataset["Author_Timezone"]= dataset["Author_Timezone"] /60 | |
model,label_encoder,optF1,optROC=self.load_model(self.models_directory+"/region/files/") | |
y_pred,_=self.model_prediction(dataset,model,label_encoder,optF1,optROC) | |
dataset["region-prediction"]=y_pred | |
Europe=dataset[dataset["region-prediction"]=="Europe"] | |
Africa=dataset[dataset["region-prediction"]=="Africa"] | |
Asia=dataset[dataset["region-prediction"]=="Asia"] | |
Americas=dataset[dataset["region-prediction"]=="Americas"] | |
Oceania=dataset[dataset["region-prediction"]=="Oceania"] | |
if not Europe.empty: | |
model,label_encoder,optF1,optROC=self.load_model(self.models_directory+"/Europe/files/") | |
y_pred,_=self.model_prediction(Europe,model,label_encoder,optF1,optROC) | |
Europe["sub-region-prediction"]=y_pred | |
if not Asia.empty: | |
model,label_encoder,optF1,optROC=self.load_model(self.models_directory+"/Asia/files/") | |
y_pred,_=self.model_prediction(Asia,model,label_encoder,optF1,optROC) | |
Asia["sub-region-prediction"]=y_pred | |
if not Americas.empty: | |
model,label_encoder,optF1,optROC=self.load_model(self.models_directory+"/Americas/files/") | |
y_pred,_=self.model_prediction(Americas,model,label_encoder,optF1,optROC) | |
Americas["sub-region-prediction"]=y_pred | |
if not Oceania.empty: | |
Oceania["sub-region-prediction"]="Australia and New Zealand" | |
if not Africa.empty: | |
Africa["sub-region-prediction"]="Africa" | |
data=pd.concat([Europe,Asia,Oceania,Americas,Africa]) | |
return data | |