__copyright__ = "Copyright (C) 2023 Ali Mustapha" __license__ = "GPL-3.0-or-later" import pandas as pd import numpy as np import tensorflow as tf import pickle class RegionPredictor: def __init__(self, models_directory): self.models_directory = models_directory def load_model(self, path): model = tf.keras.models.load_model(path+"bestmodel.tf") #compile and train the model model.compile( loss = tf.keras.losses.categorical_crossentropy, optimizer=tf.keras.optimizers.Adam(), metrics=['accuracy']) with open(path+'label_encoder.pkl', 'rb') as file: label_encoder = pickle.load(file) with open(path+'optimal_thresholds_f1.pkl', 'rb') as file: optF1 = pickle.load(file) with open(path+'optimal_thresholds_ROC.pkl', 'rb') as file: optROC = pickle.load(file) return model,label_encoder,optF1,optROC def model_prediction(self, dataset,model,label_encoder,optF1=None,optROC=None): input_Full_name=np.asarray(dataset['Author']).astype('str') input_offset=np.asarray(dataset['Author_Timezone']).astype('float') predictions_proba = model.predict({ "input_text": input_Full_name, "input_offset": input_offset }) # predictions = np.argmax(predictions_proba,axis=1) y_pred_F1=[] y_pred_ROC=[] if optF1 is not None: y_pred_F1 = (predictions_proba >= optF1).astype(int) y_pred_F1=np.argmax(y_pred_F1,axis=1) y_pred_F1 = label_encoder.inverse_transform(y_pred_F1) if optROC is not None: y_pred_ROC = (predictions_proba >= optROC).astype(int) y_pred_ROC=np.argmax(y_pred_ROC,axis=1) y_pred_ROC = label_encoder.inverse_transform(y_pred_ROC) return y_pred_F1,y_pred_ROC def get_region(self,dataset): dataset["Author_Timezone"]= dataset["Author_Timezone"] /60 model,label_encoder,optF1,optROC=self.load_model(self.models_directory+"/region/files/") y_pred,_=self.model_prediction(dataset,model,label_encoder,optF1,optROC) dataset["region-prediction"]=y_pred Europe=dataset[dataset["region-prediction"]=="Europe"] Africa=dataset[dataset["region-prediction"]=="Africa"] Asia=dataset[dataset["region-prediction"]=="Asia"] Americas=dataset[dataset["region-prediction"]=="Americas"] Oceania=dataset[dataset["region-prediction"]=="Oceania"] if not Europe.empty: model,label_encoder,optF1,optROC=self.load_model(self.models_directory+"/Europe/files/") y_pred,_=self.model_prediction(Europe,model,label_encoder,optF1,optROC) Europe["sub-region-prediction"]=y_pred if not Asia.empty: model,label_encoder,optF1,optROC=self.load_model(self.models_directory+"/Asia/files/") y_pred,_=self.model_prediction(Asia,model,label_encoder,optF1,optROC) Asia["sub-region-prediction"]=y_pred if not Americas.empty: model,label_encoder,optF1,optROC=self.load_model(self.models_directory+"/Americas/files/") y_pred,_=self.model_prediction(Americas,model,label_encoder,optF1,optROC) Americas["sub-region-prediction"]=y_pred if not Oceania.empty: Oceania["sub-region-prediction"]="Australia and New Zealand" if not Africa.empty: Africa["sub-region-prediction"]="Africa" data=pd.concat([Europe,Asia,Oceania,Americas,Africa]) return data