File size: 4,947 Bytes
c24562e
 
0c396ba
 
 
 
 
c24562e
0c396ba
c24562e
0c396ba
 
 
c24562e
0c396ba
 
 
 
 
c24562e
 
 
 
 
0c396ba
 
 
c24562e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0c396ba
 
 
 
 
97615dd
c24562e
 
 
97615dd
 
 
 
c24562e
97615dd
c24562e
 
 
97615dd
 
 
c24562e
97615dd
c24562e
 
 
97615dd
 
c24562e
 
97615dd
c24562e
97615dd
c24562e
 
 
97615dd
 
 
0c396ba
 
 
 
 
 
c24562e
 
0c396ba
c24562e
97615dd
c24562e
97615dd
c24562e
97615dd
c24562e
97615dd
0c396ba
 
97615dd
 
 
 
0c396ba
 
 
 
 
 
 
 
 
 
 
 
97615dd
 
 
 
 
 
 
0c396ba
 
c24562e
 
 
0c396ba
 
c24562e
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
import gradio as gr
import numpy as np
from keras.models import load_model
import re
from keras.preprocessing.sequence import pad_sequences
from keras.preprocessing.text import tokenizer_from_json
import re
import joblib
import json

# Load the JSON data from the file
with open('tok.json', 'r') as json_file:
    tokenizer_json = json.load(json_file)

# Assuming tokenizer_class is the class of your tokenizer
tokenizer = tokenizer_from_json(tokenizer_json)
rnn_model = load_model("rnn_model.h5")

# setting the joblib
lr_model = joblib.load("logistic_model.pkl")
svm_model = joblib.load("svm_model.pkl")
nn_model = load_model("dl_model.h5")
mnb_model = joblib.load("mnb_model.pkl")

# Load other necessary files like vectorizers or scalers
tfidf_vectorizer = joblib.load("tfidf_vectorizer.pkl")

TAG_RE = re.compile(r'<[^>]+>')

def remove_tags(text):
    return TAG_RE.sub('', text)

def preprocess_text(sen):
    # Removing html tags
    sentence = remove_tags(sen)

    # Remove punctuations and numbers
    sentence = re.sub('[^a-zA-Z]', ' ', sentence)

    # Single character removal
    sentence = re.sub(r"\s+[a-zA-Z]\s+", ' ', sentence)

    # Removing multiple spaces
    sentence = re.sub(r'\s+', ' ', sentence)

    return sentence

def preprocess_text_for_rnn(text, tokenizer, maxlen):
    text = preprocess_text(text)
    sequence = tokenizer.texts_to_sequences([text])
    padded_sequence = pad_sequences(sequence, padding='post', maxlen=maxlen)
    return padded_sequence
# Function to predict sentiment using Logistic Regression
def predict_lr(text):
    preprocessed_text = preprocess_text(text)
    vectorized_text = tfidf_vectorizer.transform([preprocessed_text])
    dense_vectorized_text = vectorized_text.toarray()  # Convert to dense array
    prediction = int(lr_model.predict(dense_vectorized_text)[0])
    return prediction


# Function to predict sentiment using SVM
def predict_svm(text):
    preprocessed_text = preprocess_text(text)
    vectorized_text = tfidf_vectorizer.transform([preprocessed_text])
    dense_vectorized_text = vectorized_text.toarray()  # Convert to dense array
    prediction = int(svm_model.predict(dense_vectorized_text)[0])
    return prediction

# Function to predict sentiment using Neural Network
def predict_nn(text):
    preprocessed_text = preprocess_text(text)
    vectorized_text = tfidf_vectorizer.transform([preprocessed_text])
    dense_vectorized_text = vectorized_text.toarray()  # Convert to dense array

    prediction_probs = nn_model.predict(dense_vectorized_text)[0]
    prediction = int(np.argmax(prediction_probs))
    return prediction

# Function to predict sentiment using Multinomial Naive Bayes
def predict_mnb(text):
    preprocessed_text = preprocess_text(text)
    vectorized_text = tfidf_vectorizer.transform([preprocessed_text])
    dense_vectorized_text = vectorized_text.toarray()  # Convert to dense array
    prediction = int(mnb_model.predict(dense_vectorized_text)[0])
    return prediction

def predict_rnn(text):
    processed_text = preprocess_text_for_rnn(text, tokenizer, maxlen=170)
    prediction_probs = rnn_model.predict(processed_text)[0]
    prediction = int(np.argmax(prediction_probs))
    return prediction, prediction_probs

def sentiment_prediction(text, model):
    prediction, percentages = 0, []
    if model == "Logistic Regression":
        prediction = predict_lr(text)
    elif model == "SVM":
        prediction = predict_svm(text)
    elif model == "Neural Network":
        prediction = predict_nn(text)
    elif model == "Multinomial Naive Bayes":
        prediction = predict_mnb(text)
    elif model == "Recurrent Neural Network":
        prediction, percentages = predict_rnn(text)
                # Calculate percentages for both labels
        percentage_negative = percentages[0]
        percentage_positive = percentages[1]


    # Displaying emojis based on sentiment
    emoji_positive = "😃"
    emoji_negative = "😢"
    emoji = emoji_positive if prediction == 1 else emoji_negative

    # Create labels for predictions
    labels = ["Negative", "Positive"]

    # Create label for the prediction
    prediction_label = labels[prediction]

    if model == "Recurrent Neural Network":
        return prediction_label, f" {labels[0]}: {percentage_negative:.2%}, Percentage {labels[1]}: {percentage_positive:.2%}", emoji
    else:

        return prediction_label, f"NOT AVAILABLE", emoji




# Create the Gradio interface
iface = gr.Interface(
    fn=sentiment_prediction,
    inputs=[gr.Textbox(type="text", label="Enter Text"), gr.Dropdown(["Logistic Regression", "SVM", "Neural Network", "Multinomial Naive Bayes", "Recurrent Neural Network"], label="Select Model")],
    outputs=[gr.Label(), gr.Label(), gr.Label()],
    live=True,
    title="Sentiment Analysis with Model Selection",
    description="Enter a text and choose a model for sentiment prediction.",
)

# Launch the Gradio interface
iface.launch()