gridflowai
commited on
Commit
·
0c396ba
1
Parent(s):
c24562e
Upload 12 files
Browse files- .gitattributes +1 -0
- README.md +5 -5
- app.py +70 -44
- classifier.pkl +3 -0
- requirements.txt +2 -0
- rnn_model.h5 +3 -0
- test.py +70 -0
- tok.json +3 -0
.gitattributes
CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
tok.json filter=lfs diff=lfs merge=lfs -text
|
README.md
CHANGED
@@ -1,10 +1,10 @@
|
|
1 |
---
|
2 |
-
title:
|
3 |
-
emoji:
|
4 |
-
colorFrom:
|
5 |
-
colorTo:
|
6 |
sdk: gradio
|
7 |
-
sdk_version:
|
8 |
app_file: app.py
|
9 |
pinned: false
|
10 |
---
|
|
|
1 |
---
|
2 |
+
title: ThreatClassification Oilspill - MNB
|
3 |
+
emoji: 🌖
|
4 |
+
colorFrom: yellow
|
5 |
+
colorTo: yellow
|
6 |
sdk: gradio
|
7 |
+
sdk_version: 3.50.2
|
8 |
app_file: app.py
|
9 |
pinned: false
|
10 |
---
|
app.py
CHANGED
@@ -1,27 +1,35 @@
|
|
1 |
import gradio as gr
|
2 |
import numpy as np
|
3 |
-
from
|
4 |
-
import re
|
5 |
-
|
|
|
|
|
6 |
import joblib
|
7 |
-
import
|
8 |
|
9 |
-
#
|
10 |
-
|
|
|
11 |
|
|
|
|
|
|
|
|
|
|
|
12 |
lr_model = joblib.load("logistic_model.pkl")
|
13 |
svm_model = joblib.load("svm_model.pkl")
|
14 |
nn_model = load_model("dl_model.h5")
|
15 |
mnb_model = joblib.load("mnb_model.pkl")
|
16 |
|
|
|
|
|
|
|
17 |
TAG_RE = re.compile(r'<[^>]+>')
|
18 |
|
19 |
def remove_tags(text):
|
20 |
return TAG_RE.sub('', text)
|
21 |
|
22 |
-
# Load other necessary files like vectorizers or scalers
|
23 |
-
tfidf_vectorizer = joblib.load("tfidf_vectorizer.pkl")
|
24 |
-
|
25 |
def preprocess_text(sen):
|
26 |
# Removing html tags
|
27 |
sentence = remove_tags(sen)
|
@@ -37,64 +45,85 @@ def preprocess_text(sen):
|
|
37 |
|
38 |
return sentence
|
39 |
|
40 |
-
|
41 |
-
|
|
|
|
|
|
|
|
|
42 |
def predict_lr(text):
|
43 |
preprocessed_text = preprocess_text(text)
|
44 |
vectorized_text = tfidf_vectorizer.transform([preprocessed_text])
|
45 |
-
dense_vectorized_text = vectorized_text.toarray()
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
|
50 |
-
# Function to predict sentiment using SVM
|
51 |
def predict_svm(text):
|
52 |
preprocessed_text = preprocess_text(text)
|
53 |
vectorized_text = tfidf_vectorizer.transform([preprocessed_text])
|
54 |
-
dense_vectorized_text = vectorized_text.toarray()
|
55 |
-
|
56 |
-
|
|
|
57 |
|
58 |
-
# Function to predict sentiment using Neural Network
|
59 |
def predict_nn(text):
|
60 |
preprocessed_text = preprocess_text(text)
|
61 |
vectorized_text = tfidf_vectorizer.transform([preprocessed_text])
|
62 |
-
dense_vectorized_text = vectorized_text.toarray()
|
63 |
-
|
64 |
prediction_probs = nn_model.predict(dense_vectorized_text)[0]
|
65 |
prediction = int(np.argmax(prediction_probs))
|
66 |
-
return prediction
|
67 |
|
68 |
-
# Function to predict sentiment using Multinomial Naive Bayes
|
69 |
def predict_mnb(text):
|
70 |
preprocessed_text = preprocess_text(text)
|
71 |
vectorized_text = tfidf_vectorizer.transform([preprocessed_text])
|
72 |
-
dense_vectorized_text = vectorized_text.toarray()
|
73 |
-
|
74 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
75 |
|
76 |
-
# Set the function based on the selected model
|
77 |
def sentiment_prediction(text, model):
|
|
|
78 |
if model == "Logistic Regression":
|
79 |
-
prediction = predict_lr(text)
|
80 |
elif model == "SVM":
|
81 |
-
prediction = predict_svm(text)
|
82 |
elif model == "Neural Network":
|
83 |
-
prediction = predict_nn(text)
|
84 |
elif model == "Multinomial Naive Bayes":
|
85 |
-
prediction = predict_mnb(text)
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
-
|
91 |
-
|
92 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
93 |
# Create the Gradio interface
|
94 |
iface = gr.Interface(
|
95 |
fn=sentiment_prediction,
|
96 |
-
inputs=[gr.Textbox(type="text", label="Enter Text"), gr.Dropdown(["Logistic Regression", "SVM", "Neural Network", "Multinomial Naive Bayes"], label="Select Model")],
|
97 |
-
outputs=gr.Label(),
|
98 |
live=True,
|
99 |
title="Sentiment Analysis with Model Selection",
|
100 |
description="Enter a text and choose a model for sentiment prediction.",
|
@@ -102,6 +131,3 @@ iface = gr.Interface(
|
|
102 |
|
103 |
# Launch the Gradio interface
|
104 |
iface.launch()
|
105 |
-
|
106 |
-
|
107 |
-
iface.launch()
|
|
|
1 |
import gradio as gr
|
2 |
import numpy as np
|
3 |
+
from keras.models import load_model
|
4 |
+
import re
|
5 |
+
from keras.preprocessing.sequence import pad_sequences
|
6 |
+
from keras.preprocessing.text import tokenizer_from_json
|
7 |
+
import re
|
8 |
import joblib
|
9 |
+
import json
|
10 |
|
11 |
+
# Load the JSON data from the file
|
12 |
+
with open('tok.json', 'r') as json_file:
|
13 |
+
tokenizer_json = json.load(json_file)
|
14 |
|
15 |
+
# Assuming tokenizer_class is the class of your tokenizer
|
16 |
+
tokenizer = tokenizer_from_json(tokenizer_json)
|
17 |
+
rnn_model = load_model("rnn_model.h5")
|
18 |
+
|
19 |
+
# setting the joblib
|
20 |
lr_model = joblib.load("logistic_model.pkl")
|
21 |
svm_model = joblib.load("svm_model.pkl")
|
22 |
nn_model = load_model("dl_model.h5")
|
23 |
mnb_model = joblib.load("mnb_model.pkl")
|
24 |
|
25 |
+
# Load other necessary files like vectorizers or scalers
|
26 |
+
tfidf_vectorizer = joblib.load("tfidf_vectorizer.pkl")
|
27 |
+
|
28 |
TAG_RE = re.compile(r'<[^>]+>')
|
29 |
|
30 |
def remove_tags(text):
|
31 |
return TAG_RE.sub('', text)
|
32 |
|
|
|
|
|
|
|
33 |
def preprocess_text(sen):
|
34 |
# Removing html tags
|
35 |
sentence = remove_tags(sen)
|
|
|
45 |
|
46 |
return sentence
|
47 |
|
48 |
+
def preprocess_text_for_rnn(text, tokenizer, maxlen):
|
49 |
+
text = preprocess_text(text)
|
50 |
+
sequence = tokenizer.texts_to_sequences([text])
|
51 |
+
padded_sequence = pad_sequences(sequence, padding='post', maxlen=maxlen)
|
52 |
+
return padded_sequence
|
53 |
+
|
54 |
def predict_lr(text):
|
55 |
preprocessed_text = preprocess_text(text)
|
56 |
vectorized_text = tfidf_vectorizer.transform([preprocessed_text])
|
57 |
+
dense_vectorized_text = vectorized_text.toarray()
|
58 |
+
prediction_probs = lr_model.predict_proba(dense_vectorized_text)[0]
|
59 |
+
prediction = int(np.argmax(prediction_probs))
|
60 |
+
return prediction, prediction_probs
|
61 |
|
|
|
62 |
def predict_svm(text):
|
63 |
preprocessed_text = preprocess_text(text)
|
64 |
vectorized_text = tfidf_vectorizer.transform([preprocessed_text])
|
65 |
+
dense_vectorized_text = vectorized_text.toarray()
|
66 |
+
prediction_probs = svm_model.predict_proba(dense_vectorized_text)[0]
|
67 |
+
prediction = int(np.argmax(prediction_probs))
|
68 |
+
return prediction, prediction_probs
|
69 |
|
|
|
70 |
def predict_nn(text):
|
71 |
preprocessed_text = preprocess_text(text)
|
72 |
vectorized_text = tfidf_vectorizer.transform([preprocessed_text])
|
73 |
+
dense_vectorized_text = vectorized_text.toarray()
|
|
|
74 |
prediction_probs = nn_model.predict(dense_vectorized_text)[0]
|
75 |
prediction = int(np.argmax(prediction_probs))
|
76 |
+
return prediction, prediction_probs
|
77 |
|
|
|
78 |
def predict_mnb(text):
|
79 |
preprocessed_text = preprocess_text(text)
|
80 |
vectorized_text = tfidf_vectorizer.transform([preprocessed_text])
|
81 |
+
dense_vectorized_text = vectorized_text.toarray()
|
82 |
+
prediction_probs = mnb_model.predict_proba(dense_vectorized_text)[0]
|
83 |
+
prediction = int(np.argmax(prediction_probs))
|
84 |
+
return prediction, prediction_probs
|
85 |
+
|
86 |
+
def predict_rnn(text):
|
87 |
+
processed_text = preprocess_text_for_rnn(text, tokenizer, maxlen=170)
|
88 |
+
prediction_probs = rnn_model.predict(processed_text)[0]
|
89 |
+
prediction = int(np.argmax(prediction_probs))
|
90 |
+
return prediction, prediction_probs
|
91 |
|
|
|
92 |
def sentiment_prediction(text, model):
|
93 |
+
prediction, percentages = 0, []
|
94 |
if model == "Logistic Regression":
|
95 |
+
prediction, percentages = predict_lr(text)
|
96 |
elif model == "SVM":
|
97 |
+
prediction, percentages = predict_svm(text)
|
98 |
elif model == "Neural Network":
|
99 |
+
prediction, percentages = predict_nn(text)
|
100 |
elif model == "Multinomial Naive Bayes":
|
101 |
+
prediction, percentages = predict_mnb(text)
|
102 |
+
elif model == "Recurrent Neural Network":
|
103 |
+
prediction, percentages = predict_rnn(text)
|
104 |
+
|
105 |
+
# Displaying emojis based on sentiment
|
106 |
+
emoji_positive = "😃"
|
107 |
+
emoji_negative = "😢"
|
108 |
+
emoji = emoji_positive if prediction == 1 else emoji_negative
|
109 |
+
|
110 |
+
# Create labels for predictions
|
111 |
+
labels = ["Negative", "Positive"]
|
112 |
+
|
113 |
+
# Create label for the prediction
|
114 |
+
prediction_label = labels[prediction]
|
115 |
+
|
116 |
+
# Calculate percentages for both labels
|
117 |
+
percentage_negative = percentages[0]
|
118 |
+
percentage_positive = percentages[1]
|
119 |
+
|
120 |
+
return prediction_label, f"{labels[0]}: {percentage_negative:.2%}, Percentage {labels[1]}: {percentage_positive:.2%}", emoji
|
121 |
+
|
122 |
# Create the Gradio interface
|
123 |
iface = gr.Interface(
|
124 |
fn=sentiment_prediction,
|
125 |
+
inputs=[gr.Textbox(type="text", label="Enter Text"), gr.Dropdown(["Logistic Regression", "SVM", "Neural Network", "Multinomial Naive Bayes", "Recurrent Neural Network"], label="Select Model")],
|
126 |
+
outputs=[gr.Label(), gr.Label(), gr.Label()],
|
127 |
live=True,
|
128 |
title="Sentiment Analysis with Model Selection",
|
129 |
description="Enter a text and choose a model for sentiment prediction.",
|
|
|
131 |
|
132 |
# Launch the Gradio interface
|
133 |
iface.launch()
|
|
|
|
|
|
classifier.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9189d53b69f0d9ff501d0a51aa9ab344b2455e03bf347653fb84ed075f788f3b
|
3 |
+
size 386759
|
requirements.txt
CHANGED
@@ -4,3 +4,5 @@ numpy
|
|
4 |
nltk
|
5 |
tensorflow==2.15.0
|
6 |
joblib==1.3.2
|
|
|
|
|
|
4 |
nltk
|
5 |
tensorflow==2.15.0
|
6 |
joblib==1.3.2
|
7 |
+
keras==2.15.0
|
8 |
+
h5py==3.9.0
|
rnn_model.h5
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:08682f8d8b196bfff00560f2c029e4dd57a51b6cfaece9f3fba85fb646b0e977
|
3 |
+
size 47449360
|
test.py
ADDED
@@ -0,0 +1,70 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from keras.models import load_model
|
2 |
+
from keras.preprocessing.text import tokenizer_from_json
|
3 |
+
from keras.preprocessing.sequence import pad_sequences
|
4 |
+
|
5 |
+
import json
|
6 |
+
import joblib
|
7 |
+
|
8 |
+
import re
|
9 |
+
|
10 |
+
rnn_model = load_model("rnn_model.h5")
|
11 |
+
|
12 |
+
|
13 |
+
# Load the JSON data from the file
|
14 |
+
with open('tok.json', 'r') as json_file:
|
15 |
+
tokenizer_json = json.load(json_file)
|
16 |
+
|
17 |
+
# Assuming tokenizer_class is the class of your tokenizer
|
18 |
+
tokenizer = tokenizer_from_json(tokenizer_json)
|
19 |
+
|
20 |
+
|
21 |
+
TAG_RE = re.compile(r'<[^>]+>')
|
22 |
+
|
23 |
+
|
24 |
+
def remove_tags(text):
|
25 |
+
return TAG_RE.sub('', text)
|
26 |
+
|
27 |
+
|
28 |
+
# Load other necessary files like vectorizers or scalers
|
29 |
+
tfidf_vectorizer = joblib.load("tfidf_vectorizer.pkl")
|
30 |
+
|
31 |
+
|
32 |
+
def preprocess_text(sen):
|
33 |
+
# Removing html tags
|
34 |
+
sentence = remove_tags(sen)
|
35 |
+
|
36 |
+
# Remove punctuations and numbers
|
37 |
+
sentence = re.sub('[^a-zA-Z]', ' ', sentence)
|
38 |
+
|
39 |
+
# Single character removal
|
40 |
+
sentence = re.sub(r"\s+[a-zA-Z]\s+", ' ', sentence)
|
41 |
+
|
42 |
+
# Removing multiple spaces
|
43 |
+
sentence = re.sub(r'\s+', ' ', sentence)
|
44 |
+
|
45 |
+
return sentence
|
46 |
+
|
47 |
+
|
48 |
+
def preprocess_text_for_rnn(text, tokenizer, maxlen):
|
49 |
+
|
50 |
+
text = preprocess_text(text)
|
51 |
+
|
52 |
+
sequence = tokenizer.texts_to_sequences([text])
|
53 |
+
padded_sequence = pad_sequences(sequence, padding='post', maxlen=maxlen)
|
54 |
+
|
55 |
+
return padded_sequence
|
56 |
+
|
57 |
+
|
58 |
+
# Function to predict sentiment using Multinomial Naive Bayes
|
59 |
+
def predict_rnn(text):
|
60 |
+
processed_text = preprocess_text_for_rnn(text, tokenizer, maxlen=170)
|
61 |
+
prediction = rnn_model.predict(processed_text)
|
62 |
+
|
63 |
+
# Assuming class 0 is negative and class 1 is positive
|
64 |
+
if prediction[0][0] > prediction[0][1]:
|
65 |
+
return 0 # Negative sentiment
|
66 |
+
else:
|
67 |
+
return 1 # Positive sentiment
|
68 |
+
|
69 |
+
|
70 |
+
print(predict_rnn('Apart from having the longest reign in British history years Queen Victoria also holds two other distinctions She was apart from our current Queen the oldest ever British monarch living to the age of And she was also the youngest ever British as opposed to English or Scottish monarch coming to the throne as girl of eighteen And yet whenever television or the cinema make programme or film about her they seem far more interested in the older Victoria than they do in the young girl the version of Victoria with which modern audiences will probably be most familiar is Judi Dench in Mrs Brown The Young Victoria tries to redress the balance by showing us the events surrounding her accession and the early years of her reign It has the rare distinction of being produced by former Royal Sarah Duchess of York whose daughter Princess Beatrice makes brief appearance as an extra There are three main strands to the plot The first concerns the intrigues of Victoria mother the Duchess of Kent highly unpopular figure even with her own daughter largely because of the influence of her adviser Sir John Conroy who was widely rumoured to be her lover According to one unfounded rumour he and not the late Duke of Kent was Victoria natural father The second strand concerns the growing romance between Victoria and her German cousin Prince Albert and the attempts of King Leopold of Belgium who was uncle to both of them to influence this romance Leopold hope was to increase the prestige of the House of Saxe Coburg to which both he and Albert belonged The third concerns one of the strangest episodes in British political history the Bedchamber Crisis of when supporters of the Tory Party which had traditionally supported strong monarchy rioted because the young Queen was perceived to favour the Whig Party and their leader Lord Melbourne even though the Whigs had historically supported quasi republican system of government with the monarch reduced to figurehead Scriptwriter Julian Fellowes is known for his Conservative views and at times wondered if this may have coloured his treatment of political themes as he seems to lean to the side of the Tories the predecessors of the modern Conservative party Their leader Robert Peel is shown as statesmanlike and dignified whereas Melbourne for all his dash and charm is shown as devious and uninterested in social reform There may be some truth is these characterisations but Fellowes glosses over the fact that only few years earlier the Tories had opposed the Reform Act which ended the corrupt electoral system of rotten boroughs and that they had benefited from William IV unconstitutional dismissal of Whig administration Lessons in dynastic and constitutional history do not always transfer well to the cinema screen and this one contains its share of inaccuracies Prince Albert for example was not injured in Edward Oxford attempt on Victoria life and Melbourne in his late fifties at the time of Victoria accession was not as youthful as he is portrayed here by Paul Bettany King William IV certainly disliked the Duchess of Kent who was his sister in law but doubt if he would have gone so far as to bawl abuse at her during state banquet as he is shown doing here also failed to understand the significance of the scene in which the Duchess and Conroy try to force Victoria to sign Regency Order the Duchess constitutional position was made clear by the Regency Act which provided that she would become Regent if her daughter was still under eighteen at the time of her accession No piece of paper signed by Victoria could have altered the provisions of the Act There are also occasional infelicities In one early scene we see Victoria and Albert playing chess while comparing themselves to pawns being moved around chessboard metaphor so hackneyed that the whole scene should have come complete with Danger Major clich ahead warning Yet in spite of scenes like this came to enjoy the film There were some good performances especially from Miranda Richardson as the scheming Duchess and Mark Strong as the obnoxious Conroy It is visually very attractive being shot in sumptuous style we have come to associate with British historical drama Jim Broadbent gives an amusing turn as King William although he does occasionally succumb to the temptation of going over the top Although not as disastrously over the top as he was in Moulin Rouge The main reason for the film success however is the performances of Emily Blunt and Rupert Friend as the two young lovers Victoria and Albert Blunt is probably more attractive than Victoria was in real life but in her delightful portrayal the Queen is no longer the old lady of the popular imagination the black clad Widow of Windsor who was perpetually not amused but determined strong minded and loving young woman Her love for Albert and their happy family life together was one of the main reasons why the monarchy succeeded in reestablishing itself in the affections of the British people With the exception of George III Victoria Hanoverian ancestors had been notoriously lacking in the matrimonial virtues Blunt and Friend make The Young Victoria touching romance and gripping human drama as well as an exploration of key period in British history '))
|
tok.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7d5cc0f92c384fa972a7c734c438d63e288aa20a9c34d450389e70cc93cb4d32
|
3 |
+
size 10838553
|