|
import streamlit as st |
|
import pandas as pd |
|
import numpy as np |
|
import plotly.express as px |
|
from wordcloud import WordCloud, STOPWORDS |
|
import matplotlib.pyplot as plt |
|
import folium |
|
import plotly.express as px |
|
import seaborn as sns |
|
import json |
|
import os |
|
from streamlit_folium import folium_static |
|
|
|
st.set_option('deprecation.showPyplotGlobalUse', False) |
|
|
|
DATA_ = pd.read_csv("combined-csv-files.csv") |
|
st.title("Sentiment Analysis of Tweets") |
|
st.sidebar.title("Sentiment Analysis of Tweets") |
|
st.markdown("This application is a streamlit dashboard to analyze the sentiment of Tweets") |
|
st.sidebar.markdown("This application is a streamlit dashboard to analyze the sentiment of Tweets") |
|
|
|
|
|
def run(): |
|
|
|
@st.cache(persist=True) |
|
def load_data(): |
|
DATA_['tweet_created'] = pd.to_datetime(DATA_['Datetime']) |
|
return DATA_ |
|
data = load_data() |
|
|
|
st.sidebar.subheader("Show random tweet") |
|
random_tweet = st.sidebar.radio('Sentiment', ('Positive', 'Neutral', 'Negative')) |
|
st.sidebar.markdown(data.query('sentiment_flair == @random_tweet')[["Text"]].sample(n=1).iat[0,0]) |
|
|
|
st.sidebar.markdown("### Number of tweets by sentiment") |
|
select = st.sidebar.selectbox('Visualization type', ['Histogram', 'Pie chart']) |
|
sentiment_count = data['sentiment_flair'].value_counts() |
|
sentiment_count = pd.DataFrame({'Sentiment':sentiment_count.index, 'Tweets':sentiment_count.values}) |
|
|
|
if not st.sidebar.checkbox("Hide", True): |
|
st.markdown("### Number of tweets by sentiment") |
|
if select == "Histogram": |
|
fig = px.bar(sentiment_count, x='Sentiment', y='Tweets', color='Tweets', height=500) |
|
st.plotly_chart(fig) |
|
else: |
|
fig = px.pie(sentiment_count, values='Tweets', names='Sentiment') |
|
st.plotly_chart(fig) |
|
|
|
|
|
st.sidebar.subheader("When and Where are users tweeting from?") |
|
hour = st.sidebar.slider("Hour of day", 0,23) |
|
modified_data = data[data['tweet_created'].dt.hour == hour] |
|
if not st.sidebar.checkbox("Close", True, key='1'): |
|
st.markdown("### Tweets locations based on the time of date") |
|
st.markdown("%i tweets between %i:00 and %i:00" % (len(modified_data), hour, (hour+1)%24)) |
|
st.map(modified_data) |
|
if st.sidebar.checkbox("Show Raw Data", False): |
|
st.write(modified_data) |
|
st.sidebar.subheader("Breakdown language tweets by sentiment") |
|
choice = st.sidebar.multiselect('Pick language', ('en', 'hi'), key='0') |
|
|
|
if len(choice) > 0: |
|
choice_data = data[data.language.isin(choice)] |
|
fig_choice = px.histogram(choice_data, x='language', |
|
y='sentiment_flair', |
|
histfunc = 'count', color = 'sentiment_flair', |
|
facet_col='sentiment_flair', |
|
labels={'sentiment_flair':'tweets'}, height=600, width=800) |
|
st.plotly_chart(fig_choice) |
|
|
|
st.sidebar.header("Word Cloud") |
|
word_sentiment = st.sidebar.radio('Display word cloud for what sentiment?',('Positive', 'Neutral','Negative')) |
|
|
|
if not st.sidebar.checkbox("Close", True, key='3'): |
|
st.header('Word cloud for %s sentiment' % (word_sentiment)) |
|
df = data[data['sentiment_flair']==word_sentiment] |
|
words = ' '.join(df['Text']) |
|
processed_words = ' '.join([word for word in words.split() if 'http' not in word and not word.startswith('@') and word !='RT']) |
|
wordcloud = WordCloud(stopwords=STOPWORDS, |
|
background_color='white', height=640, width=800).generate(processed_words) |
|
plt.imshow(wordcloud) |
|
plt.xticks([]) |
|
plt.yticks([]) |
|
st.pyplot() |
|
|
|
|
|
with open('india_state.json') as file: |
|
geojsonData = json.load(file) |
|
|
|
for i in geojsonData['features']: |
|
i['id'] = i['properties']['NAME_1'] |
|
|
|
map_choropleth_high_public = folium.Map(location = [20.5937,78.9629], zoom_start = 4) |
|
df1 = data |
|
df1 = df1[df1['location'].notna()] |
|
|
|
def get_state(x): |
|
|
|
states = ["Andaman and Nicobar Islands","Andhra Pradesh","Arunachal Pradesh","Assam","Bihar","Chandigarh","Chhattisgarh", |
|
"Dadra and Nagar Haveli","Daman and Diu","Delhi","Goa","Gujarat","Haryana","Himachal Pradesh","Jammu and Kashmir", |
|
"Jharkhand","Karnataka","Kerala","Ladakh","Lakshadweep","Madhya Pradesh","Maharashtra","Manipur","Meghalaya", |
|
"Mizoram","Nagaland","Odisha","Puducherry","Punjab","Rajasthan","Sikkim","Tamil Nadu","Telangana","Tripura","Uttar Pradesh","Uttarakhand","West Bengal"] |
|
|
|
states_dict = {"Delhi":"New Delhi","Gujarat":"Surat","Haryana":"Gurgaon", "Karnataka":"Bangalore", "Karnataka":"Bengaluru", "Maharashtra":"Pune","Maharashtra":"Mumbai","Maharashtra":"Navi Mumbai","Telangana":"Hyderabad","West Bengal":"Kolkata", |
|
"Gujarat":"Surat","Rajasthan":"Kota","Rajasthan":"Jodhpur","Karnataka":"Bengaluru South","Uttar Pradesh":"Lukhnow","Uttar Pradesh":"Noida","Bihar":"Patna","Uttarakhand":"Dehradun","Madhya Pradesh":"Indore" , "Madhya Pradesh":"Bhopal", |
|
"Andaman and Nicobar Islands":"Andaman and Nicobar Islands", "Andhra Pradesh":"Andhra Pradesh","Arunachal Pradesh":"Arunachal Pradesh","Assam":"Assam","Bihar":"Bihar", |
|
"Chandigarh":"Chandigarh","Chhattisgarh":"Chhattisgarh", "Dadra and Nagar Haveli": "Dadra and Nagar Haveli","Daman and Diu":"Daman and Diu","Delhi":"Delhi", |
|
"Goa":"Goa","Gujarat":"Gujarat","Haryana":"Haryana","Himachal Pradesh":"Himachal Pradesh","Jammu and Kashmir":"Jammu and Kashmir", "Jharkhand": "Jharkhand", |
|
"Karnataka":"Karnataka","Kerala":"Kerala","Ladakh":"Ladakh","Lakshadweep":"Lakshadweep","Madhya Pradesh":"Madhya Pradesh","Maharashtra":"Maharashtra", |
|
"Odisha":"Odisha","Puducherry":"Puducherry","Punjab":"Punjab","Rajasthan":"Rajasthan","Tamil Nadu":"Tamil Nadu","Telangana":"Telangana","Uttar Pradesh":"Uttar Pradesh", |
|
"Uttarakhand":"Uttarakhand","West Bengal":"West Bengal","West Bengal":"Calcutta","Uttar Pradesh":"Lucknow" |
|
} |
|
|
|
abv = x.split(',')[-1].lstrip() |
|
state_name = x.split(',')[0].lstrip() |
|
|
|
if abv in states: |
|
state = abv |
|
else: |
|
if state_name in states_dict.values(): |
|
state = list(states_dict.keys())[list(states_dict.values()).index(state_name)] |
|
|
|
else: |
|
state = 'Non_India' |
|
|
|
return state |
|
|
|
|
|
df2 = df1.copy() |
|
|
|
df2['states'] = df1['location'].apply(get_state) |
|
|
|
|
|
df_state_sentiment = df2.groupby(['states'])['Label'].value_counts().unstack().fillna(0.0).reset_index() |
|
df_state_sentiment['total_sentiment'] = -(df_state_sentiment[0])+df_state_sentiment[1] |
|
dff = df_state_sentiment[df_state_sentiment['states'] != 'Non_India'] |
|
|
|
folium.Choropleth(geo_data=geojsonData, |
|
data=dff, |
|
name='CHOROPLETH', |
|
key_on='feature.id', |
|
columns = ['states','total_sentiment'], |
|
fill_color='YlOrRd', |
|
fill_opacity=0.7, |
|
line_opacity=0.4, |
|
legend_name='Sentiments', |
|
highlight=True).add_to(map_choropleth_high_public) |
|
|
|
folium.LayerControl().add_to(map_choropleth_high_public) |
|
|
|
|
|
|
|
st.sidebar.header("Map Visualisation") |
|
if not st.sidebar.checkbox("Close", True, key='4'): |
|
folium_static(map_choropleth_high_public) |
|
|
|
|
|
if __name__ == '__main__': |
|
run() |
|
|