Spaces:
Sleeping
Sleeping
pankajsingh3012
commited on
Upload 21 files
Browse files- .gitattributes +3 -0
- 1619771.jpg +0 -0
- 958461.jpg +0 -0
- R (1).jpg +0 -0
- R.jpg +0 -0
- app.py +92 -0
- black.jpg +0 -0
- model.joblib +3 -0
- new.jpg +0 -0
- new2.jpg +0 -0
- pages/TaxiFare.csv +0 -0
- pages/charts.py +103 -0
- pages/data_info.py +97 -0
- pages/matt.jpg +3 -0
- pages/parot.jpg +3 -0
- pages/texi2.jpeg +0 -0
- raw.gif +0 -0
- raw2.gif +3 -0
- requirements.txt +13 -0
- synthesize.mp3 +0 -0
- test.csv +0 -0
- texii.jpg +0 -0
.gitattributes
CHANGED
@@ -33,3 +33,6 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
pages/matt.jpg filter=lfs diff=lfs merge=lfs -text
|
37 |
+
pages/parot.jpg filter=lfs diff=lfs merge=lfs -text
|
38 |
+
raw2.gif filter=lfs diff=lfs merge=lfs -text
|
1619771.jpg
ADDED
958461.jpg
ADDED
R (1).jpg
ADDED
R.jpg
ADDED
app.py
ADDED
@@ -0,0 +1,92 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import joblib
|
2 |
+
import streamlit as st
|
3 |
+
from sklearn.ensemble import RandomForestRegressor
|
4 |
+
import base64
|
5 |
+
|
6 |
+
|
7 |
+
# Using "with" notation
|
8 |
+
st.markdown(
|
9 |
+
f'''
|
10 |
+
<style>
|
11 |
+
.sidebar .sidebar-content {{
|
12 |
+
width: 375px;
|
13 |
+
}}
|
14 |
+
</style>
|
15 |
+
''',
|
16 |
+
unsafe_allow_html=True
|
17 |
+
)
|
18 |
+
|
19 |
+
with st.sidebar:
|
20 |
+
st.link_button("more_information", "https://www.kaggle.com/code/pankajsinghardh/p-tax-fare")
|
21 |
+
st.success("")
|
22 |
+
|
23 |
+
model = joblib.load("model.joblib")
|
24 |
+
titleimg = "new.jpg"
|
25 |
+
|
26 |
+
#impliment background formating
|
27 |
+
def set_bg_hack(main_bg):
|
28 |
+
# set bg name
|
29 |
+
main_bg_ext = "jpg"
|
30 |
+
st.markdown(
|
31 |
+
f"""
|
32 |
+
<style>
|
33 |
+
.stApp {{
|
34 |
+
background: url(data:image/{main_bg_ext};base64,{base64.b64encode(open(main_bg, "rb").read()).decode()});
|
35 |
+
background-repeat: no-repeat;
|
36 |
+
background-position: right 50% bottom 95% ;
|
37 |
+
background-size: cover;
|
38 |
+
background-attachment: scroll;
|
39 |
+
}}
|
40 |
+
</style>
|
41 |
+
""",
|
42 |
+
unsafe_allow_html=True,
|
43 |
+
)
|
44 |
+
|
45 |
+
set_bg_hack(titleimg)
|
46 |
+
|
47 |
+
|
48 |
+
|
49 |
+
|
50 |
+
st.markdown("<h1><font color='yellow'><center>'Taxi_Fare'</center></font></h1>",unsafe_allow_html=True)
|
51 |
+
le = r'''
|
52 |
+
$\textsf{
|
53 |
+
\LARGE Hour\
|
54 |
+
}$
|
55 |
+
'''
|
56 |
+
hour = st.selectbox(f":red[{le}]",range(1,25))
|
57 |
+
le = r'''
|
58 |
+
$\textsf{
|
59 |
+
\LARGE Distance\
|
60 |
+
}$
|
61 |
+
'''
|
62 |
+
distance = st.number_input(f":red[{le}]",step=1)
|
63 |
+
le = r'''
|
64 |
+
$\textsf{
|
65 |
+
\LARGE Weekday\
|
66 |
+
}$
|
67 |
+
'''
|
68 |
+
weekday = st.selectbox(f":red[{le}]",["Sunday","Monday","Tuesday","Wednesday","Thursday","Friday","Saturday"])
|
69 |
+
|
70 |
+
day = {"Sunday":0,"Monday":1,"Tuesday":3,"Wednesday":4,"Thursday":5,"Friday":6,"Saturday":7}
|
71 |
+
btn = st.button("predict")
|
72 |
+
if btn:
|
73 |
+
price = model.predict([[hour,distance,day[weekday]]])
|
74 |
+
|
75 |
+
st.snow()
|
76 |
+
|
77 |
+
st.write( ":red[Fare_in_USD]",price)
|
78 |
+
def autoplay_audio(file_path: str):
|
79 |
+
with open(file_path, "rb") as f:
|
80 |
+
data = f.read()
|
81 |
+
b64 = base64.b64encode(data).decode()
|
82 |
+
md = f"""
|
83 |
+
<audio controls autoplay="true">
|
84 |
+
<source src="data:audio/mp3;base64,{b64}" type="audio/mp3">
|
85 |
+
</audio>
|
86 |
+
"""
|
87 |
+
st.markdown(
|
88 |
+
md,
|
89 |
+
unsafe_allow_html=True,
|
90 |
+
)
|
91 |
+
|
92 |
+
autoplay_audio("synthesize.mp3")
|
black.jpg
ADDED
model.joblib
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:47e9c4a83be592d763c7872fe23a10c0b16e29a768b072e22e29fa1a66894cf6
|
3 |
+
size 4853617
|
new.jpg
ADDED
new2.jpg
ADDED
pages/TaxiFare.csv
ADDED
The diff for this file is too large to render.
See raw diff
|
|
pages/charts.py
ADDED
@@ -0,0 +1,103 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pandas as pd
|
2 |
+
import streamlit as st
|
3 |
+
import base64
|
4 |
+
import matplotlib.pyplot as plt
|
5 |
+
import seaborn as sns
|
6 |
+
import folium
|
7 |
+
from streamlit_folium import st_folium
|
8 |
+
|
9 |
+
titleimg = "pages\matt.jpg"
|
10 |
+
|
11 |
+
#impliment background formating
|
12 |
+
def set_bg_hack(main_bg):
|
13 |
+
# set bg name
|
14 |
+
main_bg_ext = "jpg"
|
15 |
+
st.markdown(
|
16 |
+
f"""
|
17 |
+
<style>
|
18 |
+
.stApp {{
|
19 |
+
background: url(data:image/{main_bg_ext};base64,{base64.b64encode(open(main_bg, "rb").read()).decode()});
|
20 |
+
background-repeat: no-repeat;
|
21 |
+
background-position: right 50% bottom 95% ;
|
22 |
+
background-size: cover;
|
23 |
+
background-attachment: scroll;
|
24 |
+
}}
|
25 |
+
</style>
|
26 |
+
""",
|
27 |
+
unsafe_allow_html=True,
|
28 |
+
)
|
29 |
+
|
30 |
+
set_bg_hack(titleimg)
|
31 |
+
|
32 |
+
df = pd.read_csv("pages\TaxiFare.csv", parse_dates=["date_time_of_pickup"])
|
33 |
+
|
34 |
+
# from pickup date extract hour information and add new feature to data
|
35 |
+
df["hour"] = df.date_time_of_pickup.dt.hour
|
36 |
+
|
37 |
+
# from pickup date extract weekday information and add new feature to data
|
38 |
+
df["week_day"] = df.date_time_of_pickup.apply(lambda x: x.weekday())
|
39 |
+
|
40 |
+
# function for change data into distance in km
|
41 |
+
from math import cos, asin, sqrt, pi
|
42 |
+
|
43 |
+
|
44 |
+
def distance(lon1, lat1, lon2, lat2):
|
45 |
+
r = 6371 # km
|
46 |
+
p = pi / 180
|
47 |
+
|
48 |
+
a = 0.5 - cos((lat2 - lat1) * p) / 2 + cos(lat1 * p) * cos(lat2 * p) * (1 - cos((lon2 - lon1) * p)) / 2
|
49 |
+
return 2 * r * asin(sqrt(a))
|
50 |
+
|
51 |
+
|
52 |
+
# new feature/column for data frame distance
|
53 |
+
data = []
|
54 |
+
for x in df.iloc[:, 3:7].values:
|
55 |
+
data.append(round(distance(x[0], x[1], x[2], x[3]), 2))
|
56 |
+
|
57 |
+
df["distance"] = data
|
58 |
+
|
59 |
+
# from pickup date extract month information and add new feature to data
|
60 |
+
df["month"] = df["date_time_of_pickup"].dt.month
|
61 |
+
|
62 |
+
|
63 |
+
|
64 |
+
|
65 |
+
# calulating average amount month wise
|
66 |
+
st.markdown("<h3><font color='green'><center>Average_Amount_Each_Month</font></center></h3>",unsafe_allow_html=True)
|
67 |
+
month_data = df.groupby("month")["amount"].mean().reset_index()
|
68 |
+
month_data.index=["januray","february","march","april","may","june","july","august","september","october","november","december"]
|
69 |
+
st.bar_chart(month_data,color=["#ffaa11","#ffaa33" ])
|
70 |
+
|
71 |
+
|
72 |
+
st.markdown("<h3><font color='green'><center>Average_Amount_Each_Day</font></center></h3>",unsafe_allow_html=True)
|
73 |
+
# calculating average amount weekday average amount
|
74 |
+
week_data = df.groupby("week_day")["amount"].mean()
|
75 |
+
week_data= week_data.reset_index()
|
76 |
+
week_data.index=["sunday","monday","tuesday","wednesday","thursday","friday","saturday"]
|
77 |
+
|
78 |
+
|
79 |
+
st.bar_chart(week_data)
|
80 |
+
|
81 |
+
# grouping data basis on each hour for finding average amount each hour
|
82 |
+
st.markdown("<h3><font color='green'><center>Average_Amount_Each_Hour</font></center></h3>",unsafe_allow_html=True)
|
83 |
+
hour_data = df.groupby(["hour"])["amount"].mean().reset_index()
|
84 |
+
|
85 |
+
st.bar_chart(hour_data)
|
86 |
+
|
87 |
+
st.markdown("<h3><font color='green'><center>Correlation matrix</font></center></h3>",unsafe_allow_html=True)
|
88 |
+
fig, ax = plt.subplots(figsize=(10,8))
|
89 |
+
sns.heatmap(df.iloc[:,1:].corr(), ax=ax,annot=True,cmap="hot")
|
90 |
+
st.write(fig)
|
91 |
+
st.markdown("""<h4><font color="white">Upon exploring the dataset, no significant correlation was found among the variables. Various correlation measures yielded coefficients close to zero,
|
92 |
+
suggesting a lack of linear relationship between the variables in this dataset. Further analysis utilizing non-linear models or
|
93 |
+
domain-specific feature engineering may be required to uncover potential dependencies or interactions between the variables.</font></h4>""",unsafe_allow_html=True)
|
94 |
+
|
95 |
+
|
96 |
+
|
97 |
+
st.markdown("<h3><font color='green'><center>Pickup Location</font></center></h3>",unsafe_allow_html=True)
|
98 |
+
new_york=folium.Map(location=[40.730610, -73.935242], zoom_start=12)
|
99 |
+
# polting 100 pickup loction on map
|
100 |
+
for i in df.index[:100]:
|
101 |
+
folium.CircleMarker(location=[df['latitude_of_pickup'][i],df['longitude_of_pickup'][i]],color='red').add_to(new_york)
|
102 |
+
st_folium(new_york, width=725)
|
103 |
+
|
pages/data_info.py
ADDED
@@ -0,0 +1,97 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pandas as pd
|
2 |
+
import streamlit as st
|
3 |
+
import base64
|
4 |
+
|
5 |
+
|
6 |
+
|
7 |
+
titleimg = "pages\parot.jpg"
|
8 |
+
|
9 |
+
#impliment background formating
|
10 |
+
def set_bg_hack(main_bg):
|
11 |
+
# set bg name
|
12 |
+
main_bg_ext = "jpg"
|
13 |
+
st.markdown(
|
14 |
+
f"""
|
15 |
+
<style>
|
16 |
+
.stApp {{
|
17 |
+
background: url(data:image/{main_bg_ext};base64,{base64.b64encode(open(main_bg, "rb").read()).decode()});
|
18 |
+
background-repeat: no-repeat;
|
19 |
+
background-position: right 50% bottom 95% ;
|
20 |
+
background-size: cover;
|
21 |
+
background-attachment: scroll;
|
22 |
+
}}
|
23 |
+
</style>
|
24 |
+
""",
|
25 |
+
unsafe_allow_html=True,
|
26 |
+
)
|
27 |
+
|
28 |
+
set_bg_hack(titleimg)
|
29 |
+
|
30 |
+
df = pd.read_csv("pages\TaxiFare.csv", parse_dates=["date_time_of_pickup"])
|
31 |
+
|
32 |
+
# from pickup date extract hour information and add new feature to data
|
33 |
+
df["hour"] = df.date_time_of_pickup.dt.hour
|
34 |
+
|
35 |
+
# from pickup date extract weekday information and add new feature to data
|
36 |
+
df["week_day"] = df.date_time_of_pickup.apply(lambda x: x.weekday())
|
37 |
+
|
38 |
+
# function for change data into distance in km
|
39 |
+
from math import cos, asin, sqrt, pi
|
40 |
+
|
41 |
+
|
42 |
+
def distance(lon1, lat1, lon2, lat2):
|
43 |
+
r = 6371 # km
|
44 |
+
p = pi / 180
|
45 |
+
|
46 |
+
a = 0.5 - cos((lat2 - lat1) * p) / 2 + cos(lat1 * p) * cos(lat2 * p) * (1 - cos((lon2 - lon1) * p)) / 2
|
47 |
+
return 2 * r * asin(sqrt(a))
|
48 |
+
|
49 |
+
|
50 |
+
# new feature/column for data frame distance
|
51 |
+
data = []
|
52 |
+
for x in df.iloc[:, 3:7].values:
|
53 |
+
data.append(round(distance(x[0], x[1], x[2], x[3]), 2))
|
54 |
+
|
55 |
+
df["distance"] = data
|
56 |
+
|
57 |
+
# from pickup date extract month information and add new feature to data
|
58 |
+
df["month"] = df["date_time_of_pickup"].dt.month
|
59 |
+
|
60 |
+
|
61 |
+
def data_load(data=df):
|
62 |
+
st.write(":red[Data..]")
|
63 |
+
st.write(data)
|
64 |
+
#st.write(f"{df.info()}")
|
65 |
+
|
66 |
+
|
67 |
+
data_load()
|
68 |
+
|
69 |
+
|
70 |
+
def data_info():
|
71 |
+
st.write(":red[Descriptive statistics]")
|
72 |
+
st.write(df.describe().T)
|
73 |
+
st.write(":red[Observations]")
|
74 |
+
st.markdown(""" <strong><font color='red' style='bold'><ul style="list-style-type:disc">
|
75 |
+
50,000 entries and 12 columns.
|
76 |
+
<li><b>1.unique_id:</b> Object type, presumably a unique identifier for each entry.</li>
|
77 |
+
<li><b>2.amount: </b>Floating-point numbers representing some form of monetary value.</li>
|
78 |
+
<li><b>3.date_time_of_pickup:</b> Datetime objects in UTC format, indicating the date and time of pickup.</li>
|
79 |
+
<li><b>4.longitude_of_pickup / latitude_of_pickup: </b>Floating-point numbers representing the coordinates of the pickup location.</li>
|
80 |
+
<li><b>5.longitude_of_dropoff / latitude_of_dropoff:</b> Floating-point numbers representing the coordinates of the drop-off location..</li>
|
81 |
+
<li><b>6.no_of_passenger: </b>Integer values indicating the number of passengers..</li>
|
82 |
+
<li><b>7.hour / week_day / month: </b>Integer values, potentially derived from the datetime, indicating the hour, day of the week, and month, respectively..</li>
|
83 |
+
<li> <b>8.distance:</b> Floating-point numbers representing the distance traveled, likely calculated from coordinates..</li>
|
84 |
+
</ul>
|
85 |
+
The DataFrame seems to contain details of trips or transportation data, presumably with information about fares, locations, timestamps, distances, and passenger counts.
|
86 |
+
|
87 |
+
The memory usage is around 4.2+ MB for this DataFrame, which is moderate given the size of 50,000 entries and the data types used in each column.
|
88 |
+
|
89 |
+
Is there something specific you'd like to know or do with this DataFrame?<p></font></strong>""", unsafe_allow_html=True, )
|
90 |
+
|
91 |
+
|
92 |
+
data_info()
|
93 |
+
|
94 |
+
|
95 |
+
|
96 |
+
|
97 |
+
|
pages/matt.jpg
ADDED
Git LFS Details
|
pages/parot.jpg
ADDED
Git LFS Details
|
pages/texi2.jpeg
ADDED
raw.gif
ADDED
raw2.gif
ADDED
Git LFS Details
|
requirements.txt
ADDED
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
joblib == 1.3.2
|
2 |
+
streamlit == 1.31.0
|
3 |
+
scikit-learn == 1.2.2
|
4 |
+
folium == 0.15.1
|
5 |
+
numpy == 1.26.4
|
6 |
+
pandas == 2.2.0
|
7 |
+
streamlit-folium == 0.18.0
|
8 |
+
matplotlib == 3.8.2
|
9 |
+
seaborn == 0.13.2
|
10 |
+
|
11 |
+
|
12 |
+
|
13 |
+
|
synthesize.mp3
ADDED
Binary file (7.87 kB). View file
|
|
test.csv
ADDED
The diff for this file is too large to render.
See raw diff
|
|
texii.jpg
ADDED