Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,71 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
from urllib.parse import urlparse
|
3 |
+
from sklearn.preprocessing import MinMaxScaler
|
4 |
+
import pickle
|
5 |
+
|
6 |
+
# Load the model
|
7 |
+
file = open("phishing_rf_model.saved", "rb")
|
8 |
+
rf_model = pickle.load(file)
|
9 |
+
file.close()
|
10 |
+
|
11 |
+
# Load the MinMaxScaler
|
12 |
+
min_scaler = MinMaxScaler()
|
13 |
+
|
14 |
+
# Function to extract features from URL
|
15 |
+
def extract_features_from_url(url):
|
16 |
+
parsed_url = urlparse(url)
|
17 |
+
num_dots = url.count('.')
|
18 |
+
subdomain_level = len(parsed_url.netloc.split('.')) - 1
|
19 |
+
path_level = len(parsed_url.path.split('/')) - 1
|
20 |
+
url_length = len(url)
|
21 |
+
num_dash = url.count('-')
|
22 |
+
num_dash_in_hostname = parsed_url.netloc.count('-')
|
23 |
+
at_symbol = '@' in parsed_url.netloc
|
24 |
+
tilde_symbol = '~' in parsed_url.netloc
|
25 |
+
num_underscore = url.count('_')
|
26 |
+
num_percent = url.count('%')
|
27 |
+
num_query_components = len(parsed_url.query.split('&'))
|
28 |
+
num_ampersand = url.count('&')
|
29 |
+
num_hash = url.count('#')
|
30 |
+
num_numeric_chars = sum(c.isdigit() for c in url)
|
31 |
+
no_https = not url.startswith('https://')
|
32 |
+
random_string = '?' in parsed_url.query
|
33 |
+
ip_address = parsed_url.netloc.count('.')
|
34 |
+
domain_in_subdomains = '.' in parsed_url.netloc[:-1]
|
35 |
+
domain_in_paths = '.' in parsed_url.path
|
36 |
+
https_in_hostname = 'https' in parsed_url.netloc
|
37 |
+
hostname_length = len(parsed_url.netloc)
|
38 |
+
path_length = len(parsed_url.path)
|
39 |
+
query_length = len(parsed_url.query)
|
40 |
+
double_slash_in_path = '//' in parsed_url.path
|
41 |
+
num_sensitive_words = 0 # You need to define how to extract this feature
|
42 |
+
return [num_dots, subdomain_level, path_level, url_length, num_dash,
|
43 |
+
num_dash_in_hostname, at_symbol, tilde_symbol, num_underscore, num_percent,
|
44 |
+
num_query_components, num_ampersand, num_hash, num_numeric_chars, no_https,
|
45 |
+
random_string, ip_address, domain_in_subdomains, domain_in_paths, https_in_hostname,
|
46 |
+
hostname_length, path_length, query_length, double_slash_in_path, num_sensitive_words]
|
47 |
+
|
48 |
+
# Function to predict using the model
|
49 |
+
def predict_phishing(url):
|
50 |
+
features = extract_features_from_url(url)
|
51 |
+
scaled_features = min_scaler.transform([features])
|
52 |
+
prediction = rf_model.predict(scaled_features)
|
53 |
+
return prediction
|
54 |
+
|
55 |
+
# Streamlit UI
|
56 |
+
def main():
|
57 |
+
st.title("Phishing URL Detector")
|
58 |
+
|
59 |
+
url_input = st.text_input("Enter the URL:")
|
60 |
+
if st.button("Check Phishing"):
|
61 |
+
if url_input:
|
62 |
+
prediction = predict_phishing(url_input)
|
63 |
+
if prediction[0] == 1:
|
64 |
+
st.error("Phishing URL Detected!")
|
65 |
+
else:
|
66 |
+
st.success("Safe URL")
|
67 |
+
else:
|
68 |
+
st.warning("Please enter a URL")
|
69 |
+
|
70 |
+
if __name__ == "__main__":
|
71 |
+
main()
|