mk43275 commited on
Commit
4503905
·
verified ·
1 Parent(s): 983d790

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +71 -0
app.py ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from urllib.parse import urlparse
3
+ from sklearn.preprocessing import MinMaxScaler
4
+ import pickle
5
+
6
+ # Load the model
7
+ file = open("phishing_rf_model.saved", "rb")
8
+ rf_model = pickle.load(file)
9
+ file.close()
10
+
11
+ # Load the MinMaxScaler
12
+ min_scaler = MinMaxScaler()
13
+
14
+ # Function to extract features from URL
15
+ def extract_features_from_url(url):
16
+ parsed_url = urlparse(url)
17
+ num_dots = url.count('.')
18
+ subdomain_level = len(parsed_url.netloc.split('.')) - 1
19
+ path_level = len(parsed_url.path.split('/')) - 1
20
+ url_length = len(url)
21
+ num_dash = url.count('-')
22
+ num_dash_in_hostname = parsed_url.netloc.count('-')
23
+ at_symbol = '@' in parsed_url.netloc
24
+ tilde_symbol = '~' in parsed_url.netloc
25
+ num_underscore = url.count('_')
26
+ num_percent = url.count('%')
27
+ num_query_components = len(parsed_url.query.split('&'))
28
+ num_ampersand = url.count('&')
29
+ num_hash = url.count('#')
30
+ num_numeric_chars = sum(c.isdigit() for c in url)
31
+ no_https = not url.startswith('https://')
32
+ random_string = '?' in parsed_url.query
33
+ ip_address = parsed_url.netloc.count('.')
34
+ domain_in_subdomains = '.' in parsed_url.netloc[:-1]
35
+ domain_in_paths = '.' in parsed_url.path
36
+ https_in_hostname = 'https' in parsed_url.netloc
37
+ hostname_length = len(parsed_url.netloc)
38
+ path_length = len(parsed_url.path)
39
+ query_length = len(parsed_url.query)
40
+ double_slash_in_path = '//' in parsed_url.path
41
+ num_sensitive_words = 0 # You need to define how to extract this feature
42
+ return [num_dots, subdomain_level, path_level, url_length, num_dash,
43
+ num_dash_in_hostname, at_symbol, tilde_symbol, num_underscore, num_percent,
44
+ num_query_components, num_ampersand, num_hash, num_numeric_chars, no_https,
45
+ random_string, ip_address, domain_in_subdomains, domain_in_paths, https_in_hostname,
46
+ hostname_length, path_length, query_length, double_slash_in_path, num_sensitive_words]
47
+
48
+ # Function to predict using the model
49
+ def predict_phishing(url):
50
+ features = extract_features_from_url(url)
51
+ scaled_features = min_scaler.transform([features])
52
+ prediction = rf_model.predict(scaled_features)
53
+ return prediction
54
+
55
+ # Streamlit UI
56
+ def main():
57
+ st.title("Phishing URL Detector")
58
+
59
+ url_input = st.text_input("Enter the URL:")
60
+ if st.button("Check Phishing"):
61
+ if url_input:
62
+ prediction = predict_phishing(url_input)
63
+ if prediction[0] == 1:
64
+ st.error("Phishing URL Detected!")
65
+ else:
66
+ st.success("Safe URL")
67
+ else:
68
+ st.warning("Please enter a URL")
69
+
70
+ if __name__ == "__main__":
71
+ main()