Spaces:

mk43275
/

demo-app

Sleeping

App Files Files Community

mk43275 commited on Apr 27, 2024

Commit

4503905

verified ·

1 Parent(s): 983d790

Create app.py

Browse files

Files changed (1) hide show

app.py +71 -0

app.py ADDED Viewed

	@@ -0,0 +1,71 @@

+import streamlit as st
+from urllib.parse import urlparse
+from sklearn.preprocessing import MinMaxScaler
+import pickle
+# Load the model
+file = open("phishing_rf_model.saved", "rb")
+rf_model = pickle.load(file)
+file.close()
+# Load the MinMaxScaler
+min_scaler = MinMaxScaler()
+# Function to extract features from URL
+def extract_features_from_url(url):
+    parsed_url = urlparse(url)
+    num_dots = url.count('.')
+    subdomain_level = len(parsed_url.netloc.split('.')) - 1
+    path_level = len(parsed_url.path.split('/')) - 1
+    url_length = len(url)
+    num_dash = url.count('-')
+    num_dash_in_hostname = parsed_url.netloc.count('-')
+    at_symbol = '@' in parsed_url.netloc
+    tilde_symbol = '~' in parsed_url.netloc
+    num_underscore = url.count('_')
+    num_percent = url.count('%')
+    num_query_components = len(parsed_url.query.split('&'))
+    num_ampersand = url.count('&')
+    num_hash = url.count('#')
+    num_numeric_chars = sum(c.isdigit() for c in url)
+    no_https = not url.startswith('https://')
+    random_string = '?' in parsed_url.query
+    ip_address = parsed_url.netloc.count('.')
+    domain_in_subdomains = '.' in parsed_url.netloc[:-1]
+    domain_in_paths = '.' in parsed_url.path
+    https_in_hostname = 'https' in parsed_url.netloc
+    hostname_length = len(parsed_url.netloc)
+    path_length = len(parsed_url.path)
+    query_length = len(parsed_url.query)
+    double_slash_in_path = '//' in parsed_url.path
+    num_sensitive_words = 0  # You need to define how to extract this feature
+    return [num_dots, subdomain_level, path_level, url_length, num_dash,
+            num_dash_in_hostname, at_symbol, tilde_symbol, num_underscore, num_percent,
+            num_query_components, num_ampersand, num_hash, num_numeric_chars, no_https,
+            random_string, ip_address, domain_in_subdomains, domain_in_paths, https_in_hostname,
+            hostname_length, path_length, query_length, double_slash_in_path, num_sensitive_words]
+# Function to predict using the model
+def predict_phishing(url):
+    features = extract_features_from_url(url)
+    scaled_features = min_scaler.transform([features])
+    prediction = rf_model.predict(scaled_features)
+    return prediction
+# Streamlit UI
+def main():
+    st.title("Phishing URL Detector")
+    url_input = st.text_input("Enter the URL:")
+    if st.button("Check Phishing"):
+        if url_input:
+            prediction = predict_phishing(url_input)
+            if prediction[0] == 1:
+                st.error("Phishing URL Detected!")
+            else:
+                st.success("Safe URL")
+        else:
+            st.warning("Please enter a URL")
+if __name__ == "__main__":
+    main()