Spaces:

kithangw
/

testingphishinglink

Sleeping

App Files Files Community

kithangw commited on Mar 17, 2024

Commit

5f0c212

verified ·

1 Parent(s): 7b73280

Update app.py

Browse files

Files changed (1) hide show

app.py +52 -30

app.py CHANGED Viewed

@@ -1,31 +1,53 @@
 import streamlit as st
-from transformers import pipeline
-# Load the text classification model pipeline
-classifier = pipeline("text-classification", model='kithangw/phishing_link_detection')
-# Streamlit application title
-st.title("Please enter a suspicious link")
-# Text input for user to enter the link to classify
-link = st.text_area("Enter the link to classify", "")
-# Perform text classification when the user clicks the "Classify" button
-if st.button("Classify"):
-    if link:  # Check if link is not empty
-        # Perform text classification on the input link
-        results = classifier(link)
-        # The results variable contains a list with one item, which is a dictionary.
-        # The dictionary has 'label' and 'score' as keys.
-        result = results[0]
-        label = result['label']
-        score = round(result['score'] * 100, 2)  # Convert score to percentage
-        # Check the label and print out the corresponding message
-        if label == "LABEL_1":  # Assuming LABEL_1 indicates phishing
-            st.write(f"The link you entered is {score}% likely to be a phishing link.")
-        else:  # Assuming LABEL_0 indicates not phishing
-            st.write(f"The link you entered is {score}% likely to be not a phishing link.")
-    else:
-        st.error("Please enter an link to classify.")

 import streamlit as st
+import torch
+from PIL import Image
+from transformers import pipeline, AutoModelForSequenceClassification, AutoTokenizer
+# Initialize the image-to-text pipeline and models
+@st.cache(allow_output_mutation=True)
+def load_models():
+    image_pipeline = pipeline("image-to-text", model="microsoft/trocr-large-printed")
+    phishing_model = AutoModelForSequenceClassification.from_pretrained("Phishinglink", num_labels=2)
+    phishing_tokenizer = AutoTokenizer.from_pretrained("google/bert_uncased_L-2_H-128_A-2")
+    return image_pipeline, phishing_model, phishing_tokenizer
+image_pipeline, phishing_model, phishing_tokenizer = load_models()
+# Define the main function
+def main(image_input):
+    # Convert image to URL text
+    def image2url(image_input):
+        url_for_recognise = image_pipeline(image_input)[0]['generated_text'].replace(" ", "").lower()
+        st.write(f"Recognized URL: {url_for_recognise}")
+        return url_for_recognise
+    # Check if the URL text is a phishing link
+    def checkphishing(url_for_recognise):
+        link_token = phishing_tokenizer(url_for_recognise, max_length=512, padding=True, truncation=True, return_tensors='pt')
+        with torch.no_grad():  # Disable gradient calculation for inference
+            output = phishing_model(**link_token)
+        probabilities = torch.nn.functional.softmax(output.logits, dim=-1)
+        predicted_class = torch.argmax(probabilities, dim=-1).item()
+        predicted_prob = probabilities[0, predicted_class].item()
+        labels = ['Not Phishing', 'Phishing']
+        prediction_label = labels[predicted_class]
+        sentence = f"The URL '{url_for_recognise}' is classified as '{prediction_label}' with a probability of {predicted_prob:.2f}."
+        return sentence
+    url_text = image2url(image_input)
+    result_sentence = checkphishing(url_text)
+    return result_sentence
+# Streamlit interface
+st.title("Phishing URL Detection from Image")
+uploaded_image = st.file_uploader("Upload an image of the URL", type=["png", "jpg", "jpeg"])
+if uploaded_image is not None:
+    image = Image.open(uploaded_image)
+    st.image(image, caption='Uploaded URL Image', use_column_width=True)
+    if st.button('Detect'):
+        result = main(uploaded_image)
+        st.write(result)