File size: 2,288 Bytes
b11db75
5f0c212
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
import streamlit as st
import torch
from PIL import Image
from transformers import pipeline, AutoModelForSequenceClassification, AutoTokenizer

# Initialize the image-to-text pipeline and models
@st.cache(allow_output_mutation=True)
def load_models():
    image_pipeline = pipeline("image-to-text", model="microsoft/trocr-large-printed")
    phishing_model = AutoModelForSequenceClassification.from_pretrained("Phishinglink", num_labels=2)
    phishing_tokenizer = AutoTokenizer.from_pretrained("google/bert_uncased_L-2_H-128_A-2")
    return image_pipeline, phishing_model, phishing_tokenizer

image_pipeline, phishing_model, phishing_tokenizer = load_models()

# Define the main function
def main(image_input):
    # Convert image to URL text
    def image2url(image_input):
        url_for_recognise = image_pipeline(image_input)[0]['generated_text'].replace(" ", "").lower()
        st.write(f"Recognized URL: {url_for_recognise}")
        return url_for_recognise

    # Check if the URL text is a phishing link
    def checkphishing(url_for_recognise):
        link_token = phishing_tokenizer(url_for_recognise, max_length=512, padding=True, truncation=True, return_tensors='pt')

        with torch.no_grad():  # Disable gradient calculation for inference
            output = phishing_model(**link_token)

        probabilities = torch.nn.functional.softmax(output.logits, dim=-1)
        predicted_class = torch.argmax(probabilities, dim=-1).item()
        predicted_prob = probabilities[0, predicted_class].item()

        labels = ['Not Phishing', 'Phishing']
        prediction_label = labels[predicted_class]
        sentence = f"The URL '{url_for_recognise}' is classified as '{prediction_label}' with a probability of {predicted_prob:.2f}."
        return sentence

    url_text = image2url(image_input)
    result_sentence = checkphishing(url_text)
    return result_sentence

# Streamlit interface
st.title("Phishing URL Detection from Image")
uploaded_image = st.file_uploader("Upload an image of the URL", type=["png", "jpg", "jpeg"])

if uploaded_image is not None:
    image = Image.open(uploaded_image)
    st.image(image, caption='Uploaded URL Image', use_column_width=True)
    if st.button('Detect'):
        result = main(uploaded_image)
        st.write(result)