kithangw commited on
Commit
5f0c212
·
verified ·
1 Parent(s): 7b73280

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +52 -30
app.py CHANGED
@@ -1,31 +1,53 @@
1
  import streamlit as st
2
- from transformers import pipeline
3
-
4
- # Load the text classification model pipeline
5
- classifier = pipeline("text-classification", model='kithangw/phishing_link_detection')
6
-
7
- # Streamlit application title
8
- st.title("Please enter a suspicious link")
9
-
10
- # Text input for user to enter the link to classify
11
- link = st.text_area("Enter the link to classify", "")
12
-
13
- # Perform text classification when the user clicks the "Classify" button
14
- if st.button("Classify"):
15
- if link: # Check if link is not empty
16
- # Perform text classification on the input link
17
- results = classifier(link)
18
-
19
- # The results variable contains a list with one item, which is a dictionary.
20
- # The dictionary has 'label' and 'score' as keys.
21
- result = results[0]
22
- label = result['label']
23
- score = round(result['score'] * 100, 2) # Convert score to percentage
24
-
25
- # Check the label and print out the corresponding message
26
- if label == "LABEL_1": # Assuming LABEL_1 indicates phishing
27
- st.write(f"The link you entered is {score}% likely to be a phishing link.")
28
- else: # Assuming LABEL_0 indicates not phishing
29
- st.write(f"The link you entered is {score}% likely to be not a phishing link.")
30
- else:
31
- st.error("Please enter an link to classify.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import streamlit as st
2
+ import torch
3
+ from PIL import Image
4
+ from transformers import pipeline, AutoModelForSequenceClassification, AutoTokenizer
5
+
6
+ # Initialize the image-to-text pipeline and models
7
+ @st.cache(allow_output_mutation=True)
8
+ def load_models():
9
+ image_pipeline = pipeline("image-to-text", model="microsoft/trocr-large-printed")
10
+ phishing_model = AutoModelForSequenceClassification.from_pretrained("Phishinglink", num_labels=2)
11
+ phishing_tokenizer = AutoTokenizer.from_pretrained("google/bert_uncased_L-2_H-128_A-2")
12
+ return image_pipeline, phishing_model, phishing_tokenizer
13
+
14
+ image_pipeline, phishing_model, phishing_tokenizer = load_models()
15
+
16
+ # Define the main function
17
+ def main(image_input):
18
+ # Convert image to URL text
19
+ def image2url(image_input):
20
+ url_for_recognise = image_pipeline(image_input)[0]['generated_text'].replace(" ", "").lower()
21
+ st.write(f"Recognized URL: {url_for_recognise}")
22
+ return url_for_recognise
23
+
24
+ # Check if the URL text is a phishing link
25
+ def checkphishing(url_for_recognise):
26
+ link_token = phishing_tokenizer(url_for_recognise, max_length=512, padding=True, truncation=True, return_tensors='pt')
27
+
28
+ with torch.no_grad(): # Disable gradient calculation for inference
29
+ output = phishing_model(**link_token)
30
+
31
+ probabilities = torch.nn.functional.softmax(output.logits, dim=-1)
32
+ predicted_class = torch.argmax(probabilities, dim=-1).item()
33
+ predicted_prob = probabilities[0, predicted_class].item()
34
+
35
+ labels = ['Not Phishing', 'Phishing']
36
+ prediction_label = labels[predicted_class]
37
+ sentence = f"The URL '{url_for_recognise}' is classified as '{prediction_label}' with a probability of {predicted_prob:.2f}."
38
+ return sentence
39
+
40
+ url_text = image2url(image_input)
41
+ result_sentence = checkphishing(url_text)
42
+ return result_sentence
43
+
44
+ # Streamlit interface
45
+ st.title("Phishing URL Detection from Image")
46
+ uploaded_image = st.file_uploader("Upload an image of the URL", type=["png", "jpg", "jpeg"])
47
+
48
+ if uploaded_image is not None:
49
+ image = Image.open(uploaded_image)
50
+ st.image(image, caption='Uploaded URL Image', use_column_width=True)
51
+ if st.button('Detect'):
52
+ result = main(uploaded_image)
53
+ st.write(result)