Spaces:
Sleeping
Sleeping
anvilogic-mikehart
commited on
Commit
·
0feeb57
1
Parent(s):
237f082
Updating text and styling
Browse files
app.py
CHANGED
@@ -19,13 +19,17 @@ def load_model():
|
|
19 |
device='cpu'
|
20 |
model, tokenizer = load_model()
|
21 |
|
22 |
-
st.title("FLAN-T5 Typosquatting Detection")
|
|
|
|
|
|
|
|
|
23 |
st.write("Enter a potential typosquatted domain and a target domain to check if one is a variant of the other.")
|
24 |
|
25 |
prompt_prefix = "Is the first domain a typosquat of the second:"
|
26 |
|
27 |
-
potential_typosquat = st.text_input("Potential Typosquatted Domain", value="
|
28 |
-
target_domain = st.text_input("Legitimate Domain", value="
|
29 |
|
30 |
full_prompt = f"{prompt_prefix} {potential_typosquat} {target_domain}"
|
31 |
|
@@ -39,7 +43,7 @@ if st.button("Check Typosquatting"):
|
|
39 |
prediction = tokenizer.decode(outputs[0], skip_special_tokens=True)
|
40 |
|
41 |
# Display the result
|
42 |
-
st.
|
43 |
-
|
44 |
else:
|
45 |
st.warning("Please enter both domains to perform the check.")
|
|
|
19 |
device='cpu'
|
20 |
model, tokenizer = load_model()
|
21 |
|
22 |
+
st.title("Fine tuned FLAN-T5 Typosquatting Detection")
|
23 |
+
st.markdown("This streamlit demonstrates our fine tuned model for typosquatting detection. We found that using "
|
24 |
+
"SLMs or LLMs and prompt engineering for this task could not achieve the same accuracy as our [cross encoder](https://huggingface.co/Anvilogic/CE-typosquat-detect). "
|
25 |
+
"We found that by fine tuning a FLAN-T5 model, we could get the same accuracy as our cross encoder model. "
|
26 |
+
"Using an SLM like Flan allows you to output the response (here `true` or `false`) directly into another LM. ")
|
27 |
st.write("Enter a potential typosquatted domain and a target domain to check if one is a variant of the other.")
|
28 |
|
29 |
prompt_prefix = "Is the first domain a typosquat of the second:"
|
30 |
|
31 |
+
potential_typosquat = st.text_input("Potential Typosquatted Domain", value="tiktok-tikto-tibyd-yjdj.com")
|
32 |
+
target_domain = st.text_input("Legitimate Domain", value="tiktok.com")
|
33 |
|
34 |
full_prompt = f"{prompt_prefix} {potential_typosquat} {target_domain}"
|
35 |
|
|
|
43 |
prediction = tokenizer.decode(outputs[0], skip_special_tokens=True)
|
44 |
|
45 |
# Display the result
|
46 |
+
st.markdown(f"Is {potential_typosquat} a typosquat of {target_domain}? **{prediction}**")
|
47 |
+
|
48 |
else:
|
49 |
st.warning("Please enter both domains to perform the check.")
|