Spaces:

Anvilogic
/

T5-Typosquat-Detect

Sleeping

anvilogic-mikehart commited on Nov 18, 2024

Commit

0feeb57

1 Parent(s): 237f082

Updating text and styling

Files changed (1) hide show

app.py CHANGED Viewed

@@ -19,13 +19,17 @@ def load_model():
 device='cpu'
 model, tokenizer = load_model()
-st.title("FLAN-T5 Typosquatting Detection")
 st.write("Enter a potential typosquatted domain and a target domain to check if one is a variant of the other.")
 prompt_prefix = "Is the first domain a typosquat of the second:"
-potential_typosquat = st.text_input("Potential Typosquatted Domain", value="lonlonsoft.com")
-target_domain = st.text_input("Legitimate Domain", value="stiltsoft.net")
 full_prompt = f"{prompt_prefix} {potential_typosquat} {target_domain}"
@@ -39,7 +43,7 @@ if st.button("Check Typosquatting"):
         prediction = tokenizer.decode(outputs[0], skip_special_tokens=True)
         # Display the result
-        st.write("**Prediction: **")
-        st.write(prediction)
     else:
         st.warning("Please enter both domains to perform the check.")

 device='cpu'
 model, tokenizer = load_model()
+st.title("Fine tuned FLAN-T5 Typosquatting Detection")
+st.markdown("This streamlit demonstrates our fine tuned model for typosquatting detection.  We found that using "
+            "SLMs or LLMs and prompt engineering for this task could not achieve the same accuracy as our [cross encoder](https://huggingface.co/Anvilogic/CE-typosquat-detect). "
+            "We found that by fine tuning a FLAN-T5 model, we could get the same accuracy as our cross encoder model.  "
+            "Using an SLM like Flan allows you to output the response (here `true` or `false`) directly into another LM.  ")
 st.write("Enter a potential typosquatted domain and a target domain to check if one is a variant of the other.")
 prompt_prefix = "Is the first domain a typosquat of the second:"
+potential_typosquat = st.text_input("Potential Typosquatted Domain", value="tiktok-tikto-tibyd-yjdj.com")
+target_domain = st.text_input("Legitimate Domain", value="tiktok.com")
 full_prompt = f"{prompt_prefix} {potential_typosquat} {target_domain}"
         prediction = tokenizer.decode(outputs[0], skip_special_tokens=True)
         # Display the result
+        st.markdown(f"Is {potential_typosquat} a typosquat of {target_domain}? **{prediction}**")
     else:
         st.warning("Please enter both domains to perform the check.")