Spaces:
Sleeping
Sleeping
import streamlit as st | |
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM | |
from peft import PeftModel | |
import torch | |
def load_model(): | |
model_id = "google/flan-t5-large" | |
adapter_path = "./Flan-T5-Typosquat-detect" # Adjust to your saved adapter path | |
# Load the tokenizer and model | |
tokenizer = AutoTokenizer.from_pretrained(model_id) | |
model = AutoModelForSeq2SeqLM.from_pretrained(model_id) | |
model = PeftModel.from_pretrained(model, adapter_path) | |
model = model.merge_and_unload() | |
model.eval() | |
return model, tokenizer | |
device='cpu' | |
model, tokenizer = load_model() | |
st.title("Fine tuned FLAN-T5 Typosquatting Detection") | |
st.markdown("This streamlit demonstrates our fine tuned model for typosquatting detection. We found that using " | |
"SLMs or LLMs and prompt engineering for this task could not achieve the same accuracy as our [cross encoder](https://huggingface.co/Anvilogic/CE-typosquat-detect). " | |
"We found that by fine tuning a FLAN-T5 model, we could get the same accuracy as our cross encoder model. " | |
"Using an SLM like Flan allows you to output the response (here `true` or `false`) directly into another LM. ") | |
st.write("Enter a potential typosquatted domain and a target domain to check if one is a variant of the other.") | |
prompt_prefix = "Is the first domain a typosquat of the second:" | |
potential_typosquat = st.text_input("Potential Typosquatted Domain", value="tiktok-tikto-tibyd-yjdj.com") | |
target_domain = st.text_input("Legitimate Domain", value="tiktok.com") | |
full_prompt = f"{prompt_prefix} {potential_typosquat} {target_domain}" | |
if st.button("Check Typosquatting"): | |
if potential_typosquat and target_domain: | |
# Encode and generate response | |
input_ids = tokenizer(full_prompt, return_tensors="pt").input_ids.to(device) | |
outputs = model.generate(input_ids, max_new_tokens=20) | |
# Decode the response | |
prediction = tokenizer.decode(outputs[0], skip_special_tokens=True) | |
# Display the result | |
st.markdown(f"Is {potential_typosquat} a typosquat of {target_domain}? **{prediction}**") | |
else: | |
st.warning("Please enter both domains to perform the check.") |