Spaces:
Sleeping
Sleeping
File size: 1,248 Bytes
ce3aa8d cc75dc9 0a98cc1 35946c9 cc75dc9 ce3aa8d cc75dc9 fdaa396 ce3aa8d cc75dc9 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 |
import gradio as gr
import joblib
import torch
import sys
import os
sys.path.append(os.path.dirname(os.path.abspath(__file__)))
from l3prune import LLMEncoder
#load the model
best_clf = joblib.load("./saved/classifier_llama32.joblib")
encoder = LLMEncoder.from_pretrained(
"./saved/pruned_encoder_llama32",
device_map="cpu",
torch_dtype=torch.bfloat16,
#torch_dtype=torch,
#cache_dir=cache_dir
)
def classify_prompt(prompt):
#response = client.text_classification(prompt)
#label = response[0]['label']
#score = response[0]['score']
#if label == 'hate':
# result = f"Harmful (Confidence: {score:.2%})"
#else:
# result = f"Benign (Confidence: {score:.2%})"
X = encoder.encode([prompt])
result = best_clf.predict(X)[0]
return "Harmful" if result else "Benign"
demo = gr.Interface(
fn=classify_prompt,
inputs=gr.Textbox(lines=3, placeholder="Enter a prompt to classify..."),
outputs=gr.Textbox(label="Classification Result"),
title="Harmful Prompt Classifier",
description="This app classifies whether a given prompt is potentially harmful or benign.",
show_api=False,
show_response_timing=True
)
if __name__ == "__main__":
demo.launch() |