Spaces:
Sleeping
Sleeping
File size: 6,911 Bytes
188456f 11ad39a 8380741 c31032a 11ad39a 06247a9 8380741 2cca5cf 5099e16 11ad39a f70ad97 11ad39a d6c3fe4 c31032a 11ad39a 8380741 6f95ca2 8380741 11ad39a 188456f c31032a 8380741 188456f 022adff 188456f 5099e16 188456f 11ad39a 2cca5cf 3404ec5 5099e16 11ad39a 188456f 11ad39a c31a467 11ad39a 188456f 11ad39a 5099e16 8380741 6f95ca2 19ef4fe c31032a 19ef4fe 06247a9 19ef4fe |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 |
import streamlit as st
from joblib import load
from transformers import BertTokenizer, BertForSequenceClassification
import torch
from tensorflow.keras.models import load_model
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
import time
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from transformers import GPT2Tokenizer, GPT2LMHeadModel
tok = GPT2Tokenizer.from_pretrained('sberbank-ai/rugpt3small_based_on_gpt2')
model_checkpoint = 'cointegrated/rubert-tiny-toxicity'
toxicity_tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)
toxicity_model = AutoModelForSequenceClassification.from_pretrained(model_checkpoint)
clf = load('my_model_filename.pkl')
vectorizer = load('tfidf_vectorizer.pkl')
scaler = load('scaler.joblib')
tukinazor = load('tokenizer.pkl')
rnn_model = load_model('path_to_my_model.h5')
bert_model = BertForSequenceClassification.from_pretrained('my_bert_model')
tokenizer = BertTokenizer.from_pretrained('bert-base-multilingual-cased')
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
bert_model = bert_model.to(device)
model_finetuned = GPT2LMHeadModel.from_pretrained('GPT_2')
model_finetuned.eval()
labels = ["не токсичный", "оскорбляющий", "непристойный", "угрожающий", "опасный"]
def text2toxicity(text, aggregate=True):
""" Calculate toxicity of a text (if aggregate=True) or a vector of toxicity aspects (if aggregate=False)"""
with torch.no_grad():
inputs = toxicity_tokenizer(text, return_tensors='pt', truncation=True, padding=True).to(toxicity_model.device)
proba = torch.sigmoid(toxicity_model(**inputs).logits).cpu().numpy()
if isinstance(text, str):
proba = proba[0]
if aggregate:
return 1 - proba.T[0] * (1 - proba.T[-1])
else:
result = {}
for label, prob in zip(labels, proba):
result[label] = prob
return result
def predict_text(text):
sequences = tukinazor.texts_to_sequences([text])
padded_sequences = tf.keras.preprocessing.sequence.pad_sequences(sequences, maxlen=200, padding='post', truncating='post')
predictions = rnn_model.predict(padded_sequences)
predicted_class = tf.argmax(predictions, axis=-1).numpy()[0]
return predicted_class
def generate_text(model, prompt, max_length=150, temperature=1.0):
input_ids = tok.encode(prompt, return_tensors='pt')
output = sber.generate(
input_ids=input_ids,
max_length=max_length + len(input_ids[0]),
temperature=temperature,
num_return_sequences=1,
pad_token_id=tokenizer.eos_token_id
)
generated_text = tok.decode(output[0], skip_special_tokens=True)
return generated_text
def page_reviews_classification():
st.title("Модель классификации отзывов")
st.image("ramsey.jpg", caption="finally some good food", use_column_width=True)
user_input = st.text_area("Введите текст отзыва:")
if st.button("Классифицировать"):
start_time = time.time()
user_input_vec = vectorizer.transform([user_input])
sentence_vector_scaled = scaler.transform(user_input_vec)
prediction = clf.predict(
sentence_vector_scaled)
elapsed_time = time.time() - start_time
st.write(f"Прогнозируемый класс: {prediction[0]}")
st.write(f"Время вычисления: {elapsed_time:.2f} сек.")
user_input_rnn = st.text_area("Введите текст отзыва для RNN модели:")
if st.button("Классифицировать с RNN"):
start_time = time.time()
prediction_rnn = predict_text(user_input_rnn)
elapsed_time = time.time() - start_time
st.write(f"Прогнозируемый класс с RNN: {prediction_rnn}")
st.write(f"Время вычисления: {elapsed_time:.2f} сек.")
user_input_bert = st.text_area("Введите текст отзыва для BERT:")
if st.button("Классифицировать (BERT)"):
start_time = time.time()
encoding = tokenizer.encode_plus(
user_input_bert,
add_special_tokens=True,
max_length=200,
return_token_type_ids=False,
padding='max_length',
truncation=True,
return_attention_mask=True,
return_tensors='pt'
)
input_ids = encoding['input_ids'].to(device)
attention_mask = encoding['attention_mask'].to(device)
with torch.no_grad():
outputs = bert_model(input_ids=input_ids, attention_mask=attention_mask)
predictions = torch.argmax(outputs.logits, dim=1)
elapsed_time = time.time() - start_time
st.write(f"Прогнозируемый класс (BERT): {predictions.item() + 1}")
st.write(f"Время вычисления: {elapsed_time:.2f} сек.")
def page_toxicity_analysis():
user_input_toxicity = st.text_area("Введите текст для оценки токсичности:")
if st.button("Оценить токсичность"):
start_time = time.time()
probs = text2toxicity(user_input_toxicity, aggregate=False)
elapsed_time = time.time() - start_time
for label, prob in probs.items():
st.write(f"Вероятность того что комментарий {label}: {prob:.4f}")
def page_gpt_generation():
st.title("Генерация текста с помощью GPT-модели")
user_prompt = st.text_area("Введите ваш текст:")
sequence_length = st.slider("Длина последовательности:", min_value=10, max_value=1000, value=150, step=10)
num_generations = st.slider("Число генераций:", min_value=1, max_value=10, value=1)
temperature = st.slider("Температура:", min_value=0.1, max_value=3.0, value=1.0, step=0.1)
if st.button("Генерировать"):
for _ in range(num_generations):
generated_text = generate_text(model_finetuned, user_prompt, sequence_length, temperature)
st.text(generated_text)
def main():
page_selection = st.sidebar.selectbox("Выберите страницу:", ["Классификация отзывов", "Анализ токсичности"])
if page_selection == "Классификация отзывов":
page_reviews_classification()
elif page_selection == "Анализ токсичности":
page_toxicity_analysis()
elif page_selection == "Генерация текста Noize MC":
page_gpt_generation()
if __name__ == "__main__":
main() |