import streamlit as st import pandas as pd from catboost import CatBoostClassifier import re import string from nltk.corpus import stopwords from pymystem3 import Mystem from joblib import load import nltk nltk.download('stopwords') def data_preprocessing(text): stop_words = set(stopwords.words('russian')) text = text.lower() text = re.sub("<.*?>", "", text) text = re.sub(r'http\S+', " ", text) text = re.sub(r'@\w+', ' ', text) text = re.sub(r'#\w+', ' ', text) text = re.sub(r'\d+', ' ', text) text = "".join([c for c in text if c not in string.punctuation]) return " ".join([word for word in text.split() if word not in stop_words]) def lemmatize_text(text): mystem = Mystem() lemmas = mystem.lemmatize(text) return ' '.join(lemmas) model = CatBoostClassifier() model.load_model('cat_model4.cbm') tfidf_vectorizer = load('tfidf_vectorizer.joblib') def classic_ml_page(): st.title("Классификация отзывов о медицинских учреждениях") user_review = st.text_area("Введите ваш отзыв здесь:") if st.button("Классифицировать"): if user_review: preprocessed_review = data_preprocessing(user_review) lemmatized_review = lemmatize_text(preprocessed_review) vectorized_review = tfidf_vectorizer.transform([lemmatized_review]) prediction = model.predict(vectorized_review) if prediction[0] == 1: st.write("Позитивный отзыв 😀") else: st.write("Негативный отзыв 😟") else: st.write("Пожалуйста, введите отзыв для классификации.")