KevSun commited on
Commit
b2d22df
·
verified ·
1 Parent(s): 1c22cc5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +2 -27
app.py CHANGED
@@ -2,11 +2,7 @@ import streamlit as st
2
  from sentence_transformers import SentenceTransformer, util
3
  from sklearn.decomposition import LatentDirichletAllocation
4
  from sklearn.feature_extraction.text import CountVectorizer
5
- from sklearn.manifold import TSNE
6
  from langdetect import detect, DetectorFactory
7
- import numpy as np
8
- import matplotlib.pyplot as plt
9
- import pandas as pd
10
 
11
  st.set_page_config(page_title="Multilingual Text Analysis System", layout="wide")
12
 
@@ -56,18 +52,6 @@ def detect_language(text):
56
  except:
57
  return "unknown"
58
 
59
- @st.cache_data
60
- def tsne_visualization(embeddings, words):
61
- if len(words) < 3: # Not enough words for t-SNE
62
- return pd.DataFrame({'word': words})
63
-
64
- perplexity = min(30, len(words) - 1)
65
- tsne = TSNE(n_components=2, random_state=42, perplexity=perplexity)
66
- embeddings_2d = tsne.fit_transform(embeddings)
67
- df = pd.DataFrame(embeddings_2d, columns=['x', 'y'])
68
- df['word'] = words
69
- return df
70
-
71
  st.title("Multilingual Text Analysis System")
72
  user_input = st.text_area("Enter your text here:")
73
 
@@ -86,17 +70,8 @@ if st.button("Analyze"):
86
  embeddings = embedding_agent.get_embeddings(words)
87
  st.success("Word Embeddings Generated.")
88
 
89
- with st.spinner("Visualizing words..."):
90
- tsne_df = tsne_visualization(embeddings, words)
91
- if 'x' in tsne_df.columns:
92
- fig, ax = plt.subplots()
93
- ax.scatter(tsne_df['x'], tsne_df['y'])
94
- for i, word in enumerate(tsne_df['word']):
95
- ax.annotate(word, (tsne_df['x'][i], tsne_df['y'][i]))
96
- st.pyplot(fig)
97
- else:
98
- st.write("Word list (not enough words for t-SNE visualization):")
99
- st.write(", ".join(words))
100
 
101
  if len(words) > 1:
102
  with st.spinner("Extracting topics..."):
 
2
  from sentence_transformers import SentenceTransformer, util
3
  from sklearn.decomposition import LatentDirichletAllocation
4
  from sklearn.feature_extraction.text import CountVectorizer
 
5
  from langdetect import detect, DetectorFactory
 
 
 
6
 
7
  st.set_page_config(page_title="Multilingual Text Analysis System", layout="wide")
8
 
 
52
  except:
53
  return "unknown"
54
 
 
 
 
 
 
 
 
 
 
 
 
 
55
  st.title("Multilingual Text Analysis System")
56
  user_input = st.text_area("Enter your text here:")
57
 
 
70
  embeddings = embedding_agent.get_embeddings(words)
71
  st.success("Word Embeddings Generated.")
72
 
73
+ st.write("Words in the input:")
74
+ st.write(", ".join(words))
 
 
 
 
 
 
 
 
 
75
 
76
  if len(words) > 1:
77
  with st.spinner("Extracting topics..."):