Spaces:
Sleeping
Sleeping
bintangyosua
commited on
Upload 8 files
Browse files- Dockerfile +16 -16
- README.md +36 -4
- app.py +78 -1
- requirements.txt +16 -17
Dockerfile
CHANGED
@@ -1,16 +1,16 @@
|
|
1 |
-
FROM python:3.11.4
|
2 |
-
COPY --from=ghcr.io/astral-sh/uv:0.4.20 /uv /bin/uv
|
3 |
-
|
4 |
-
RUN useradd -m -u 1000 user
|
5 |
-
ENV PATH="/home/user/.local/bin:$PATH"
|
6 |
-
ENV UV_SYSTEM_PYTHON=1
|
7 |
-
|
8 |
-
WORKDIR /app
|
9 |
-
|
10 |
-
COPY --chown=user ./requirements.txt requirements.txt
|
11 |
-
RUN uv pip install -r requirements.txt
|
12 |
-
|
13 |
-
COPY --chown=user . /app
|
14 |
-
USER user
|
15 |
-
|
16 |
-
CMD ["marimo", "run", "app.py", "--include-code", "--host", "0.0.0.0", "--port", "7860"]
|
|
|
1 |
+
FROM python:3.11.4
|
2 |
+
COPY --from=ghcr.io/astral-sh/uv:0.4.20 /uv /bin/uv
|
3 |
+
|
4 |
+
RUN useradd -m -u 1000 user
|
5 |
+
ENV PATH="/home/user/.local/bin:$PATH"
|
6 |
+
ENV UV_SYSTEM_PYTHON=1
|
7 |
+
|
8 |
+
WORKDIR /app
|
9 |
+
|
10 |
+
COPY --chown=user ./requirements.txt requirements.txt
|
11 |
+
RUN uv pip install -r requirements.txt
|
12 |
+
|
13 |
+
COPY --chown=user . /app
|
14 |
+
USER user
|
15 |
+
|
16 |
+
CMD ["marimo", "run", "app.py", "--include-code", "--host", "0.0.0.0", "--port", "7860"]
|
README.md
CHANGED
@@ -1,13 +1,45 @@
|
|
1 |
---
|
2 |
-
title:
|
3 |
emoji: 🍃
|
4 |
colorFrom: indigo
|
5 |
colorTo: purple
|
6 |
sdk: docker
|
7 |
pinned: true
|
8 |
license: mit
|
9 |
-
short_description:
|
10 |
---
|
11 |
|
12 |
-
|
13 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
---
|
2 |
+
title: Political Ideologies Analysis and Classification
|
3 |
emoji: 🍃
|
4 |
colorFrom: indigo
|
5 |
colorTo: purple
|
6 |
sdk: docker
|
7 |
pinned: true
|
8 |
license: mit
|
9 |
+
short_description: Analysis and Classification
|
10 |
---
|
11 |
|
12 |
+
# Political Ideologies Analysis
|
13 |
+
|
14 |
+
This project provides a comprehensive analysis of political ideologies using data from the Huggingface Political Ideologies dataset. The analysis involves data preprocessing, mapping ideological labels, and visualizing political statements through Word2Vec embeddings and t-SNE projections. Additionally, an interactive tool is created for exploring political ideologies and their related issue types in a 2D space.
|
15 |
+
|
16 |
+
## Project Overview
|
17 |
+
|
18 |
+
The goal of this project is to analyze the political ideologies dataset to understand the distribution of political ideologies (conservative vs liberal) and their association with various issue types. The analysis involves:
|
19 |
+
|
20 |
+
- **Data Loading and Cleaning**: Loading, cleaning, and mapping data from the Huggingface dataset.
|
21 |
+
- **Label Mapping**: Mapping ideological labels (conservative and liberal) and issue types to numerical values.
|
22 |
+
- **Word2Vec Embeddings**: Generating word embeddings for political statements to create vector representations.
|
23 |
+
- **Dimensionality Reduction**: Using t-SNE to reduce the dimensionality of embeddings and visualize them in 2D.
|
24 |
+
- **Interactive Visualizations**: Visualizing the data using Altair with interactive charts to explore ideology and issue type distributions.
|
25 |
+
|
26 |
+
## Dataset
|
27 |
+
|
28 |
+
The dataset used in this project is the [Political Ideologies dataset](https://huggingface.co/datasets/JyotiNayak/political_ideologies) from Huggingface, which contains political statements along with their corresponding labels (conservative or liberal) and issue types (economic, environmental, social, etc.).
|
29 |
+
|
30 |
+
## Requirements
|
31 |
+
|
32 |
+
- Python 3.x
|
33 |
+
- TensorFlow
|
34 |
+
- Gensim
|
35 |
+
- Pandas
|
36 |
+
- NumPy
|
37 |
+
- Matplotlib
|
38 |
+
- Seaborn
|
39 |
+
- Altair
|
40 |
+
|
41 |
+
You can install the necessary dependencies with:
|
42 |
+
|
43 |
+
```bash
|
44 |
+
pip install -r requirements.txt
|
45 |
+
```
|
app.py
CHANGED
@@ -33,12 +33,31 @@ def __():
|
|
33 |
from gensim.models import Word2Vec
|
34 |
from sklearn.manifold import TSNE
|
35 |
|
|
|
|
|
|
|
|
|
36 |
mo.md("""
|
37 |
## 1. Import all libraries needed
|
38 |
|
39 |
The initial cells import the necessary libraries for data handling, visualization, and word embedding.
|
40 |
""")
|
41 |
-
return
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
42 |
|
43 |
|
44 |
@app.cell(hide_code=True)
|
@@ -325,5 +344,63 @@ def __(mo):
|
|
325 |
return
|
326 |
|
327 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
328 |
if __name__ == "__main__":
|
329 |
app.run()
|
|
|
33 |
from gensim.models import Word2Vec
|
34 |
from sklearn.manifold import TSNE
|
35 |
|
36 |
+
import tensorflow as tf
|
37 |
+
from tensorflow.keras.models import Sequential
|
38 |
+
from tensorflow.keras.layers import Embedding, Bidirectional, LSTM, Dense
|
39 |
+
|
40 |
mo.md("""
|
41 |
## 1. Import all libraries needed
|
42 |
|
43 |
The initial cells import the necessary libraries for data handling, visualization, and word embedding.
|
44 |
""")
|
45 |
+
return (
|
46 |
+
Bidirectional,
|
47 |
+
Dense,
|
48 |
+
Embedding,
|
49 |
+
LSTM,
|
50 |
+
Sequential,
|
51 |
+
TSNE,
|
52 |
+
Word2Vec,
|
53 |
+
alt,
|
54 |
+
mo,
|
55 |
+
np,
|
56 |
+
pd,
|
57 |
+
plt,
|
58 |
+
sns,
|
59 |
+
tf,
|
60 |
+
)
|
61 |
|
62 |
|
63 |
@app.cell(hide_code=True)
|
|
|
344 |
return
|
345 |
|
346 |
|
347 |
+
@app.cell
|
348 |
+
def __(mo):
|
349 |
+
mo.md(r"""## Building Bidirection LSTM Model""")
|
350 |
+
return
|
351 |
+
|
352 |
+
|
353 |
+
@app.cell
|
354 |
+
def __():
|
355 |
+
max_length = 100
|
356 |
+
embedding_dim = 100
|
357 |
+
num_classes = 2
|
358 |
+
return embedding_dim, max_length, num_classes
|
359 |
+
|
360 |
+
|
361 |
+
@app.cell
|
362 |
+
def __(
|
363 |
+
Bidirectional,
|
364 |
+
Dense,
|
365 |
+
Embedding,
|
366 |
+
LSTM,
|
367 |
+
Sequential,
|
368 |
+
embedding_dim,
|
369 |
+
max_length,
|
370 |
+
num_classes,
|
371 |
+
word2vec_model,
|
372 |
+
):
|
373 |
+
model = Sequential()
|
374 |
+
model.add(Embedding(input_dim=len(word2vec_model.wv.index_to_key), output_dim=embedding_dim, input_length=max_length))
|
375 |
+
model.add(Bidirectional(LSTM(64, return_sequences=False)))
|
376 |
+
model.add(Dense(num_classes, activation='softmax'))
|
377 |
+
return (model,)
|
378 |
+
|
379 |
+
|
380 |
+
@app.cell
|
381 |
+
def __(model):
|
382 |
+
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
|
383 |
+
model.summary()
|
384 |
+
return
|
385 |
+
|
386 |
+
|
387 |
+
@app.cell
|
388 |
+
def __(df, np):
|
389 |
+
X = np.vstack(df['embedding'].values)
|
390 |
+
y = df['label'].values
|
391 |
+
return X, y
|
392 |
+
|
393 |
+
|
394 |
+
@app.cell
|
395 |
+
def __(X, model, y):
|
396 |
+
model.fit(X, y, epochs=10, batch_size=32, validation_split=0.2)
|
397 |
+
return
|
398 |
+
|
399 |
+
|
400 |
+
@app.cell
|
401 |
+
def __():
|
402 |
+
return
|
403 |
+
|
404 |
+
|
405 |
if __name__ == "__main__":
|
406 |
app.run()
|
requirements.txt
CHANGED
@@ -1,17 +1,16 @@
|
|
1 |
-
marimo
|
2 |
-
pandas
|
3 |
-
numpy
|
4 |
-
scipy==1.10.1
|
5 |
-
|
6 |
-
|
7 |
-
|
8 |
-
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
#
|
15 |
-
|
16 |
-
|
17 |
-
# Add other dependencies as needed
|
|
|
1 |
+
marimo
|
2 |
+
pandas
|
3 |
+
numpy
|
4 |
+
scipy==1.10.1
|
5 |
+
|
6 |
+
matplotlib
|
7 |
+
seaborn
|
8 |
+
altair
|
9 |
+
|
10 |
+
gensim
|
11 |
+
scikit-learn
|
12 |
+
|
13 |
+
# Or a specific version
|
14 |
+
# marimo>=0.9.0
|
15 |
+
|
16 |
+
# Add other dependencies as needed
|
|