snaramirez872 commited on
Commit
56f9921
·
1 Parent(s): b41d622

initial commit

Browse files
Files changed (5) hide show
  1. .gitattributes +2 -0
  2. app.py +38 -0
  3. requirements.txt +5 -0
  4. test.csv +3 -0
  5. train.csv +3 -0
.gitattributes CHANGED
@@ -32,3 +32,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
32
  *.zip filter=lfs diff=lfs merge=lfs -text
33
  *.zst filter=lfs diff=lfs merge=lfs -text
34
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
32
  *.zip filter=lfs diff=lfs merge=lfs -text
33
  *.zst filter=lfs diff=lfs merge=lfs -text
34
  *tfevents* filter=lfs diff=lfs merge=lfs -text
35
+ test.csv filter=lfs diff=lfs merge=lfs -text
36
+ train.csv filter=lfs diff=lfs merge=lfs -text
app.py ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import torch
3
+ import torch.nn.functional as F
4
+ import pandas as pd
5
+ from transformers import AutoTokenizer as AT, AutoModelForSequenceClassifcation as AMFSC
6
+
7
+ # TODO choose model for use
8
+ modName = "distilbert-base-uncased-finetuned-sst-2-english"
9
+ mod = AMFSC.from_pretrained(modName)
10
+ tokenizer = AT.from_pretrained(modName)
11
+
12
+ # TODO set up training data
13
+ train = pd.read_csv('./train.csv')
14
+ train_texts = train['text'].values
15
+ train_labels = train['label'].values
16
+
17
+ # TODO set up test data
18
+ test = pd.read_csv('./test.csv')
19
+ test_texts = test['text'].values
20
+ test_labels = test['label'].values
21
+
22
+ # TODO working with the model
23
+ batch = tokenizer(train_texts, padding=True, truncation=True, return_tensors="pt")
24
+
25
+ # For App
26
+ st.title("Finetuning Toxicity Model")
27
+ with torch.no_grad():
28
+ outs = mod(**batch, labels=torch.tensor([1, 0]))
29
+ st.write(outs)
30
+ predicts = F.softmax(outs.logits, dim=1)
31
+ st.write(predicts)
32
+ labels = torch.argmax(predicts, dim=1)
33
+ st.write(labels)
34
+ labels = [mod.config.id2label[label_id] for label_id in labels.tolist()]
35
+ st.write(labels)
36
+
37
+ # TODO deploy app to HuggingFace Streamlit Space
38
+ # TODO add link to readme file
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ transformers
2
+ torch
3
+ torch.nn.functional
4
+ pandas
5
+ streamlit
test.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c2513ce4abb98c4d1d216e3ca0d4377d57589a0989aa8c06a840509a16c786e8
3
+ size 60354593
train.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bd4084611bd27c939ba98e5e63bc3e5a2c1a4e99477dcba46c829e4c986c429d
3
+ size 68802655