RuudVelo commited on
Commit
5e56153
·
1 Parent(s): 617ca6f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +3 -48
app.py CHANGED
@@ -3,32 +3,10 @@ from transformers import pipeline
3
  import torch
4
  import matplotlib.pyplot as plt
5
  import numpy as np
6
- #from PIL import Image
7
-
8
-
9
- #pipe = pipeline(model="RuudVelo/dutch_news_classifier_bert_finetuned")
10
- #text = st.text_area('Please type/copy/paste the Dutch article')
11
-
12
- #labels = ['Binnenland' 'Buitenland' 'Cultuur & Media' 'Economie' 'Koningshuis'
13
- # 'Opmerkelijk' 'Politiek' 'Regionaal nieuws' 'Tech']
14
-
15
- #if text:
16
- # out = pipe(text)
17
- # st.json(out)
18
-
19
-
20
- # load tokenizer and model, create trainer
21
- #model_name = "RuudVelo/dutch_news_classifier_bert_finetuned"
22
- #tokenizer = AutoTokenizer.from_pretrained(model_name)
23
- #model = AutoModelForSequenceClassification.from_pretrained(model_name)
24
- #trainer = Trainer(model=model)
25
- #print(filename, type(filename))
26
- #print(filename.name)
27
 
28
  from transformers import BertForSequenceClassification, BertTokenizer
29
 
30
  model = BertForSequenceClassification.from_pretrained("RuudVelo/dutch_news_clf_bert_finetuned")
31
- #from transformers import BertTokenizer
32
 
33
  tokenizer = BertTokenizer.from_pretrained("RuudVelo/dutch_news_clf_bert_finetuned")
34
 
@@ -38,29 +16,10 @@ st.title("Dutch news article classification")
38
 
39
  st.write("This app classifies a Dutch news article into one of 9 pre-defined* article categories")
40
 
41
- #image = Image.open('dataset-cover_articles.jpg')
42
  st.image('dataset-cover_articles.jpeg', width=150)
43
 
44
  text = st.text_area('Please type/copy/paste text of the Dutch article and click Submit')
45
 
46
- #if text:
47
- # encoding = tokenizer(text, return_tensors="pt")
48
- # outputs = model(**encoding)
49
- # predictions = outputs.logits.argmax(-1)
50
- # probabilities = torch.nn.functional.softmax(outputs.logits, dim=-1)
51
-
52
- ## fig = plt.figure()
53
- # ax = fig.add_axes([0,0,1,1])
54
- # labels_plot = ['Binnenland', 'Buitenland' ,'Cultuur & Media' ,'Economie' ,'Koningshuis',
55
- # 'Opmerkelijk' ,'Politiek', 'Regionaal nieuws', 'Tech']
56
- # probs_plot = probabilities[0].cpu().detach().numpy()
57
-
58
- # ax.barh(labels_plot,probs_plot )
59
- # st.pyplot(fig)
60
-
61
-
62
- #input = st.text_input('Context')
63
-
64
  if st.button('Submit'):
65
  with st.spinner('Generating a response...'):
66
  encoding = tokenizer(text, return_tensors="pt")
@@ -87,12 +46,8 @@ if st.button('Submit'):
87
  st.pyplot(fig)
88
 
89
  st.write('The predicted category is: **{}** with a probability of: **{:.1f}%**'.format(labels_plot[number],(probs_plot[predictions])*1))
90
- # output = genQuestion(option, input)
91
- # print(output)
92
- # st.write(output)
93
- #encoding = tokenizer(text, return_tensors="pt")
94
- #import numpy as np
95
  st.write("The pre-defined categories are Binnenland, Buitenland, Cultuur & Media, Economie , Koningshuis, Opmerkelijk, Politiek, 'Regionaal nieuws en Tech")
96
- st.write("The model for this app has been trained using data from Dutch news articles published by NOS. For more information regarding the dataset can be found at https://www.kaggle.com/maxscheijen/dutch-news-articles")
97
  #st.write('\n')
98
- st.write('The model performance details can be found at https://huggingface.co/RuudVelo/dutch_news_clf_bert_finetuned')
 
3
  import torch
4
  import matplotlib.pyplot as plt
5
  import numpy as np
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
 
7
  from transformers import BertForSequenceClassification, BertTokenizer
8
 
9
  model = BertForSequenceClassification.from_pretrained("RuudVelo/dutch_news_clf_bert_finetuned")
 
10
 
11
  tokenizer = BertTokenizer.from_pretrained("RuudVelo/dutch_news_clf_bert_finetuned")
12
 
 
16
 
17
  st.write("This app classifies a Dutch news article into one of 9 pre-defined* article categories")
18
 
 
19
  st.image('dataset-cover_articles.jpeg', width=150)
20
 
21
  text = st.text_area('Please type/copy/paste text of the Dutch article and click Submit')
22
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
  if st.button('Submit'):
24
  with st.spinner('Generating a response...'):
25
  encoding = tokenizer(text, return_tensors="pt")
 
46
  st.pyplot(fig)
47
 
48
  st.write('The predicted category is: **{}** with a probability of: **{:.1f}%**'.format(labels_plot[number],(probs_plot[predictions])*1))
49
+
 
 
 
 
50
  st.write("The pre-defined categories are Binnenland, Buitenland, Cultuur & Media, Economie , Koningshuis, Opmerkelijk, Politiek, 'Regionaal nieuws en Tech")
51
+ st.write("The model for this app has been trained using data from Dutch news articles published by NOS. More information regarding the dataset can be found at https://www.kaggle.com/maxscheijen/dutch-news-articles")
52
  #st.write('\n')
53
+ st.write('Model performance details can be found at https://huggingface.co/RuudVelo/dutch_news_clf_bert_finetuned')