Seetha commited on
Commit
6a2ebdf
·
1 Parent(s): d083073

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -14
app.py CHANGED
@@ -123,14 +123,14 @@ def main():
123
  result2 = re.sub(r'[^\w\s]','',result1)
124
  result.append(result2)
125
 
126
- st.write("--- %s seconds ---" % (time.time() - start_time))
127
  tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased") #bert-base-uncased
128
 
129
  model_path = "checkpoint-2850"
130
 
131
  model = AutoModelForSequenceClassification.from_pretrained(model_path,id2label={0:'non-causal',1:'causal'})
132
 
133
- st.write('base sequence classification loaded')
134
  pipe1 = pipeline("text-classification", model=model,tokenizer=tokenizer)
135
  for sent in result:
136
  pred = pipe1(sent)
@@ -138,34 +138,31 @@ def main():
138
  if lab['label'] == 'causal': #causal
139
  causal_sents.append(sent)
140
 
141
- st.write('causal sentence classification finished')
142
- st.write("--- %s seconds ---" % (time.time() - start_time))
143
 
144
  model_name = "distilbert-base-cased"
145
  tokenizer = DistilBertTokenizerFast.from_pretrained(model_name)
146
 
147
-
148
-
149
  model_path1 = "DistilBertforTokenclassification"
150
 
151
  model = DistilBertForTokenClassification.from_pretrained(model_path1) #len(unique_tags),, num_labels= 7, , id2label={0:'CT',1:'E',2:'C',3:'O'}
152
  pipe = pipeline('ner', model=model, tokenizer=tokenizer,aggregation_strategy='simple') #grouped_entities=True
153
- st.write('DistilBERT loaded')
154
  sentence_pred = []
155
  class_list = []
156
  entity_list = []
157
  for k in causal_sents:
158
  pred= pipe(k)
159
  #st.write(pred)
160
- st.write('preds')
161
- for i in pred:
162
-
163
  sentence_pred.append(k)
164
  class_list.append(i['word'])
165
  entity_list.append(i['entity_group'])
166
 
167
- st.write('causality extraction finished')
168
- st.write("--- %s seconds ---" % (time.time() - start_time))
169
 
170
  # filename = 'Checkpoint-classification.sav'
171
  # loaded_model = pickle.load(open(filename, 'rb'))
@@ -193,8 +190,8 @@ def main():
193
  predictions = loaded_model.predict(pad_sequences(tokenizer.texts_to_sequences(class_list),maxlen=MAX_SEQUENCE_LENGTH))
194
  predicted = np.argmax(predictions,axis=1)
195
 
196
- st.write('stakeholder taxonomy finished')
197
- st.write("--- %s seconds ---" % (time.time() - start_time))
198
  pred1 = predicted
199
  level0 = []
200
  count =0
 
123
  result2 = re.sub(r'[^\w\s]','',result1)
124
  result.append(result2)
125
 
126
+ #st.write("--- %s seconds ---" % (time.time() - start_time))
127
  tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased") #bert-base-uncased
128
 
129
  model_path = "checkpoint-2850"
130
 
131
  model = AutoModelForSequenceClassification.from_pretrained(model_path,id2label={0:'non-causal',1:'causal'})
132
 
133
+ #st.write('base sequence classification loaded')
134
  pipe1 = pipeline("text-classification", model=model,tokenizer=tokenizer)
135
  for sent in result:
136
  pred = pipe1(sent)
 
138
  if lab['label'] == 'causal': #causal
139
  causal_sents.append(sent)
140
 
141
+ #st.write('causal sentence classification finished')
142
+ #st.write("--- %s seconds ---" % (time.time() - start_time))
143
 
144
  model_name = "distilbert-base-cased"
145
  tokenizer = DistilBertTokenizerFast.from_pretrained(model_name)
146
 
 
 
147
  model_path1 = "DistilBertforTokenclassification"
148
 
149
  model = DistilBertForTokenClassification.from_pretrained(model_path1) #len(unique_tags),, num_labels= 7, , id2label={0:'CT',1:'E',2:'C',3:'O'}
150
  pipe = pipeline('ner', model=model, tokenizer=tokenizer,aggregation_strategy='simple') #grouped_entities=True
151
+ #st.write('DistilBERT loaded')
152
  sentence_pred = []
153
  class_list = []
154
  entity_list = []
155
  for k in causal_sents:
156
  pred= pipe(k)
157
  #st.write(pred)
158
+ #st.write('preds')
159
+ for i in pred:
 
160
  sentence_pred.append(k)
161
  class_list.append(i['word'])
162
  entity_list.append(i['entity_group'])
163
 
164
+ # st.write('causality extraction finished')
165
+ # st.write("--- %s seconds ---" % (time.time() - start_time))
166
 
167
  # filename = 'Checkpoint-classification.sav'
168
  # loaded_model = pickle.load(open(filename, 'rb'))
 
190
  predictions = loaded_model.predict(pad_sequences(tokenizer.texts_to_sequences(class_list),maxlen=MAX_SEQUENCE_LENGTH))
191
  predicted = np.argmax(predictions,axis=1)
192
 
193
+ # st.write('stakeholder taxonomy finished')
194
+ # st.write("--- %s seconds ---" % (time.time() - start_time))
195
  pred1 = predicted
196
  level0 = []
197
  count =0