Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -123,14 +123,14 @@ def main():
|
|
123 |
result2 = re.sub(r'[^\w\s]','',result1)
|
124 |
result.append(result2)
|
125 |
|
126 |
-
st.write("--- %s seconds ---" % (time.time() - start_time))
|
127 |
tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased") #bert-base-uncased
|
128 |
|
129 |
model_path = "checkpoint-2850"
|
130 |
|
131 |
model = AutoModelForSequenceClassification.from_pretrained(model_path,id2label={0:'non-causal',1:'causal'})
|
132 |
|
133 |
-
st.write('base sequence classification loaded')
|
134 |
pipe1 = pipeline("text-classification", model=model,tokenizer=tokenizer)
|
135 |
for sent in result:
|
136 |
pred = pipe1(sent)
|
@@ -138,34 +138,31 @@ def main():
|
|
138 |
if lab['label'] == 'causal': #causal
|
139 |
causal_sents.append(sent)
|
140 |
|
141 |
-
st.write('causal sentence classification finished')
|
142 |
-
st.write("--- %s seconds ---" % (time.time() - start_time))
|
143 |
|
144 |
model_name = "distilbert-base-cased"
|
145 |
tokenizer = DistilBertTokenizerFast.from_pretrained(model_name)
|
146 |
|
147 |
-
|
148 |
-
|
149 |
model_path1 = "DistilBertforTokenclassification"
|
150 |
|
151 |
model = DistilBertForTokenClassification.from_pretrained(model_path1) #len(unique_tags),, num_labels= 7, , id2label={0:'CT',1:'E',2:'C',3:'O'}
|
152 |
pipe = pipeline('ner', model=model, tokenizer=tokenizer,aggregation_strategy='simple') #grouped_entities=True
|
153 |
-
st.write('DistilBERT loaded')
|
154 |
sentence_pred = []
|
155 |
class_list = []
|
156 |
entity_list = []
|
157 |
for k in causal_sents:
|
158 |
pred= pipe(k)
|
159 |
#st.write(pred)
|
160 |
-
st.write('preds')
|
161 |
-
for i in pred:
|
162 |
-
|
163 |
sentence_pred.append(k)
|
164 |
class_list.append(i['word'])
|
165 |
entity_list.append(i['entity_group'])
|
166 |
|
167 |
-
st.write('causality extraction finished')
|
168 |
-
st.write("--- %s seconds ---" % (time.time() - start_time))
|
169 |
|
170 |
# filename = 'Checkpoint-classification.sav'
|
171 |
# loaded_model = pickle.load(open(filename, 'rb'))
|
@@ -193,8 +190,8 @@ def main():
|
|
193 |
predictions = loaded_model.predict(pad_sequences(tokenizer.texts_to_sequences(class_list),maxlen=MAX_SEQUENCE_LENGTH))
|
194 |
predicted = np.argmax(predictions,axis=1)
|
195 |
|
196 |
-
st.write('stakeholder taxonomy finished')
|
197 |
-
st.write("--- %s seconds ---" % (time.time() - start_time))
|
198 |
pred1 = predicted
|
199 |
level0 = []
|
200 |
count =0
|
|
|
123 |
result2 = re.sub(r'[^\w\s]','',result1)
|
124 |
result.append(result2)
|
125 |
|
126 |
+
#st.write("--- %s seconds ---" % (time.time() - start_time))
|
127 |
tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased") #bert-base-uncased
|
128 |
|
129 |
model_path = "checkpoint-2850"
|
130 |
|
131 |
model = AutoModelForSequenceClassification.from_pretrained(model_path,id2label={0:'non-causal',1:'causal'})
|
132 |
|
133 |
+
#st.write('base sequence classification loaded')
|
134 |
pipe1 = pipeline("text-classification", model=model,tokenizer=tokenizer)
|
135 |
for sent in result:
|
136 |
pred = pipe1(sent)
|
|
|
138 |
if lab['label'] == 'causal': #causal
|
139 |
causal_sents.append(sent)
|
140 |
|
141 |
+
#st.write('causal sentence classification finished')
|
142 |
+
#st.write("--- %s seconds ---" % (time.time() - start_time))
|
143 |
|
144 |
model_name = "distilbert-base-cased"
|
145 |
tokenizer = DistilBertTokenizerFast.from_pretrained(model_name)
|
146 |
|
|
|
|
|
147 |
model_path1 = "DistilBertforTokenclassification"
|
148 |
|
149 |
model = DistilBertForTokenClassification.from_pretrained(model_path1) #len(unique_tags),, num_labels= 7, , id2label={0:'CT',1:'E',2:'C',3:'O'}
|
150 |
pipe = pipeline('ner', model=model, tokenizer=tokenizer,aggregation_strategy='simple') #grouped_entities=True
|
151 |
+
#st.write('DistilBERT loaded')
|
152 |
sentence_pred = []
|
153 |
class_list = []
|
154 |
entity_list = []
|
155 |
for k in causal_sents:
|
156 |
pred= pipe(k)
|
157 |
#st.write(pred)
|
158 |
+
#st.write('preds')
|
159 |
+
for i in pred:
|
|
|
160 |
sentence_pred.append(k)
|
161 |
class_list.append(i['word'])
|
162 |
entity_list.append(i['entity_group'])
|
163 |
|
164 |
+
# st.write('causality extraction finished')
|
165 |
+
# st.write("--- %s seconds ---" % (time.time() - start_time))
|
166 |
|
167 |
# filename = 'Checkpoint-classification.sav'
|
168 |
# loaded_model = pickle.load(open(filename, 'rb'))
|
|
|
190 |
predictions = loaded_model.predict(pad_sequences(tokenizer.texts_to_sequences(class_list),maxlen=MAX_SEQUENCE_LENGTH))
|
191 |
predicted = np.argmax(predictions,axis=1)
|
192 |
|
193 |
+
# st.write('stakeholder taxonomy finished')
|
194 |
+
# st.write("--- %s seconds ---" % (time.time() - start_time))
|
195 |
pred1 = predicted
|
196 |
level0 = []
|
197 |
count =0
|