Shredder commited on
Commit
75d092f
·
1 Parent(s): 15c29c6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +36 -4
app.py CHANGED
@@ -11,7 +11,10 @@ from nltk.tokenize import sent_tokenize
11
  from fin_readability_sustainability import BERTClass, do_predict
12
  import pandas as pd
13
  import en_core_web_sm
14
-
 
 
 
15
 
16
  nlp = en_core_web_sm.load()
17
  nltk.download('punkt')
@@ -37,6 +40,35 @@ def get_sustainability(text):
37
  return highlight
38
  #SUSTAINABILITY ENDS
39
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40
  ##Summarization
41
  summarizer = pipeline("summarization", model="knkarthick/MEETING_SUMMARY")
42
  def summarize_text(text):
@@ -93,8 +125,8 @@ def quad(query,file):
93
  #resp = summarizer(answer)
94
  #stext = resp[0]['summary_text']
95
 
96
-
97
- return answer,summarize_text(answer),fin_ner(answer),get_sustainability(answer),fls(answer)
98
 
99
 
100
  # b6 = gr.Button("Get Sustainability")
@@ -104,7 +136,7 @@ def quad(query,file):
104
  #iface = gr.Interface(fn=get_sustainability, inputs="textbox", title="CONBERT",description="SUSTAINABILITY TOOL", outputs=gr.HighlightedText(), allow_flagging="never")
105
  #iface.launch()
106
 
107
- iface = gr.Interface(fn=quad, inputs=[gr.inputs.Textbox(label='SEARCH QUERY'),gr.inputs.File(label='TXT FILE')], title="CONBERT",description="SUSTAINABILITY TOOL",article='Article', outputs=[gr.outputs.Textbox(label='Answer'),gr.outputs.Textbox(label='Summary'),gr.HighlightedText(label='NER'),gr.HighlightedText(label='SUSTAINABILITY'),gr.HighlightedText(label='FLS')], allow_flagging="never")
108
 
109
 
110
  iface.launch()
 
11
  from fin_readability_sustainability import BERTClass, do_predict
12
  import pandas as pd
13
  import en_core_web_sm
14
+ from fincat_utils import extract_context_words
15
+ from fincat_utils import bert_embedding_extract
16
+ import pickle
17
+ lr_clf = pickle.load(open("lr_clf_FiNCAT.pickle",'rb'))
18
 
19
  nlp = en_core_web_sm.load()
20
  nltk.download('punkt')
 
40
  return highlight
41
  #SUSTAINABILITY ENDS
42
 
43
+ #CLAIM STARTS
44
+ def score_fincat(txt):
45
+ li = []
46
+ highlight = []
47
+ txt = " " + txt + " "
48
+ k = ''
49
+ for word in txt.split():
50
+ if any(char.isdigit() for char in word):
51
+ if word[-1] in ['.', ',', ';', ":", "-", "!", "?", ")", '"', "'"]:
52
+ k = word[-1]
53
+ word = word[:-1]
54
+ st = txt.index(" " + word + k + " ")+1
55
+ k = ''
56
+ ed = st + len(word)
57
+ x = {'paragraph' : txt, 'offset_start':st, 'offset_end':ed}
58
+ context_text = extract_context_words(x)
59
+ features = bert_embedding_extract(context_text, word)
60
+ prediction = lr_clf.predict(features.reshape(1, 768))
61
+ prediction_probability = '{:.4f}'.format(round(lr_clf.predict_proba(features.reshape(1, 768))[:,1][0], 4))
62
+ highlight.append((word, ' In-claim' if prediction==1 else 'Out-of-Claim'))
63
+ li.append([word,' In-claim' if prediction==1 else 'Out-of-Claim', prediction_probability])
64
+ else:
65
+ highlight.append((word, ' '))
66
+ headers = ['numeral', 'prediction', 'probability']
67
+ dff = pd.DataFrame(li)
68
+ dff.columns = headers
69
+ return highlight, dff
70
+
71
+
72
  ##Summarization
73
  summarizer = pipeline("summarization", model="knkarthick/MEETING_SUMMARY")
74
  def summarize_text(text):
 
125
  #resp = summarizer(answer)
126
  #stext = resp[0]['summary_text']
127
 
128
+ highlight,dff=score_fincat(answer)
129
+ return answer,summarize_text(answer),highlight,dff,fin_ner(answer),get_sustainability(answer),fls(answer)
130
 
131
 
132
  # b6 = gr.Button("Get Sustainability")
 
136
  #iface = gr.Interface(fn=get_sustainability, inputs="textbox", title="CONBERT",description="SUSTAINABILITY TOOL", outputs=gr.HighlightedText(), allow_flagging="never")
137
  #iface.launch()
138
 
139
+ iface = gr.Interface(fn=quad, inputs=[gr.inputs.Textbox(label='SEARCH QUERY'),gr.inputs.File(label='TXT FILE')], title="CONBERT",description="SUSTAINABILITY TOOL",article='Article', outputs=[gr.outputs.Textbox(label='Answer'),gr.outputs.Textbox(label='Summary'),"highlight","data frame",gr.HighlightedText(label='NER'),gr.HighlightedText(label='SUSTAINABILITY'),gr.HighlightedText(label='FLS')], allow_flagging="never")
140
 
141
 
142
  iface.launch()