Spaces:
Runtime error
Runtime error
adding nltk postagger
Browse files
app.py
CHANGED
@@ -4,7 +4,8 @@ from transformers import AutoTokenizer, AutoModel
|
|
4 |
from sentence_transformers import SentenceTransformer
|
5 |
import pickle
|
6 |
import nltk
|
7 |
-
nltk.download('punkt')
|
|
|
8 |
|
9 |
from input_format import *
|
10 |
from score import *
|
@@ -27,6 +28,7 @@ def get_similar_paper(
|
|
27 |
author_id_input,
|
28 |
num_papers_show=10
|
29 |
):
|
|
|
30 |
input_sentences = sent_tokenize(abstract_text_input)
|
31 |
|
32 |
# TODO handle pdf file input
|
@@ -39,6 +41,7 @@ def get_similar_paper(
|
|
39 |
name, papers = get_text_from_author_id(author_id_input)
|
40 |
|
41 |
# Compute Doc-level affinity scores for the Papers
|
|
|
42 |
titles, abstracts, doc_scores = compute_overall_score(
|
43 |
doc_model,
|
44 |
tokenizer,
|
@@ -60,6 +63,7 @@ def get_similar_paper(
|
|
60 |
doc_scores = doc_scores[:num_papers_show]
|
61 |
|
62 |
display_title = ['[ %0.3f ] %s'%(s, t) for t, s in zip(titles, doc_scores)]
|
|
|
63 |
|
64 |
return gr.update(choices=display_title, interactive=True, visible=True), gr.update(choices=input_sentences, interactive=True), gr.update(visible=True), gr.update(visible=True), gr.update(visible=True)
|
65 |
|
@@ -69,6 +73,7 @@ def get_highlights(
|
|
69 |
abstract,
|
70 |
K=2
|
71 |
):
|
|
|
72 |
# Compute sent-level and phrase-level affinity scores for each papers
|
73 |
sent_ids, sent_scores, info = get_highlight_info(
|
74 |
sent_model,
|
|
|
4 |
from sentence_transformers import SentenceTransformer
|
5 |
import pickle
|
6 |
import nltk
|
7 |
+
nltk.download('punkt') # tokenizer
|
8 |
+
nltk.download('averaged_perceptron_tagger') # postagger
|
9 |
|
10 |
from input_format import *
|
11 |
from score import *
|
|
|
28 |
author_id_input,
|
29 |
num_papers_show=10
|
30 |
):
|
31 |
+
print('-- retrieving similar papers')
|
32 |
input_sentences = sent_tokenize(abstract_text_input)
|
33 |
|
34 |
# TODO handle pdf file input
|
|
|
41 |
name, papers = get_text_from_author_id(author_id_input)
|
42 |
|
43 |
# Compute Doc-level affinity scores for the Papers
|
44 |
+
print('---- computing scores')
|
45 |
titles, abstracts, doc_scores = compute_overall_score(
|
46 |
doc_model,
|
47 |
tokenizer,
|
|
|
63 |
doc_scores = doc_scores[:num_papers_show]
|
64 |
|
65 |
display_title = ['[ %0.3f ] %s'%(s, t) for t, s in zip(titles, doc_scores)]
|
66 |
+
print('----- done')
|
67 |
|
68 |
return gr.update(choices=display_title, interactive=True, visible=True), gr.update(choices=input_sentences, interactive=True), gr.update(visible=True), gr.update(visible=True), gr.update(visible=True)
|
69 |
|
|
|
73 |
abstract,
|
74 |
K=2
|
75 |
):
|
76 |
+
print('-- obtaining highlights')
|
77 |
# Compute sent-level and phrase-level affinity scores for each papers
|
78 |
sent_ids, sent_scores, info = get_highlight_info(
|
79 |
sent_model,
|