jskim commited on
Commit
b1499f3
·
1 Parent(s): e16ae7e

adding nltk postagger

Browse files
Files changed (1) hide show
  1. app.py +6 -1
app.py CHANGED
@@ -4,7 +4,8 @@ from transformers import AutoTokenizer, AutoModel
4
  from sentence_transformers import SentenceTransformer
5
  import pickle
6
  import nltk
7
- nltk.download('punkt')
 
8
 
9
  from input_format import *
10
  from score import *
@@ -27,6 +28,7 @@ def get_similar_paper(
27
  author_id_input,
28
  num_papers_show=10
29
  ):
 
30
  input_sentences = sent_tokenize(abstract_text_input)
31
 
32
  # TODO handle pdf file input
@@ -39,6 +41,7 @@ def get_similar_paper(
39
  name, papers = get_text_from_author_id(author_id_input)
40
 
41
  # Compute Doc-level affinity scores for the Papers
 
42
  titles, abstracts, doc_scores = compute_overall_score(
43
  doc_model,
44
  tokenizer,
@@ -60,6 +63,7 @@ def get_similar_paper(
60
  doc_scores = doc_scores[:num_papers_show]
61
 
62
  display_title = ['[ %0.3f ] %s'%(s, t) for t, s in zip(titles, doc_scores)]
 
63
 
64
  return gr.update(choices=display_title, interactive=True, visible=True), gr.update(choices=input_sentences, interactive=True), gr.update(visible=True), gr.update(visible=True), gr.update(visible=True)
65
 
@@ -69,6 +73,7 @@ def get_highlights(
69
  abstract,
70
  K=2
71
  ):
 
72
  # Compute sent-level and phrase-level affinity scores for each papers
73
  sent_ids, sent_scores, info = get_highlight_info(
74
  sent_model,
 
4
  from sentence_transformers import SentenceTransformer
5
  import pickle
6
  import nltk
7
+ nltk.download('punkt') # tokenizer
8
+ nltk.download('averaged_perceptron_tagger') # postagger
9
 
10
  from input_format import *
11
  from score import *
 
28
  author_id_input,
29
  num_papers_show=10
30
  ):
31
+ print('-- retrieving similar papers')
32
  input_sentences = sent_tokenize(abstract_text_input)
33
 
34
  # TODO handle pdf file input
 
41
  name, papers = get_text_from_author_id(author_id_input)
42
 
43
  # Compute Doc-level affinity scores for the Papers
44
+ print('---- computing scores')
45
  titles, abstracts, doc_scores = compute_overall_score(
46
  doc_model,
47
  tokenizer,
 
63
  doc_scores = doc_scores[:num_papers_show]
64
 
65
  display_title = ['[ %0.3f ] %s'%(s, t) for t, s in zip(titles, doc_scores)]
66
+ print('----- done')
67
 
68
  return gr.update(choices=display_title, interactive=True, visible=True), gr.update(choices=input_sentences, interactive=True), gr.update(visible=True), gr.update(visible=True), gr.update(visible=True)
69
 
 
73
  abstract,
74
  K=2
75
  ):
76
+ print('-- obtaining highlights')
77
  # Compute sent-level and phrase-level affinity scores for each papers
78
  sent_ids, sent_scores, info = get_highlight_info(
79
  sent_model,