Spaces:

jskim
/

paper-matching

Runtime error

App Files Files Community

jskim commited on Mar 23, 2023

Commit

e7933f3

1 Parent(s): 573dc49

increasing figure size. adding names to the publication. adding submission title and using it for scores.

Browse files

Files changed (3) hide show

app.py +31 -8
details.html +1 -1
score.py +6 -4

app.py CHANGED Viewed

@@ -24,14 +24,16 @@ sent_model = SentenceTransformer('sentence-transformers/gtr-t5-base')
 sent_model.to(device)
 def get_similar_paper(
     abstract_text_input,
     author_id_input,
     results={}, # this state variable will be updated and returned
     #progress=gr.Progress()
-):
     progress = gr.Progress()
     num_papers_show = 10 # number of top papers to show from the reviewer
     print('retrieving similar papers...')
     start = time.time()
     input_sentences = sent_tokenize(abstract_text_input)
@@ -47,6 +49,7 @@ def get_similar_paper(
     titles, abstracts, paper_urls, doc_scores = compute_document_score(
         doc_model,
         tokenizer,
         abstract_text_input,
         papers,
         batch=10
@@ -256,9 +259,9 @@ with gr.Blocks(css='style.css') as demo:
 # R2P2: Reviewer TO Paper in Peer review
 #### Who is it for?
-It is for meta-reviewers, area chairs, program chairs, or anyone who oversees the submission-reviewer matching process in peer review for acadmeic conferences, journals, and grants.
-<center><img src="file/tool.gif" width="50%" alt="general workflow"></center>
 #### How does it help?
 A typical meta-reviewer workflow lacks supportive information on **what makes the pre-selected candidate reviewers a good fit** for the submission. Only affinity scores between the reviewer and the paper are shown, without additional detail.
@@ -267,7 +270,7 @@ R2P2 provides more information about each reviewer. It searches for the most rel
     """
     # TODO add instruction video link
     # More details (video, addendum)
-    more_details_instruction = """Check out <a href="", target="_blank">this video</a> for a quick demo of what R2P2 is and how it can help. For more details (e.g., relevant work, privacy policy, disclaimer), refer to <a href="file/details.html", target="_blank">here</a>."""
     gr.Markdown(general_instruction)
     gr.HTML(more_details_instruction)
@@ -275,20 +278,37 @@ R2P2 provides more information about each reviewer. It searches for the most rel
     ### INPUT
     with gr.Row() as input_row:
-        with gr.Column():
-            abstract_text_input = gr.Textbox(label='Submission Abstract', info='Paste in the abstract of the submission.')
-        with gr.Column():
             with gr.Row():
                 author_id_input = gr.Textbox(label='Reviewer Profile Link (Semantic Scholar)', info="Paste in the reviewer's Semantic Scholar link")
             with gr.Row():
                 name = gr.Textbox(label='Confirm Reviewer Name', info='This will be automatically updated based on the reviewer profile link above', interactive=False)
                 author_id_input.change(fn=update_name, inputs=author_id_input, outputs=name)
     with gr.Row():
         compute_btn = gr.Button('What Makes This a Good Match?')
     with gr.Row():
         search_status = gr.Textbox(label='Search Status', interactive=False, visible=False)
     ### OVERVIEW
     # Paper title, score, and top-ranking sentence pairs -- two sentence pairs per paper, three papers
     ## ONE BLOCK OF INFO FOR A SINGLE PAPER
@@ -425,6 +445,8 @@ R2P2 provides more information about each reviewer. It searches for the most rel
             with gr.Row(visible=False) as hl_row:
                 # highlighted text from paper
                 highlight = gr.components.Interpretation(paper_abstract)
     ### EVENT LISTENERS
@@ -438,6 +460,7 @@ R2P2 provides more information about each reviewer. It searches for the most rel
     compute_btn.click(
         fn=get_similar_paper,
         inputs=[
             abstract_text_input,
             author_id_input,
             info

 sent_model.to(device)
 def get_similar_paper(
+    title_input,
     abstract_text_input,
     author_id_input,
     results={}, # this state variable will be updated and returned
     #progress=gr.Progress()
+):
     progress = gr.Progress()
     num_papers_show = 10 # number of top papers to show from the reviewer
+    if title_input == None:
+        title_input = '' # if no title is given, just focus on abstract.
     print('retrieving similar papers...')
     start = time.time()
     input_sentences = sent_tokenize(abstract_text_input)
     titles, abstracts, paper_urls, doc_scores = compute_document_score(
         doc_model,
         tokenizer,
+        title_input,
         abstract_text_input,
         papers,
         batch=10
 # R2P2: Reviewer TO Paper in Peer review
 #### Who is it for?
+It is for meta-reviewers, area chairs, program chairs, or anyone who oversees the submission-reviewer matching process in peer review for academic conferences, journals, and grants.
+<center><img src="file/tool.gif" width="70%" alt="general workflow"></center>
 #### How does it help?
 A typical meta-reviewer workflow lacks supportive information on **what makes the pre-selected candidate reviewers a good fit** for the submission. Only affinity scores between the reviewer and the paper are shown, without additional detail.
     """
     # TODO add instruction video link
     # More details (video, addendum)
+    more_details_instruction = """Check out <a href="", target="_blank">this video</a> for a quick demo of what R2P2 is and how it can help. You can find more details <a href="file/details.html", target="_blank">here</a> about R2P2, along with our privacy policy and disclaimer."""
     gr.Markdown(general_instruction)
     gr.HTML(more_details_instruction)
     ### INPUT
     with gr.Row() as input_row:
+        with gr.Column(scale=3):
+            with gr.Row():
+                title_input = gr.Textbox(label='Submission Title', info='Paste in the title of the submission.')
+            with gr.Row():
+                abstract_text_input = gr.Textbox(label='Submission Abstract', info='Paste in the abstract of the submission.')
+        with gr.Column(scale=2):
             with gr.Row():
                 author_id_input = gr.Textbox(label='Reviewer Profile Link (Semantic Scholar)', info="Paste in the reviewer's Semantic Scholar link")
             with gr.Row():
                 name = gr.Textbox(label='Confirm Reviewer Name', info='This will be automatically updated based on the reviewer profile link above', interactive=False)
                 author_id_input.change(fn=update_name, inputs=author_id_input, outputs=name)
+    # Add examples
+    example_title ="The Toronto Paper Matching System: An automated paper-reviewer assignment system"
+    example_submission = """One of the most important tasks of conference organizers is the assignment of papers to reviewers. Reviewers' assessments of papers is a crucial step in determining the conference program, and in a certain sense to shape the direction of a field. However this is not a simple task: large conferences typically have to assign hundreds of papers to hundreds of reviewers, and time constraints make the task impossible for one person to accomplish. Furthermore other constraints, such as reviewer load have to be taken into account, preventing the process from being completely distributed. We built the first version of a system to suggest reviewer assignments for the NIPS 2010 conference, followed, in 2012, by a release that better integrated our system with Microsoft's popular Conference Management Toolkit (CMT). Since then our system has been widely adopted by the leading conferences in both the machine learning and computer vision communities. This paper provides an overview of the system, a summary of learning models and methods of evaluation that we have been using, as well as some of the recent progress and open issues."""
+    example_reviewer = "https://www.semanticscholar.org/author/Nihar-B.-Shah/1737249"
+    gr.Examples(
+        examples=[[example_title, example_submission, example_reviewer]],
+        inputs=[title_input, abstract_text_input, author_id_input],
+        cache_examples=False,
+        label="Click to try out the example input."
+    )
     with gr.Row():
         compute_btn = gr.Button('What Makes This a Good Match?')
     with gr.Row():
         search_status = gr.Textbox(label='Search Status', interactive=False, visible=False)
     ### OVERVIEW
     # Paper title, score, and top-ranking sentence pairs -- two sentence pairs per paper, three papers
     ## ONE BLOCK OF INFO FOR A SINGLE PAPER
             with gr.Row(visible=False) as hl_row:
                 # highlighted text from paper
                 highlight = gr.components.Interpretation(paper_abstract)
     ### EVENT LISTENERS
     compute_btn.click(
         fn=get_similar_paper,
         inputs=[
+            title_input,
             abstract_text_input,
             author_id_input,
             info

details.html CHANGED Viewed

@@ -4,7 +4,7 @@
 <h1>Relevant Work</h1>
-The tool is based on the findings from the paper <a href="https://arxiv.org/abs/2302.08450", target="_blank">Assisting Human Decisions in Document Matching</a>.
 <h1>What Happens Behind the Scenes</h1>
 <ul>

 <h1>Relevant Work</h1>
+The tool is developed by <a href="https://wnstlr.github.io", target="_blank">Joon Sik Kim</a>, based on the findings from our paper <a href="https://arxiv.org/abs/2302.08450", target="_blank">Assisting Human Decisions in Document Matching</a>, with <a href="https://valeriechen.github.io/", target="_blank">Valerie Chen</a>, <a href="https://danishpruthi.com/", target="_blank">Danish Pruthi</a>, <a href="https://www.cs.cmu.edu/~nihars/", target="_blank">Nihar B. Shah</a>, and <a href="https://www.cs.cmu.edu/~atalwalk/", target="_blank">Ameet Talwalkar</a>.
 <h1>What Happens Behind the Scenes</h1>
 <ul>

score.py CHANGED Viewed

@@ -251,7 +251,6 @@ def get_highlight_info(model, text1, text2, K=None):
     return sent_ids, sent_scores, info, top_pairs_info
 ### Document-level operations
-# TODO Use specter_MFR
 def predict_docscore(doc_model, tokenizer, query, titles, abstracts, batch=20):
     # compute document scores for each papers
@@ -259,8 +258,7 @@ def predict_docscore(doc_model, tokenizer, query, titles, abstracts, batch=20):
     title_abs = []
     for t, a in zip(titles, abstracts):
         if t is not None and a is not None:
-            # title_abs.append(t + ' [SEP] ' + a)
-            title_abs.append(a)
     num_docs = len(title_abs)
     no_iter = int(np.ceil(num_docs / batch))
@@ -292,7 +290,7 @@ def predict_docscore(doc_model, tokenizer, query, titles, abstracts, batch=20):
     return scores
-def compute_document_score(doc_model, tokenizer, query, papers, batch=5):
     scores = []
     titles = []
     abstracts = []
@@ -302,6 +300,10 @@ def compute_document_score(doc_model, tokenizer, query, papers, batch=5):
             titles.append(p['title'])
             abstracts.append(p['abstract'])
             urls.append(p['url'])
     scores = predict_docscore(doc_model, tokenizer, query, titles, abstracts, batch=batch)
     assert(len(scores) == len(abstracts))
     idx_sorted = np.argsort(scores)[::-1]

     return sent_ids, sent_scores, info, top_pairs_info
 ### Document-level operations
 def predict_docscore(doc_model, tokenizer, query, titles, abstracts, batch=20):
     # compute document scores for each papers
     title_abs = []
     for t, a in zip(titles, abstracts):
         if t is not None and a is not None:
+            title_abs.append(t + ' [SEP] ' + a) # title + abstract
     num_docs = len(title_abs)
     no_iter = int(np.ceil(num_docs / batch))
     return scores
+def compute_document_score(doc_model, tokenizer, query_title, query_abs, papers, batch=5):
     scores = []
     titles = []
     abstracts = []
             titles.append(p['title'])
             abstracts.append(p['abstract'])
             urls.append(p['url'])
+    if query_title == '':
+        query = query_abs
+    else:
+        query = query_title + ' [SEP] ' + query_abs # feed in submission title and abstract
     scores = predict_docscore(doc_model, tokenizer, query, titles, abstracts, batch=batch)
     assert(len(scores) == len(abstracts))
     idx_sorted = np.argsort(scores)[::-1]