arxiv-cards

Runtime error

App Files Files Community

eliolio

eliolio commited on Dec 5, 2022

Commit

5ba4f18

•

0 Parent(s):

Duplicate from EuroSciPy2022/arxiv-cards

Browse files

Co-authored-by: eliolio <[email protected]>

Files changed (8) hide show

.gitattributes +31 -0
README.md +13 -0
app.py +126 -0
arxiv_util.py +58 -0
csscard.css +107 -0
get_paperinfo_fromurls.py +20 -0
htmlcard.html +36 -0
requirements.txt +2 -0

.gitattributes ADDED Viewed

	@@ -0,0 +1,31 @@

+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text

README.md ADDED Viewed

	@@ -0,0 +1,13 @@

+---
+title: arXiv cards
+emoji: 📄
+colorFrom: red
+colorTo: yellow
+sdk: gradio
+sdk_version: 3.1.7
+app_file: app.py
+pinned: false
+duplicated_from: EuroSciPy2022/arxiv-cards
+---
+arXiv card generator for easily sharing scientific papers on websites and presentations

app.py ADDED Viewed

	@@ -0,0 +1,126 @@

+import os
+from jinja2 import Environment, FileSystemLoader, select_autoescape
+from get_paperinfo_fromurls import get_paperinfo_fromurls
+import gradio as gr
+class CARDS_TEMPLATE(object):
+    def __init__(self, path_to_template, template_filename):
+        self.path_to_template = path_to_template
+        self.template_filename = template_filename
+        self.template = self._get_template()
+        self.rendered_html = None
+    def _get_template(self):
+        env = Environment(
+                    autoescape=select_autoescape(
+                        enabled_extensions=('html'),
+                        default_for_string=True,
+                    ),
+                    loader=FileSystemLoader(self.path_to_template)
+                )
+        return env.get_template(self.template_filename)
+    def render(self, paper_details_iterator):
+        self.rendered_html = self.template.render(paper_details=paper_details_iterator)
+    def save_html(self, output_dir=None, output_htmlfile=None):
+        with open(os.path.join(output_dir, output_htmlfile), "w") as f:
+            f.write(self.rendered_html)
+template_file = "htmlcard.html"
+template_path = ""
+card_template = CARDS_TEMPLATE(
+                path_to_template = template_path,
+                template_filename = template_file,
+                )
+CSS = """
+#url-textbox {
+    padding: 0 !important;
+    font-size: 16px;
+}
+.gradio-container {
+    background-color: transparent;
+}
+.gradio-container .gr-button-primary {
+    background: #b31b1b;
+    border: 1px solid #b31b1b;
+    border-radius: 8px;
+    color: white;
+    font-weight: bold;
+    font-size: 16px;
+}
+#ctr {
+    text-align: center;
+}
+#htel {
+    justify-content: center;
+    text-align: center;
+}
+"""
+examples = [
+    [
+        "https://arxiv.org/abs/2208.14178v1",
+    ]
+]
+def create_html_card(arxiv_link):
+    paper_details = get_paperinfo_fromurls(arxiv_link)
+    card_template.render(paper_details_iterator=paper_details)
+    return card_template.rendered_html
+demo = gr.Blocks(css=CSS)
+with demo:
+    with gr.Column():
+        gr.Markdown("# arXiv Cards Generator ⚙️", elem_id="ctr")
+        gr.Markdown(
+            """
+            Need a simple and visual way to share arXiv papers on presentations, blogposts, messages?
+            This gradio demo allows for creating arXiv cards including arXiv identifier, title, authors, abstract
+            Simply paste the url link of the arXiv paper and generate!
+            """
+        )
+    with gr.Column():
+        with gr.Row():
+            text = gr.Textbox(
+                show_label=False,
+                placeholder="Paste arXiv link (abs of pdf)",
+                lines=1,
+                max_lines=1,
+                elem_id="url-textbox",
+            )
+            button = gr.Button("Generate", variant="primary")
+        with gr.Row():
+            card = gr.HTML(elem_id="htel")
+        with gr.Row():
+            gr.Examples(
+                examples=examples,
+                inputs=[text],
+            )
+    with gr.Column():
+        gr.Markdown("### Resources and inspirations", elem_id="ctr")
+        gr.Markdown(
+            """
+            - The code for retrieving the information using arXiv API is mainly taken from [github.com/kunalghosh/Conference-Grok](https://github.com/kunalghosh/Conference-Grok).
+            - The [pdf2preview](https://huggingface.co/spaces/chuanenlin/pdf2preview) space is also a great way to share academic publications on slides.
+            **Author**: [eliolio](https://huggingface.co/eliolio)
+            """)
+    button.click(
+        fn=create_html_card,
+        inputs=[text],
+        outputs=[card]
+    )
+if __name__ == "__main__":
+    demo.launch()

arxiv_util.py ADDED Viewed

	@@ -0,0 +1,58 @@

+from collections import namedtuple # later use py3.7 dataclasses
+import urllib
+import feedparser
+import pdb
+ArxivPaper = namedtuple("ArxivPaper", ["title", "authors", "abstract", "linktopdf", "linktoabs", "arxiv_id"])
+def arxiv_url_sanitizer(url):
+    """
+    as of now, just converts
+    arxiv.org/pdf/ to arxiv.org/abs
+    """
+    # if its an arxiv pdf url then
+    if url.find("pdf") != -1:
+        url = url.replace("/pdf","/abs")
+        url = url.replace(".pdf","")
+    return url
+def get_paper_info(url):
+    """
+    Given an arxiv url returns
+    a ArxivPaper object with fields
+        title : str
+        authors : str
+        abstract : str
+        linktopdf : str
+        linktoabs : str
+        arxiv_id : str
+    """
+    arxiv_id = url.split("/")[-1]
+    arxiv_searchurl = "http://export.arxiv.org/api/query?id_list={}".format(arxiv_id)
+    try:
+        atom_feed = urllib.request.urlopen(arxiv_searchurl)
+    except urllib.error.HTTPError as e:
+        # print("Couldn't retrieve : {}".format(arxiv_searchurl))
+        raise RuntimeError("Trouble fetching ArXiv Id : {}".format(arxiv_id))
+    parsed_feed = feedparser.parse(atom_feed)
+    paper = parsed_feed["entries"][0]
+    title = paper["title"]
+    authors = paper["authors"]
+    if len(authors)>5:
+        authors = authors[:6]
+        authors[5] = {'name': 'and others...'}
+    abstract = paper["summary"]
+    linktopdf = None
+    linktoabs = None
+    for link_dict in paper["links"]:
+        if link_dict["type"].find("html") != -1:
+            linktoabs = link_dict["href"]
+        elif link_dict["type"].find("pdf")!= -1:
+            linktopdf = link_dict["href"]
+    # comment = paper["arxiv_comment"] # Not there in all arxiv pages.
+    return ArxivPaper(title, authors, abstract, linktopdf, linktoabs, arxiv_id)

csscard.css ADDED Viewed

	@@ -0,0 +1,107 @@

+@import url("https://fonts.googleapis.com/css?family=Merriweather|Open+Sans");
+.container {
+  display: flex;
+  justify-content: center;
+  padding: 80px;
+}
+ul {
+list-style-type: none;
+display: flex;
+float: none;
+justify-content: center;
+align-items: center;
+padding-left: 30px;
+padding-top: 10px;
+}
+#urllinks li {
+  padding: 0px 30px 5px 5px;
+}
+.square {
+  width: 700px;
+  background: white;
+  border-radius: 4px;
+  box-shadow: 0px 20px 50px #d9dbdf;
+}
+.mask {
+  width: 700px;
+  height: 65px;
+  clip: rect(0px, 700px, 150px, 0px);
+  border-radius: 4px;
+  position: absolute;
+  background-color: #b31b1b;
+  display: flex;
+}
+.mask .left,
+.mask .right {
+  flex: 1;
+}
+img {
+position: absolute;
+width: 60px;
+padding: 20px 0px 0px 0px;
+margin-left: 30px;
+}
+.h1 {
+  margin: auto;
+  text-align: left;
+  margin-top: 90px;
+  padding-left: 30px;
+  font-family: "Merriweather", serif;
+  font-size: 22px;
+}
+h2 {
+    color: white;
+    text-align: right;
+    font-size: 14px;
+    padding: 22px 0px;
+    font-family: "Open Sans", sans-serif;
+    font-weight: 400;
+    margin-right: 30px;
+}
+p {
+text-align: justify;
+padding-left: 30px;
+padding-right: 30px;
+font-family: "Open Sans", sans-serif;
+font-size: 12px;
+color: #949494;
+line-height: 18px;
+padding-bottom: 30px;
+padding-top: 30px;
+}
+.auth {
+  text-align: justify;
+  padding-left: 0px;
+  padding-right: 20px;
+  font-family: "Open Sans", sans-serif;
+  font-size: 14px;
+  line-height: 18px;
+}
+.button {
+  background-color: #b31b1b;
+  color: white;
+  width: 150px;
+  padding: 10px 10px;
+  border-radius: 3px;
+  text-align: center;
+  text-decoration: none;
+  display: block;
+  margin-top: 20px;
+  margin-left: 20px;
+  margin-right: 20px;
+  font-size: 12px;
+  cursor: pointer;
+  font-family: "merriweather";
+}

get_paperinfo_fromurls.py ADDED Viewed

	@@ -0,0 +1,20 @@

+from arxiv_util import arxiv_url_sanitizer
+from arxiv_util import get_paper_info
+def get_paperinfo_fromurls(original_url):
+    """
+    Returns a dictionary of url entered by user
+    and corresponding paper info from arxiv.
+    """
+    url_paperinfo = {}
+    url = arxiv_url_sanitizer(original_url.strip())
+    # print("Sanitized url = {}".format(url))
+    try:
+        paper_info = get_paper_info(url)
+    except RuntimeError as e:
+        print("[SKIP] Error processing : {}, message : {}".format(url, e))
+        pass
+    url_paperinfo[original_url] = paper_info
+    return url_paperinfo

htmlcard.html ADDED Viewed

	@@ -0,0 +1,36 @@

+    <head>
+        <meta name="viewport" content="width=device-width, initial-scale=1">
+        <link href="file/csscard.css" rel="stylesheet" type="text/css"/>
+    </head>
+    <body>
+    {% for url, paper in paper_details.items() %}
+        <div class="container">
+            <div class="square">
+            <div class="mask">
+                <div class="left">
+                <img src="https://static.arxiv.org/static/browse/0.3.4/images/arxiv-logo-one-color-white.svg" alt="arxiv logo">
+                </div>
+                <h2 class="right">[{{ paper.arxiv_id}}]</h2>
+            </div>
+            <div class="h1">{{ paper.title }}</div>
+            <ul id="links">
+                {% for author in paper.authors%}
+                        <li><div class="auth">{{ author["name"] }}</div></li>
+                {% endfor %}
+            </ul>
+            <p>{{ paper.abstract }}</p>
+            <!-- <ul id="urllinks">
+                <li>
+                <a href="{{ paper.linktopdf}}" target="_" class="button">Article</a>
+                </li>
+                <li>
+                <a href="{{ paper.linktoabs}}" target="_" class="button">Abstract</a>
+                </li>
+            </ul> -->
+            </div>
+        </div>
+    {% endfor %}
+    </body>

requirements.txt ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ feedparser
2	+ jinja2