Spaces:
Runtime error
Runtime error
Duplicate from EuroSciPy2022/arxiv-cards
Browse filesCo-authored-by: eliolio <[email protected]>
- .gitattributes +31 -0
- README.md +13 -0
- app.py +126 -0
- arxiv_util.py +58 -0
- csscard.css +107 -0
- get_paperinfo_fromurls.py +20 -0
- htmlcard.html +36 -0
- requirements.txt +2 -0
.gitattributes
ADDED
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
*.7z filter=lfs diff=lfs merge=lfs -text
|
2 |
+
*.arrow filter=lfs diff=lfs merge=lfs -text
|
3 |
+
*.bin filter=lfs diff=lfs merge=lfs -text
|
4 |
+
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
5 |
+
*.ftz filter=lfs diff=lfs merge=lfs -text
|
6 |
+
*.gz filter=lfs diff=lfs merge=lfs -text
|
7 |
+
*.h5 filter=lfs diff=lfs merge=lfs -text
|
8 |
+
*.joblib filter=lfs diff=lfs merge=lfs -text
|
9 |
+
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
10 |
+
*.model filter=lfs diff=lfs merge=lfs -text
|
11 |
+
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
12 |
+
*.npy filter=lfs diff=lfs merge=lfs -text
|
13 |
+
*.npz filter=lfs diff=lfs merge=lfs -text
|
14 |
+
*.onnx filter=lfs diff=lfs merge=lfs -text
|
15 |
+
*.ot filter=lfs diff=lfs merge=lfs -text
|
16 |
+
*.parquet filter=lfs diff=lfs merge=lfs -text
|
17 |
+
*.pickle filter=lfs diff=lfs merge=lfs -text
|
18 |
+
*.pkl filter=lfs diff=lfs merge=lfs -text
|
19 |
+
*.pb filter=lfs diff=lfs merge=lfs -text
|
20 |
+
*.pt filter=lfs diff=lfs merge=lfs -text
|
21 |
+
*.pth filter=lfs diff=lfs merge=lfs -text
|
22 |
+
*.rar filter=lfs diff=lfs merge=lfs -text
|
23 |
+
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
24 |
+
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
25 |
+
*.tflite filter=lfs diff=lfs merge=lfs -text
|
26 |
+
*.tgz filter=lfs diff=lfs merge=lfs -text
|
27 |
+
*.wasm filter=lfs diff=lfs merge=lfs -text
|
28 |
+
*.xz filter=lfs diff=lfs merge=lfs -text
|
29 |
+
*.zip filter=lfs diff=lfs merge=lfs -text
|
30 |
+
*.zst filter=lfs diff=lfs merge=lfs -text
|
31 |
+
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
README.md
ADDED
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
title: arXiv cards
|
3 |
+
emoji: 📄
|
4 |
+
colorFrom: red
|
5 |
+
colorTo: yellow
|
6 |
+
sdk: gradio
|
7 |
+
sdk_version: 3.1.7
|
8 |
+
app_file: app.py
|
9 |
+
pinned: false
|
10 |
+
duplicated_from: EuroSciPy2022/arxiv-cards
|
11 |
+
---
|
12 |
+
|
13 |
+
arXiv card generator for easily sharing scientific papers on websites and presentations
|
app.py
ADDED
@@ -0,0 +1,126 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
from jinja2 import Environment, FileSystemLoader, select_autoescape
|
3 |
+
from get_paperinfo_fromurls import get_paperinfo_fromurls
|
4 |
+
import gradio as gr
|
5 |
+
|
6 |
+
class CARDS_TEMPLATE(object):
|
7 |
+
def __init__(self, path_to_template, template_filename):
|
8 |
+
self.path_to_template = path_to_template
|
9 |
+
self.template_filename = template_filename
|
10 |
+
self.template = self._get_template()
|
11 |
+
self.rendered_html = None
|
12 |
+
|
13 |
+
def _get_template(self):
|
14 |
+
env = Environment(
|
15 |
+
autoescape=select_autoescape(
|
16 |
+
enabled_extensions=('html'),
|
17 |
+
default_for_string=True,
|
18 |
+
),
|
19 |
+
loader=FileSystemLoader(self.path_to_template)
|
20 |
+
)
|
21 |
+
return env.get_template(self.template_filename)
|
22 |
+
|
23 |
+
def render(self, paper_details_iterator):
|
24 |
+
self.rendered_html = self.template.render(paper_details=paper_details_iterator)
|
25 |
+
|
26 |
+
def save_html(self, output_dir=None, output_htmlfile=None):
|
27 |
+
with open(os.path.join(output_dir, output_htmlfile), "w") as f:
|
28 |
+
f.write(self.rendered_html)
|
29 |
+
|
30 |
+
template_file = "htmlcard.html"
|
31 |
+
template_path = ""
|
32 |
+
card_template = CARDS_TEMPLATE(
|
33 |
+
path_to_template = template_path,
|
34 |
+
template_filename = template_file,
|
35 |
+
)
|
36 |
+
|
37 |
+
CSS = """
|
38 |
+
#url-textbox {
|
39 |
+
padding: 0 !important;
|
40 |
+
font-size: 16px;
|
41 |
+
}
|
42 |
+
|
43 |
+
.gradio-container {
|
44 |
+
background-color: transparent;
|
45 |
+
}
|
46 |
+
|
47 |
+
.gradio-container .gr-button-primary {
|
48 |
+
background: #b31b1b;
|
49 |
+
border: 1px solid #b31b1b;
|
50 |
+
border-radius: 8px;
|
51 |
+
color: white;
|
52 |
+
font-weight: bold;
|
53 |
+
font-size: 16px;
|
54 |
+
}
|
55 |
+
|
56 |
+
#ctr {
|
57 |
+
text-align: center;
|
58 |
+
}
|
59 |
+
|
60 |
+
#htel {
|
61 |
+
justify-content: center;
|
62 |
+
text-align: center;
|
63 |
+
}
|
64 |
+
"""
|
65 |
+
|
66 |
+
examples = [
|
67 |
+
[
|
68 |
+
"https://arxiv.org/abs/2208.14178v1",
|
69 |
+
]
|
70 |
+
]
|
71 |
+
|
72 |
+
def create_html_card(arxiv_link):
|
73 |
+
paper_details = get_paperinfo_fromurls(arxiv_link)
|
74 |
+
card_template.render(paper_details_iterator=paper_details)
|
75 |
+
return card_template.rendered_html
|
76 |
+
|
77 |
+
demo = gr.Blocks(css=CSS)
|
78 |
+
with demo:
|
79 |
+
with gr.Column():
|
80 |
+
gr.Markdown("# arXiv Cards Generator ⚙️", elem_id="ctr")
|
81 |
+
gr.Markdown(
|
82 |
+
"""
|
83 |
+
Need a simple and visual way to share arXiv papers on presentations, blogposts, messages?
|
84 |
+
This gradio demo allows for creating arXiv cards including arXiv identifier, title, authors, abstract
|
85 |
+
|
86 |
+
Simply paste the url link of the arXiv paper and generate!
|
87 |
+
"""
|
88 |
+
)
|
89 |
+
|
90 |
+
with gr.Column():
|
91 |
+
with gr.Row():
|
92 |
+
text = gr.Textbox(
|
93 |
+
show_label=False,
|
94 |
+
placeholder="Paste arXiv link (abs of pdf)",
|
95 |
+
lines=1,
|
96 |
+
max_lines=1,
|
97 |
+
elem_id="url-textbox",
|
98 |
+
)
|
99 |
+
button = gr.Button("Generate", variant="primary")
|
100 |
+
with gr.Row():
|
101 |
+
card = gr.HTML(elem_id="htel")
|
102 |
+
with gr.Row():
|
103 |
+
gr.Examples(
|
104 |
+
examples=examples,
|
105 |
+
inputs=[text],
|
106 |
+
)
|
107 |
+
|
108 |
+
with gr.Column():
|
109 |
+
gr.Markdown("### Resources and inspirations", elem_id="ctr")
|
110 |
+
gr.Markdown(
|
111 |
+
"""
|
112 |
+
- The code for retrieving the information using arXiv API is mainly taken from [github.com/kunalghosh/Conference-Grok](https://github.com/kunalghosh/Conference-Grok).
|
113 |
+
- The [pdf2preview](https://huggingface.co/spaces/chuanenlin/pdf2preview) space is also a great way to share academic publications on slides.
|
114 |
+
|
115 |
+
**Author**: [eliolio](https://huggingface.co/eliolio)
|
116 |
+
""")
|
117 |
+
button.click(
|
118 |
+
fn=create_html_card,
|
119 |
+
inputs=[text],
|
120 |
+
outputs=[card]
|
121 |
+
)
|
122 |
+
|
123 |
+
|
124 |
+
|
125 |
+
if __name__ == "__main__":
|
126 |
+
demo.launch()
|
arxiv_util.py
ADDED
@@ -0,0 +1,58 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from collections import namedtuple # later use py3.7 dataclasses
|
2 |
+
import urllib
|
3 |
+
import feedparser
|
4 |
+
import pdb
|
5 |
+
|
6 |
+
ArxivPaper = namedtuple("ArxivPaper", ["title", "authors", "abstract", "linktopdf", "linktoabs", "arxiv_id"])
|
7 |
+
|
8 |
+
def arxiv_url_sanitizer(url):
|
9 |
+
"""
|
10 |
+
as of now, just converts
|
11 |
+
arxiv.org/pdf/ to arxiv.org/abs
|
12 |
+
"""
|
13 |
+
# if its an arxiv pdf url then
|
14 |
+
if url.find("pdf") != -1:
|
15 |
+
url = url.replace("/pdf","/abs")
|
16 |
+
url = url.replace(".pdf","")
|
17 |
+
return url
|
18 |
+
|
19 |
+
def get_paper_info(url):
|
20 |
+
"""
|
21 |
+
Given an arxiv url returns
|
22 |
+
a ArxivPaper object with fields
|
23 |
+
title : str
|
24 |
+
authors : str
|
25 |
+
abstract : str
|
26 |
+
linktopdf : str
|
27 |
+
linktoabs : str
|
28 |
+
arxiv_id : str
|
29 |
+
"""
|
30 |
+
arxiv_id = url.split("/")[-1]
|
31 |
+
arxiv_searchurl = "http://export.arxiv.org/api/query?id_list={}".format(arxiv_id)
|
32 |
+
|
33 |
+
try:
|
34 |
+
atom_feed = urllib.request.urlopen(arxiv_searchurl)
|
35 |
+
except urllib.error.HTTPError as e:
|
36 |
+
# print("Couldn't retrieve : {}".format(arxiv_searchurl))
|
37 |
+
raise RuntimeError("Trouble fetching ArXiv Id : {}".format(arxiv_id))
|
38 |
+
|
39 |
+
parsed_feed = feedparser.parse(atom_feed)
|
40 |
+
paper = parsed_feed["entries"][0]
|
41 |
+
|
42 |
+
title = paper["title"]
|
43 |
+
authors = paper["authors"]
|
44 |
+
if len(authors)>5:
|
45 |
+
authors = authors[:6]
|
46 |
+
authors[5] = {'name': 'and others...'}
|
47 |
+
abstract = paper["summary"]
|
48 |
+
linktopdf = None
|
49 |
+
linktoabs = None
|
50 |
+
for link_dict in paper["links"]:
|
51 |
+
if link_dict["type"].find("html") != -1:
|
52 |
+
linktoabs = link_dict["href"]
|
53 |
+
|
54 |
+
elif link_dict["type"].find("pdf")!= -1:
|
55 |
+
linktopdf = link_dict["href"]
|
56 |
+
|
57 |
+
# comment = paper["arxiv_comment"] # Not there in all arxiv pages.
|
58 |
+
return ArxivPaper(title, authors, abstract, linktopdf, linktoabs, arxiv_id)
|
csscard.css
ADDED
@@ -0,0 +1,107 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
@import url("https://fonts.googleapis.com/css?family=Merriweather|Open+Sans");
|
2 |
+
|
3 |
+
.container {
|
4 |
+
display: flex;
|
5 |
+
justify-content: center;
|
6 |
+
padding: 80px;
|
7 |
+
}
|
8 |
+
|
9 |
+
ul {
|
10 |
+
list-style-type: none;
|
11 |
+
display: flex;
|
12 |
+
float: none;
|
13 |
+
justify-content: center;
|
14 |
+
align-items: center;
|
15 |
+
padding-left: 30px;
|
16 |
+
padding-top: 10px;
|
17 |
+
}
|
18 |
+
|
19 |
+
#urllinks li {
|
20 |
+
padding: 0px 30px 5px 5px;
|
21 |
+
}
|
22 |
+
|
23 |
+
.square {
|
24 |
+
width: 700px;
|
25 |
+
background: white;
|
26 |
+
border-radius: 4px;
|
27 |
+
box-shadow: 0px 20px 50px #d9dbdf;
|
28 |
+
}
|
29 |
+
|
30 |
+
.mask {
|
31 |
+
width: 700px;
|
32 |
+
height: 65px;
|
33 |
+
clip: rect(0px, 700px, 150px, 0px);
|
34 |
+
border-radius: 4px;
|
35 |
+
position: absolute;
|
36 |
+
background-color: #b31b1b;
|
37 |
+
display: flex;
|
38 |
+
}
|
39 |
+
|
40 |
+
.mask .left,
|
41 |
+
.mask .right {
|
42 |
+
flex: 1;
|
43 |
+
}
|
44 |
+
|
45 |
+
img {
|
46 |
+
position: absolute;
|
47 |
+
width: 60px;
|
48 |
+
padding: 20px 0px 0px 0px;
|
49 |
+
margin-left: 30px;
|
50 |
+
}
|
51 |
+
|
52 |
+
.h1 {
|
53 |
+
margin: auto;
|
54 |
+
text-align: left;
|
55 |
+
margin-top: 90px;
|
56 |
+
padding-left: 30px;
|
57 |
+
font-family: "Merriweather", serif;
|
58 |
+
font-size: 22px;
|
59 |
+
}
|
60 |
+
|
61 |
+
h2 {
|
62 |
+
color: white;
|
63 |
+
text-align: right;
|
64 |
+
font-size: 14px;
|
65 |
+
padding: 22px 0px;
|
66 |
+
font-family: "Open Sans", sans-serif;
|
67 |
+
font-weight: 400;
|
68 |
+
margin-right: 30px;
|
69 |
+
}
|
70 |
+
|
71 |
+
p {
|
72 |
+
text-align: justify;
|
73 |
+
padding-left: 30px;
|
74 |
+
padding-right: 30px;
|
75 |
+
font-family: "Open Sans", sans-serif;
|
76 |
+
font-size: 12px;
|
77 |
+
color: #949494;
|
78 |
+
line-height: 18px;
|
79 |
+
padding-bottom: 30px;
|
80 |
+
padding-top: 30px;
|
81 |
+
}
|
82 |
+
|
83 |
+
.auth {
|
84 |
+
text-align: justify;
|
85 |
+
padding-left: 0px;
|
86 |
+
padding-right: 20px;
|
87 |
+
font-family: "Open Sans", sans-serif;
|
88 |
+
font-size: 14px;
|
89 |
+
line-height: 18px;
|
90 |
+
}
|
91 |
+
|
92 |
+
.button {
|
93 |
+
background-color: #b31b1b;
|
94 |
+
color: white;
|
95 |
+
width: 150px;
|
96 |
+
padding: 10px 10px;
|
97 |
+
border-radius: 3px;
|
98 |
+
text-align: center;
|
99 |
+
text-decoration: none;
|
100 |
+
display: block;
|
101 |
+
margin-top: 20px;
|
102 |
+
margin-left: 20px;
|
103 |
+
margin-right: 20px;
|
104 |
+
font-size: 12px;
|
105 |
+
cursor: pointer;
|
106 |
+
font-family: "merriweather";
|
107 |
+
}
|
get_paperinfo_fromurls.py
ADDED
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from arxiv_util import arxiv_url_sanitizer
|
2 |
+
from arxiv_util import get_paper_info
|
3 |
+
|
4 |
+
def get_paperinfo_fromurls(original_url):
|
5 |
+
"""
|
6 |
+
Returns a dictionary of url entered by user
|
7 |
+
and corresponding paper info from arxiv.
|
8 |
+
"""
|
9 |
+
url_paperinfo = {}
|
10 |
+
url = arxiv_url_sanitizer(original_url.strip())
|
11 |
+
# print("Sanitized url = {}".format(url))
|
12 |
+
try:
|
13 |
+
paper_info = get_paper_info(url)
|
14 |
+
except RuntimeError as e:
|
15 |
+
print("[SKIP] Error processing : {}, message : {}".format(url, e))
|
16 |
+
pass
|
17 |
+
url_paperinfo[original_url] = paper_info
|
18 |
+
|
19 |
+
return url_paperinfo
|
20 |
+
|
htmlcard.html
ADDED
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
<head>
|
2 |
+
<meta name="viewport" content="width=device-width, initial-scale=1">
|
3 |
+
<link href="file/csscard.css" rel="stylesheet" type="text/css"/>
|
4 |
+
|
5 |
+
</head>
|
6 |
+
|
7 |
+
<body>
|
8 |
+
{% for url, paper in paper_details.items() %}
|
9 |
+
<div class="container">
|
10 |
+
<div class="square">
|
11 |
+
<div class="mask">
|
12 |
+
<div class="left">
|
13 |
+
<img src="https://static.arxiv.org/static/browse/0.3.4/images/arxiv-logo-one-color-white.svg" alt="arxiv logo">
|
14 |
+
</div>
|
15 |
+
<h2 class="right">[{{ paper.arxiv_id}}]</h2>
|
16 |
+
</div>
|
17 |
+
<div class="h1">{{ paper.title }}</div>
|
18 |
+
<ul id="links">
|
19 |
+
{% for author in paper.authors%}
|
20 |
+
<li><div class="auth">{{ author["name"] }}</div></li>
|
21 |
+
{% endfor %}
|
22 |
+
</ul>
|
23 |
+
<p>{{ paper.abstract }}</p>
|
24 |
+
|
25 |
+
<!-- <ul id="urllinks">
|
26 |
+
<li>
|
27 |
+
<a href="{{ paper.linktopdf}}" target="_" class="button">Article</a>
|
28 |
+
</li>
|
29 |
+
<li>
|
30 |
+
<a href="{{ paper.linktoabs}}" target="_" class="button">Abstract</a>
|
31 |
+
</li>
|
32 |
+
</ul> -->
|
33 |
+
</div>
|
34 |
+
</div>
|
35 |
+
{% endfor %}
|
36 |
+
</body>
|
requirements.txt
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
feedparser
|
2 |
+
jinja2
|