eliolio eliolio commited on
Commit
5ba4f18
0 Parent(s):

Duplicate from EuroSciPy2022/arxiv-cards

Browse files

Co-authored-by: eliolio <[email protected]>

Files changed (8) hide show
  1. .gitattributes +31 -0
  2. README.md +13 -0
  3. app.py +126 -0
  4. arxiv_util.py +58 -0
  5. csscard.css +107 -0
  6. get_paperinfo_fromurls.py +20 -0
  7. htmlcard.html +36 -0
  8. requirements.txt +2 -0
.gitattributes ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ftz filter=lfs diff=lfs merge=lfs -text
6
+ *.gz filter=lfs diff=lfs merge=lfs -text
7
+ *.h5 filter=lfs diff=lfs merge=lfs -text
8
+ *.joblib filter=lfs diff=lfs merge=lfs -text
9
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
10
+ *.model filter=lfs diff=lfs merge=lfs -text
11
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
12
+ *.npy filter=lfs diff=lfs merge=lfs -text
13
+ *.npz filter=lfs diff=lfs merge=lfs -text
14
+ *.onnx filter=lfs diff=lfs merge=lfs -text
15
+ *.ot filter=lfs diff=lfs merge=lfs -text
16
+ *.parquet filter=lfs diff=lfs merge=lfs -text
17
+ *.pickle filter=lfs diff=lfs merge=lfs -text
18
+ *.pkl filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pt filter=lfs diff=lfs merge=lfs -text
21
+ *.pth filter=lfs diff=lfs merge=lfs -text
22
+ *.rar filter=lfs diff=lfs merge=lfs -text
23
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
24
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
25
+ *.tflite filter=lfs diff=lfs merge=lfs -text
26
+ *.tgz filter=lfs diff=lfs merge=lfs -text
27
+ *.wasm filter=lfs diff=lfs merge=lfs -text
28
+ *.xz filter=lfs diff=lfs merge=lfs -text
29
+ *.zip filter=lfs diff=lfs merge=lfs -text
30
+ *.zst filter=lfs diff=lfs merge=lfs -text
31
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: arXiv cards
3
+ emoji: 📄
4
+ colorFrom: red
5
+ colorTo: yellow
6
+ sdk: gradio
7
+ sdk_version: 3.1.7
8
+ app_file: app.py
9
+ pinned: false
10
+ duplicated_from: EuroSciPy2022/arxiv-cards
11
+ ---
12
+
13
+ arXiv card generator for easily sharing scientific papers on websites and presentations
app.py ADDED
@@ -0,0 +1,126 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from jinja2 import Environment, FileSystemLoader, select_autoescape
3
+ from get_paperinfo_fromurls import get_paperinfo_fromurls
4
+ import gradio as gr
5
+
6
+ class CARDS_TEMPLATE(object):
7
+ def __init__(self, path_to_template, template_filename):
8
+ self.path_to_template = path_to_template
9
+ self.template_filename = template_filename
10
+ self.template = self._get_template()
11
+ self.rendered_html = None
12
+
13
+ def _get_template(self):
14
+ env = Environment(
15
+ autoescape=select_autoescape(
16
+ enabled_extensions=('html'),
17
+ default_for_string=True,
18
+ ),
19
+ loader=FileSystemLoader(self.path_to_template)
20
+ )
21
+ return env.get_template(self.template_filename)
22
+
23
+ def render(self, paper_details_iterator):
24
+ self.rendered_html = self.template.render(paper_details=paper_details_iterator)
25
+
26
+ def save_html(self, output_dir=None, output_htmlfile=None):
27
+ with open(os.path.join(output_dir, output_htmlfile), "w") as f:
28
+ f.write(self.rendered_html)
29
+
30
+ template_file = "htmlcard.html"
31
+ template_path = ""
32
+ card_template = CARDS_TEMPLATE(
33
+ path_to_template = template_path,
34
+ template_filename = template_file,
35
+ )
36
+
37
+ CSS = """
38
+ #url-textbox {
39
+ padding: 0 !important;
40
+ font-size: 16px;
41
+ }
42
+
43
+ .gradio-container {
44
+ background-color: transparent;
45
+ }
46
+
47
+ .gradio-container .gr-button-primary {
48
+ background: #b31b1b;
49
+ border: 1px solid #b31b1b;
50
+ border-radius: 8px;
51
+ color: white;
52
+ font-weight: bold;
53
+ font-size: 16px;
54
+ }
55
+
56
+ #ctr {
57
+ text-align: center;
58
+ }
59
+
60
+ #htel {
61
+ justify-content: center;
62
+ text-align: center;
63
+ }
64
+ """
65
+
66
+ examples = [
67
+ [
68
+ "https://arxiv.org/abs/2208.14178v1",
69
+ ]
70
+ ]
71
+
72
+ def create_html_card(arxiv_link):
73
+ paper_details = get_paperinfo_fromurls(arxiv_link)
74
+ card_template.render(paper_details_iterator=paper_details)
75
+ return card_template.rendered_html
76
+
77
+ demo = gr.Blocks(css=CSS)
78
+ with demo:
79
+ with gr.Column():
80
+ gr.Markdown("# arXiv Cards Generator ⚙️", elem_id="ctr")
81
+ gr.Markdown(
82
+ """
83
+ Need a simple and visual way to share arXiv papers on presentations, blogposts, messages?
84
+ This gradio demo allows for creating arXiv cards including arXiv identifier, title, authors, abstract
85
+
86
+ Simply paste the url link of the arXiv paper and generate!
87
+ """
88
+ )
89
+
90
+ with gr.Column():
91
+ with gr.Row():
92
+ text = gr.Textbox(
93
+ show_label=False,
94
+ placeholder="Paste arXiv link (abs of pdf)",
95
+ lines=1,
96
+ max_lines=1,
97
+ elem_id="url-textbox",
98
+ )
99
+ button = gr.Button("Generate", variant="primary")
100
+ with gr.Row():
101
+ card = gr.HTML(elem_id="htel")
102
+ with gr.Row():
103
+ gr.Examples(
104
+ examples=examples,
105
+ inputs=[text],
106
+ )
107
+
108
+ with gr.Column():
109
+ gr.Markdown("### Resources and inspirations", elem_id="ctr")
110
+ gr.Markdown(
111
+ """
112
+ - The code for retrieving the information using arXiv API is mainly taken from [github.com/kunalghosh/Conference-Grok](https://github.com/kunalghosh/Conference-Grok).
113
+ - The [pdf2preview](https://huggingface.co/spaces/chuanenlin/pdf2preview) space is also a great way to share academic publications on slides.
114
+
115
+ **Author**: [eliolio](https://huggingface.co/eliolio)
116
+ """)
117
+ button.click(
118
+ fn=create_html_card,
119
+ inputs=[text],
120
+ outputs=[card]
121
+ )
122
+
123
+
124
+
125
+ if __name__ == "__main__":
126
+ demo.launch()
arxiv_util.py ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from collections import namedtuple # later use py3.7 dataclasses
2
+ import urllib
3
+ import feedparser
4
+ import pdb
5
+
6
+ ArxivPaper = namedtuple("ArxivPaper", ["title", "authors", "abstract", "linktopdf", "linktoabs", "arxiv_id"])
7
+
8
+ def arxiv_url_sanitizer(url):
9
+ """
10
+ as of now, just converts
11
+ arxiv.org/pdf/ to arxiv.org/abs
12
+ """
13
+ # if its an arxiv pdf url then
14
+ if url.find("pdf") != -1:
15
+ url = url.replace("/pdf","/abs")
16
+ url = url.replace(".pdf","")
17
+ return url
18
+
19
+ def get_paper_info(url):
20
+ """
21
+ Given an arxiv url returns
22
+ a ArxivPaper object with fields
23
+ title : str
24
+ authors : str
25
+ abstract : str
26
+ linktopdf : str
27
+ linktoabs : str
28
+ arxiv_id : str
29
+ """
30
+ arxiv_id = url.split("/")[-1]
31
+ arxiv_searchurl = "http://export.arxiv.org/api/query?id_list={}".format(arxiv_id)
32
+
33
+ try:
34
+ atom_feed = urllib.request.urlopen(arxiv_searchurl)
35
+ except urllib.error.HTTPError as e:
36
+ # print("Couldn't retrieve : {}".format(arxiv_searchurl))
37
+ raise RuntimeError("Trouble fetching ArXiv Id : {}".format(arxiv_id))
38
+
39
+ parsed_feed = feedparser.parse(atom_feed)
40
+ paper = parsed_feed["entries"][0]
41
+
42
+ title = paper["title"]
43
+ authors = paper["authors"]
44
+ if len(authors)>5:
45
+ authors = authors[:6]
46
+ authors[5] = {'name': 'and others...'}
47
+ abstract = paper["summary"]
48
+ linktopdf = None
49
+ linktoabs = None
50
+ for link_dict in paper["links"]:
51
+ if link_dict["type"].find("html") != -1:
52
+ linktoabs = link_dict["href"]
53
+
54
+ elif link_dict["type"].find("pdf")!= -1:
55
+ linktopdf = link_dict["href"]
56
+
57
+ # comment = paper["arxiv_comment"] # Not there in all arxiv pages.
58
+ return ArxivPaper(title, authors, abstract, linktopdf, linktoabs, arxiv_id)
csscard.css ADDED
@@ -0,0 +1,107 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ @import url("https://fonts.googleapis.com/css?family=Merriweather|Open+Sans");
2
+
3
+ .container {
4
+ display: flex;
5
+ justify-content: center;
6
+ padding: 80px;
7
+ }
8
+
9
+ ul {
10
+ list-style-type: none;
11
+ display: flex;
12
+ float: none;
13
+ justify-content: center;
14
+ align-items: center;
15
+ padding-left: 30px;
16
+ padding-top: 10px;
17
+ }
18
+
19
+ #urllinks li {
20
+ padding: 0px 30px 5px 5px;
21
+ }
22
+
23
+ .square {
24
+ width: 700px;
25
+ background: white;
26
+ border-radius: 4px;
27
+ box-shadow: 0px 20px 50px #d9dbdf;
28
+ }
29
+
30
+ .mask {
31
+ width: 700px;
32
+ height: 65px;
33
+ clip: rect(0px, 700px, 150px, 0px);
34
+ border-radius: 4px;
35
+ position: absolute;
36
+ background-color: #b31b1b;
37
+ display: flex;
38
+ }
39
+
40
+ .mask .left,
41
+ .mask .right {
42
+ flex: 1;
43
+ }
44
+
45
+ img {
46
+ position: absolute;
47
+ width: 60px;
48
+ padding: 20px 0px 0px 0px;
49
+ margin-left: 30px;
50
+ }
51
+
52
+ .h1 {
53
+ margin: auto;
54
+ text-align: left;
55
+ margin-top: 90px;
56
+ padding-left: 30px;
57
+ font-family: "Merriweather", serif;
58
+ font-size: 22px;
59
+ }
60
+
61
+ h2 {
62
+ color: white;
63
+ text-align: right;
64
+ font-size: 14px;
65
+ padding: 22px 0px;
66
+ font-family: "Open Sans", sans-serif;
67
+ font-weight: 400;
68
+ margin-right: 30px;
69
+ }
70
+
71
+ p {
72
+ text-align: justify;
73
+ padding-left: 30px;
74
+ padding-right: 30px;
75
+ font-family: "Open Sans", sans-serif;
76
+ font-size: 12px;
77
+ color: #949494;
78
+ line-height: 18px;
79
+ padding-bottom: 30px;
80
+ padding-top: 30px;
81
+ }
82
+
83
+ .auth {
84
+ text-align: justify;
85
+ padding-left: 0px;
86
+ padding-right: 20px;
87
+ font-family: "Open Sans", sans-serif;
88
+ font-size: 14px;
89
+ line-height: 18px;
90
+ }
91
+
92
+ .button {
93
+ background-color: #b31b1b;
94
+ color: white;
95
+ width: 150px;
96
+ padding: 10px 10px;
97
+ border-radius: 3px;
98
+ text-align: center;
99
+ text-decoration: none;
100
+ display: block;
101
+ margin-top: 20px;
102
+ margin-left: 20px;
103
+ margin-right: 20px;
104
+ font-size: 12px;
105
+ cursor: pointer;
106
+ font-family: "merriweather";
107
+ }
get_paperinfo_fromurls.py ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from arxiv_util import arxiv_url_sanitizer
2
+ from arxiv_util import get_paper_info
3
+
4
+ def get_paperinfo_fromurls(original_url):
5
+ """
6
+ Returns a dictionary of url entered by user
7
+ and corresponding paper info from arxiv.
8
+ """
9
+ url_paperinfo = {}
10
+ url = arxiv_url_sanitizer(original_url.strip())
11
+ # print("Sanitized url = {}".format(url))
12
+ try:
13
+ paper_info = get_paper_info(url)
14
+ except RuntimeError as e:
15
+ print("[SKIP] Error processing : {}, message : {}".format(url, e))
16
+ pass
17
+ url_paperinfo[original_url] = paper_info
18
+
19
+ return url_paperinfo
20
+
htmlcard.html ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <head>
2
+ <meta name="viewport" content="width=device-width, initial-scale=1">
3
+ <link href="file/csscard.css" rel="stylesheet" type="text/css"/>
4
+
5
+ </head>
6
+
7
+ <body>
8
+ {% for url, paper in paper_details.items() %}
9
+ <div class="container">
10
+ <div class="square">
11
+ <div class="mask">
12
+ <div class="left">
13
+ <img src="https://static.arxiv.org/static/browse/0.3.4/images/arxiv-logo-one-color-white.svg" alt="arxiv logo">
14
+ </div>
15
+ <h2 class="right">[{{ paper.arxiv_id}}]</h2>
16
+ </div>
17
+ <div class="h1">{{ paper.title }}</div>
18
+ <ul id="links">
19
+ {% for author in paper.authors%}
20
+ <li><div class="auth">{{ author["name"] }}</div></li>
21
+ {% endfor %}
22
+ </ul>
23
+ <p>{{ paper.abstract }}</p>
24
+
25
+ <!-- <ul id="urllinks">
26
+ <li>
27
+ <a href="{{ paper.linktopdf}}" target="_" class="button">Article</a>
28
+ </li>
29
+ <li>
30
+ <a href="{{ paper.linktoabs}}" target="_" class="button">Abstract</a>
31
+ </li>
32
+ </ul> -->
33
+ </div>
34
+ </div>
35
+ {% endfor %}
36
+ </body>
requirements.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ feedparser
2
+ jinja2