Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -96,33 +96,37 @@ def parse_to_markdown(text):
|
|
96 |
import re
|
97 |
|
98 |
def extract_urls(text):
|
99 |
-
|
100 |
-
|
101 |
-
|
102 |
-
|
103 |
-
|
104 |
-
|
105 |
-
|
106 |
-
|
107 |
-
|
108 |
-
|
109 |
-
|
110 |
-
|
111 |
-
|
112 |
-
|
113 |
-
|
114 |
-
|
115 |
-
|
116 |
-
|
117 |
-
|
118 |
-
|
119 |
-
|
120 |
-
|
121 |
-
|
122 |
-
|
123 |
-
|
124 |
-
|
125 |
-
|
|
|
|
|
|
|
|
|
126 |
|
127 |
|
128 |
|
|
|
96 |
import re
|
97 |
|
98 |
def extract_urls(text):
|
99 |
+
try:
|
100 |
+
# Regular expression patterns to find the required fields
|
101 |
+
date_pattern = re.compile(r'### (\d{2} \w{3} \d{4})')
|
102 |
+
abs_link_pattern = re.compile(r'\[(.*?)\]\((https://arxiv\.org/abs/\d+\.\d+)\)')
|
103 |
+
pdf_link_pattern = re.compile(r'\[⬇️\]\((https://arxiv\.org/pdf/\d+\.\d+)\)')
|
104 |
+
title_pattern = re.compile(r'### \d{2} \w{3} \d{4} \| \[(.*?)\]')
|
105 |
+
|
106 |
+
# Find all occurrences of the required fields using the regular expression patterns
|
107 |
+
date_matches = date_pattern.findall(text)
|
108 |
+
abs_link_matches = abs_link_pattern.findall(text)
|
109 |
+
pdf_link_matches = pdf_link_pattern.findall(text)
|
110 |
+
title_matches = title_pattern.findall(text)
|
111 |
+
|
112 |
+
# Generate markdown string with the extracted fields
|
113 |
+
markdown_text = ""
|
114 |
+
for i in range(len(date_matches)):
|
115 |
+
date = date_matches[i]
|
116 |
+
title = title_matches[i]
|
117 |
+
abs_link = abs_link_matches[i][1]
|
118 |
+
pdf_link = pdf_link_matches[i]
|
119 |
+
|
120 |
+
markdown_text += f"**Date:** {date}\n\n"
|
121 |
+
markdown_text += f"**Title:** {title}\n\n"
|
122 |
+
markdown_text += f"**Abstract Link:** [{abs_link}]({abs_link})\n\n"
|
123 |
+
markdown_text += f"**PDF Link:** [{pdf_link}]({pdf_link})\n\n"
|
124 |
+
markdown_text += "---\n\n"
|
125 |
+
|
126 |
+
return markdown_text
|
127 |
+
except:
|
128 |
+
st.write('.')
|
129 |
+
return ''
|
130 |
|
131 |
|
132 |
|