ReXploreBackend / main.py
raannakasturi's picture
Refactor paper_data and summarize_paper functions for improved parameter handling and clarity
adbfd57
raw
history blame
4.35 kB
import json
import os
import dotenv
import html
from summarize_paper import summarize_paper
from fetch_data import fetch_paper_data_with_category
from post_blog import post_blog
from send_mail import send_email
dotenv.load_dotenv()
access_key = os.getenv("ACCESS_KEY")
def paper_data(paper_data, wait_time=5):
data = {"status": "success"}
data['data'] = {}
paper_data = json.loads(paper_data)
for category, papers in paper_data.items():
print(f"Processing category: {category}")
data['data'][category] = {}
for paper_id, details in papers.items():
doi = details.get("doi")
pdf_url = details.get("pdf_url")
title = details.get("title")
citation = details.get("citation")
if not all([paper_id, doi, pdf_url, title, citation]):
print(f"Skipping paper with ID: {paper_id} (missing details)")
continue
fixed_title, summary, mindmap, fixed_citation = summarize_paper(title, pdf_url, paper_id, citation, access_key)
if not fixed_title:
title = title
else:
title = fixed_title
if not fixed_citation:
citation = citation
else:
citation = fixed_citation
if not summary or not mindmap:
print(f"Skipping paper with ID: {paper_id} (Summary/Mindmap not found)")
continue
try:
title = html.escape(str(title).strip())
citation = html.escape(str(citation).strip())
status = post_blog(doi, title, category, summary, mindmap, citation, access_key, wait_time)
except Exception as e:
print(f"Error posting blog '{title}': {e}")
continue
data['data'][category][paper_id] = {
"id": paper_id,
"doi": doi,
"title": title,
"category": category,
"posted": status,
"citation": citation,
"summary": summary,
"mindmap": mindmap,
}
data = json.dumps(data, indent=4, ensure_ascii=False)
return data
def post_blogpost(uaccess_key, wait_time=5):
if uaccess_key != access_key:
return False
data = fetch_paper_data_with_category(uaccess_key)
pdata = paper_data(data, wait_time)
try:
send_email(pdata)
print("\n-------------------------------------------------------\nMail Sent\n-------------------------------------------------------\n")
except Exception as e:
print(f"\n-------------------------------------------------------\nError sending mail: {e}\n-------------------------------------------------------\n")
finally:
print("\n-------------------------------------------------------\nProcess Completed\n-------------------------------------------------------\n")
return pdata
def test(uaccess_key):
if uaccess_key != access_key:
return False
data = {
"Astrophysics": {
"2412.20276": {
"doi": "https://doi.org/10.48550/arXiv.2412.20276",
"title": "Demographics of black holes at $<$100 R$_{\\rm g}$ scales: accretion flows, jets, and shadows. From Painlev\xe9 equations to ${\cal N}=2$ susy gauge theories: prolegomena TDI-$\\infty$",
"pdf_url": "http://ftp.ncbi.nlm.nih.gov/pub/pmc/oa_pdf/97/ea/41598_2024_Article_78595.PMC11551141.pdf",
"citation": "Nair, D. G., Nagar, N. M., Ramakrishnan, V., Wielgus, M., Arratia, V., Krichbaum, T. P., Zhang, X. A., Ricarte, A., S., S., Hernández-Yévenes, J., Ford, N. M., Bandyopadhyay, B., Gurwell, M., Burridge, R., Pesce, D. W., Doeleman, S. S., Kim, J.-Y., Kim, D., Janssen, M., … Zensus, J. A. (2024). Demographics of black holes at $<$100 R$_{\\rm g}$ scales: accretion flows, jets, and shadows (Version 1). arXiv. https://doi.org/10.48550/ARXIV.2412.20276"
}
},
}
status = paper_data(json.dumps(data))
print(status)
return status
if __name__ == '__main__':
test(access_key)