Refactor paper_data and summarize_paper functions for improved parameter handling and clarity
adbfd57
import json | |
import os | |
import dotenv | |
import html | |
from summarize_paper import summarize_paper | |
from fetch_data import fetch_paper_data_with_category | |
from post_blog import post_blog | |
from send_mail import send_email | |
dotenv.load_dotenv() | |
access_key = os.getenv("ACCESS_KEY") | |
def paper_data(paper_data, wait_time=5): | |
data = {"status": "success"} | |
data['data'] = {} | |
paper_data = json.loads(paper_data) | |
for category, papers in paper_data.items(): | |
print(f"Processing category: {category}") | |
data['data'][category] = {} | |
for paper_id, details in papers.items(): | |
doi = details.get("doi") | |
pdf_url = details.get("pdf_url") | |
title = details.get("title") | |
citation = details.get("citation") | |
if not all([paper_id, doi, pdf_url, title, citation]): | |
print(f"Skipping paper with ID: {paper_id} (missing details)") | |
continue | |
fixed_title, summary, mindmap, fixed_citation = summarize_paper(title, pdf_url, paper_id, citation, access_key) | |
if not fixed_title: | |
title = title | |
else: | |
title = fixed_title | |
if not fixed_citation: | |
citation = citation | |
else: | |
citation = fixed_citation | |
if not summary or not mindmap: | |
print(f"Skipping paper with ID: {paper_id} (Summary/Mindmap not found)") | |
continue | |
try: | |
title = html.escape(str(title).strip()) | |
citation = html.escape(str(citation).strip()) | |
status = post_blog(doi, title, category, summary, mindmap, citation, access_key, wait_time) | |
except Exception as e: | |
print(f"Error posting blog '{title}': {e}") | |
continue | |
data['data'][category][paper_id] = { | |
"id": paper_id, | |
"doi": doi, | |
"title": title, | |
"category": category, | |
"posted": status, | |
"citation": citation, | |
"summary": summary, | |
"mindmap": mindmap, | |
} | |
data = json.dumps(data, indent=4, ensure_ascii=False) | |
return data | |
def post_blogpost(uaccess_key, wait_time=5): | |
if uaccess_key != access_key: | |
return False | |
data = fetch_paper_data_with_category(uaccess_key) | |
pdata = paper_data(data, wait_time) | |
try: | |
send_email(pdata) | |
print("\n-------------------------------------------------------\nMail Sent\n-------------------------------------------------------\n") | |
except Exception as e: | |
print(f"\n-------------------------------------------------------\nError sending mail: {e}\n-------------------------------------------------------\n") | |
finally: | |
print("\n-------------------------------------------------------\nProcess Completed\n-------------------------------------------------------\n") | |
return pdata | |
def test(uaccess_key): | |
if uaccess_key != access_key: | |
return False | |
data = { | |
"Astrophysics": { | |
"2412.20276": { | |
"doi": "https://doi.org/10.48550/arXiv.2412.20276", | |
"title": "Demographics of black holes at $<$100 R$_{\\rm g}$ scales: accretion flows, jets, and shadows. From Painlev\xe9 equations to ${\cal N}=2$ susy gauge theories: prolegomena TDI-$\\infty$", | |
"pdf_url": "http://ftp.ncbi.nlm.nih.gov/pub/pmc/oa_pdf/97/ea/41598_2024_Article_78595.PMC11551141.pdf", | |
"citation": "Nair, D. G., Nagar, N. M., Ramakrishnan, V., Wielgus, M., Arratia, V., Krichbaum, T. P., Zhang, X. A., Ricarte, A., S., S., Hernández-Yévenes, J., Ford, N. M., Bandyopadhyay, B., Gurwell, M., Burridge, R., Pesce, D. W., Doeleman, S. S., Kim, J.-Y., Kim, D., Janssen, M., ⦠Zensus, J. A. (2024). Demographics of black holes at $<$100 R$_{\\rm g}$ scales: accretion flows, jets, and shadows (Version 1). arXiv. https://doi.org/10.48550/ARXIV.2412.20276" | |
} | |
}, | |
} | |
status = paper_data(json.dumps(data)) | |
print(status) | |
return status | |
if __name__ == '__main__': | |
test(access_key) | |