Spaces:
Running
Running
raannakasturi
commited on
Commit
·
0485034
1
Parent(s):
b6b0f54
Refactor image generation and paper summarization logic for improved safety and clarity
Browse files- image.py +2 -2
- main.py +11 -10
- post_blog.py +36 -29
- summarize_paper.py +10 -2
- test.py +0 -7
image.py
CHANGED
@@ -24,9 +24,9 @@ def fix_base64_padding(data):
|
|
24 |
|
25 |
def generate_image(title, summary):
|
26 |
try:
|
27 |
-
negative="low quality, blurry, pixelated, bad anatomy, bad hands, three hands, three legs, bad arms, missing legs, missing arms, poorly drawn face, poorly rendered hands, bad face, fused face, cloned face, worst face, three crus, extra crus, fused crus, worst feet, three feet, fused feet, fused thigh, three thigh, extra thigh, worst thigh, missing fingers, extra fingers, ugly fingers, long fingers, bad composition, horn, extra eyes, huge eyes, 2girl, amputation, disconnected limbs, cartoon, cg, 3d, unreal, animate, cgi, render, artwork, illustration, 3d render, cinema 4d, artstation, octane render, mutated body parts, painting, oil painting, 2d, sketch, bad photography, bad photo, deviant art, aberrations, abstract, anime, black and white, collapsed, conjoined, creative, drawing, extra windows, harsh lighting, jpeg artifacts, low saturation, monochrome, multiple levels, overexposed, oversaturated, photoshop, rotten, surreal, twisted, UI, underexposed, unnatural, unreal engine, unrealistic, video game, deformed body features",
|
28 |
extracted_summary = extract_summary(summary)
|
29 |
-
prompt = quote(f"[[IMAGE GENERATED SHOULD BE SAFE FOR WORK (SFW). NO NUDES OR REVEALING IMAGES]] [[(({title.strip()}))]]: {extracted_summary.strip()}")
|
30 |
client = Client(
|
31 |
image_provider=RetryProvider(
|
32 |
providers=[Airforce, Blackbox, BlackboxCreateAgent, PollinationsAI],
|
|
|
24 |
|
25 |
def generate_image(title, summary):
|
26 |
try:
|
27 |
+
negative="low quality, blurry, pixelated, bad anatomy, bad hands, three hands, three legs, bad arms, missing legs, missing arms, poorly drawn face, poorly rendered hands, bad face, fused face, cloned face, worst face, three crus, extra crus, fused crus, worst feet, three feet, fused feet, fused thigh, three thigh, extra thigh, worst thigh, missing fingers, extra fingers, ugly fingers, long fingers, bad composition, horn, extra eyes, huge eyes, 2girl, amputation, disconnected limbs, cartoon, cg, 3d, unreal, animate, cgi, render, artwork, illustration, 3d render, cinema 4d, artstation, octane render, mutated body parts, painting, oil painting, 2d, sketch, bad photography, bad photo, deviant art, aberrations, abstract, anime, black and white, collapsed, conjoined, creative, drawing, extra windows, harsh lighting, jpeg artifacts, low saturation, monochrome, multiple levels, overexposed, oversaturated, photoshop, rotten, surreal, twisted, UI, underexposed, unnatural, unreal engine, unrealistic, video game, deformed body features, NSFW, NUDE, vulgar, negative, unsuitable, inappropriate, offensive, revealing, sexual, explicit",
|
28 |
extracted_summary = extract_summary(summary)
|
29 |
+
prompt = quote(f"[[IMAGE GENERATED SHOULD BE SAFE FOR WORK (SFW). NO NUDES OR ANYTHING REVEALING IMAGES NOR SHOULD THEY BE VULGAR OR UNSCIENTIFIC]] [[(({title.strip()}))]]: {extracted_summary.strip()}")
|
30 |
client = Client(
|
31 |
image_provider=RetryProvider(
|
32 |
providers=[Airforce, Blackbox, BlackboxCreateAgent, PollinationsAI],
|
main.py
CHANGED
@@ -25,22 +25,23 @@ def paper_data(paper_data, wait_time=5):
|
|
25 |
if not all([paper_id, doi, pdf_url, title, citation]):
|
26 |
print(f"Skipping paper with ID: {paper_id} (missing details)")
|
27 |
continue
|
28 |
-
summary, mindmap = summarize_paper(pdf_url, paper_id, access_key)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
29 |
if not summary or not mindmap:
|
30 |
print(f"Skipping paper with ID: {paper_id} (summary/mindmap not found)")
|
31 |
continue
|
32 |
try:
|
|
|
33 |
try:
|
34 |
try:
|
35 |
-
|
36 |
-
except:
|
37 |
-
escaped_title = repr(title).strip().encode('latin1', errors='replace').decode('utf-8', errors='replace')
|
38 |
-
except:
|
39 |
-
escaped_title = repr(title).strip()
|
40 |
-
title = html.escape(str(escaped_title).strip()[1:-1])
|
41 |
-
try:
|
42 |
-
try:
|
43 |
-
encoded_bytes = citation.encode('latin1').decode('unicode-escape', errors='replace')
|
44 |
except:
|
45 |
encoded_bytes = repr(citation).strip().encode('latin1').decode('utf-8', errors='replace')
|
46 |
except:
|
|
|
25 |
if not all([paper_id, doi, pdf_url, title, citation]):
|
26 |
print(f"Skipping paper with ID: {paper_id} (missing details)")
|
27 |
continue
|
28 |
+
fixed_title, summary, mindmap, fixed_citation = summarize_paper(title, pdf_url, paper_id, access_key)
|
29 |
+
if not fixed_title:
|
30 |
+
title = title
|
31 |
+
else:
|
32 |
+
title = fixed_title
|
33 |
+
if not fixed_citation:
|
34 |
+
citation = citation
|
35 |
+
else:
|
36 |
+
citation = fixed_citation
|
37 |
if not summary or not mindmap:
|
38 |
print(f"Skipping paper with ID: {paper_id} (summary/mindmap not found)")
|
39 |
continue
|
40 |
try:
|
41 |
+
title = html.escape(str(title).strip())
|
42 |
try:
|
43 |
try:
|
44 |
+
encoded_bytes = citation.encode('latin1').decode('utf-8', errors='replace')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
45 |
except:
|
46 |
encoded_bytes = repr(citation).strip().encode('latin1').decode('utf-8', errors='replace')
|
47 |
except:
|
post_blog.py
CHANGED
@@ -5,7 +5,6 @@ import time
|
|
5 |
import requests
|
6 |
import dotenv
|
7 |
import mistune
|
8 |
-
from gradio_client import Client
|
9 |
from image import fetch_image
|
10 |
|
11 |
dotenv.load_dotenv()
|
@@ -28,6 +27,41 @@ def generate_post_html(doi, title, category, summary, mindmap, citation):
|
|
28 |
html_summary = mistune.html(summary)
|
29 |
post = f"""
|
30 |
<div id="paper_post">
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
31 |
<script>
|
32 |
window.markmap = {{
|
33 |
autoLoader: {{
|
@@ -60,29 +94,6 @@ def generate_post_html(doi, title, category, summary, mindmap, citation):
|
|
60 |
height: 80dvh;
|
61 |
}}
|
62 |
</style>
|
63 |
-
<img style="display:block; width:100%; height:100%;" id="paper_image"
|
64 |
-
src="{image.strip()}"
|
65 |
-
alt="{title.strip()}">
|
66 |
-
<br>
|
67 |
-
<b>{{getToc}} $title={{Table of Contents}}</b>
|
68 |
-
<br>
|
69 |
-
<div id="paper_summary">
|
70 |
-
{html_summary.replace("&", "&").strip()}
|
71 |
-
</div>
|
72 |
-
<br>
|
73 |
-
<h2>Mindmap</h2>
|
74 |
-
<p><small><em>If MindMap doesn't load, go to the <a href="/">Homepage</a> and visit blog again or <a
|
75 |
-
href="/#">Switch to Android App (Under Development)</a>.</em></small></p>
|
76 |
-
<div class="markmap" id="paper_mindmap">
|
77 |
-
<script type="text/template">
|
78 |
-
{mindmap.replace("&", "&").replace(":", "=>").strip()}
|
79 |
-
</script>
|
80 |
-
</div>
|
81 |
-
<br>
|
82 |
-
<h2>Citation</h2>
|
83 |
-
<div id="paper_citation">
|
84 |
-
{mistune.html(citation.replace("&", "&").strip())}
|
85 |
-
</div>
|
86 |
<script>
|
87 |
window.addEventListener('load', function() {{
|
88 |
const anchor = document.querySelector('a.mm-toolbar-brand');
|
@@ -213,11 +224,7 @@ def test(uaccess_key):
|
|
213 |
mindmap = paperdata.get('mindmap', '')
|
214 |
citation = paperdata.get('citation', '')
|
215 |
uaccess_key = access_key
|
216 |
-
|
217 |
-
escaped_title = repr(title.encode('latin1').decode('unicode-escape')).strip()
|
218 |
-
except:
|
219 |
-
escaped_title = repr(title).strip()
|
220 |
-
title = html.escape(str(escaped_title).strip()[1:-1])
|
221 |
try:
|
222 |
encoded_bytes = citation.encode('latin1').decode('utf-8', errors='replace')
|
223 |
except:
|
|
|
5 |
import requests
|
6 |
import dotenv
|
7 |
import mistune
|
|
|
8 |
from image import fetch_image
|
9 |
|
10 |
dotenv.load_dotenv()
|
|
|
27 |
html_summary = mistune.html(summary)
|
28 |
post = f"""
|
29 |
<div id="paper_post">
|
30 |
+
<img style="display:block; width:100%; height:100%;" id="paper_image"
|
31 |
+
src="{image.strip()}"
|
32 |
+
alt="{title.strip()}">
|
33 |
+
<br>
|
34 |
+
<br>
|
35 |
+
<div id="paper_summary">
|
36 |
+
{html_summary.replace("&", "&").strip()}
|
37 |
+
</div>
|
38 |
+
<br>
|
39 |
+
<h2>Mindmap</h2>
|
40 |
+
<p><small><em>If MindMap doesn't load, go to the <a href="/">Homepage</a> and visit blog again or <a
|
41 |
+
href="/#">Switch to Android App (Under Development)</a>.</em></small></p>
|
42 |
+
<div class="markmap" id="paper_mindmap">
|
43 |
+
<script type="text/template">
|
44 |
+
{mindmap.replace("&", "&").replace(":", "=>").strip()}
|
45 |
+
</script>
|
46 |
+
</div>
|
47 |
+
<br>
|
48 |
+
<h2>Citation</h2>
|
49 |
+
<div id="paper_citation">
|
50 |
+
{mistune.html(citation.replace("&", "&").strip())}
|
51 |
+
</div>
|
52 |
+
<script>
|
53 |
+
const paperImage = document.querySelector(
|
54 |
+
'img[style="display:block; width:100%; height:100%;"][id="paper_image"]'
|
55 |
+
);
|
56 |
+
if (paperImage) {{
|
57 |
+
const tablOfContents = document.createElement("div");
|
58 |
+
tablOfContents.innerHTML = "<b>{{getToc}} $title={{Table of Contents}}</b>";
|
59 |
+
const brElement = paperImage.nextElementSibling;
|
60 |
+
if (brElement && brElement.tagName === "BR") {{
|
61 |
+
brElement.insertAdjacentElement("afterend", tablOfContents);
|
62 |
+
}}
|
63 |
+
}}
|
64 |
+
</script>
|
65 |
<script>
|
66 |
window.markmap = {{
|
67 |
autoLoader: {{
|
|
|
94 |
height: 80dvh;
|
95 |
}}
|
96 |
</style>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
97 |
<script>
|
98 |
window.addEventListener('load', function() {{
|
99 |
const anchor = document.querySelector('a.mm-toolbar-brand');
|
|
|
224 |
mindmap = paperdata.get('mindmap', '')
|
225 |
citation = paperdata.get('citation', '')
|
226 |
uaccess_key = access_key
|
227 |
+
title = html.escape(str(title).strip()[1:-1])
|
|
|
|
|
|
|
|
|
228 |
try:
|
229 |
encoded_bytes = citation.encode('latin1').decode('utf-8', errors='replace')
|
230 |
except:
|
summarize_paper.py
CHANGED
@@ -1,23 +1,31 @@
|
|
1 |
import json
|
2 |
from gradio_client import Client
|
3 |
|
4 |
-
def summarize_paper(pdf_url, paper_id, access_key):
|
5 |
mindmap = None
|
6 |
summary = None
|
|
|
|
|
7 |
try:
|
8 |
summarizer_client = Client("raannakasturi/ReXploreAPI")
|
9 |
result = summarizer_client.predict(
|
10 |
url=pdf_url,
|
|
|
11 |
id=paper_id,
|
|
|
12 |
access_key=access_key,
|
13 |
api_name="/rexplore_summarizer"
|
14 |
)
|
15 |
if result:
|
16 |
data = json.loads(result[0])
|
|
|
|
|
|
|
|
|
17 |
if data["mindmap_status"] == "success":
|
18 |
mindmap = data["mindmap"]
|
19 |
if data["summary_status"] == "success":
|
20 |
summary = data["summary"]
|
21 |
except Exception as e:
|
22 |
print(f"Error summarizing paper: {e}")
|
23 |
-
return summary, mindmap
|
|
|
1 |
import json
|
2 |
from gradio_client import Client
|
3 |
|
4 |
+
def summarize_paper(pdf_url, paper_title, paper_id, paper_citation, access_key):
|
5 |
mindmap = None
|
6 |
summary = None
|
7 |
+
title = None
|
8 |
+
citation = None
|
9 |
try:
|
10 |
summarizer_client = Client("raannakasturi/ReXploreAPI")
|
11 |
result = summarizer_client.predict(
|
12 |
url=pdf_url,
|
13 |
+
title=paper_title,
|
14 |
id=paper_id,
|
15 |
+
citation=paper_citation,
|
16 |
access_key=access_key,
|
17 |
api_name="/rexplore_summarizer"
|
18 |
)
|
19 |
if result:
|
20 |
data = json.loads(result[0])
|
21 |
+
if data['title']:
|
22 |
+
title = data['title']
|
23 |
+
if data['citation']:
|
24 |
+
citation = data['citation']
|
25 |
if data["mindmap_status"] == "success":
|
26 |
mindmap = data["mindmap"]
|
27 |
if data["summary_status"] == "success":
|
28 |
summary = data["summary"]
|
29 |
except Exception as e:
|
30 |
print(f"Error summarizing paper: {e}")
|
31 |
+
return title, summary, mindmap, citation
|
test.py
DELETED
@@ -1,7 +0,0 @@
|
|
1 |
-
# Original text with incorrect encoding
|
2 |
-
text = "Itâ\x80\x99s the AIâ\x80\x99s fault, not mine: Mind perception increases blame attribution to AI"
|
3 |
-
|
4 |
-
# Decode as 'latin1' and re-encode as 'utf-8'
|
5 |
-
fixed_text = text.encode('latin1').decode('utf-8')
|
6 |
-
|
7 |
-
print(fixed_text)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|