raannakasturi commited on
Commit
0485034
·
1 Parent(s): b6b0f54

Refactor image generation and paper summarization logic for improved safety and clarity

Browse files
Files changed (5) hide show
  1. image.py +2 -2
  2. main.py +11 -10
  3. post_blog.py +36 -29
  4. summarize_paper.py +10 -2
  5. test.py +0 -7
image.py CHANGED
@@ -24,9 +24,9 @@ def fix_base64_padding(data):
24
 
25
  def generate_image(title, summary):
26
  try:
27
- negative="low quality, blurry, pixelated, bad anatomy, bad hands, three hands, three legs, bad arms, missing legs, missing arms, poorly drawn face, poorly rendered hands, bad face, fused face, cloned face, worst face, three crus, extra crus, fused crus, worst feet, three feet, fused feet, fused thigh, three thigh, extra thigh, worst thigh, missing fingers, extra fingers, ugly fingers, long fingers, bad composition, horn, extra eyes, huge eyes, 2girl, amputation, disconnected limbs, cartoon, cg, 3d, unreal, animate, cgi, render, artwork, illustration, 3d render, cinema 4d, artstation, octane render, mutated body parts, painting, oil painting, 2d, sketch, bad photography, bad photo, deviant art, aberrations, abstract, anime, black and white, collapsed, conjoined, creative, drawing, extra windows, harsh lighting, jpeg artifacts, low saturation, monochrome, multiple levels, overexposed, oversaturated, photoshop, rotten, surreal, twisted, UI, underexposed, unnatural, unreal engine, unrealistic, video game, deformed body features",
28
  extracted_summary = extract_summary(summary)
29
- prompt = quote(f"[[IMAGE GENERATED SHOULD BE SAFE FOR WORK (SFW). NO NUDES OR REVEALING IMAGES]] [[(({title.strip()}))]]: {extracted_summary.strip()}")
30
  client = Client(
31
  image_provider=RetryProvider(
32
  providers=[Airforce, Blackbox, BlackboxCreateAgent, PollinationsAI],
 
24
 
25
  def generate_image(title, summary):
26
  try:
27
+ negative="low quality, blurry, pixelated, bad anatomy, bad hands, three hands, three legs, bad arms, missing legs, missing arms, poorly drawn face, poorly rendered hands, bad face, fused face, cloned face, worst face, three crus, extra crus, fused crus, worst feet, three feet, fused feet, fused thigh, three thigh, extra thigh, worst thigh, missing fingers, extra fingers, ugly fingers, long fingers, bad composition, horn, extra eyes, huge eyes, 2girl, amputation, disconnected limbs, cartoon, cg, 3d, unreal, animate, cgi, render, artwork, illustration, 3d render, cinema 4d, artstation, octane render, mutated body parts, painting, oil painting, 2d, sketch, bad photography, bad photo, deviant art, aberrations, abstract, anime, black and white, collapsed, conjoined, creative, drawing, extra windows, harsh lighting, jpeg artifacts, low saturation, monochrome, multiple levels, overexposed, oversaturated, photoshop, rotten, surreal, twisted, UI, underexposed, unnatural, unreal engine, unrealistic, video game, deformed body features, NSFW, NUDE, vulgar, negative, unsuitable, inappropriate, offensive, revealing, sexual, explicit",
28
  extracted_summary = extract_summary(summary)
29
+ prompt = quote(f"[[IMAGE GENERATED SHOULD BE SAFE FOR WORK (SFW). NO NUDES OR ANYTHING REVEALING IMAGES NOR SHOULD THEY BE VULGAR OR UNSCIENTIFIC]] [[(({title.strip()}))]]: {extracted_summary.strip()}")
30
  client = Client(
31
  image_provider=RetryProvider(
32
  providers=[Airforce, Blackbox, BlackboxCreateAgent, PollinationsAI],
main.py CHANGED
@@ -25,22 +25,23 @@ def paper_data(paper_data, wait_time=5):
25
  if not all([paper_id, doi, pdf_url, title, citation]):
26
  print(f"Skipping paper with ID: {paper_id} (missing details)")
27
  continue
28
- summary, mindmap = summarize_paper(pdf_url, paper_id, access_key)
 
 
 
 
 
 
 
 
29
  if not summary or not mindmap:
30
  print(f"Skipping paper with ID: {paper_id} (summary/mindmap not found)")
31
  continue
32
  try:
 
33
  try:
34
  try:
35
- escaped_title = repr(title.encode('latin1').decode('unicode-escape', errors='replace')).strip()
36
- except:
37
- escaped_title = repr(title).strip().encode('latin1', errors='replace').decode('utf-8', errors='replace')
38
- except:
39
- escaped_title = repr(title).strip()
40
- title = html.escape(str(escaped_title).strip()[1:-1])
41
- try:
42
- try:
43
- encoded_bytes = citation.encode('latin1').decode('unicode-escape', errors='replace')
44
  except:
45
  encoded_bytes = repr(citation).strip().encode('latin1').decode('utf-8', errors='replace')
46
  except:
 
25
  if not all([paper_id, doi, pdf_url, title, citation]):
26
  print(f"Skipping paper with ID: {paper_id} (missing details)")
27
  continue
28
+ fixed_title, summary, mindmap, fixed_citation = summarize_paper(title, pdf_url, paper_id, access_key)
29
+ if not fixed_title:
30
+ title = title
31
+ else:
32
+ title = fixed_title
33
+ if not fixed_citation:
34
+ citation = citation
35
+ else:
36
+ citation = fixed_citation
37
  if not summary or not mindmap:
38
  print(f"Skipping paper with ID: {paper_id} (summary/mindmap not found)")
39
  continue
40
  try:
41
+ title = html.escape(str(title).strip())
42
  try:
43
  try:
44
+ encoded_bytes = citation.encode('latin1').decode('utf-8', errors='replace')
 
 
 
 
 
 
 
 
45
  except:
46
  encoded_bytes = repr(citation).strip().encode('latin1').decode('utf-8', errors='replace')
47
  except:
post_blog.py CHANGED
@@ -5,7 +5,6 @@ import time
5
  import requests
6
  import dotenv
7
  import mistune
8
- from gradio_client import Client
9
  from image import fetch_image
10
 
11
  dotenv.load_dotenv()
@@ -28,6 +27,41 @@ def generate_post_html(doi, title, category, summary, mindmap, citation):
28
  html_summary = mistune.html(summary)
29
  post = f"""
30
  <div id="paper_post">
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31
  <script>
32
  window.markmap = {{
33
  autoLoader: {{
@@ -60,29 +94,6 @@ def generate_post_html(doi, title, category, summary, mindmap, citation):
60
  height: 80dvh;
61
  }}
62
  </style>
63
- <img style="display:block; width:100%; height:100%;" id="paper_image"
64
- src="{image.strip()}"
65
- alt="{title.strip()}">
66
- <br>
67
- <b>{{getToc}} $title={{Table of Contents}}</b>
68
- <br>
69
- <div id="paper_summary">
70
- {html_summary.replace("&amp;", "&").strip()}
71
- </div>
72
- <br>
73
- <h2>Mindmap</h2>
74
- <p><small><em>If MindMap doesn't load, go to the <a href="/">Homepage</a> and visit blog again or <a
75
- href="/#">Switch to Android App (Under Development)</a>.</em></small></p>
76
- <div class="markmap" id="paper_mindmap">
77
- <script type="text/template">
78
- {mindmap.replace("&amp;", "&").replace(":", "=>").strip()}
79
- </script>
80
- </div>
81
- <br>
82
- <h2>Citation</h2>
83
- <div id="paper_citation">
84
- {mistune.html(citation.replace("&amp;", "&").strip())}
85
- </div>
86
  <script>
87
  window.addEventListener('load', function() {{
88
  const anchor = document.querySelector('a.mm-toolbar-brand');
@@ -213,11 +224,7 @@ def test(uaccess_key):
213
  mindmap = paperdata.get('mindmap', '')
214
  citation = paperdata.get('citation', '')
215
  uaccess_key = access_key
216
- try:
217
- escaped_title = repr(title.encode('latin1').decode('unicode-escape')).strip()
218
- except:
219
- escaped_title = repr(title).strip()
220
- title = html.escape(str(escaped_title).strip()[1:-1])
221
  try:
222
  encoded_bytes = citation.encode('latin1').decode('utf-8', errors='replace')
223
  except:
 
5
  import requests
6
  import dotenv
7
  import mistune
 
8
  from image import fetch_image
9
 
10
  dotenv.load_dotenv()
 
27
  html_summary = mistune.html(summary)
28
  post = f"""
29
  <div id="paper_post">
30
+ <img style="display:block; width:100%; height:100%;" id="paper_image"
31
+ src="{image.strip()}"
32
+ alt="{title.strip()}">
33
+ <br>
34
+ <br>
35
+ <div id="paper_summary">
36
+ {html_summary.replace("&amp;", "&").strip()}
37
+ </div>
38
+ <br>
39
+ <h2>Mindmap</h2>
40
+ <p><small><em>If MindMap doesn't load, go to the <a href="/">Homepage</a> and visit blog again or <a
41
+ href="/#">Switch to Android App (Under Development)</a>.</em></small></p>
42
+ <div class="markmap" id="paper_mindmap">
43
+ <script type="text/template">
44
+ {mindmap.replace("&amp;", "&").replace(":", "=>").strip()}
45
+ </script>
46
+ </div>
47
+ <br>
48
+ <h2>Citation</h2>
49
+ <div id="paper_citation">
50
+ {mistune.html(citation.replace("&amp;", "&").strip())}
51
+ </div>
52
+ <script>
53
+ const paperImage = document.querySelector(
54
+ 'img[style="display:block; width:100%; height:100%;"][id="paper_image"]'
55
+ );
56
+ if (paperImage) {{
57
+ const tablOfContents = document.createElement("div");
58
+ tablOfContents.innerHTML = "<b>{{getToc}} $title={{Table of Contents}}</b>";
59
+ const brElement = paperImage.nextElementSibling;
60
+ if (brElement && brElement.tagName === "BR") {{
61
+ brElement.insertAdjacentElement("afterend", tablOfContents);
62
+ }}
63
+ }}
64
+ </script>
65
  <script>
66
  window.markmap = {{
67
  autoLoader: {{
 
94
  height: 80dvh;
95
  }}
96
  </style>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
97
  <script>
98
  window.addEventListener('load', function() {{
99
  const anchor = document.querySelector('a.mm-toolbar-brand');
 
224
  mindmap = paperdata.get('mindmap', '')
225
  citation = paperdata.get('citation', '')
226
  uaccess_key = access_key
227
+ title = html.escape(str(title).strip()[1:-1])
 
 
 
 
228
  try:
229
  encoded_bytes = citation.encode('latin1').decode('utf-8', errors='replace')
230
  except:
summarize_paper.py CHANGED
@@ -1,23 +1,31 @@
1
  import json
2
  from gradio_client import Client
3
 
4
- def summarize_paper(pdf_url, paper_id, access_key):
5
  mindmap = None
6
  summary = None
 
 
7
  try:
8
  summarizer_client = Client("raannakasturi/ReXploreAPI")
9
  result = summarizer_client.predict(
10
  url=pdf_url,
 
11
  id=paper_id,
 
12
  access_key=access_key,
13
  api_name="/rexplore_summarizer"
14
  )
15
  if result:
16
  data = json.loads(result[0])
 
 
 
 
17
  if data["mindmap_status"] == "success":
18
  mindmap = data["mindmap"]
19
  if data["summary_status"] == "success":
20
  summary = data["summary"]
21
  except Exception as e:
22
  print(f"Error summarizing paper: {e}")
23
- return summary, mindmap
 
1
  import json
2
  from gradio_client import Client
3
 
4
+ def summarize_paper(pdf_url, paper_title, paper_id, paper_citation, access_key):
5
  mindmap = None
6
  summary = None
7
+ title = None
8
+ citation = None
9
  try:
10
  summarizer_client = Client("raannakasturi/ReXploreAPI")
11
  result = summarizer_client.predict(
12
  url=pdf_url,
13
+ title=paper_title,
14
  id=paper_id,
15
+ citation=paper_citation,
16
  access_key=access_key,
17
  api_name="/rexplore_summarizer"
18
  )
19
  if result:
20
  data = json.loads(result[0])
21
+ if data['title']:
22
+ title = data['title']
23
+ if data['citation']:
24
+ citation = data['citation']
25
  if data["mindmap_status"] == "success":
26
  mindmap = data["mindmap"]
27
  if data["summary_status"] == "success":
28
  summary = data["summary"]
29
  except Exception as e:
30
  print(f"Error summarizing paper: {e}")
31
+ return title, summary, mindmap, citation
test.py DELETED
@@ -1,7 +0,0 @@
1
- # Original text with incorrect encoding
2
- text = "Itâ\x80\x99s the AIâ\x80\x99s fault, not mine: Mind perception increases blame attribution to AI"
3
-
4
- # Decode as 'latin1' and re-encode as 'utf-8'
5
- fixed_text = text.encode('latin1').decode('utf-8')
6
-
7
- print(fixed_text)