Spaces:

raannakasturi
/

ReXploreBackend

Running

App Files Files Community

raannakasturi commited on 21 days ago

Commit

0485034

1 Parent(s): b6b0f54

Refactor image generation and paper summarization logic for improved safety and clarity

Browse files

Files changed (5) hide show

image.py +2 -2
main.py +11 -10
post_blog.py +36 -29
summarize_paper.py +10 -2
test.py +0 -7

image.py CHANGED Viewed

@@ -24,9 +24,9 @@ def fix_base64_padding(data):
 def generate_image(title, summary):
     try:
-        negative="low quality, blurry, pixelated, bad anatomy, bad hands, three hands, three legs, bad arms, missing legs, missing arms, poorly drawn face, poorly rendered hands, bad face, fused face, cloned face, worst face, three crus, extra crus, fused crus, worst feet, three feet, fused feet, fused thigh, three thigh, extra thigh, worst thigh, missing fingers, extra fingers, ugly fingers, long fingers, bad composition, horn, extra eyes, huge eyes, 2girl, amputation, disconnected limbs, cartoon, cg, 3d, unreal, animate, cgi, render, artwork, illustration, 3d render, cinema 4d, artstation, octane render, mutated body parts, painting, oil painting, 2d, sketch, bad photography, bad photo, deviant art, aberrations, abstract, anime, black and white, collapsed, conjoined, creative, drawing, extra windows, harsh lighting, jpeg artifacts, low saturation, monochrome, multiple levels, overexposed, oversaturated, photoshop, rotten, surreal, twisted, UI, underexposed, unnatural, unreal engine, unrealistic, video game, deformed body features",
         extracted_summary = extract_summary(summary)
-        prompt = quote(f"[[IMAGE GENERATED SHOULD BE SAFE FOR WORK (SFW). NO NUDES OR REVEALING IMAGES]] [[(({title.strip()}))]]: {extracted_summary.strip()}")
         client = Client(
             image_provider=RetryProvider(
                 providers=[Airforce, Blackbox, BlackboxCreateAgent, PollinationsAI],

 def generate_image(title, summary):
     try:
+        negative="low quality, blurry, pixelated, bad anatomy, bad hands, three hands, three legs, bad arms, missing legs, missing arms, poorly drawn face, poorly rendered hands, bad face, fused face, cloned face, worst face, three crus, extra crus, fused crus, worst feet, three feet, fused feet, fused thigh, three thigh, extra thigh, worst thigh, missing fingers, extra fingers, ugly fingers, long fingers, bad composition, horn, extra eyes, huge eyes, 2girl, amputation, disconnected limbs, cartoon, cg, 3d, unreal, animate, cgi, render, artwork, illustration, 3d render, cinema 4d, artstation, octane render, mutated body parts, painting, oil painting, 2d, sketch, bad photography, bad photo, deviant art, aberrations, abstract, anime, black and white, collapsed, conjoined, creative, drawing, extra windows, harsh lighting, jpeg artifacts, low saturation, monochrome, multiple levels, overexposed, oversaturated, photoshop, rotten, surreal, twisted, UI, underexposed, unnatural, unreal engine, unrealistic, video game, deformed body features, NSFW, NUDE, vulgar, negative, unsuitable, inappropriate, offensive, revealing, sexual, explicit",
         extracted_summary = extract_summary(summary)
+        prompt = quote(f"[[IMAGE GENERATED SHOULD BE SAFE FOR WORK (SFW). NO NUDES OR ANYTHING REVEALING IMAGES NOR SHOULD THEY BE VULGAR OR UNSCIENTIFIC]] [[(({title.strip()}))]]: {extracted_summary.strip()}")
         client = Client(
             image_provider=RetryProvider(
                 providers=[Airforce, Blackbox, BlackboxCreateAgent, PollinationsAI],

main.py CHANGED Viewed

@@ -25,22 +25,23 @@ def paper_data(paper_data, wait_time=5):
                 if not all([paper_id, doi, pdf_url, title, citation]):
                     print(f"Skipping paper with ID: {paper_id} (missing details)")
                     continue
-                summary, mindmap = summarize_paper(pdf_url, paper_id, access_key)
                 if not summary or not mindmap:
                     print(f"Skipping paper with ID: {paper_id} (summary/mindmap not found)")
                     continue
                 try:
                     try:
                         try:
-                            escaped_title = repr(title.encode('latin1').decode('unicode-escape', errors='replace')).strip()
-                        except:
-                            escaped_title = repr(title).strip().encode('latin1', errors='replace').decode('utf-8', errors='replace')
-                    except:
-                        escaped_title = repr(title).strip()
-                    title = html.escape(str(escaped_title).strip()[1:-1])
-                    try:
-                        try:
-                            encoded_bytes = citation.encode('latin1').decode('unicode-escape', errors='replace')
                         except:
                             encoded_bytes = repr(citation).strip().encode('latin1').decode('utf-8', errors='replace')
                     except:

                 if not all([paper_id, doi, pdf_url, title, citation]):
                     print(f"Skipping paper with ID: {paper_id} (missing details)")
                     continue
+                fixed_title, summary, mindmap, fixed_citation = summarize_paper(title, pdf_url, paper_id, access_key)
+                if not fixed_title:
+                    title = title
+                else:
+                    title = fixed_title
+                if not fixed_citation:
+                    citation = citation
+                else:
+                    citation = fixed_citation
                 if not summary or not mindmap:
                     print(f"Skipping paper with ID: {paper_id} (summary/mindmap not found)")
                     continue
                 try:
+                    title = html.escape(str(title).strip())
                     try:
                         try:
+                            encoded_bytes = citation.encode('latin1').decode('utf-8', errors='replace')
                         except:
                             encoded_bytes = repr(citation).strip().encode('latin1').decode('utf-8', errors='replace')
                     except:

post_blog.py CHANGED Viewed

@@ -5,7 +5,6 @@ import time
 import requests
 import dotenv
 import mistune
-from gradio_client import Client
 from image import fetch_image
 dotenv.load_dotenv()
@@ -28,6 +27,41 @@ def generate_post_html(doi, title, category, summary, mindmap, citation):
     html_summary = mistune.html(summary)
     post = f"""
     <div id="paper_post">
         <script>
             window.markmap = {{
                 autoLoader: {{
@@ -60,29 +94,6 @@ def generate_post_html(doi, title, category, summary, mindmap, citation):
                 height: 80dvh;
             }}
         </style>
-        <img style="display:block; width:100%; height:100%;" id="paper_image"
-            src="{image.strip()}"
-            alt="{title.strip()}">
-        <br>
-        <b>{{getToc}} $title={{Table of Contents}}</b>
-        <br>
-        <div id="paper_summary">
-            {html_summary.replace("&amp;", "&").strip()}
-        </div>
-        <br>
-        <h2>Mindmap</h2>
-        <p><small><em>If MindMap doesn't load, go to the <a href="/">Homepage</a> and visit blog again or <a
-                        href="/#">Switch to Android App (Under Development)</a>.</em></small></p>
-        <div class="markmap" id="paper_mindmap">
-            <script type="text/template">
-                    {mindmap.replace("&amp;", "&").replace(":", "=>").strip()}
-                </script>
-        </div>
-        <br>
-        <h2>Citation</h2>
-        <div id="paper_citation">
-            {mistune.html(citation.replace("&amp;", "&").strip())}
-        </div>
         <script>
             window.addEventListener('load', function() {{
                 const anchor = document.querySelector('a.mm-toolbar-brand');
@@ -213,11 +224,7 @@ def test(uaccess_key):
                     mindmap = paperdata.get('mindmap', '')
                     citation = paperdata.get('citation', '')
                     uaccess_key = access_key
-                    try:
-                        escaped_title = repr(title.encode('latin1').decode('unicode-escape')).strip()
-                    except:
-                        escaped_title = repr(title).strip()
-                    title = html.escape(str(escaped_title).strip()[1:-1])
                     try:
                         encoded_bytes = citation.encode('latin1').decode('utf-8', errors='replace')
                     except:

 import requests
 import dotenv
 import mistune
 from image import fetch_image
 dotenv.load_dotenv()
     html_summary = mistune.html(summary)
     post = f"""
     <div id="paper_post">
+        <img style="display:block; width:100%; height:100%;" id="paper_image"
+            src="{image.strip()}"
+            alt="{title.strip()}">
+        <br>
+        <br>
+        <div id="paper_summary">
+            {html_summary.replace("&amp;", "&").strip()}
+        </div>
+        <br>
+        <h2>Mindmap</h2>
+        <p><small><em>If MindMap doesn't load, go to the <a href="/">Homepage</a> and visit blog again or <a
+                        href="/#">Switch to Android App (Under Development)</a>.</em></small></p>
+        <div class="markmap" id="paper_mindmap">
+            <script type="text/template">
+                    {mindmap.replace("&amp;", "&").replace(":", "=>").strip()}
+                </script>
+        </div>
+        <br>
+        <h2>Citation</h2>
+        <div id="paper_citation">
+            {mistune.html(citation.replace("&amp;", "&").strip())}
+        </div>
+        <script>
+          const paperImage = document.querySelector(
+            'img[style="display:block; width:100%; height:100%;"][id="paper_image"]'
+          );
+          if (paperImage) {{
+            const tablOfContents = document.createElement("div");
+            tablOfContents.innerHTML = "<b>{{getToc}} $title={{Table of Contents}}</b>";
+            const brElement = paperImage.nextElementSibling;
+            if (brElement && brElement.tagName === "BR") {{
+              brElement.insertAdjacentElement("afterend", tablOfContents);
+            }}
+          }}
+		</script>
         <script>
             window.markmap = {{
                 autoLoader: {{
                 height: 80dvh;
             }}
         </style>
         <script>
             window.addEventListener('load', function() {{
                 const anchor = document.querySelector('a.mm-toolbar-brand');
                     mindmap = paperdata.get('mindmap', '')
                     citation = paperdata.get('citation', '')
                     uaccess_key = access_key
+                    title = html.escape(str(title).strip()[1:-1])
                     try:
                         encoded_bytes = citation.encode('latin1').decode('utf-8', errors='replace')
                     except:

summarize_paper.py CHANGED Viewed

@@ -1,23 +1,31 @@
 import json
 from gradio_client import Client
-def summarize_paper(pdf_url, paper_id, access_key):
     mindmap = None
     summary = None
     try:
         summarizer_client = Client("raannakasturi/ReXploreAPI")
         result = summarizer_client.predict(
             url=pdf_url,
             id=paper_id,
             access_key=access_key,
             api_name="/rexplore_summarizer"
         )
         if result:
             data = json.loads(result[0])
             if data["mindmap_status"] == "success":
                 mindmap = data["mindmap"]
             if data["summary_status"] == "success":
                 summary = data["summary"]
     except Exception as e:
         print(f"Error summarizing paper: {e}")
-    return summary, mindmap

 import json
 from gradio_client import Client
+def summarize_paper(pdf_url, paper_title, paper_id, paper_citation, access_key):
     mindmap = None
     summary = None
+    title = None
+    citation = None
     try:
         summarizer_client = Client("raannakasturi/ReXploreAPI")
         result = summarizer_client.predict(
             url=pdf_url,
+            title=paper_title,
             id=paper_id,
+            citation=paper_citation,
             access_key=access_key,
             api_name="/rexplore_summarizer"
         )
         if result:
             data = json.loads(result[0])
+            if data['title']:
+                title = data['title']
+            if data['citation']:
+                citation = data['citation']
             if data["mindmap_status"] == "success":
                 mindmap = data["mindmap"]
             if data["summary_status"] == "success":
                 summary = data["summary"]
     except Exception as e:
         print(f"Error summarizing paper: {e}")
+    return title, summary, mindmap, citation

test.py DELETED Viewed

@@ -1,7 +0,0 @@
-# Original text with incorrect encoding
-text = "Itâ\x80\x99s the AIâ\x80\x99s fault, not mine: Mind perception increases blame attribution to AI"
-# Decode as 'latin1' and re-encode as 'utf-8'
-fixed_text = text.encode('latin1').decode('utf-8')
-print(fixed_text)