santacoder-search

Runtime error

App Files Files Community

lvwerra HF staff commited on Dec 21, 2022

Commit

7f5bdb5

1 Parent(s): 25cf3d1

Update app.py

Browse files

Files changed (1) hide show

app.py +11 -24

app.py CHANGED Viewed

@@ -12,38 +12,29 @@ import requests
 def mark_tokens_bold(string, tokens):
   for token in tokens:
     pattern = re.escape(token) #r"\b" + re.escape(token) + r"\b"
-    string = re.sub(pattern, "<span style='color: red;'><b>" + token + "</b></span>", string)
   return string
 def process_results(results, highlight_terms):
     if len(results) == 0:
-        return """<br><p style='font-family: Arial; color:Silver; text-align: center;'>
-                No results retrieved.</p><br><hr>"""
     results_html = ""
     for result in results:
         text_html = result["text"]
         text_html = mark_tokens_bold(text_html, highlight_terms)
-        meta_html = (
-            """
-                <p class='underline-on-hover' style='font-size:12px; font-family: Arial; color:#585858; text-align: left;'>
-                <a href='{}' target='_blank'>{}</a></p>""".format(
-                result["meta"]["url"], result["meta"]["url"]
-            )
-            if "meta" in result and result["meta"] is not None and "url" in result["meta"]
-            else ""
-        )
         docid_html = str(result["docid"])
         licenses = " | ".join(result["repo_license"])
         repo_name = result["repo_name"]
         repo_path = result["repo_path"]
-        results_html += """{}
-            <p style='font-size:16px; font-family: Arial; text-align: left;'>Repository name: <span style='color: #20233fff;'>{}</span></p>
-            <p style='font-size:16px; font-family: Arial; text-align: left;'>Repository path: <span style='color: #20233fff;'>{}</span></p>
-            <p style='font-size:16px; font-family: Arial; text-align: left;'>Repository licenses: <span style='color: #20233fff;'>{}</span></p>
             <pre style='height: 600px; overflow: scroll;'><code>{}</code></pre>
             <br>
         """.format(
@@ -74,18 +65,14 @@ def scisearch(query, language, num_results=10):
     return process_results(results, highlight_terms)
-description = """# <p style="text-align: center;"> 🌸 🔎 ROOTS search tool 🔍 🌸 </p>
-The ROOTS corpus was developed during the [BigScience workshop](https://bigscience.huggingface.co/) for the purpose
-of training the Multilingual Large Language Model [BLOOM](https://huggingface.co/bigscience/bloom). This tool allows
-you to search through the ROOTS corpus. We serve a BM25 index for each language or group of languages included in
-ROOTS. You can read more about the details of the tool design
-[here](https://huggingface.co/spaces/bigscience-data/scisearch/blob/main/roots_search_tool_specs.pdf). For more
-information and instructions on how to access the full corpus check [this form](https://forms.gle/qyYswbEL5kA23Wu99)."""
 if __name__ == "__main__":
     demo = gr.Blocks(
-        css=".underline-on-hover:hover { text-decoration: underline; } .flagging { font-size:12px; background-color:#20233fff; } .gradio-container {background-color: #20233fff}"
     )
     with demo:

 def mark_tokens_bold(string, tokens):
   for token in tokens:
     pattern = re.escape(token) #r"\b" + re.escape(token) + r"\b"
+    string = re.sub(pattern, "<span style='color: #ff75b3;'><b>" + token + "</b></span>", string)
   return string
 def process_results(results, highlight_terms):
     if len(results) == 0:
+        return """<br><p>No results retrieved.</p><br><hr>"""
     results_html = ""
     for result in results:
         text_html = result["text"]
         text_html = mark_tokens_bold(text_html, highlight_terms)
         docid_html = str(result["docid"])
         licenses = " | ".join(result["repo_license"])
         repo_name = result["repo_name"]
         repo_path = result["repo_path"]
+        results_html += """\
+            <p style='font-size:16px; font-family: Arial; text-align: left;'>Repository name: <span style='color: #ff75b3;'>{}</span></p>
+            <p style='font-size:16px; font-family: Arial; text-align: left;'>Repository path: <span style='color: #ff75b3;'>{}</span></p>
+            <p style='font-size:16px; font-family: Arial; text-align: left;'>Repository licenses: <span style='color: #ff75b3;'>{}</span></p>
             <pre style='height: 600px; overflow: scroll;'><code>{}</code></pre>
             <br>
         """.format(
     return process_results(results, highlight_terms)
+description = """# <p style="text-align: center;"> 🔎 IceCoder Dataset Search 🔍 </p>
+When you use [IceCoder]() to generate code it might produce exact copies of code in the pretraining dataset. In that case the code requires
+and with this search tool we aim to provide help to finding out where the code came from."""
 if __name__ == "__main__":
     demo = gr.Blocks(
+        css=".gradio-container {background-color: #20233fff; color:white}"
     )
     with demo: