Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -12,38 +12,29 @@ import requests
|
|
12 |
def mark_tokens_bold(string, tokens):
|
13 |
for token in tokens:
|
14 |
pattern = re.escape(token) #r"\b" + re.escape(token) + r"\b"
|
15 |
-
string = re.sub(pattern, "<span style='color:
|
16 |
return string
|
17 |
|
18 |
|
19 |
def process_results(results, highlight_terms):
|
20 |
if len(results) == 0:
|
21 |
-
return """<br><p
|
22 |
-
No results retrieved.</p><br><hr>"""
|
23 |
|
24 |
results_html = ""
|
25 |
for result in results:
|
26 |
text_html = result["text"]
|
27 |
text_html = mark_tokens_bold(text_html, highlight_terms)
|
28 |
-
|
29 |
-
"""
|
30 |
-
<p class='underline-on-hover' style='font-size:12px; font-family: Arial; color:#585858; text-align: left;'>
|
31 |
-
<a href='{}' target='_blank'>{}</a></p>""".format(
|
32 |
-
result["meta"]["url"], result["meta"]["url"]
|
33 |
-
)
|
34 |
-
if "meta" in result and result["meta"] is not None and "url" in result["meta"]
|
35 |
-
else ""
|
36 |
-
)
|
37 |
docid_html = str(result["docid"])
|
38 |
|
39 |
licenses = " | ".join(result["repo_license"])
|
40 |
repo_name = result["repo_name"]
|
41 |
repo_path = result["repo_path"]
|
42 |
|
43 |
-
results_html += """
|
44 |
-
<p style='font-size:16px; font-family: Arial; text-align: left;'>Repository name: <span style='color: #
|
45 |
-
<p style='font-size:16px; font-family: Arial; text-align: left;'>Repository path: <span style='color: #
|
46 |
-
<p style='font-size:16px; font-family: Arial; text-align: left;'>Repository licenses: <span style='color: #
|
47 |
<pre style='height: 600px; overflow: scroll;'><code>{}</code></pre>
|
48 |
<br>
|
49 |
""".format(
|
@@ -74,18 +65,14 @@ def scisearch(query, language, num_results=10):
|
|
74 |
return process_results(results, highlight_terms)
|
75 |
|
76 |
|
77 |
-
description = """# <p style="text-align: center;">
|
78 |
-
|
79 |
-
|
80 |
-
you to search through the ROOTS corpus. We serve a BM25 index for each language or group of languages included in
|
81 |
-
ROOTS. You can read more about the details of the tool design
|
82 |
-
[here](https://huggingface.co/spaces/bigscience-data/scisearch/blob/main/roots_search_tool_specs.pdf). For more
|
83 |
-
information and instructions on how to access the full corpus check [this form](https://forms.gle/qyYswbEL5kA23Wu99)."""
|
84 |
|
85 |
|
86 |
if __name__ == "__main__":
|
87 |
demo = gr.Blocks(
|
88 |
-
css=".
|
89 |
)
|
90 |
|
91 |
with demo:
|
|
|
12 |
def mark_tokens_bold(string, tokens):
|
13 |
for token in tokens:
|
14 |
pattern = re.escape(token) #r"\b" + re.escape(token) + r"\b"
|
15 |
+
string = re.sub(pattern, "<span style='color: #ff75b3;'><b>" + token + "</b></span>", string)
|
16 |
return string
|
17 |
|
18 |
|
19 |
def process_results(results, highlight_terms):
|
20 |
if len(results) == 0:
|
21 |
+
return """<br><p>No results retrieved.</p><br><hr>"""
|
|
|
22 |
|
23 |
results_html = ""
|
24 |
for result in results:
|
25 |
text_html = result["text"]
|
26 |
text_html = mark_tokens_bold(text_html, highlight_terms)
|
27 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
28 |
docid_html = str(result["docid"])
|
29 |
|
30 |
licenses = " | ".join(result["repo_license"])
|
31 |
repo_name = result["repo_name"]
|
32 |
repo_path = result["repo_path"]
|
33 |
|
34 |
+
results_html += """\
|
35 |
+
<p style='font-size:16px; font-family: Arial; text-align: left;'>Repository name: <span style='color: #ff75b3;'>{}</span></p>
|
36 |
+
<p style='font-size:16px; font-family: Arial; text-align: left;'>Repository path: <span style='color: #ff75b3;'>{}</span></p>
|
37 |
+
<p style='font-size:16px; font-family: Arial; text-align: left;'>Repository licenses: <span style='color: #ff75b3;'>{}</span></p>
|
38 |
<pre style='height: 600px; overflow: scroll;'><code>{}</code></pre>
|
39 |
<br>
|
40 |
""".format(
|
|
|
65 |
return process_results(results, highlight_terms)
|
66 |
|
67 |
|
68 |
+
description = """# <p style="text-align: center;"> π IceCoder Dataset Search π </p>
|
69 |
+
When you use [IceCoder]() to generate code it might produce exact copies of code in the pretraining dataset. In that case the code requires
|
70 |
+
and with this search tool we aim to provide help to finding out where the code came from."""
|
|
|
|
|
|
|
|
|
71 |
|
72 |
|
73 |
if __name__ == "__main__":
|
74 |
demo = gr.Blocks(
|
75 |
+
css=".gradio-container {background-color: #20233fff; color:white}"
|
76 |
)
|
77 |
|
78 |
with demo:
|