Spaces:
Runtime error
Runtime error
Upload 4 files
Browse files- app.py +7 -8
- confidence.py +1 -1
app.py
CHANGED
@@ -2,11 +2,11 @@ import gradio as gr
|
|
2 |
from confidence import run_nli
|
3 |
|
4 |
DESCRIPTION = """\
|
5 |
-
# Llama
|
6 |
-
This
|
7 |
-
|
8 |
-
|
9 |
-
|
10 |
"""
|
11 |
|
12 |
def greet(query, history):
|
@@ -18,13 +18,12 @@ def greet(query, history):
|
|
18 |
sample_list = [
|
19 |
"Tell me something about Albert Einstein, e.g., a short bio with birth date and birth place",
|
20 |
"Tell me something about Lihu Chen, e.g., a short bio with birth date and birth place",
|
|
|
21 |
]
|
22 |
|
23 |
iface = gr.ChatInterface(
|
24 |
fn=greet,
|
25 |
stop_btn=None,
|
26 |
-
# inputs="text",
|
27 |
-
# outputs="text",
|
28 |
examples=sample_list,
|
29 |
cache_examples=True
|
30 |
)
|
@@ -35,4 +34,4 @@ with gr.Blocks() as demo:
|
|
35 |
#gr.Markdown(LICENSE)
|
36 |
|
37 |
|
38 |
-
|
|
|
2 |
from confidence import run_nli
|
3 |
|
4 |
DESCRIPTION = """\
|
5 |
+
# Llama Chatbot with confidence scores π©Ί
|
6 |
+
This space shows that we can teach LLMs to express how confident they are in their answers.
|
7 |
+
Since we can only access free CPUs, we use a tiny Llama ([TinyLlama-1.1B](https://huggingface.co/PY007/TinyLlama-1.1B-Chat-v0.3)) as the chatbot and an [NLI model](https://github.com/potsawee/selfcheckgpt) to get scores. <br/>
|
8 |
+
π― There will be a score between 0 and 1 after each sentence, and a higher value means the sentence is more factual.<br/>
|
9 |
+
β³ It takes 150s-300s to process each query, and we limit the token numbers of answers for saving time.
|
10 |
"""
|
11 |
|
12 |
def greet(query, history):
|
|
|
18 |
sample_list = [
|
19 |
"Tell me something about Albert Einstein, e.g., a short bio with birth date and birth place",
|
20 |
"Tell me something about Lihu Chen, e.g., a short bio with birth date and birth place",
|
21 |
+
"How tall is the Eiffel Tower?"
|
22 |
]
|
23 |
|
24 |
iface = gr.ChatInterface(
|
25 |
fn=greet,
|
26 |
stop_btn=None,
|
|
|
|
|
27 |
examples=sample_list,
|
28 |
cache_examples=True
|
29 |
)
|
|
|
34 |
#gr.Markdown(LICENSE)
|
35 |
|
36 |
|
37 |
+
demo.launch()
|
confidence.py
CHANGED
@@ -70,7 +70,7 @@ def run_nli(query, sample_size=5):
|
|
70 |
final_content = ''
|
71 |
for index, sent in enumerate(sentences):
|
72 |
final_content += sent.strip() + ' ({a}) '.format(a=scores[index])
|
73 |
-
final_content += '\nThe confidence score of this answer is {a}'.format(a=sum(scores)/len(scores))
|
74 |
return final_content
|
75 |
|
76 |
|
|
|
70 |
final_content = ''
|
71 |
for index, sent in enumerate(sentences):
|
72 |
final_content += sent.strip() + ' ({a}) '.format(a=scores[index])
|
73 |
+
final_content += '\nThe confidence score of this answer is {a}'.format(a=round(sum(scores)/len(scores), 4))
|
74 |
return final_content
|
75 |
|
76 |
|