Spaces:

tatihden
/

CalmChat

Runtime error

tatihden commited on Dec 2, 2024

Commit

649a7fc

verified ·

1 Parent(s): 0d2ec09

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,14 +1,14 @@
 import gradio as gr
 import random
-from huggingface_hub import InferenceClient
 import spaces
-#import os
-#os.environ["KERAS_BACKEND"] =  "tensorflow"  #"jax" "torch"
-#os.environ["XLA_PYTHON_CLIENT_MEM_FRACTION"]="1.00"
-#import keras_hub
 models = [
@@ -17,16 +17,16 @@ models = [
     "hf://tatihden/gemma_mental_health_7b_it_en"
 ]
-#clients = []
-#for model in models:
-    #clients.append(keras_hub.models.GemmaCausalLM.from_preset(model))
-from huggingface_hub import InferenceClient
-clients = []
-for model in models:
-    clients.append(InferenceClient(model))
 @spaces.GPU
 def format_prompt(message, history):
@@ -48,21 +48,20 @@ def chat_inf(system_prompt, prompt, history, client_choice, seed, temp, tokens,
         hist_len = len(history)
         print(hist_len)
-    generate_kwargs = dict(
-        temperature=temp,
-        max_new_tokens=tokens,
-        top_p=top_p,
-        repetition_penalty=rep_p,
-        do_sample=True,
-        seed=seed,
     )
     formatted_prompt = format_prompt(f"{system_prompt}, {prompt}", history)
-    stream = client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True,
-                                    return_full_text=False)
     output = ""
     for response in stream:
-        output += response.token.text
         yield [(prompt, output)]
     history.append((prompt, output))
     yield history

 import gradio as gr
 import random
+#from huggingface_hub import InferenceClient
 import spaces
+import os
+os.environ["KERAS_BACKEND"] =  "tensorflow"  #"jax" "torch"
+os.environ["XLA_PYTHON_CLIENT_MEM_FRACTION"]="1.00"
+import keras_hub
 models = [
     "hf://tatihden/gemma_mental_health_7b_it_en"
 ]
+clients = []
+for model in models:
+    clients.append(keras_hub.models.GemmaCausalLM.from_preset(model))
+#from huggingface_hub import InferenceClient
+#clients = []
+#for model in models:
+    #clients.append(InferenceClient(model))
 @spaces.GPU
 def format_prompt(message, history):
         hist_len = len(history)
         print(hist_len)
+    #generate_kwargs = dict(
+        #temperature=temp,
+        #max_new_tokens=tokens,
+        #top_p=top_p,
+        #repetition_penalty=rep_p,
+        #do_sample=True,
+        #seed=seed,
     )
     formatted_prompt = format_prompt(f"{system_prompt}, {prompt}", history)
+    stream = client.generate(formatted_prompt,max_lenght=tokens)
     output = ""
     for response in stream:
+        output = response.replace(prompt, "")
         yield [(prompt, output)]
     history.append((prompt, output))
     yield history