damand2061 commited on
Commit
927fcf3
·
verified ·
1 Parent(s): 804ae79

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +22 -47
app.py CHANGED
@@ -19,52 +19,27 @@ def respond(
19
  # Create client for the selected model
20
  client = get_client(model_name)
21
 
22
- # Check if the model is one of the problematic models
23
- if model_name in ["indonlp/cendol-mt5-small-inst", "indonlp/cendol-mt5-small-chat"]:
24
- # For these models, we simply concatenate the conversation into a single string
25
- history_str = ""
26
- for user_msg, assistant_msg in history:
27
- if user_msg:
28
- history_str += f"{user_msg}\n"
29
- if assistant_msg:
30
- history_str += f"{assistant_msg}\n"
31
-
32
- # Add the latest user message
33
- history_str += f"{message}\n"
34
-
35
- # Pass the entire conversation history as a plain text prompt
36
- response = client.text_generation(
37
- history_str, # Single string as input
38
- max_new_tokens=max_tokens,
39
- temperature=temperature,
40
- top_p=top_p
41
- )
42
-
43
- # Since response is a string, return it directly
44
- full_response = response
45
- else:
46
- # For other models, we use a structured format with roles
47
- messages = [{"role": "system", "content": system_message}]
48
- for val in history:
49
- if val[0]:
50
- messages.append({"role": "user", "content": val[0]})
51
- if val[1]:
52
- messages.append({"role": "assistant", "content": val[1]})
53
-
54
- # Add the latest user message
55
- messages.append({"role": "user", "content": message})
56
-
57
- # Make the request
58
- response = client.chat_completion(
59
- messages,
60
- max_tokens=max_tokens,
61
- temperature=temperature,
62
- top_p=top_p,
63
- stream=False
64
- )
65
-
66
- # Extract the full response for chat models
67
- full_response = response.choices[0].message["content"]
68
 
69
  return full_response
70
 
@@ -74,7 +49,7 @@ demo = gr.ChatInterface(
74
  respond,
75
  additional_inputs=[
76
  gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
77
- gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
78
  gr.Slider(
79
  minimum=0.1,
80
  maximum=1.0,
 
19
  # Create client for the selected model
20
  client = get_client(model_name)
21
 
22
+ messages = [{"role": "system", "content": system_message}]
23
+ for val in history:
24
+ if val[0]:
25
+ messages.append({"role": "user", "content": val[0]})
26
+ if val[1]:
27
+ messages.append({"role": "assistant", "content": val[1]})
28
+
29
+ # Add the latest user message
30
+ messages.append({"role": "user", "content": message})
31
+
32
+ # Make the request
33
+ response = client.chat_completion(
34
+ messages,
35
+ max_tokens=max_tokens,
36
+ temperature=temperature,
37
+ top_p=top_p,
38
+ stream=False
39
+ )
40
+
41
+ # Extract the full response for chat models
42
+ full_response = response.choices[0].message["content"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
 
44
  return full_response
45
 
 
49
  respond,
50
  additional_inputs=[
51
  gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
52
+ gr.Slider(minimum=0.1, maximum=2.0, value=1.0, step=0.1, label="Temperature"),
53
  gr.Slider(
54
  minimum=0.1,
55
  maximum=1.0,