ysharma HF staff commited on
Commit
a67a358
·
verified ·
1 Parent(s): 9aa8ffe

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +166 -0
app.py ADDED
@@ -0,0 +1,166 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import gradio as gr
3
+ from gradio import ChatMessage
4
+ from typing import Iterator
5
+ import google.generativeai as genai
6
+
7
+ # get Gemini API Key from the environ variable
8
+ GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
9
+ genai.configure(api_key=GEMINI_API_KEY)
10
+
11
+ # we will be using the Gemini 2.0 Flash model with Thinking capabilities
12
+ model = genai.GenerativeModel("gemini-2.0-flash-thinking-exp-1219")
13
+
14
+
15
+ def stream_gemini_response(user_message: str, messages: list) -> Iterator[list]:
16
+ """
17
+ Streams LLM's thoughts and response.
18
+ """
19
+ try:
20
+ # Enabling logging for users to understand how thinking works along with streaming
21
+ print(f"\n=== New Request ===")
22
+ print(f"User message: {user_message}")
23
+
24
+ # Initialize response from Gemini
25
+ response = model.generate_content(user_message, stream=True)
26
+
27
+ # Initialize buffers and flags
28
+ thought_buffer = ""
29
+ response_buffer = ""
30
+ has_response = False
31
+ thinking_complete = False
32
+
33
+ # Add initial thinking message
34
+ messages.append(
35
+ ChatMessage(
36
+ role="assistant",
37
+ content="",
38
+ metadata={"title": "⏳Thinking: *The thoughts produced by the model are experimental"}
39
+ )
40
+ )
41
+
42
+ for chunk in response:
43
+ parts = chunk.candidates[0].content.parts
44
+ current_chunk = parts[0].text
45
+
46
+ if len(parts) == 2 and not thinking_complete:
47
+ # Complete thought
48
+ thought_buffer += current_chunk
49
+ print(f"\n=== Complete Thought ===\n{thought_buffer}")
50
+
51
+ # Update thinking message
52
+ messages[-1] = ChatMessage(
53
+ role="assistant",
54
+ content=thought_buffer,
55
+ metadata={"title": "⏳Thinking: *The thoughts produced by the model are experimental"}
56
+ )
57
+ yield messages
58
+
59
+ # Start response
60
+ response_buffer = parts[1].text
61
+ print(f"\n=== Starting Response ===\n{response_buffer}")
62
+
63
+ messages.append(
64
+ ChatMessage(
65
+ role="assistant",
66
+ content=response_buffer
67
+ )
68
+ )
69
+
70
+ thinking_complete = True
71
+ has_response = True
72
+ yield messages
73
+ time.sleep(0.05) # Small delay for visible streaming
74
+
75
+ elif thinking_complete:
76
+ # Stream response
77
+ response_buffer += current_chunk
78
+ print(f"\n=== Response Chunk ===\n{current_chunk}")
79
+
80
+ messages[-1] = ChatMessage(
81
+ role="assistant",
82
+ content=response_buffer
83
+ )
84
+ yield messages
85
+
86
+ else:
87
+ # Stream thinking
88
+ thought_buffer += current_chunk
89
+ print(f"\n=== Thinking Chunk ===\n{current_chunk}")
90
+
91
+ messages[-1] = ChatMessage(
92
+ role="assistant",
93
+ content=thought_buffer,
94
+ metadata={"title": "⏳Thinking: *The thoughts produced by the model are experimental"}
95
+ )
96
+ yield messages
97
+
98
+ # Log final complete response
99
+ print(f"\n=== Final Response ===\n{response_buffer}")
100
+
101
+ except Exception as e:
102
+ print(f"\n=== Error ===\n{str(e)}")
103
+ messages.append(
104
+ ChatMessage(
105
+ role="assistant",
106
+ content=f"I apologize, but I encountered an error: {str(e)}"
107
+ )
108
+ )
109
+ yield messages
110
+
111
+ def user_message(msg: str, history: list) -> tuple[str, list]:
112
+ """Adds user message to chat history"""
113
+ history.append(ChatMessage(role="user", content=msg))
114
+ return "", history
115
+
116
+ # Create the Gradio interface
117
+ with gr.Blocks(theme=gr.themes.Citrus(), fill_height=True) as demo:
118
+ #with gr.Column():
119
+ gr.Markdown("# Chat with Gemini 2.0 Flash and See its Thoughts 💭")
120
+
121
+ chatbot = gr.Chatbot(
122
+ type="messages",
123
+ label="Gemini2.0 'Thinking' Chatbot",
124
+ render_markdown=True,
125
+ scale=1,
126
+ avatar_images=(None,"https://lh3.googleusercontent.com/oxz0sUBF0iYoN4VvhqWTmux-cxfD1rxuYkuFEfm1SFaseXEsjjE4Je_C_V3UQPuJ87sImQK3HfQ3RXiaRnQetjaZbjJJUkiPL5jFJ1WRl5FKJZYibUA=w214-h214-n-nu")
127
+ )
128
+
129
+ with gr.Row(equal_height=True):
130
+ input_box = gr.Textbox(
131
+ lines=1,
132
+ label="Chat Message",
133
+ placeholder="Type your message here...",
134
+ scale=4
135
+ )
136
+
137
+ clear_button = gr.Button("Clear Chat", scale=1)
138
+
139
+ # Set up event handlers
140
+ msg_store = gr.State("") # Store for preserving user message
141
+
142
+ input_box.submit(
143
+ lambda msg: (msg, msg, ""), # Store message and clear input
144
+ inputs=[input_box],
145
+ outputs=[msg_store, input_box, input_box],
146
+ queue=False
147
+ ).then(
148
+ user_message, # Add user message to chat
149
+ inputs=[msg_store, chatbot],
150
+ outputs=[input_box, chatbot],
151
+ queue=False
152
+ ).then(
153
+ stream_gemini_response, # Generate and stream response
154
+ inputs=[msg_store, chatbot],
155
+ outputs=chatbot
156
+ )
157
+
158
+ clear_button.click(
159
+ lambda: ([], "", ""),
160
+ outputs=[chatbot, input_box, msg_store],
161
+ queue=False
162
+ )
163
+
164
+ # Launch the interface
165
+ if __name__ == "__main__":
166
+ demo.launch(debug=True)