Spaces:

ysharma
/

Gemini2-Flash-Thinking

Running

App Files Files Community

ysharma HF staff commited on 28 days ago

Commit

a67a358

verified ·

1 Parent(s): 9aa8ffe

Create app.py

Browse files

Files changed (1) hide show

app.py +166 -0

app.py ADDED Viewed

	@@ -0,0 +1,166 @@

+import os
+import gradio as gr
+from gradio import ChatMessage
+from typing import Iterator
+import google.generativeai as genai
+# get Gemini API Key from the environ variable
+GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
+genai.configure(api_key=GEMINI_API_KEY)
+# we will be using the Gemini 2.0 Flash model with Thinking capabilities
+model = genai.GenerativeModel("gemini-2.0-flash-thinking-exp-1219")
+def stream_gemini_response(user_message: str, messages: list) -> Iterator[list]:
+    """
+    Streams LLM's thoughts and response.
+    """
+    try:
+        # Enabling logging for users to understand how thinking works along with streaming
+        print(f"\n=== New Request ===")
+        print(f"User message: {user_message}")
+        # Initialize response from Gemini
+        response = model.generate_content(user_message, stream=True)
+        # Initialize buffers and flags
+        thought_buffer = ""
+        response_buffer = ""
+        has_response = False
+        thinking_complete = False
+        # Add initial thinking message
+        messages.append(
+            ChatMessage(
+                role="assistant",
+                content="",
+                metadata={"title": "⏳Thinking: *The thoughts produced by the model are experimental"}
+            )
+        )
+        for chunk in response:
+            parts = chunk.candidates[0].content.parts
+            current_chunk = parts[0].text
+            if len(parts) == 2 and not thinking_complete:
+                # Complete thought
+                thought_buffer += current_chunk
+                print(f"\n=== Complete Thought ===\n{thought_buffer}")
+                # Update thinking message
+                messages[-1] = ChatMessage(
+                    role="assistant",
+                    content=thought_buffer,
+                    metadata={"title": "⏳Thinking: *The thoughts produced by the model are experimental"}
+                )
+                yield messages
+                # Start response
+                response_buffer = parts[1].text
+                print(f"\n=== Starting Response ===\n{response_buffer}")
+                messages.append(
+                    ChatMessage(
+                        role="assistant",
+                        content=response_buffer
+                    )
+                )
+                thinking_complete = True
+                has_response = True
+                yield messages
+                time.sleep(0.05)  # Small delay for visible streaming
+            elif thinking_complete:
+                # Stream response
+                response_buffer += current_chunk
+                print(f"\n=== Response Chunk ===\n{current_chunk}")
+                messages[-1] = ChatMessage(
+                    role="assistant",
+                    content=response_buffer
+                )
+                yield messages
+            else:
+                # Stream thinking
+                thought_buffer += current_chunk
+                print(f"\n=== Thinking Chunk ===\n{current_chunk}")
+                messages[-1] = ChatMessage(
+                    role="assistant",
+                    content=thought_buffer,
+                    metadata={"title": "⏳Thinking: *The thoughts produced by the model are experimental"}
+                )
+                yield messages
+        # Log final complete response
+        print(f"\n=== Final Response ===\n{response_buffer}")
+    except Exception as e:
+        print(f"\n=== Error ===\n{str(e)}")
+        messages.append(
+            ChatMessage(
+                role="assistant",
+                content=f"I apologize, but I encountered an error: {str(e)}"
+            )
+        )
+        yield messages
+def user_message(msg: str, history: list) -> tuple[str, list]:
+    """Adds user message to chat history"""
+    history.append(ChatMessage(role="user", content=msg))
+    return "", history
+# Create the Gradio interface
+with gr.Blocks(theme=gr.themes.Citrus(), fill_height=True) as demo:
+  #with gr.Column():
+    gr.Markdown("# Chat with Gemini 2.0 Flash and See its Thoughts 💭")
+    chatbot = gr.Chatbot(
+        type="messages",
+        label="Gemini2.0 'Thinking' Chatbot",
+        render_markdown=True,
+        scale=1,
+        avatar_images=(None,"https://lh3.googleusercontent.com/oxz0sUBF0iYoN4VvhqWTmux-cxfD1rxuYkuFEfm1SFaseXEsjjE4Je_C_V3UQPuJ87sImQK3HfQ3RXiaRnQetjaZbjJJUkiPL5jFJ1WRl5FKJZYibUA=w214-h214-n-nu")
+    )
+    with gr.Row(equal_height=True):
+        input_box = gr.Textbox(
+            lines=1,
+            label="Chat Message",
+            placeholder="Type your message here...",
+            scale=4
+        )
+        clear_button = gr.Button("Clear Chat", scale=1)
+    # Set up event handlers
+    msg_store = gr.State("")  # Store for preserving user message
+    input_box.submit(
+        lambda msg: (msg, msg, ""),  # Store message and clear input
+        inputs=[input_box],
+        outputs=[msg_store, input_box, input_box],
+        queue=False
+    ).then(
+        user_message,  # Add user message to chat
+        inputs=[msg_store, chatbot],
+        outputs=[input_box, chatbot],
+        queue=False
+    ).then(
+        stream_gemini_response,  # Generate and stream response
+        inputs=[msg_store, chatbot],
+        outputs=chatbot
+    )
+    clear_button.click(
+        lambda: ([], "", ""),
+        outputs=[chatbot, input_box, msg_store],
+        queue=False
+    )
+# Launch the interface
+if __name__ == "__main__":
+    demo.launch(debug=True)