Spaces:

ysharma
/

Gemini2-Flash-Thinking

Running

File size: 6,044 Bytes

a67a358
 
 
 
 
 
 
 
 
 
 
 
 
 
4a3acd2
 
 
 
 
 
 
 
 
 
 
 
 
 
a67a358
 
4a3acd2
a67a358
 
 
 
4a3acd2
 
 
 
 
 
 
 
a67a358
 
 
 
4a3acd2
a67a358
 
 
 
 
4a3acd2
a67a358
 
4a3acd2
a67a358
 
 
4a3acd2
a67a358
4a3acd2
a67a358
 
4a3acd2
a67a358
 
 
4a3acd2
a67a358
 
 
 
 
 
4a3acd2
a67a358
 
 
 
 
 
 
772fb5d
a67a358
 
 
 
4a3acd2
a67a358
 
 
 
 
 
 
 
 
4a3acd2
a67a358
 
 
4a3acd2
a67a358
4a3acd2
 
7f52231
a67a358
4a3acd2
a67a358
 
 
 
 
 
 
 
 
 
 
 
 
 
4a3acd2
a67a358

import os
import gradio as gr
from gradio import ChatMessage
from typing import Iterator
import google.generativeai as genai

# get Gemini API Key from the environ variable
GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
genai.configure(api_key=GEMINI_API_KEY)

# we will be using the Gemini 2.0 Flash model with Thinking capabilities
model = genai.GenerativeModel("gemini-2.0-flash-thinking-exp-1219")


def format_chat_history(messages: list) -> list:
    """
    Formats the chat history into a structure Gemini can understand
    """
    formatted_history = []
    for message in messages:
        # Skip thinking messages (messages with metadata)
        if not (message.get("role") == "assistant" and "metadata" in message):
            formatted_history.append({
                "role": "user" if message.get("role") == "user" else "assistant",
                "parts": [message.get("content", "")]
            })
    return formatted_history

def stream_gemini_response(user_message: str, messages: list) -> Iterator[list]:
    """
    Streams thoughts and response with conversation history support.
    """
    try:
        print(f"\n=== New Request ===")
        print(f"User message: {user_message}")
        
        # Format chat history for Gemini
        chat_history = format_chat_history(messages)
        
        # Initialize Gemini chat
        chat = model.start_chat(history=chat_history)
        response = chat.send_message(user_message, stream=True)
        
        # Initialize buffers and flags
        thought_buffer = ""
        response_buffer = ""
        thinking_complete = False
        
        # Add initial thinking message
        messages.append(
            ChatMessage(
                role="assistant",
                content="",
                metadata={"title": "⚙️ Thinking: *The thoughts produced by the model are experimental"}
            )
        )
        
        for chunk in response:
            parts = chunk.candidates[0].content.parts
            current_chunk = parts[0].text
            
            if len(parts) == 2 and not thinking_complete:
                # Complete thought and start response
                thought_buffer += current_chunk
                print(f"\n=== Complete Thought ===\n{thought_buffer}")
                
                messages[-1] = ChatMessage(
                    role="assistant",
                    content=thought_buffer,
                    metadata={"title": "⚙️ Thinking: *The thoughts produced by the model are experimental"}
                )
                yield messages
                
                # Start response
                response_buffer = parts[1].text
                print(f"\n=== Starting Response ===\n{response_buffer}")
                
                messages.append(
                    ChatMessage(
                        role="assistant",
                        content=response_buffer
                    )
                )
                thinking_complete = True
                
            elif thinking_complete:
                # Stream response
                response_buffer += current_chunk
                print(f"\n=== Response Chunk ===\n{current_chunk}")
                
                messages[-1] = ChatMessage(
                    role="assistant",
                    content=response_buffer
                )
                
            else:
                # Stream thinking
                thought_buffer += current_chunk
                print(f"\n=== Thinking Chunk ===\n{current_chunk}")
                
                messages[-1] = ChatMessage(
                    role="assistant",
                    content=thought_buffer,
                    metadata={"title": "⚙️ Thinking: *The thoughts produced by the model are experimental"}
                )
            
            yield messages
            
        print(f"\n=== Final Response ===\n{response_buffer}")
                
    except Exception as e:
        print(f"\n=== Error ===\n{str(e)}")
        messages.append(
            ChatMessage(
                role="assistant",
                content=f"I apologize, but I encountered an error: {str(e)}"
            )
        )
        yield messages

def user_message(msg: str, history: list) -> tuple[str, list]:
    """Adds user message to chat history"""
    history.append(ChatMessage(role="user", content=msg))
    return "", history
    

# Create the Gradio interface
with gr.Blocks(theme=gr.themes.Citrus(), fill_height=True) as demo:
  #with gr.Column():
    gr.Markdown("# Chat with Gemini 2.0 Flash and See its Thoughts 💭")

    chatbot = gr.Chatbot(
        type="messages",
        label="Gemini2.0 'Thinking' Chatbot",
        render_markdown=True,
        scale=1,
        avatar_images=(None,"https://lh3.googleusercontent.com/oxz0sUBF0iYoN4VvhqWTmux-cxfD1rxuYkuFEfm1SFaseXEsjjE4Je_C_V3UQPuJ87sImQK3HfQ3RXiaRnQetjaZbjJJUkiPL5jFJ1WRl5FKJZYibUA=w214-h214-n-nu")
    )

    with gr.Row(equal_height=True):
        input_box = gr.Textbox(
            lines=1,
            label="Chat Message",
            placeholder="Type your message here...",
            scale=4
        )

        clear_button = gr.Button("Clear Chat", scale=1)

    # Set up event handlers
    msg_store = gr.State("")  # Store for preserving user message
    
    input_box.submit(
        lambda msg: (msg, msg, ""),  # Store message and clear input
        inputs=[input_box],
        outputs=[msg_store, input_box, input_box],
        queue=False
    ).then(
        user_message,  # Add user message to chat
        inputs=[msg_store, chatbot],
        outputs=[input_box, chatbot],
        queue=False
    ).then(
        stream_gemini_response,  # Generate and stream response
        inputs=[msg_store, chatbot],
        outputs=chatbot
    )

    clear_button.click(
        lambda: ([], "", ""),
        outputs=[chatbot, input_box, msg_store],
        queue=False
    )

# Launch the interface
if __name__ == "__main__":
    demo.launch(debug=True)