Spaces:

ysharma
/

Gemini2-Flash-Thinking

Running

App Files Files Community

ysharma HF staff commited on Dec 23, 2024

Commit

4a3acd2

verified ·

1 Parent(s): 772fb5d

Update app.py

Browse files

Files changed (1) hide show

app.py +41 -27

app.py CHANGED Viewed

@@ -12,91 +12,104 @@ genai.configure(api_key=GEMINI_API_KEY)
 model = genai.GenerativeModel("gemini-2.0-flash-thinking-exp-1219")
 def stream_gemini_response(user_message: str, messages: list) -> Iterator[list]:
     """
-    Streams LLM's thoughts and response.
     """
     try:
-        # Enabling logging for users to understand how thinking works along with streaming
         print(f"\n=== New Request ===")
         print(f"User message: {user_message}")
-        # Initialize response from Gemini
-        response = model.generate_content(user_message, stream=True)
         # Initialize buffers and flags
         thought_buffer = ""
         response_buffer = ""
-        has_response = False
         thinking_complete = False
         # Add initial thinking message
         messages.append(
             ChatMessage(
                 role="assistant",
                 content="",
-                metadata={"title": "⏳Thinking: *The thoughts produced by the model are experimental"}
             )
         )
         for chunk in response:
             parts = chunk.candidates[0].content.parts
             current_chunk = parts[0].text
             if len(parts) == 2 and not thinking_complete:
-                # Complete thought
                 thought_buffer += current_chunk
                 print(f"\n=== Complete Thought ===\n{thought_buffer}")
-                # Update thinking message
                 messages[-1] = ChatMessage(
                     role="assistant",
                     content=thought_buffer,
-                    metadata={"title": "⏳Thinking: *The thoughts produced by the model are experimental"}
                 )
                 yield messages
                 # Start response
                 response_buffer = parts[1].text
                 print(f"\n=== Starting Response ===\n{response_buffer}")
                 messages.append(
                     ChatMessage(
                         role="assistant",
                         content=response_buffer
                     )
                 )
                 thinking_complete = True
-                has_response = True
-                yield messages
             elif thinking_complete:
                 # Stream response
                 response_buffer += current_chunk
                 print(f"\n=== Response Chunk ===\n{current_chunk}")
                 messages[-1] = ChatMessage(
                     role="assistant",
                     content=response_buffer
                 )
-                yield messages
             else:
                 # Stream thinking
                 thought_buffer += current_chunk
                 print(f"\n=== Thinking Chunk ===\n{current_chunk}")
                 messages[-1] = ChatMessage(
                     role="assistant",
                     content=thought_buffer,
-                    metadata={"title": "⏳Thinking: *The thoughts produced by the model are experimental"}
                 )
-                yield messages
-        # Log final complete response
         print(f"\n=== Final Response ===\n{response_buffer}")
     except Exception as e:
         print(f"\n=== Error ===\n{str(e)}")
         messages.append(
@@ -111,6 +124,7 @@ def user_message(msg: str, history: list) -> tuple[str, list]:
     """Adds user message to chat history"""
     history.append(ChatMessage(role="user", content=msg))
     return "", history
 # Create the Gradio interface
 with gr.Blocks(theme=gr.themes.Citrus(), fill_height=True) as demo:

 model = genai.GenerativeModel("gemini-2.0-flash-thinking-exp-1219")
+def format_chat_history(messages: list) -> list:
+    """
+    Formats the chat history into a structure Gemini can understand
+    """
+    formatted_history = []
+    for message in messages:
+        # Skip thinking messages (messages with metadata)
+        if not (message.get("role") == "assistant" and "metadata" in message):
+            formatted_history.append({
+                "role": "user" if message.get("role") == "user" else "assistant",
+                "parts": [message.get("content", "")]
+            })
+    return formatted_history
 def stream_gemini_response(user_message: str, messages: list) -> Iterator[list]:
     """
+    Streams thoughts and response with conversation history support.
     """
     try:
         print(f"\n=== New Request ===")
         print(f"User message: {user_message}")
+        # Format chat history for Gemini
+        chat_history = format_chat_history(messages)
+        # Initialize Gemini chat
+        chat = model.start_chat(history=chat_history)
+        response = chat.send_message(user_message, stream=True)
         # Initialize buffers and flags
         thought_buffer = ""
         response_buffer = ""
         thinking_complete = False
         # Add initial thinking message
         messages.append(
             ChatMessage(
                 role="assistant",
                 content="",
+                metadata={"title": "⚙️ Thinking: *The thoughts produced by the model are experimental"}
             )
         )
         for chunk in response:
             parts = chunk.candidates[0].content.parts
             current_chunk = parts[0].text
             if len(parts) == 2 and not thinking_complete:
+                # Complete thought and start response
                 thought_buffer += current_chunk
                 print(f"\n=== Complete Thought ===\n{thought_buffer}")
                 messages[-1] = ChatMessage(
                     role="assistant",
                     content=thought_buffer,
+                    metadata={"title": "⚙️ Thinking: *The thoughts produced by the model are experimental"}
                 )
                 yield messages
+                time.sleep(0.05)
                 # Start response
                 response_buffer = parts[1].text
                 print(f"\n=== Starting Response ===\n{response_buffer}")
                 messages.append(
                     ChatMessage(
                         role="assistant",
                         content=response_buffer
                     )
                 )
                 thinking_complete = True
             elif thinking_complete:
                 # Stream response
                 response_buffer += current_chunk
                 print(f"\n=== Response Chunk ===\n{current_chunk}")
                 messages[-1] = ChatMessage(
                     role="assistant",
                     content=response_buffer
                 )
             else:
                 # Stream thinking
                 thought_buffer += current_chunk
                 print(f"\n=== Thinking Chunk ===\n{current_chunk}")
                 messages[-1] = ChatMessage(
                     role="assistant",
                     content=thought_buffer,
+                    metadata={"title": "⚙️ Thinking: *The thoughts produced by the model are experimental"}
                 )
+            yield messages
+            time.sleep(0.05)
         print(f"\n=== Final Response ===\n{response_buffer}")
     except Exception as e:
         print(f"\n=== Error ===\n{str(e)}")
         messages.append(
     """Adds user message to chat history"""
     history.append(ChatMessage(role="user", content=msg))
     return "", history
 # Create the Gradio interface
 with gr.Blocks(theme=gr.themes.Citrus(), fill_height=True) as demo: