Spaces:

Tonic
/

RWKV-7

Running

App Files Files Community

Tonic commited on 23 days ago

Commit

22d5543

verified ·

1 Parent(s): 8c00703

Update app.py

Browse files

Files changed (1) hide show

app.py +109 -169

app.py CHANGED Viewed

@@ -1,30 +1,19 @@
 import os
 import gradio as gr
 import torch
-from rwkv.model import RWKV
-from rwkv.utils import PIPELINE, PIPELINE_ARGS
-from copy import deepcopy
 import requests
-import os.path
 from tqdm import tqdm
-# Set environment variables
-os.environ['RWKV_JIT_ON'] = '1'
-os.environ["RWKV_CUDA_ON"] = '0'
-os.environ["RWKV_V7_ON"] = '1'
-# Model options
-MODELS = {
-    "0.1B (Smaller)": "RWKV-x070-World-0.1B-v2.8-20241210-ctx4096.pth",
-    "0.4B (Larger)": "RWKV-x070-World-0.4B-v2.9-20250107-ctx4096.pth"
-}
-# Download vocab file if not present
-VOCAB_FILE = "rwkv_vocab_v20230424.txt"
-VOCAB_URL = "https://raw.githubusercontent.com/BlinkDL/ChatRWKV/main/v2/rwkv_vocab_v20230424.txt"
 def download_file(url, filename):
-    """Generic file downloader with progress bar"""
     if not os.path.exists(filename):
         print(f"Downloading {filename}...")
         response = requests.get(url, stream=True)
@@ -41,195 +30,146 @@ def download_file(url, filename):
                 size = file.write(data)
                 pbar.update(size)
-def download_model(model_name):
-    """Download model if not present"""
-    if not os.path.exists(model_name):
-        url = f"https://huggingface.co/BlinkDL/rwkv-7-world/resolve/main/{model_name}"
-        download_file(url, model_name)
-def ensure_vocab():
-    """Ensure vocab file is present"""
-    if not os.path.exists(VOCAB_FILE):
-        download_file(VOCAB_URL, VOCAB_FILE)
-class ModelManager:
-    def __init__(self):
-        self.current_model = None
-        self.current_model_name = None
-        self.pipeline = None
-        ensure_vocab()
-    def load_model(self, model_choice):
-        model_file = MODELS[model_choice]
-        if model_file != self.current_model_name:
-            download_model(model_file)
-            self.current_model = RWKV(
-                model=model_file,
-                strategy='cpu fp32'
-            )
-            self.pipeline = PIPELINE(self.current_model, VOCAB_FILE)
-            self.current_model_name = model_file
-        return self.pipeline
-model_manager = ModelManager()
-def generate_response(
-    model_choice,
-    user_prompt,
-    system_prompt,
-    temperature,
-    top_p,
-    top_k,
-    alpha_frequency,
-    alpha_presence,
-    alpha_decay,
-    max_tokens
 ):
     try:
-        # Get or load the model
-        pipeline = model_manager.load_model(model_choice)
-        # Prepare the context
-        if system_prompt.strip():
-            ctx = f"{system_prompt.strip()}\n\nUser: {user_prompt.strip()}\n\nA:"
-        else:
-            ctx = f"User: {user_prompt.strip()}\n\nA:"
-        # Prepare generation arguments
-        args = PIPELINE_ARGS(
             temperature=temperature,
-            top_p=top_p,
-            top_k=top_k,
-            alpha_frequency=alpha_frequency,
-            alpha_presence=alpha_presence,
-            alpha_decay=alpha_decay,
-            token_ban=[],
-            token_stop=[],
-            chunk_len=256
         )
-        # Generate response
-        response = ""
-        def callback(text):
-            nonlocal response
-            response += text
-            return response
-        pipeline.generate(ctx, token_count=max_tokens, args=args, callback=callback)
-        return response
     except Exception as e:
-        import traceback
-        return f"Error: {str(e)}\nStack trace: {traceback.format_exc()}"
-# Create the Gradio interface
 with gr.Blocks() as demo:
-    gr.Markdown("# RWKV-7 Language Model Demo")
     with gr.Row():
         with gr.Column():
-            model_choice = gr.Radio(
-                choices=list(MODELS.keys()),
-                value=list(MODELS.keys())[0],
-                label="Model Selection"
             )
-            system_prompt = gr.Textbox(
-                label="System Prompt",
-                placeholder="Optional system prompt to set the context",
-                lines=3,
-                value="You are a helpful AI assistant. You provide detailed and accurate responses."
-            )
-            user_prompt = gr.Textbox(
-                label="User Prompt",
-                placeholder="Enter your prompt here",
-                lines=3
-            )
-            max_tokens = gr.Slider(
-                minimum=1,
-                maximum=1000,
-                value=200,
-                step=1,
-                label="Max Tokens"
             )
         with gr.Column():
-            temperature = gr.Slider(
                 minimum=0.1,
                 maximum=2.0,
                 value=1.0,
-                step=0.1,
                 label="Temperature"
             )
-            top_p = gr.Slider(
-                minimum=0.0,
                 maximum=1.0,
-                value=0.7,
-                step=0.05,
-                label="Top P"
             )
-            top_k = gr.Slider(
-                minimum=0,
-                maximum=200,
                 value=100,
-                step=1,
-                label="Top K"
-            )
-            alpha_frequency = gr.Slider(
-                minimum=0.0,
-                maximum=1.0,
-                value=0.25,
-                step=0.05,
-                label="Alpha Frequency"
-            )
-            alpha_presence = gr.Slider(
-                minimum=0.0,
-                maximum=1.0,
-                value=0.25,
-                step=0.05,
-                label="Alpha Presence"
-            )
-            alpha_decay = gr.Slider(
-                minimum=0.9,
-                maximum=1.0,
-                value=0.996,
-                step=0.001,
-                label="Alpha Decay"
             )
     generate_button = gr.Button("Generate")
-    output = gr.Textbox(label="Generated Response", lines=10)
     generate_button.click(
-        fn=generate_response,
         inputs=[
-            model_choice,
-            user_prompt,
-            system_prompt,
-            temperature,
-            top_p,
-            top_k,
-            alpha_frequency,
-            alpha_presence,
-            alpha_decay,
-            max_tokens
         ],
-        outputs=output
     )
     gr.Markdown("""
-    ## Model Information
-    - **0.1B Model**: Smaller model, faster but less capable
-    - **0.4B Model**: Larger model, slower but more capable
-    ## Parameter Descriptions
-    - **Temperature**: Controls randomness in the output (higher = more random)
-    - **Top P**: Nucleus sampling threshold (lower = more focused)
-    - **Top K**: Limits the number of tokens considered for each step
-    - **Alpha Frequency**: Penalizes frequent tokens
-    - **Alpha Presence**: Penalizes tokens that have appeared before
-    - **Alpha Decay**: Rate at which penalties decay
-    - **Max Tokens**: Maximum length of generated response
     """)
-# Launch the demo
 if __name__ == "__main__":
     demo.launch()

 import os
 import gradio as gr
 import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from tokenizers import Tokenizer
+import json
+import math
 import requests
 from tqdm import tqdm
+# Download tokenizer if not present
+TOKENIZER_FILE = "20B_tokenizer.json"
+TOKENIZER_URL = "https://raw.githubusercontent.com/BlinkDL/ChatRWKV/main/20B_tokenizer.json"
 def download_file(url, filename):
     if not os.path.exists(filename):
         print(f"Downloading {filename}...")
         response = requests.get(url, stream=True)
                 size = file.write(data)
                 pbar.update(size)
+# Ensure tokenizer exists
+if not os.path.exists(TOKENIZER_FILE):
+    download_file(TOKENIZER_URL, TOKENIZER_FILE)
+tokenizer = Tokenizer.from_file(TOKENIZER_FILE)
+class RWKV_Model:
+    def __init__(self, model_path):
+        self.model_path = model_path
+        self.model = None
+        self.device = "cuda" if torch.cuda.is_available() else "cpu"
+    def load_model(self):
+        if not os.path.exists(self.model_path):
+            raise FileNotFoundError(f"Model file {self.model_path} not found")
+        self.model = torch.load(self.model_path, map_location=self.device)
+        print("Model loaded successfully")
+    def generate(self, prompt, max_length=100, temperature=1.0, top_p=0.9):
+        if self.model is None:
+            self.load_model()
+        input_ids = tokenizer.encode(prompt).ids
+        input_tensor = torch.tensor(input_ids).unsqueeze(0).to(self.device)
+        with torch.no_grad():
+            output_sequence = []
+            for _ in range(max_length):
+                outputs = self.model(input_tensor)
+                next_token_logits = outputs[0, -1, :] / temperature
+                # Apply top-p sampling
+                sorted_logits, sorted_indices = torch.sort(next_token_logits, descending=True)
+                cumulative_probs = torch.cumsum(F.softmax(sorted_logits, dim=-1), dim=-1)
+                sorted_indices_to_remove = cumulative_probs > top_p
+                sorted_indices_to_remove[..., 1:] = sorted_indices_to_remove[..., :-1].clone()
+                sorted_indices_to_remove[..., 0] = 0
+                indices_to_remove = sorted_indices[sorted_indices_to_remove]
+                next_token_logits[indices_to_remove] = float('-inf')
+                probs = F.softmax(next_token_logits, dim=-1)
+                next_token = torch.multinomial(probs, num_samples=1)
+                output_sequence.append(next_token.item())
+                input_tensor = torch.cat([input_tensor, next_token.unsqueeze(0)], dim=1)
+                if next_token.item() == tokenizer.token_to_id("</s>"):
+                    break
+        return tokenizer.decode(output_sequence)
+def generate_text(
+    prompt,
+    temperature=1.0,
+    top_p=0.9,
+    max_length=100,
+    model_size="small"
 ):
     try:
+        # Select model based on size
+        model_path = "RWKV-x070-World-0.1B-v2.8-20241210-ctx4096.pth" if model_size == "small" else "RWKV-x070-World-0.4B-v2.9-20250107-ctx4096.pth"
+        model = RWKV_Model(model_path)
+        generated_text = model.generate(
+            prompt=prompt,
+            max_length=max_length,
             temperature=temperature,
+            top_p=top_p
         )
+        return generated_text
     except Exception as e:
+        return f"Error: {str(e)}"
+# Create Gradio interface
 with gr.Blocks() as demo:
+    gr.Markdown("# RWKV-7 Text Generation Demo")
     with gr.Row():
         with gr.Column():
+            prompt_input = gr.Textbox(
+                label="Input Prompt",
+                placeholder="Enter your prompt here...",
+                lines=5
             )
+            model_size = gr.Radio(
+                choices=["small", "large"],
+                label="Model Size",
+                value="small"
             )
         with gr.Column():
+            temperature_slider = gr.Slider(
                 minimum=0.1,
                 maximum=2.0,
                 value=1.0,
                 label="Temperature"
             )
+            top_p_slider = gr.Slider(
+                minimum=0.1,
                 maximum=1.0,
+                value=0.9,
+                label="Top-p"
             )
+            max_length_slider = gr.Slider(
+                minimum=10,
+                maximum=500,
                 value=100,
+                step=10,
+                label="Maximum Length"
             )
     generate_button = gr.Button("Generate")
+    output_text = gr.Textbox(label="Generated Output", lines=10)
     generate_button.click(
+        fn=generate_text,
         inputs=[
+            prompt_input,
+            temperature_slider,
+            top_p_slider,
+            max_length_slider,
+            model_size
         ],
+        outputs=output_text
     )
     gr.Markdown("""
+    ## Parameters:
+    - **Temperature**: Controls randomness (higher = more random)
+    - **Top-p**: Controls diversity (higher = more diverse)
+    - **Maximum Length**: Maximum number of tokens to generate
+    - **Model Size**:
+        - Small (0.1B parameters)
+        - Large (0.4B parameters)
     """)
 if __name__ == "__main__":
     demo.launch()