import gradio as gr import json from datetime import datetime from theme import TufteInspired import uuid from huggingface_hub import InferenceClient, CommitScheduler, hf_hub_download from openai import OpenAI from huggingface_hub import get_token, login from prompts import detailed_genre_description_prompt, basic_prompt import random import os from pathlib import Path # Ensure you're logged in to Hugging Face login(get_token()) # Define available models MODELS = [ "meta-llama/Meta-Llama-3-70B-Instruct", "mistralai/Mixtral-8x7B-Instruct-v0.1", "NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO", ] CHOSEN_MODEL = None # Set up dataset storage dataset_folder = Path("dataset") dataset_folder.mkdir(exist_ok=True) # Function to get the latest dataset file def get_latest_dataset_file(): files = list(dataset_folder.glob("data_*.jsonl")) return max(files, key=os.path.getctime) if files else None # Check for existing dataset and create or append to it latest_file = get_latest_dataset_file() if latest_file: dataset_file = latest_file print(f"Appending to existing dataset file: {dataset_file}") else: dataset_file = dataset_folder / f"data_{uuid.uuid4()}.jsonl" print(f"Creating new dataset file: {dataset_file}") # Set up CommitScheduler for dataset uploads repo_id = ( "davanstrien/summer-reading-preference" # Replace with your desired dataset repo ) scheduler = CommitScheduler( repo_id=repo_id, repo_type="dataset", folder_path=dataset_folder, path_in_repo="data", every=5, # Upload every 5 minutes ) # Function to download existing dataset files def download_existing_dataset(): try: files = hf_hub_download( repo_id=repo_id, filename="data", repo_type="dataset", recursive=True ) for file in Path(files).glob("*.jsonl"): dest_file = dataset_folder / file.name if not dest_file.exists(): dest_file.write_bytes(file.read_bytes()) print(f"Downloaded existing dataset file: {dest_file}") except Exception as e: print(f"Error downloading existing dataset: {e}") # Download existing dataset files at startup download_existing_dataset() def get_random_model(): global CHOSEN_MODEL model = random.choice(MODELS) CHOSEN_MODEL = model return model def create_client(model_id): return OpenAI( base_url=f"https://api-inference.huggingface.co/models/{model_id}/v1", api_key=get_token(), ) client = OpenAI( base_url="https://api-inference.huggingface.co/models/meta-llama/Meta-Llama-3-70B-Instruct/v1", api_key=get_token(), ) def generate_prompt(): if random.choice([True, False]): return detailed_genre_description_prompt() else: return basic_prompt() def get_and_store_prompt(): prompt = generate_prompt() print(prompt) # Keep this for debugging return prompt def generate_blurb(prompt): model_id = get_random_model() client = create_client(model_id) max_tokens = random.randint(100, 1000) chat_completion = client.chat.completions.create( model="tgi", messages=[ {"role": "user", "content": prompt}, ], stream=True, max_tokens=max_tokens, ) full_text = "" for message in chat_completion: full_text += message.choices[0].delta.content yield full_text # Function to log blurb and vote def log_blurb_and_vote(prompt, blurb, vote, user_info: gr.OAuthProfile | None, *args): user_id = user_info.username if user_info is not None else str(uuid.uuid4()) log_entry = { "timestamp": datetime.now().isoformat(), "prompt": prompt, "blurb": blurb, "vote": vote, "user_id": user_id, "model": CHOSEN_MODEL, } with scheduler.lock: with dataset_file.open("a") as f: f.write(json.dumps(log_entry) + "\n") gr.Info("Thank you for voting!") return f"Logged: {vote} by user {user_id}" # Create custom theme tufte_theme = TufteInspired() # Create Gradio interface with gr.Blocks(theme=tufte_theme) as demo: gr.Markdown("
Looking for your next summer read?
Would you read a book based on this LLM generated blurb?
Your vote will be added to this Hugging Face dataset