Spaces:

NCSOFT
/

VARCO_Arena

Running

App Files Files Community

VARCO_Arena / streamlit_app_local /app.py

sonsus

데모와 동일한 내용 업데이트

3313619 verified about 2 months ago

raw

history blame

12 kB

	# import shutil
	import os
	import select
	import subprocess
	import sys
	import time
	from datetime import datetime, timedelta, timezone
	from pathlib import Path
	from typing import *

	import streamlit as st

	sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))

	from varco_arena_core.prompts import load_prompt

	from view_utils import (
	default_page_setting,
	escape_markdown,
	set_nav_bar,
	show_linebreak_in_md,
	)

	# import sys
	# print(sys.executable)


	VA_ROOT = Path(os.environ.get("VARCO_ARENA_RESULT_PATH", "./user_submit"))
	USR_SUB = VA_ROOT.parts[-1]


	def upload_files(uploaded_files) -> Path:
	# prep directory for user submission
	user_sub_root = VA_ROOT
	if user_sub_root.exists():
	if not user_sub_root.is_dir():
	raise ValueError(
	f"{user_sub_root} file exists and is not a directory. Consider renaming it."
	)
	else:
	user_sub_root.mkdir(parents=True)

	KST = timezone(timedelta(hours=9))
	tstamp = datetime.now(KST)
	tstr = tstamp.strftime("%m-%d_%H:%M:%S")
	files_dir_str = "./" + str(user_sub_root / tstr)
	files_dir = Path(files_dir_str)
	files_dir.mkdir(parents=True, exist_ok=True)
	uploaded_files = list(uploaded_files)

	if not uploaded_files:
	st.warning("❌ No files to upload. Please drag/drop or browse files to upload.")
	elif len(uploaded_files) < 2:
	st.error("❌ You need at least 2 jsonlines files to properly run VA.")
	else: # properly uploaded
	for file in uploaded_files:
	# Create a path for the file in the server directory
	file_path = files_dir / file.name

	# Save the file to the server directory
	with open(file_path, "wb") as f:
	f.write(file.getbuffer())

	jslfiles = list(files_dir.glob("*.jsonl"))
	st.success(f"✅ Successfully uploaded {len(jslfiles)} jsonl files.")
	return files_dir.resolve()


	def run_varco_arena(
	price_estimation: bool = False,
	# upload_dir: Union[str, Path] = None,
	promptname: str = None,
	exp_name: str = None,
	api_key: Optional[str] = None,
	evaluation_model: str = "gpt-4o-mini",
	update_interval: float = 1.0,
	):
	# Use environment variable for API key
	ptn = f"{str(st.session_state.upfiles_dir)}"
	outdir = Path(ptn)
	if exp_name:
	outdir = outdir / exp_name

	command = f"python ../varco_arena/main.py -i {ptn} -o {outdir} -k {api_key} -p {promptname} -e {evaluation_model} -j 64"
	if price_estimation:
	command = f"{command} -c"
	else:
	command = command.replace("python", "yes \| python ")
	print(command)

	api_key = None # clear immediately

	process = subprocess.Popen(
	command,
	stdout=subprocess.PIPE,
	stderr=subprocess.STDOUT,
	stdin=subprocess.PIPE,
	text=True,
	bufsize=1,
	shell=True,
	)

	# Set stdout and stdin to non-blocking mode
	os.set_blocking(process.stdout.fileno(), False)

	last_update_time = time.time()
	terminal_output = st.empty()
	full_output = f"{command}\n"
	while True:
	# Check if we have output to read
	if select.select([process.stdout], [], [], 0)[0]:
	output = process.stdout.readline()
	if output:
	full_output += output
	if price_estimation:
	to_show = full_output
	terminal_output.code(to_show, language="bash")
	else:
	current_time = time.time()
	if current_time - last_update_time > update_interval:
	lines = full_output.split("\n")
	if len(lines) < 5:
	to_show = full_output
	else:
	to_show = "\n".join(["...\n..\n.\n"] + lines[-5:])
	terminal_output.code(to_show, language="bash")
	last_update_time = current_time
	print(output)
	time.sleep(0.1)
	# Check if the process has finished
	if process.poll() is not None:
	# Read any remaining output
	remaining_output = process.stdout.read()
	if remaining_output:
	lines = remaining_output.split("\n")
	if len(lines) > 10:
	to_show += "\n".join(["\n...\n..\n.\n"] + lines[-10:])
	else:
	to_show += remaining_output
	terminal_output.code(to_show, language="bash")
	print(remaining_output)
	break

	return_code = process.poll()
	return outdir, return_code


	def main():
	# init lang
	st.session_state["korean"] = st.session_state.get("korean", False)

	sidebar_placeholder = default_page_setting()
	set_nav_bar(
	False, sidebar_placeholder=sidebar_placeholder, toggle_hashstr="app_init"
	)

	st.title("⚔️ VARCO ARENA ⚔️")
	if st.session_state.korean:
	st.write(
	"VARCO Arena는 각 모델의 생성된 결과를 비교 평가하여 모델의 성능 순위를 제공하는 시스템입니다. 모범답안을 필요로 하지 않으므로 커스텀 테스트셋 (50+ 행) 을 활용하는 경우 편리한 벤치마킹이 가능합니다."
	)
	else:
	st.write(
	"VARCO Arena is an LLM benchmarking system that compares model responses across customized test scenarios (recommend >50 prompts) without requiring reference answers."
	)

	st.divider()
	# Set up the file uploader
	if st.session_state.korean:
	st.markdown("모델 출력파일 업로드")
	else:
	st.markdown("### 1. Upload LLM responses")
	uploaded_files = st.file_uploader(
	"Drag and Drop jsonlines files (.jsonl)", accept_multiple_files=True
	)

	# upload state
	if "upfiles_dir" not in st.session_state:
	st.session_state.upfiles_dir = None
	if st.button("Upload Files"):
	st.session_state.upfiles_dir = upload_files(uploaded_files)
	# st.success(st.session_state.upfiles_dir)

	# st.markdown("💥주의: 중복된 테스트 시나리오는 오류로 처리됩니다💥")
	if st.session_state.korean:
	with st.expander("❓❔ 무엇을 업로드 하나요❓❔"):
	st.info(open("guide_mds/input_jsonls_kr.md", encoding="UTF8").read())
	else:
	with st.expander("❓❔ What should I upload ❓❔"):
	st.info(open("guide_mds/input_jsonls_en.md", encoding="UTF8").read())

	# Form for cost estimation
	with st.form("cost_estimation_form"):
	if st.session_state.korean:
	st.write("### 2. 가격 산정")
	else:
	st.write("### 2. Cost Estimation")
	eval_model = st.selectbox(
	"Select Judge",
	open("eval_models_list.txt", encoding="UTF8").read().split("\n"),
	)
	promptname = st.selectbox(
	"Select Evalutaion Prompt",
	open("eval_prompt_list.txt", encoding="UTF8").read().split("\n"),
	)
	if promptname == USR_SUB:
	raise ValueError(
	f"{USR_SUB=} is preserved name for the system. Consider another naming for the prompt or consider changing {VA_ROOT=} (USR_SUB == VA_ROOT.parts[-1])."
	)
	estimate_button = st.form_submit_button("Calculate Cost!")
	with st.expander(
	"LLM Judge에 활용되는 프롬프트 (`Calculate Cost!` 클릭시 갱신)"
	if st.session_state.korean
	else "Evaluation Prompt for LLM Judge (will refresh after `Calculate Cost!` clicked)"
	):
	prompt = load_prompt(promptname, task="-")
	kwargs = dict(
	inst="{inst}",
	src="{src}",
	out_a="{out_a}",
	out_b="{out_b}",
	task="-",
	)
	if promptname == "translation_pair":
	kwargs["source_lang"] = "{source_lang}"
	kwargs["target_lang"] = "{target_lang}"
	prompt_cmpl = prompt.complete_prompt(**kwargs)

	st.markdown(f"### Evaluation Prompt: {promptname}")
	for msg in prompt_cmpl:
	st.markdown(f"{msg['role']}")
	st.info(show_linebreak_in_md(escape_markdown(msg["content"])))

	if estimate_button:
	if st.session_state.get("upfiles_dir") is None:
	st.error(
	"❌ Requirements: You have to upload jsonlines files first to proceed"
	)
	else:
	st.markdown("##### Estimated Cost")
	dummy_api_key = "dummy"
	dummy_exp_name = "dummy"
	result_file_path, return_code = run_varco_arena(
	# upload_dir=st.session_state.upfiles_dir,
	promptname=promptname,
	api_key=dummy_api_key,
	exp_name=dummy_exp_name,
	price_estimation=True,
	evaluation_model=eval_model,
	)
	if return_code:
	st.error("❌ RuntimeError: An error occurred during cost estimation")
	else:
	st.success("✅ Cost estimation completed successfully")
	st.session_state.cost_estimated = True

	# Form for actual run
	with st.form("run_arena_form"):
	if st.session_state.korean:
	st.write("### 3. Varco Arena 구동하기")
	else:
	st.write("### 3. Run Varco Arena")
	api_key = st.text_input("Enter your OpenAI API Key", type="password")
	exp_name = st.text_input("(Optional) Enter Exp. name")
	exp_name = exp_name.replace(
	"..", "_"
	) # May cause rmtree problem later. Block it.
	exp_name = exp_name.replace(
	USR_SUB, f"-{USR_SUB}-"
	) # May cause rmtree problem later. Block it.
	exp_name = exp_name.replace("/", "-")
	exp_name = exp_name.replace(" ", "_")
	exp_name = exp_name.replace("~", "_")

	if st.session_state.korean:
	st.write("주의:`Ctrl+C` 버튼은 구현되지 않았습니다. 구동 전 숙고해주세요.")
	else:
	st.write("Caution: `Ctrl+C` button hasn't been implemented.")
	run_button = st.form_submit_button(
	"🔥 Run Arena!",
	disabled=(not st.session_state.get("cost_estimated", False))
	or "result_file_path"
	in st.session_state.keys(), # run already performed once
	)

	if run_button:
	set_nav_bar(
	True,
	sidebar_placeholder=sidebar_placeholder,
	toggle_hashstr="app_during_run",
	)
	if st.session_state.get("upfiles_dir") is None:
	st.error(
	"❌ Requirements: You have to upload jsonlines files first to proceed"
	)
	elif not api_key:
	st.error("❌ Requirements: OpenAI key required to run VA.")
	else:
	result_file_path, return_code = run_varco_arena(
	# upload_dir=st.session_state.upfiles_dir,
	promptname=promptname,
	api_key=api_key,
	exp_name=exp_name,
	price_estimation=False,
	evaluation_model=eval_model,
	)
	if return_code:
	st.error("❌ RuntimeError: An error occurred during Varco Arena run")
	else:
	st.success("✅ Varco Arena run completed successfully")
	st.session_state.result_file_path = result_file_path
	set_nav_bar(
	False, sidebar_placeholder=sidebar_placeholder, toggle_hashstr="app_run_done"
	)


	if __name__ == "__main__":
	main()