Spaces:

livecodebench
/

code_generation_samples

Running

App Files Files Community

code_generation_samples / app.py

StringChaos

question id and problem_idx

0df4202 15 days ago

raw

history blame

4.03 kB

	import os
	import random
	import glob
	import json

	import numpy as np
	from flask import Flask, render_template, request

	app = Flask(__name__)


	with open("problems.json") as f:
	problems = json.load(f)
	problem_choices = [q["question_title"] for q in problems]

	random_idxs = list(range(len(problems)))
	# random.seed(42)
	# random.shuffle(random_idxs)
	problems = [problems[idx] for idx in random_idxs]

	with open("all_outputs.json") as f:
	all_outputs = json.load(f)
	all_models = list(all_outputs.keys())


	num_questions_filtered = len(problems)

	all_correctness_by_problem = {
	idx: {model: np.mean(all_outputs[model][idx]["pass1_list"]) for model in all_models}
	for idx in random_idxs
	}


	def calculate_color(performance):
	# Convert performance to a value between 0 and 1
	# Calculate the red and green components of the color
	if performance > 0.75:
	return f"rgba(0, 150, 0, 0.5)"
	elif performance > 0.5:
	return f"rgba(50, 150, 0, {performance})"
	elif performance > 0.25:
	return f"rgba(150, 50, 0, {1-performance})"
	else:
	return f"rgba(150, 0, 0, 0.5)"


	all_evaluations_by_problem_colored = [
	(
	trueidx,
	{
	model: {
	"correctness": f"{all_correctness_by_problem[idx][model]*100:.1f}",
	"correctness_color": calculate_color(
	all_correctness_by_problem[idx][model]
	),
	}
	for model in all_models
	},
	problems[idx]["difficulty"],
	problems[idx]["question_id"],
	)
	for trueidx, idx in enumerate(random_idxs)
	]

	all_data_for_view_formatted = {
	model: [
	[
	{"code": a, "pass1": b, "metadata": c}
	for a, b, c in zip(
	row["code_list"], row["pass1_list"], row["metadata_list"]
	)
	]
	# print(row)
	for idx in random_idxs
	for row in [resp[idx]]
	]
	for model, resp in all_outputs.items()
	}


	@app.route("/")
	def home():
	# Fetch your data here
	print(all_models)
	return render_template(
	"index.html", models=all_models, problems=all_evaluations_by_problem_colored
	)


	@app.route("/problem/<int:problem_idx>")
	def problem(problem_idx):
	# Fetch your data here

	data = {
	model: all_data_for_view_formatted[model][problem_idx] for model in all_models
	}
	evaluation = all_evaluations_by_problem_colored[problem_idx][1]
	question = problems[problem_idx]

	# print(data)

	return render_template(
	"problem.html",
	problem_idx=problem_idx,
	question_id=all_evaluations_by_problem_colored[problem_idx][3],
	evaluation=evaluation,
	models=all_models,
	question=question,
	data=data,
	)


	mini_models = [
	# "DeepSeek-V2",
	"DeepSeek-V3",
	"DeepSeek-R1-Preview",
	# "DSCoder-33b-Ins",
	# "GPT-4-Turbo-2024-04-09",
	"GPT-4O-2024-05-13",
	"Claude-3.5-Sonnet-20240620",
	"Gemini-Flash-2.0-Thinking",
	# "Gemini-Exp-1206",
	# "Claude-3-Sonnet",
	"O1-2024-12-17 (N=1) (High)",
	"QwQ-32B-Preview (N=1)",
	]


	@app.route("/mini")
	def mini():
	# Fetch your data here
	return render_template(
	"index_mini.html",
	models=mini_models,
	problems=all_evaluations_by_problem_colored,
	)


	@app.route("/problem_mini/<int:problem_idx>")
	def problem_mini(problem_idx):
	# Fetch your data here

	data = {
	model: all_data_for_view_formatted[model][problem_idx] for model in mini_models
	}
	evaluation = all_evaluations_by_problem_colored[problem_idx][1]
	question = problems[problem_idx]

	# print(data)

	return render_template(
	"problem_mini.html",
	problem_idx=problem_idx,
	question_id=all_evaluations_by_problem_colored[problem_idx][3],
	evaluation=evaluation,
	models=mini_models,
	question=question,
	data=data,
	)


	if __name__ == "__main__":
	app.run(host="0.0.0.0", port=7860)