Datasets-Metrics-Viewer / src /view /metric_view_tab.py
hynky's picture
hynky HF staff
Refactor the code
75448af
raw
history blame
7.15 kB
from datetime import datetime
import tempfile
from typing import Callable
import gradio as gr
from functools import partial
import re
import json
from src.logic.data_fetching import fetch_datasets, fetch_graph_data, fetch_groups, fetch_metrics, update_datasets_with_regex
from src.logic.data_processing import export_data
from src.logic.graph_settings import update_graph_options
from src.logic.plotting import plot_data
def create_metric_view_tab(METRICS_LOCATION_DEFAULT: str, available_datasets: gr.State, selected_datasets: gr.State):
metric_data = gr.State([])
with gr.Row():
with gr.Column(scale=2):
with gr.Row():
with gr.Column(scale=1):
base_folder = gr.Textbox(
label="Metrics Location",
value=METRICS_LOCATION_DEFAULT,
)
datasets_fetch = gr.Button("Fetch Datasets")
with gr.Column(scale=1):
regex_select = gr.Text(label="Regex filter", value=".*")
regex_button = gr.Button("Search")
with gr.Row():
selected_datasets_dropdown = gr.Dropdown(
choices=[],
label="Datasets",
multiselect=True,
interactive=True,
)
with gr.Column(scale=1):
grouping_dropdown = gr.Dropdown(
choices=[],
label="Grouping",
multiselect=False,
)
metric_name_dropdown = gr.Dropdown(
choices=[],
label="Metric name",
multiselect=False,
)
render_button = gr.Button("Render Metric", variant="primary")
with gr.Tabs():
with gr.TabItem("Graph Settings"):
log_scale_x_checkbox = gr.Checkbox(
label="Log scale x",
value=False,
)
log_scale_y_checkbox = gr.Checkbox(
label="Log scale y",
value=False,
)
rounding = gr.Number(
label="Rounding",
value=2,
)
with gr.TabItem("Grouping Settings") as group_settings:
with gr.Row() as group_choices:
with gr.Column(scale=2):
group_regex = gr.Text(
label="Group Regex",
value=None,
)
with gr.Row():
top_select = gr.Number(
label="N Groups",
value=100,
interactive=True,
)
direction_checkbox = gr.Radio(
label="Partition",
choices=[
"Top",
"Bottom",
"Most frequent (n_docs)",
],
value="Most frequent (n_docs)",
)
with gr.TabItem("Histogram Settings") as histogram_settings:
normalization_checkbox = gr.Checkbox(
label="Normalize",
value=True,
visible=False
)
cdf_checkbox = gr.Checkbox(
label="CDF",
value=False,
)
perc_checkbox = gr.Checkbox(
label="%",
value=False,
)
with gr.TabItem("Summary Settings") as summary_settings:
show_stds_checkbox = gr.Checkbox(
label="Show standard deviations",
value=False,
)
with gr.Row():
graph_output = gr.Plot(label="Graph")
with gr.Row(visible=False) as min_max_hist:
with gr.Column(scale=3):
min_max_hist_data = gr.Markdown()
with gr.Column(scale=1):
export_data_button = gr.Button("Export Data")
export_data_json = gr.File(visible=False)
def update_selected_datasets_dropdown(available_datasets, selected_datasets):
return gr.Dropdown(choices=available_datasets, value=sorted(selected_datasets))
datasets_fetch.click(
fn=fetch_datasets,
inputs=[base_folder],
outputs=[available_datasets],
)
available_datasets.change(
fn=update_selected_datasets_dropdown,
inputs=[available_datasets, selected_datasets],
outputs=selected_datasets_dropdown,
)
regex_button.click(
fn=update_datasets_with_regex,
inputs=[regex_select, selected_datasets, available_datasets],
outputs=selected_datasets,
)
def update_selected_datasets(selected_datasets_dropdown):
return selected_datasets_dropdown
selected_datasets_dropdown.change(
fn=update_selected_datasets,
inputs=[selected_datasets_dropdown],
outputs=selected_datasets,
)
selected_datasets.change(
fn=update_selected_datasets_dropdown,
inputs=[available_datasets, selected_datasets],
outputs=selected_datasets_dropdown,
)
selected_datasets.change(
fn=fetch_groups,
inputs=[base_folder, selected_datasets, grouping_dropdown],
outputs=grouping_dropdown,
)
grouping_dropdown.change(
fn=fetch_metrics,
inputs=[base_folder, selected_datasets, grouping_dropdown, metric_name_dropdown],
outputs=metric_name_dropdown,
)
render_button.click(
fn=fetch_graph_data,
inputs=[
base_folder,
selected_datasets,
metric_name_dropdown,
grouping_dropdown,
],
# We also output the graph_output = None to show the progress
outputs=[metric_data, graph_output],
)
grouping_dropdown.change(
fn=update_graph_options,
inputs=[grouping_dropdown],
outputs=[group_settings, histogram_settings, summary_settings],
)
gr.on(
triggers=[normalization_checkbox.input, rounding.input, group_regex.input, direction_checkbox.input,
top_select.input, log_scale_x_checkbox.input,
log_scale_y_checkbox.input, cdf_checkbox.input, perc_checkbox.input, show_stds_checkbox.input, metric_data.change],
fn=plot_data,
inputs=[
metric_data,
metric_name_dropdown,
normalization_checkbox,
rounding,
grouping_dropdown,
top_select,
direction_checkbox,
group_regex,
log_scale_x_checkbox,
log_scale_y_checkbox,
cdf_checkbox,
perc_checkbox,
show_stds_checkbox
],
outputs=[graph_output, min_max_hist, min_max_hist_data],
)
export_data_button.click(
fn=export_data,
inputs=[metric_data, metric_name_dropdown, grouping_dropdown],
outputs=[export_data_json],
)
return base_folder