Spaces:

Rajarshi-Roy-research
/

Plot_Viz

Sleeping

App Files Files Community

Rajarshi Roy commited on Sep 16, 2024

Commit

08949f8

verified ·

1 Parent(s): b944a57

Create app.py

Browse files

Files changed (1) hide show

app.py +381 -0

app.py ADDED Viewed

	@@ -0,0 +1,381 @@

+def convert_google_sheet_url(url):
+    # Regular expression to match and capture the necessary part of the URL
+    pattern = r'https://docs\.google\.com/spreadsheets/d/([a-zA-Z0-9-_]+)(/edit#gid=(\d+)|/edit.*)?'
+    # Replace function to construct the new URL for CSV export
+    # If gid is present in the URL, it includes it in the export URL, otherwise, it's omitted
+    replacement = lambda m: f'https://docs.google.com/spreadsheets/d/{m.group(1)}/export?' + (f'gid={m.group(3)}&' if m.group(3) else '') + 'format=csv'
+    # Replace using regex
+    new_url = re.sub(pattern, replacement, url)
+    return new_url
+# Replace with your modified URL
+# url = "https://docs.google.com/spreadsheets/d/1dlTjKJrGVwRDU8m-hT53IdSluRAsWXftnx5uRqnq4yE/edit?gid=0#gid=0"
+url = "https://docs.google.com/spreadsheets/d/1MY0-DOitMZGnib73BAaSKg0TI7i5V1CXP8dF6jAgKWc/edit?gid=293606167#gid=293606167"
+new_url = convert_google_sheet_url(url)
+df = pd.read_csv(new_url)
+# Set 'Categories' column as index
+df1 = df.copy()
+df1.set_index('Categories', inplace=True)
+transposed_df = df.transpose()
+transposed_df.columns = transposed_df.iloc[0]
+df = transposed_df.drop(["Categories"])
+df = df.fillna("[]")
+df1 = df1.fillna("[]")
+# Convert the string representation of lists into actual lists for all relevant columns
+for col in df.columns:  # Skip the first column which is 'Categories'
+    df[col] = df[col].apply(lambda x: ast.literal_eval(x) if isinstance(x, str) else x)
+# Convert the string representation of lists into actual lists for all relevant columns
+for col in df1.columns:  # Skip the first column which is 'Categories'
+    df1[col] = df1[col].apply(lambda x: ast.literal_eval(x) if isinstance(x, str) else x)
+cols = df.columns
+# Get the specific column while filtering out empty cells
+column_data = df[cols[0]]
+# Filter out the empty lists ([])
+filtered_column_data = column_data[column_data.apply(lambda x: x != [])]
+def get_score(avg_kl_div,kl_div,missing,extra,common):
+    Wc=1
+    Wm=1.5
+    We=1.5
+    WeE=(We*extra)**2
+    WeM=(Wm*missing)**2
+    WeC=(We*common)**2
+    if kl_div==-1:
+        kl_div=avg_kl_div
+    kl_div_factor=kl_div/avg_kl_div
+    ans=kl_div_factor*(((WeE+WeM)/WeC)-2)#  (e**2 -c**2)/c**2 +(m**2-c**2)/c**2 => (0-1)*[((e**2+m**2)/c**2 -2)] => ((rank*y/a)m(m+1)/2))
+    return ans
+def get_individual_score(avg_kl_div,kl_div,e_or_m,common):
+    if kl_div==-1:
+        kl_div=avg_kl_div
+    kl_div_factor=kl_div/avg_kl_div
+    weight=1.5
+    ans=avg_kl_div + ((1+(e_or_m/common))*(((e_or_m)*(e_or_m+1)))/2)**0.5 # X +- [(1+b/a)*n**2*y]
+    # ans = kl_div_factor*((((weight*e_or_m)**2)/(common**2))-1)
+    return ans
+def get_entity_scores(ans4):
+    # Calculate average KL divergence
+    tt = 0
+    avg_kl_div = 0
+    for t in ans4:
+        if t[0] != -1:
+            avg_kl_div += t[0]
+            tt += 1
+    # Avoid division by zero
+    if tt > 0:
+        avg_kl_div /= tt
+    else:
+        avg_kl_div = 0
+    extra_entity_score = []
+    missing_entity_score = []
+    for t in ans4:
+        extra_entity_score.append(get_individual_score(avg_kl_div, t[0], t[2], t[3]))
+        missing_entity_score.append(get_individual_score(avg_kl_div, t[0], t[1], t[3]))
+    extra_entity_score.sort()
+    missing_entity_score.sort()
+    return (
+        missing_entity_score[:int(0.950 * len(missing_entity_score))],
+        extra_entity_score[:int(0.95 * len(extra_entity_score))]
+    )
+compare = df.columns[0]
+column_data = df[compare]
+# Filter out the empty lists ([])
+filtered_column_data = column_data[column_data.apply(lambda x: x != [])]
+# Display the filtered column data
+variables = filtered_column_data.to_list()
+models = filtered_column_data.index.to_list()
+color_schemes = [
+    '#d60000',  # Red
+    '#2f5282',  # Navy Blue
+    '#f15cd8',  # Pink
+    '#66abb7',  # Light Teal
+    '#ce7391',  # Rose
+    '#6bdb7a',  # Light Green
+    '#ea8569',  # Coral
+    '#b36cc9',  # Lavender
+    '#ffd700',  # Gold
+    '#ff7f0e',  # Orange
+    '#1f77b4',  # Blue
+    '#2ca02c',  # Green
+]
+colors = color_schemes[:len(models)]
+values_dict = {model: var for var, model in zip(variables, models)}
+color_dict = {model: color for model, color in zip(models, colors)}
+# plot_grouped_3d_kde(values_dict, models, color_dict, compare)
+import numpy as np
+import plotly.graph_objects as go
+from scipy.stats import gaussian_kde
+import plotly.express as px
+def adjust_kde_range(data, increment=25, threshold=0.00005):
+    kde = gaussian_kde(data)
+    min_x, max_x = min(data) - increment, max(data) + increment
+    # Keep expanding the range until both tails get close to zero
+    while True:
+        x_values = np.linspace(min_x, max_x, 1000)
+        y_values = kde(x_values)
+        # # Check the values at the tails
+        # print(y_values[0], y_values[-1])
+        # print(x_values[0], x_values[-1], "\n")
+        if y_values[0] < threshold and y_values[-1] < threshold:
+            break  # Stop if both tails are below the threshold
+        # Extend the range
+        min_x -= increment
+        max_x += increment
+    return x_values, y_values
+def compute_kde_ranges(missing_scores, extra_scores):
+    data1 = np.array(missing_scores)
+    data2 = -np.array(extra_scores)  # Negate extra scores for alignment
+    # Compute KDE for missing scores with extended range
+    x_missing, y_missing = adjust_kde_range(data1)
+    # Compute KDE for extra scores with extended range
+    x_extra, y_extra = adjust_kde_range(data2)
+    # Calculate axis limits
+    Val_x_extra = [max(x_extra)]
+    Val_x_miss = [x_missing[np.argmax(y_missing)]]
+    peak_extra = max(y_extra)
+    peak_miss = max(y_missing)
+    # Calculate the x and y axis ranges
+    min_x = min(min(x_missing), min(x_extra))
+    max_x = max(max(x_missing), max(x_extra))
+    x_range = [min_x, max_x]
+    y_range = [-peak_extra, peak_miss * 1.25]
+    return x_missing, y_missing, x_extra, y_extra, x_range, y_range
+def calculate_ticks(x_min, x_max, num_ticks=20):
+    # Calculate the total range
+    total_range = x_max - x_min
+    # Determine the interval between ticks
+    interval = total_range / (num_ticks - 1)  # We need num_ticks - 1 intervals
+    # Generate tick values
+    ticks = np.arange(x_min, x_max + interval, interval)
+    return ticks
+def plot_filled_surface(x, z, y_level, color):
+    """
+    Create a 3D mesh to fill the surface between the KDE curve and the 0-axis.
+    """
+    x_full = np.concatenate([x, x[::-1]])  # X-axis values, with reverse for baseline
+    z_full = np.concatenate([z, np.zeros_like(z)])  # Z-axis (KDE and baseline at 0)
+    y_full = np.full_like(x_full, y_level)  # Flat Y plane (constant for each model)
+    num_pts = len(x)
+    i = np.arange(num_pts - 1)
+    j = i + 1
+    k = i + num_pts
+    i = np.concatenate([i, i + num_pts])
+    j = np.concatenate([j, j + num_pts])
+    k = np.concatenate([k, i[:len(i)//2]])
+    return go.Mesh3d(
+        x=x_full, y=y_full, z=z_full,
+        i=i, j=j, k=k,
+        opacity=0.5,
+        color=color,
+        showscale=False,
+        legendgroup='filling'
+    )
+def plot_kde_3d(values_dict, models, color_dict, compare):
+    # values_dict, models, color_dict, compare = (values_dict, models, color_dict, 'Comparison Title')
+    fig = go.Figure()
+    model_y_positions = {model: i for i, model in enumerate(models)}
+    x_ranges = []
+    y_ranges = []
+    for model in models:
+        missing_scores, extra_scores = get_entity_scores(values_dict[model])
+        # Compute KDE and ranges for missing and extra scores
+        x_m, y_m, x_e, y_e, x_range, y_range = compute_kde_ranges(missing_scores, extra_scores)
+        # Append ranges for global limits
+        x_ranges.append(x_range)
+        y_ranges.append(y_range)
+        # Get color for this model
+        color = color_dict.get(model, 'rgba(0, 0, 0, 0.5)')  # Default color if not found
+        # Create filled surfaces between KDE curves and zero line
+        fig.add_trace(plot_filled_surface(x_m, y_m, model_y_positions[model], color))
+        fig.add_trace(plot_filled_surface(x_e, -y_e, model_y_positions[model], color))
+        # Plot the KDE lines (for visualization of the curves)
+        fig.add_trace(go.Scatter3d(
+            x=x_m,
+            y=[model_y_positions[model]] * len(x_m),
+            z=y_m,
+            mode='lines',
+            line=dict(color='blue'),
+            showlegend=False
+        ))
+        fig.add_trace(go.Scatter3d(
+            x=x_e,
+            y=[model_y_positions[model]] * len(x_e),
+            z=-y_e,
+            mode='lines',
+            line=dict(color='red'),
+            showlegend=False  # Hide legend for extra scores to combine with missing scores
+        ))
+    # Compute global x and y limits
+    x_min = min(r[0] for r in x_ranges)
+    x_max = max(r[1] for r in x_ranges)
+    y_min = min(r[0] for r in y_ranges)
+    y_max = max(r[1] for r in y_ranges)
+    # Define x, y, z axis tick intervals
+    x_ticks = calculate_ticks(np.floor(x_min), np.ceil(x_max))
+    y_ticks = list(model_y_positions.values())
+    z_ticks = calculate_ticks(y_min, y_max)
+    # Add a line through the 0-axis of density for each model
+    for model in models:
+        color = color_dict.get(model, 'rgba(0, 0, 0, 0.5)')
+        fig.add_trace(go.Scatter3d(
+            x=[x_min, x_max],
+            y=[model_y_positions[model], model_y_positions[model]],
+            z=[0, 0],
+            mode='lines',
+            # line=dict(color=color, width=2, dash='dash'),
+            line=dict(color=color),
+            name=model,
+            # showlegend=False
+        ))
+    # Update layout for 3D plot
+    fig.update_layout(
+        title=f'3D KDE Plots for {compare}',
+        scene=dict(
+            xaxis_title='Score',
+            yaxis_title='Model',
+            zaxis_title='Density',
+            xaxis=dict(
+                range=[x_min, x_max],
+                tickvals=x_ticks,
+                ticktext=[f'{tick:.2f}' for tick in x_ticks]
+            ),
+            yaxis=dict(
+                tickvals=y_ticks,
+                ticktext=[list(model_y_positions.keys())[list(model_y_positions.values()).index(tick)] for tick in y_ticks]
+            ),
+            zaxis=dict(
+                range=[y_min, y_max],
+                tickvals=z_ticks,
+                ticktext=[f'{tick:.4f}' for tick in z_ticks]
+            ),
+            camera=dict(
+                eye=dict(x=1.25, y=1.25, z=1.25)
+            )
+        ),
+        autosize=True,
+        width=1200*.75,
+        height=800*.75
+    )
+    # Save the plot as an HTML file
+    # plot = px.scatter(x=range(10), y=range(10))
+    filename = f"{compare}.html"
+    fig.write_html(filename)
+    # fig.show()
+    return fig
+# Path to your saved HTML file
+html_file_path = '3d_plot.html'
+title = 'My 3D Plot'
+def display_plot():
+    fig = plot_kde_3d(values_dict, models, color_dict, compare)
+    return fig
+# Define the Gradio interface
+interface = gr.Interface(
+    fn=display_plot,
+    inputs=[],
+    outputs=gr.Plot(),
+    title='Plotly 3D Plot in Gradio',
+    description='This app displays a 3D Plotly plot directly in the Gradio interface.',
+    live=False
+)
+# Launch the Gradio app
+if __name__ == "__main__":
+    interface.launch()