File size: 35,697 Bytes
4d6df18
9afeb25
4d6df18
 
 
 
9afeb25
4d6df18
 
 
 
 
 
 
 
 
 
1862231
4d6df18
9afeb25
 
 
1862231
4d6df18
1862231
 
a7eb400
1862231
4d6df18
 
30a8176
4d6df18
9afeb25
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4d6df18
9afeb25
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4d6df18
 
9afeb25
4d6df18
 
 
 
 
 
 
 
9afeb25
4d6df18
 
 
 
 
 
 
9afeb25
 
 
4d6df18
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e5f1915
 
 
9afeb25
4d6df18
 
e5f1915
4d6df18
 
 
 
 
9afeb25
 
4d6df18
9afeb25
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4d6df18
 
 
 
9afeb25
 
 
 
 
 
4d6df18
 
 
 
 
 
 
 
 
 
 
 
 
e5f1915
 
 
 
4d6df18
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9afeb25
 
4d6df18
e5f1915
 
4d6df18
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9afeb25
 
a3e05a5
4d6df18
 
 
9afeb25
 
 
 
 
 
 
 
 
 
 
 
4d6df18
9afeb25
 
 
4d6df18
 
9afeb25
4d6df18
 
 
 
9afeb25
 
4d6df18
9afeb25
4d6df18
9afeb25
 
 
 
 
 
 
 
e5f1915
9afeb25
 
 
 
 
 
 
 
 
e5f1915
9afeb25
 
 
 
 
 
 
e5f1915
 
 
4d6df18
 
 
 
9afeb25
4d6df18
 
9afeb25
 
4d6df18
 
 
 
9afeb25
 
 
 
 
 
 
e5f1915
9afeb25
 
 
 
 
 
 
e5f1915
9afeb25
 
 
 
 
 
 
 
7326ce2
4d6df18
 
9afeb25
4d6df18
9afeb25
4d6df18
 
 
 
 
7326ce2
824740b
 
 
9afeb25
4d6df18
7326ce2
9afeb25
 
 
 
 
 
e5f1915
4d6df18
824740b
 
4d6df18
 
 
 
 
 
824740b
 
 
 
e5f1915
 
824740b
 
9afeb25
824740b
 
 
 
 
4d6df18
 
 
 
9afeb25
7326ce2
4d6df18
 
9afeb25
 
 
 
 
a3e05a5
9afeb25
 
 
 
4d6df18
9afeb25
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
# -*- coding: utf-8 -*-
"""ranking_simulation_1_asymmetric_judges.ipynb

Automatically generated by Colaboratory.

Original file is located at
    https://colab.research.google.com/drive/1d7GP8R96AgQSvJq4YL3nnzEmtBomevu0
"""

# Commented out IPython magic to ensure Python compatibility.
# %%capture
# !pip install gradio
# !pip install opinionated
#

import numpy as np
import pandas as pd
import opinionated
import matplotlib.pyplot as plt
from scipy.stats import skewnorm


plt.style.use("opinionated_rc")

from opinionated.core import download_googlefont
download_googlefont('Quicksand', add_to_cache=True)

plt.rc('font', family='Quicksand')


import colormaps as cmaps

def sample_skewed_normal(alpha, loc, scale, n, range_min=None, range_max=None):
    samples = []
    while len(samples) < n:
        sample = skewnorm.rvs(alpha, loc, scale)
        if (range_min is None or sample >= range_min) and (range_max is None or sample <= range_max):
            samples.append(sample)
    return np.array(samples)


# # Example usage
# alpha = 5  # Skewness
# loc = 0    # Location (mean)
# scale = 1  # Scale (standard deviation)
# n_samples = 10000  # Number of samples to generate
# range_min, range_max = -2, 2  # Range for the samples

# samples = sample_skewed_normal(alpha, loc, scale, n_samples, range_min, range_max)
# # print(samples)

# import seaborn as sns
# sns.histplot(samples)

from scipy.stats import norm



def simulate_applicant_judging(num_applicants=100, num_judges=10, ratings_per_applicant=5,alpha=0, loc=50, scale=15, judge_error=1, judges_attitude=0.3,
                                        judgment_coarse_graining=False):
    """
    Simulates the process of judging applicants by a set of judges, incorporating randomness in evaluations, judge bias, and error.

    Parameters:
    - num_applicants (int): Number of applicants to be judged.
    - num_judges (int): Number of judges involved in the evaluation process.
    - ratings_per_applicant (int): Number of ratings each applicant receives from different judges.
    - alpha (float): Alpha parameter for the skewed normal distribution to simulate applicant qualities.
    - loc (float): The mean ('location') for the skewed normal distribution of applicant qualities.
    - scale (float): Standard deviation for the skewed normal distribution of applicant qualities.
    - judge_error (float): Standard deviation for the random error in judges' evaluations.
    - judges_attitude (float): Mean for the normal distribution representing judges' biases.
    - judgment_coarse_graining (int/bool): Enables coarse graining of evaluations into specified number of buckets. Set to False to disable.

    The function simulates the quality of each applicant using a skewed normal distribution and then assigns each applicant a set of evaluations by different judges, considering the judges' workload distribution, biases, and random error margin. Coarse graining of evaluations is applied if enabled.

    Returns:
    - pandas.DataFrame: A DataFrame with columns for each applicant's identifier, inherent quality, average evaluation score, individual scores from each judge, original ranking based on scores, and final rankings after applying random tie-breaking for identical scores.
    """
    # Generate the quality of applicants from a Beta distribution normalized to 0-100
    applicant_qualities = sample_skewed_normal(alpha, loc, scale, num_applicants, 0, 100)

    # Function to apply coarse graining
    def coarse_grain_evaluation(evaluation, grain_size):
        return round(evaluation / (100 / grain_size)) * (100 / grain_size)

    # Initialize evaluations dictionary
    evaluations = {f"Applicant_{i+1}": [] for i in range(num_applicants)}
    judge_workload = np.zeros(num_judges)
    judges_attitudes =np.random.normal(0,judges_attitude,num_judges)

    # Randomly assign judges to applicants
    for _ in range(ratings_per_applicant):
        for applicant in range(num_applicants):
            probabilities = (max(judge_workload) - judge_workload + 1) / sum(max(judge_workload) - judge_workload + 1)
            judge = np.random.choice(num_judges, p=probabilities)
            judge_workload[judge] += 1
            evaluation = sample_skewed_normal(alpha=judges_attitudes[judge], loc=applicant_qualities[applicant],  scale=judge_error/norm.ppf(0.975), n=1)[0]
            # print(np.random.normal(loc=applicant_qualities[applicant], scale=judge_error/norm.ppf(0.975)))
            # print(evaluation)

            # Apply coarse graining if enabled
            if judgment_coarse_graining:
                evaluation = coarse_grain_evaluation(evaluation, judgment_coarse_graining)

            evaluations[f"Applicant_{applicant+1}"].append(evaluation)

    # Prepare data for DataFrame
    data = []
    for applicant, (quality, scores) in enumerate(zip(applicant_qualities, evaluations.values()), 1):
        average_evaluation = np.mean(scores)
        original_ranks = np.argsort(np.argsort(-np.array(scores))) + 1
        data.append([f"Applicant_{applicant}", quality, average_evaluation, scores, list(original_ranks)])

    # Create DataFrame
    df = pd.DataFrame(data, columns=["Applicant", "Applicant Quality", "Average Evaluation", "Original Scores", "Rank of Original Scores"])

    # Random tie-breaking function
    def random_tie_breaking(df, column):
        # Shuffle rows with the same value in the specified column
        ranks = df[column].rank(method='first', ascending=False)
        ties = df.duplicated(column, keep=False)
        shuffled_ranks = ranks[ties].sample(frac=1).sort_index()
        ranks[ties] = shuffled_ranks
        return ranks

    # Apply random tie-breaking to rankings
    df['Rank of Evaluation'] = random_tie_breaking(df, 'Average Evaluation').astype(int)
    df['Rank of Applicant Quality'] = random_tie_breaking(df, 'Applicant Quality').astype(int)

    return df

# df_results = simulate_applicant_judging(num_applicants=100, num_judges=10, ratings_per_applicant=5,alpha=0, loc=50, scale=15, judge_error=1, judges_attitude=0.3,
#                                         judgment_coarse_graining=10)
# df_results.head(30)



# df_results.sort_values(by='Rank of Evaluation').head(30)



import pandas as pd

def summarize_simulation_runs(num_runs, num_applicants, num_judges, ratings_per_applicant, top_n, alpha, loc, scale,
                               judge_error, judges_attitude, judgment_coarse_graining):
    """
      Runs multiple simulations of applicant judging and summarizes the frequency of each candidate's placement in the top n positions based on their quality ranking.

      Parameters:
      - num_runs (int): Number of simulation iterations to run.
      - num_applicants (int): Number of applicants in each simulation.
      - num_judges (int): Number of judges evaluating the applicants in each simulation.
      - ratings_per_applicant (int): Number of evaluations each applicant receives.
      - top_n (int): The number of top positions to analyze in the ranking.
      - alpha (float): Alpha parameter for the skewed normal distribution to simulate applicant qualities.
      - loc (float): The mean ('location') for the skewed normal distribution of applicant qualities.
      - scale (float): Standard deviation for the skewed normal distribution of applicant qualities.
      - judge_error (float): Standard deviation for the random error in judges' evaluations.
      - judges_attitude (float): Mean for the normal distribution representing judges' biases.
      - judgment_coarse_graining (int/bool): Enables coarse graining of evaluations into specified number of buckets. Set to False to disable.

      The function performs multiple runs of the applicant judging simulation. It aggregates and summarizes how often each applicant, based on their quality rank, appears in the top n positions of the evaluation rankings.

      Returns:
      - pandas.DataFrame: A DataFrame where each row corresponds to an applicant (ranked by quality) and each column represents how often that applicant was in a specific top n position across all simulation runs.
      """
    # Initialize counts for each quality-ranked candidate in top n positions
    top_n_counts = pd.DataFrame(0, index=range(1, num_applicants + 1), columns=[f'Top {i}' for i in range(1, top_n + 1)])

    for _ in range(num_runs):
        # df_results = simulate_applicant_judging(num_applicants, num_judges, ratings_per_applicant,
        #                                         alpha=alpha, beta=beta, judge_error=judge_error, judgment_coarse_graining=judgment_coarse_graining)
        df_results = simulate_applicant_judging(num_applicants=num_applicants, num_judges=num_judges, ratings_per_applicant=ratings_per_applicant,
                                                alpha=alpha, loc=loc, scale=scale,
                                                judge_error=judge_error, judges_attitude=judges_attitude,
                                                judgment_coarse_graining=judgment_coarse_graining)
        # Sort by Rank of Applicant Quality
        sorted_by_quality = df_results.sort_values(by='Applicant Quality', ascending=False).reset_index()
        # Sort by Rank of Evaluation
        sorted_by_evaluation = df_results.sort_values(by='Rank of Evaluation').reset_index()

        for i in range(top_n):
            # Find which quality-ranked candidate is in this top evaluation position
            quality_rank = sorted_by_quality[sorted_by_evaluation.loc[i, 'Applicant'] == sorted_by_quality['Applicant']].index[0] + 1
            top_n_counts.loc[quality_rank, f'Top {i+1}'] += 1

    return top_n_counts

# Example usage
# num_runs = 100  # Number of simulation runs
# top_n_results = summarize_simulation_runs(num_runs=num_runs, num_applicants=100, num_judges=5, ratings_per_applicant=3,
#                                           top_n=5, alpha=0, loc=50, scale=15, judge_error=4, judges_attitude=0.3, judgment_coarse_graining=False)
# top_n_results



import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap, LinearSegmentedColormap


def plot_top_n_results(top_n_results, num_runs):
    """
    Plots a stacked bar chart of the top-n results, filling out what's missing to num_runs in grey.

    :param top_n_results: DataFrame containing the counts of each quality-ranked candidate in top n positions.
    :param num_runs: Total number of simulation runs.
    """

    base_cmap = cmaps.green_green1_r.cut(0.10, 'right').discrete(top_n_results.shape[1])
    newcolors = base_cmap(np.linspace(0, 1, top_n_results.shape[1]))

    # Define a new color (pink) and add it to the colormap
    pink = np.array([178/256, 171/256, 165/256, 1])
    newcolors = np.vstack([newcolors, pink])
    # Create a new ListedColormap
    newcmp = ListedColormap(newcolors)

    # Calculate the missing counts to fill up to num_runs
    missing_counts = num_runs - top_n_results.sum(axis=1)

    # Prepare data for stacked bar chart
    data_to_plot = top_n_results.copy()
    data_to_plot['Missing'] = missing_counts

    # Create a figure and axis for plotting
    fig, ax = plt.subplots(figsize=(12, 8))
    # Plot stacked bar chart
    data_to_plot.head(top_n_results.shape[1]).plot(kind='bar', stacked=True, colormap=newcmp, alpha=.9, ax=ax)
    # Plot settings
    ax.set_title('How often did the actually best get chosen?', loc='right')  # Right-align title
    ax.set_xlabel('Real Applicant Rank')
    ax.set_ylabel('Selected in this many simulation runs')

    # Conditionally add legend
    if top_n_results.shape[1] <= 5:
        labels = [label.replace("Top", "Rank") for label in top_n_results.columns] + ['Not chosen']

        ax.legend(labels=labels, title='Rank in Evaluation', loc='lower center', bbox_to_anchor=(0.5, -0.2), ncol=top_n_results.shape[1]+1)  # Legend below the chart
    else:
        ax.legend().set_visible(False)
    plt.tight_layout()
    plt.close()

    return fig

    # plt.show()
# plot = plot_top_n_results(top_n_results, num_runs)

# num_applicants=100
# num_judges=10
# ratings_per_applicant=5
# top_n=5
# applicant_std_dev=20
# judge_error_std_dev=1
# judgment_coarse_graining=6

# top_n_counts = pd.DataFrame(0, index=range(1, num_applicants + 1), columns=[f'Top {i}' for i in range(1, top_n + 1)])

# df_results = simulate_applicant_judging(num_applicants, num_judges, ratings_per_applicant,
#                                         applicant_std_dev, judge_error_std_dev, judgment_coarse_graining)
# display(df_results.sort_values(by='Rank of Evaluation').head(10))
# # Sort by Rank of Applicant Quality
# sorted_by_quality = df_results.sort_values(by='Applicant Quality', ascending=False).reset_index()
# # Sort by Rank of Evaluation
# sorted_by_evaluation = df_results.sort_values(by='Rank of Evaluation').reset_index()

# for i in range(top_n):
#     # Find which quality-ranked candidate is in this top evaluation position
#     quality_rank = sorted_by_quality[sorted_by_evaluation.loc[i, 'Applicant'] == sorted_by_quality['Applicant']].index[0] + 1
#     top_n_counts.loc[quality_rank, f'Top {i+1}'] += 1

# display(top_n_counts.head(15))

# import numpy as np
# import matplotlib.pyplot as plt
# import seaborn as sns
# from scipy.stats import gaussian_kde

# def visualize_applicant_and_judge_distributions(alpha, beta, judge_error=1):
#     """
#     Visualizes the distribution of applicants' qualities and an example of a judge's evaluations for a random applicant
#     using density estimates with normalized heights.

#     :param applicant_std_dev: Standard deviation for the quality of applicants.
#     :param judge_error_std_dev: Standard deviation for the judge's margin of error.
#     """
#     # Generate applicant qualities
#     applicant_qualities = np.random.beta(alpha, beta, 5000) * 100

#     # Choose a random applicant for judge's evaluation
#     random_applicant_quality = np.random.choice(applicant_qualities)
#     judge_evaluations = np.random.normal(random_applicant_quality, judge_error, 5000)

#     # Calculate KDEs and find peak values
#     kde_applicant = gaussian_kde(applicant_qualities)
#     kde_judge = gaussian_kde(judge_evaluations)
#     x = np.linspace(0, 100, 1000)
#     kde_applicant_vals = kde_applicant(x)
#     kde_judge_vals = kde_judge(x)
#     peak_applicant = np.max(kde_applicant_vals)
#     peak_judge = np.max(kde_judge_vals)
#     scale_factor = peak_applicant / peak_judge

#     # Plotting
#     plt.figure(figsize=(12, 6))

#     # Plot for distribution of all applicants
#     sns.lineplot(x=x, y=kde_applicant_vals, color="blue", label='Applicant Qualities')
#     plt.fill_between(x, kde_applicant_vals, color="blue", alpha=0.3)
#     plt.title('Distribution of Applicant Qualities')
#     plt.xlabel('Quality')
#     plt.ylabel('Normalized Density')
#     plt.legend()
#     plt.xlim(0, 100)

#     # # Plot for distribution of a single applicant's evaluations
#     # sns.lineplot(x=x, y=kde_judge_vals * scale_factor, color='orange', label='Judge Evaluations')
#     # plt.fill_between(x, kde_judge_vals * scale_factor, color="orange", alpha=0.3)
#     # plt.title('Distribution of a Judge\'s Evaluations for a Chosen Applicant')
#     # plt.xlabel('Evaluation Score')
#     # plt.ylabel('Normalized Density')
#     # plt.legend()
#     # plt.xlim(0, 100)

#     plt.tight_layout()
#     plt.show()

# # Example usage
# visualize_applicant_and_judge_distributions(alpha=2, beta=1, judge_error=5)



import gradio as gr
import matplotlib.pyplot as plt
from io import BytesIO

# from scipy.stats import beta

#     x = np.linspace(-10, 10, 1000)
#     y = skewnorm.pdf(x, alpha, loc, scale)

#     fig, ax = plt.subplots(figsize=(7, 3))
#     plt.fill_between(x, y, color="#6a4c93", alpha=0.8)
#     plt.xlabel('Value')
#     plt.ylabel('Probability Density')
#     ax.set_yticks([])  # Optional: Remove y-axis labels for cleaner look

#     return fig

def plot_skewed_normal_distribution(alpha, loc, scale, judge_error,judgement_variability,):
    x = np.linspace(0, 100, 1000)
    y_pop_dist = skewnorm.pdf(x, alpha, loc, scale)

    fig, ax = plt.subplots(figsize=(7, 3))  # Figure size
    plt.fill_between(np.linspace(0, 100, 1000), y_pop_dist, color="#ee4d5a", alpha=0.8)
    plt.xlabel('True Applicants Quality-Distribution')
    plt.xlim(0, 100)
    ax.set_yticklabels([])  # Remove y-axis labels

    # Judgement Variability Line
    line_length = judge_error
    line_x = [50 - line_length/2, 50 + line_length/2]
    plt.plot(line_x, [0, 0], color='black', linewidth=1.4, linestyle='dotted')


    # Small Normal Distribution
    std_dev = judge_error / 2 / norm.ppf(0.975)
    small_dist_x = np.linspace(50 - 3*std_dev, 50 + 3*std_dev, 100)  # 3 standard deviations on each side
    small_dist_y = skewnorm.pdf(small_dist_x,0, loc=50, scale=std_dev)  #
    small_dist_y_scaled = small_dist_y / max(small_dist_y) * np.max(y_pop_dist)*.12  # Scale down for representation
    plt.plot(small_dist_x, small_dist_y_scaled, color='black', linewidth=2)
    plt.text(np.mean(line_x), np.max(y_pop_dist)*.08 + np.max(small_dist_y_scaled) , 'Judgement Variability', ha='center', va='bottom', color='black',weight='bold',)
    plt.plot([50,50], [0, np.max(small_dist_y_scaled)], color='black', linewidth=1.4, linestyle='dotted')



    # Small Normal Distribution Cranky judge
    small_dist_x = np.linspace(25 - 3*std_dev, 25 + 3*std_dev, 100)  # 3 standard deviations on each side
    small_dist_y = skewnorm.pdf(small_dist_x,-judgement_variability, loc=25, scale=std_dev)  #
    small_dist_y_scaled = small_dist_y / max(small_dist_y) * np.max(y_pop_dist)*.12  # Scale down for representation
    plt.text(25, np.max(y_pop_dist)*.05 + np.max(small_dist_y_scaled), 'Most Harsh', ha='center', va='bottom', color='black')
    plt.plot(small_dist_x, small_dist_y_scaled, color='black', linewidth=2)
    plt.plot([25,25], [0, np.max(small_dist_y_scaled)], color='black', linewidth=1.4, linestyle='dotted')

        # Small Normal Distribution genereous judge
    small_dist_x = np.linspace(75 - 3*std_dev, 75 + 3*std_dev, 100)  # 3 standard deviations on each side
    small_dist_y = skewnorm.pdf(small_dist_x,judgement_variability, loc=75, scale=std_dev)  #
    small_dist_y_scaled = small_dist_y / max(small_dist_y) * np.max(y_pop_dist)*.12  # Scale down for representation
    plt.text(75, np.max(y_pop_dist)*.05 + np.max(small_dist_y_scaled) , 'Most Generous', ha='center', va='bottom', color='black')
    plt.plot(small_dist_x, small_dist_y_scaled, color='black', linewidth=2)
    plt.plot([75,75], [0, np.max(small_dist_y_scaled)], color='black', linewidth=1.4, linestyle='dotted')

    plt.close()

    return fig

# Your existing function for running simulation and plotting
def run_simulation_and_plot(num_runs, num_applicants, num_judges, ratings_per_applicant, top_n, alpha, loc, scale,  judge_error, judges_attitude, judgment_coarse_graining,judgment_coarse_graining_true_false):
    if judgment_coarse_graining_true_false == False:
      judgment_coarse_graining = False
    top_n_results = summarize_simulation_runs(num_runs, num_applicants, num_judges, ratings_per_applicant, top_n, alpha, loc, scale,
                               judge_error, judges_attitude, judgment_coarse_graining)
    return plot_top_n_results(top_n_results, num_runs)



intro_md = """
   # On Rankings

_by [Max Noichl](https://homepage.univie.ac.at/maximilian.noichl/)_

One of the central experiences of being an academic is the experience of being ranked. We are ranked when we submit abstracts for conferences, when we publish papers in ranked journals, when we apply for graduate school or maybe already for a master's degree, and when we apply for faculty positions, at departments, which are, of course, ranked as well. But although rankings are catnip to academics (and presumably everybody else as well), most people probably share the intuition that there's something weird or iffy about rankings and may suspect that often they are not as informative as their prevalence suggests.

The simulation, which you can run yourself below, should give some further insight into that perceived weirdness. It involves setting up a population of candidates (like conference abstracts or job applicants) according to a distribution which you specify. Candidates with objective qualities are then sampled from this distribution and evaluated by judges, who have a certain error margin in their assessments, as well as a variability in their harshness or generosity reflecting real-world inaccuracies. The simulation averages the ratings each candidate receives. The results show that in many plausible scenarios the best candidates often don't make it into the top of the rankings, highlighting the rankings limited value for accurately identifying the most qualified individuals. Below, I give some more detailed explanation – but first, the simulation."""


explanation_md = """
The simulation works like this: We set up a population of candidates. These could be abstracts for a conference, applicants to a PhD position, departments getting ranked for 'eliteness', etc. We can determine how this distribution looks. For instance, you might expect many strong candidates and few inappropriate ones, a normal distribution around a midpoint, or predominantly poor applicants with rare exceptional cases. You can set the parameters of this distribution, (which we model as a skewed normal), **Mean**, **Standard Deviation**, and **Skewness**, yourself, depending on your prior expectations. Your settings are reflected in the red distribution-graphic on the right. This simulation then generates a **Number of Applicants** with objectively determined qualities for our ranking.

Applicant contributions are evaluated by a **Number of Judges**, who evenly distribute the task of rating until all applicants have received a rating. Importantly, like in the real world, judges are not infallible; they make errors in assessing applicants. This is adjustable via the **Judge Error** property, which represents the width of the 95% confidence interval of a distribution centered around the applicant's objective quality. For example, if an applicant has an objective quality score of 70 and the **Judge Error** is set to 4, it means that in 95% of cases, judges are expected to score the applicant between 68 and 72. In most cases of academic ratings, it seems implausible that people can reliably distinguish closely scored candidates (e.g., 75 vs. 77). Depending on your application, you can therefore experiment with different plausible error-ranges for your specific scenario.

Judges' rating styles naturally vary. Some may be more critical, rating applicants harshly, while others may be more lenient. This variability is adjustable in the simulation through the **Judges' Attitude Range**. Each judge gets assigned a random attitude value that is drawn from the  range between this number and its negative, which is then used to set the skew of the skewed normal distribution (centered around the true quality), from which their assessments are drawn. The impact of these settings is visually represented in the simulation by the black distributions within the population-distribution-graphic on the right.

Once all evaluations are complete, each applicant's final score is determined by averaging all their received ratings. Additionally, the simulation allows for coarse-grained judgments, where judges have to select the most appropriate score on a predefined scale (e.g., 0 to 7, determined by the **Coarse Graining Factor**). In that case (a very common practice), we do lose quite some information about the quality of the candidates.

To complete the setup, we specify the number of simulation runs before activating them with the **Run Simulations** button. The outcomes are displayed in the green graphic under the 'Simulation Results' tab on the right. These results, indicated by green bars, show the frequency with which the objectively best candidates make it into the top spots after undergoing our ranking. In many realistic scenarios, the simulation reveals a significant element of randomness, particularly in lower top spots. **Often, the most qualified candidates don't consistently reach the top ranks**, and even the highest-ranked candidates are commonly misplaced.

I don't think that this is a particularly new or surprising result. But I think that it is very important to keep in mind, and have an intuitive feel for, when interacting with rankings. I hope the simulation can help you with that. We should also note, that the setup of this simulation in many ways presents an unrealistic best case for rankings. While our judges do make errors, they do not make errors systematically. They are not biased against specific candidates for reasons of gender, ethnic, or class background, or the intellectual traditions they come from. But even in our very idealized ranking situation, we can see that the information contained in the final ranking about the reality of things is often severely limited. Which just goes to say that probably we shouldn't make that much of a fuss about rankings, in most cases.
"""

comment_distribution_image = """<p>This is the distribution from which our applicants will be sampled:</p>"""

# Building the interface
with gr.Blocks(theme=gr.themes.Monochrome()) as demo:
  with gr.Column():
    gr.Markdown(intro_md)
    with gr.Row():
      with gr.Column():
        run_button = gr.Button("Run Simulations!")

        # control applicant distribution
        # with gr.Group():
        loc_slider = gr.Slider(0,100, step=1, value=70, label="Mean (Population)", info='Where the center of the distribution is.')
        scale_slider = gr.Slider(0, 80, step=1, value=50, label="Standard Deviation (Population)", info='How wide the distribution is.')
        alpha_slider = gr.Slider(-10, 10, step=1, value=0, label="Skewness (Population)", info='How asymmetric the distribution is.')

        # simumlation-settings:
        with gr.Group():
          num_applicants = gr.Slider(10, 300, step=10, value=100, label="Number of Applicants", info='How many applications were submitted.')
          num_judges = gr.Slider(1, 100, step=1, value=7, label="Number of Judges", info='How many Judges are involved.')
          ratings_per_applicant = gr.Slider(1, 5, step=1, value=3, label="Ratings per Applicant", info='How many different ratings each application gets.')
          top_n = gr.Slider(1, 40, step=1, value=5, label="Top N", info='How many candidates can be selected.')

        judge_error = gr.Slider(0, 20, step=1, value=7, label="Judge Error", info='How much error judges can plausibly commit in their ratings.')
        judges_attitude = gr.Slider(0, 10, step=.1, value=1.7, label="Judges attitude-range", info='How harsh/generous individual judges can be at maximum. (Skewness of their error distributions)')

        judgment_coarse_graining_true_false = gr.Checkbox(value= True, label="Coarse grain judgements.", info='Whether judgements are made on a coarser scale.')
        judgment_coarse_graining = gr.Slider(0, 30, step=1, value=7, label="Coarse Graining Factor", info='Number of ratings on the judgement-scale.')
        num_runs = gr.Slider(10, 1000, step=10,value=100, label="Number of Runs")

        # The button to run the simulation
    # Sliders for alpha and beta parameters of the beta distribution

      with gr.Column():
        # with gr.Group():
        population_plot = gr.Plot(label="Population",render=True)
        gr.Markdown("""**Applicants quality distribution & judgement errors** – Above you can see in red the distribution from which we draw the real qualities of our applicants.
                        You can alter its **Mean, Scale and Skewness** on the left side. You can also see how large the errors are,
                        which our judges can potentially commit, and how harshly the most harsh (left) and how nice the most generous judges (right) can skew the assessments.
                        The (example) candidates true scores are shown by a vertical dotted line.
                        You can alter these values by playing with the **Judge Error** and the **Judge's attitude range** on the left.""")
        # with gr.Group():
          # Your existing plot output
        plot_output = gr.Plot(label="Results",show_label=True)
        gr.Markdown("""**Simulation Results** – Above are the results of our simulation. The green bars represent the frequency of the first, second,
                      and subsequent applicants being selected for the **Top N** spots. The shade of green shows the rank each
                      applicant achieved in the number of simulations. The grey area indicates the frequency with which applicants,
                      who were objectively top candidates, failed to be selected into the Top N at all.""")

    # Function call on button click
    run_button.click(
        run_simulation_and_plot,
        inputs=[num_runs, num_applicants, num_judges, ratings_per_applicant, top_n, alpha_slider, loc_slider, scale_slider,  judge_error, judges_attitude, judgment_coarse_graining,judgment_coarse_graining_true_false],
        outputs=[plot_output], scroll_to_output = True
    )

    alpha_slider.change(plot_skewed_normal_distribution, inputs=[alpha_slider, loc_slider,scale_slider,judge_error,judges_attitude], outputs=[population_plot])
    loc_slider.change(plot_skewed_normal_distribution, inputs=[alpha_slider, loc_slider,scale_slider,judge_error,judges_attitude], outputs=[population_plot])
    scale_slider.change(plot_skewed_normal_distribution, inputs=[alpha_slider, loc_slider,scale_slider,judge_error,judges_attitude], outputs=[population_plot])
    judge_error.change(plot_skewed_normal_distribution, inputs=[alpha_slider, loc_slider,scale_slider,judge_error,judges_attitude], outputs=[population_plot])
    judges_attitude.change(plot_skewed_normal_distribution, inputs=[alpha_slider, loc_slider,scale_slider,judge_error,judges_attitude], outputs=[population_plot])

    demo.load(plot_skewed_normal_distribution, inputs=[alpha_slider, loc_slider,scale_slider,judge_error,judges_attitude], outputs=[population_plot])
    demo.load(run_simulation_and_plot,inputs=[num_runs, num_applicants, num_judges, ratings_per_applicant, top_n, alpha_slider, loc_slider, scale_slider,  judge_error, judges_attitude, judgment_coarse_graining,judgment_coarse_graining_true_false],
              outputs=[plot_output])
    gr.Markdown(explanation_md)
if __name__ == "__main__":
    demo.launch(debug=True)

"""Next steps:
* Add inter-rater-agreement for the simulation.
* ~make both population and judges Potentially skewed normals.~
* Add parameter description and fazit. - This is about what we can expect from rankings.
* add an explanation of graphics.
* ~immediately run simulation~
* ~get rid of active legend for large plots~
"""

# import gradio as gr
# import numpy as np
# import matplotlib.pyplot as plt
# from scipy.stats import skewnorm

# # Function to plot skewed normal distribution
# def plot_skewed_normal(alpha, loc, scale):
#     x = np.linspace(-10, 10, 1000)
#     y = skewnorm.pdf(x, alpha, loc, scale)

#     fig, ax = plt.subplots(figsize=(7, 3))
#     plt.fill_between(x, y, color="#6a4c93", alpha=0.8)
#     plt.xlabel('Value')
#     plt.ylabel('Probability Density')
#     ax.set_yticks([])  # Optional: Remove y-axis labels for cleaner look

#     return fig

# # Building the Gradio interface
# with gr.Blocks() as demo:
#     with gr.Column():
#         gr.Markdown("# Explore the Skewed Normal Distribution")

#         with gr.Row():
#             with gr.Column():
#                 # Control parameters of the skewed normal distribution
#                 alpha_slider = gr.Slider(-10, 10, step=0.1, value=0, label="Alpha (Skewness)")
#                 loc_slider = gr.Slider(-5, 5, step=0.1, value=0, label="Location (Mean)")
#                 scale_slider = gr.Slider(0.1, 10, step=0.1, value=1, label="Scale (Standard Deviation)")

#             with gr.Column():
#                 # Plot output
#                 plot_output = gr.Plot(label="Skewed Normal Distribution")

#         # Update the plot based on slider changes
#         alpha_slider.change(plot_skewed_normal, inputs=[alpha_slider, loc_slider, scale_slider], outputs=[plot_output])
#         loc_slider.change(plot_skewed_normal, inputs=[alpha_slider, loc_slider, scale_slider], outputs=[plot_output])
#         scale_slider.change(plot_skewed_normal, inputs=[alpha_slider, loc_slider, scale_slider], outputs=[plot_output])

#         # Load initial plot
#         demo.load(plot_skewed_normal, inputs=[alpha_slider, loc_slider, scale_slider], outputs=[plot_output])

# if __name__ == "__main__":
#     demo.launch()

# import numpy as np
# import matplotlib.pyplot as plt
# from scipy.stats import skewnorm, norm

# def plot_skewed_normal_distribution(alpha, loc, scale, judgement_variability, judge_error):
#     x = np.linspace(0, 100, 1000)
#     y_pop_dist = skewnorm.pdf(x, alpha, loc, scale)

#     fig, ax = plt.subplots(figsize=(7, 3))  # Figure size
#     plt.fill_between(np.linspace(0, 100, 1000), y_pop_dist, color="#ee4d5a", alpha=0.8)
#     plt.xlabel('True Applicants Quality-Distribution')
#     plt.xlim(0, 100)
#     ax.set_yticklabels([])  # Remove y-axis labels

#     # Judgement Variability Line
#     line_length = judge_error
#     line_x = [50 - line_length/2, 50 + line_length/2]
#     plt.plot(line_x, [0, 0], color='black', linewidth=2)


#     # Small Normal Distribution
#     std_dev = judge_error / 2 / norm.ppf(0.975)
#     small_dist_x = np.linspace(50 - 3*std_dev, 50 + 3*std_dev, 100)  # 3 standard deviations on each side
#     small_dist_y = skewnorm.pdf(small_dist_x,0, loc=50, scale=std_dev)  #
#     small_dist_y_scaled = small_dist_y / max(small_dist_y) * np.max(y_pop_dist)*.02  # Scale down for representation
#     plt.plot(small_dist_x, small_dist_y_scaled, color='black', linewidth=2)
#     plt.text(np.mean(line_x), np.max(y_pop_dist)*.05 + np.max(small_dist_y_scaled) , 'Judgement Variability', ha='center', va='bottom', color='black')



#     plt.text(25, 0.008, 'Most Cranky', ha='center', va='bottom', color='black')
#     # Small Normal Distribution Cranky judge
#     std_dev = judge_error / 2 / norm.ppf(0.975)
#     small_dist_x = np.linspace(25 - 3*std_dev, 25 + 3*std_dev, 100)  # 3 standard deviations on each side
#     small_dist_y = skewnorm.pdf(small_dist_x,-judgement_variability, loc=25, scale=std_dev)  #
#     small_dist_y_scaled = small_dist_y / max(small_dist_y) * 0.005  # Scale down for representation

#     plt.plot(small_dist_x, small_dist_y_scaled, color='black', linewidth=2)

#     plt.text(75, 0.008, 'Most Generous', ha='center', va='bottom', color='black')
#         # Small Normal Distribution genereous judge
#     std_dev = judge_error / 2 / norm.ppf(0.975)
#     small_dist_x = np.linspace(75 - 3*std_dev, 75 + 3*std_dev, 100)  # 3 standard deviations on each side
#     small_dist_y = skewnorm.pdf(small_dist_x,judgement_variability, loc=75, scale=std_dev)  #
#     small_dist_y_scaled = small_dist_y / max(small_dist_y) * 0.005  # Scale down for representation

#     plt.plot(small_dist_x, small_dist_y_scaled, color='black', linewidth=2)
#     plt.show()

#     return fig


# # Example parameters
# alpha = 5
# loc = 50
# scale = 10
# judgement_variability = 2
# judge_error = 8

# # Call the function with the example parameters
# plot = plot_skewed_normal_distribution(alpha, loc, scale, judgement_variability, judge_error)
# plt.show()