m7n commited on
Commit
9afeb25
·
1 Parent(s): 7326ce2

updated to use diff. distributions, added more explanation.

Browse files
Files changed (1) hide show
  1. app.py +320 -79
app.py CHANGED
@@ -1,10 +1,10 @@
1
  # -*- coding: utf-8 -*-
2
- """ranking_simulation_0.ipynb
3
 
4
  Automatically generated by Colaboratory.
5
 
6
  Original file is located at
7
- https://colab.research.google.com/drive/1GzjewA8ePLJ1fdk76Qax-aRRsSsNp417
8
  """
9
 
10
  # Commented out IPython magic to ensure Python compatibility.
@@ -17,6 +17,9 @@ import numpy as np
17
  import pandas as pd
18
  import opinionated
19
  import matplotlib.pyplot as plt
 
 
 
20
  plt.style.use("opinionated_rc")
21
 
22
  from opinionated.core import download_googlefont
@@ -26,22 +29,55 @@ plt.rc('font', family='Quicksand')
26
 
27
  import colormaps as cmaps
28
 
29
- def simulate_applicant_judging(num_applicants, num_judges, ratings_per_applicant, alpha, beta,
30
- judge_error=1, judgment_coarse_graining=False):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31
  """
32
- Simulates judging of applicants by a set of judges with coarse graining functionality and random tie-breaking in ranking.
33
-
34
- :param num_applicants: Number of applicants to be judged.
35
- :param num_judges: Number of judges available for judging.
36
- :param ratings_per_applicant: Number of ratings each applicant should receive.
37
- :param alpha: Alpha parameter for the Beta distribution.
38
- :param beta: Beta parameter for the Beta distribution.
39
- :param judge_error_std_dev: Standard deviation for the judge's margin of error.
40
- :param judgment_coarse_graining: Number of buckets for coarse graining (2 to 100) or False to disable.
41
- :return: A Pandas DataFrame with detailed results for each applicant.
 
 
 
 
 
 
 
42
  """
43
  # Generate the quality of applicants from a Beta distribution normalized to 0-100
44
- applicant_qualities = np.random.beta(alpha, beta, num_applicants) * 100
45
 
46
  # Function to apply coarse graining
47
  def coarse_grain_evaluation(evaluation, grain_size):
@@ -50,6 +86,7 @@ def simulate_applicant_judging(num_applicants, num_judges, ratings_per_applicant
50
  # Initialize evaluations dictionary
51
  evaluations = {f"Applicant_{i+1}": [] for i in range(num_applicants)}
52
  judge_workload = np.zeros(num_judges)
 
53
 
54
  # Randomly assign judges to applicants
55
  for _ in range(ratings_per_applicant):
@@ -57,7 +94,9 @@ def simulate_applicant_judging(num_applicants, num_judges, ratings_per_applicant
57
  probabilities = (max(judge_workload) - judge_workload + 1) / sum(max(judge_workload) - judge_workload + 1)
58
  judge = np.random.choice(num_judges, p=probabilities)
59
  judge_workload[judge] += 1
60
- evaluation = np.random.uniform(applicant_qualities[applicant]-judge_error, applicant_qualities[applicant]+judge_error)
 
 
61
 
62
  # Apply coarse graining if enabled
63
  if judgment_coarse_graining:
@@ -90,38 +129,51 @@ def simulate_applicant_judging(num_applicants, num_judges, ratings_per_applicant
90
 
91
  return df
92
 
93
- # # Example usage with specified alpha and beta values
94
- # df_results = simulate_applicant_judging(num_applicants=100, num_judges=10, ratings_per_applicant=5, alpha=4, beta=4, judgment_coarse_graining=10)
95
- # df_results.head(30) # Displaying the top 30 rows for brevity
 
96
 
97
 
 
98
 
99
 
100
- # df_results.sort_values(by='Rank of Evaluation').head(30)
101
 
102
  import pandas as pd
103
 
104
- def summarize_simulation_runs(num_runs, num_applicants, num_judges, ratings_per_applicant, top_n, alpha, beta,
105
- judge_error=1, judgment_coarse_graining=False):
106
- """
107
- Runs the applicant judging simulation multiple times and summarizes how often each candidate by quality was in the top n.
108
-
109
- :param num_runs: Number of times to run the simulation.
110
- :param num_applicants: Number of applicants to be judged.
111
- :param num_judges: Number of judges available for judging.
112
- :param ratings_per_applicant: Number of ratings each applicant should receive.
113
- :param top_n: Number of top positions to consider in the summary.
114
- :param applicant_std_dev: Standard deviation for the quality of applicants.
115
- :param judge_error_std_dev: Standard deviation for the judge's margin of error.
116
- :param judgment_coarse_graining: Number of buckets for coarse graining or False to disable.
117
- :return: A Pandas DataFrame summarizing the results.
118
  """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
119
  # Initialize counts for each quality-ranked candidate in top n positions
120
  top_n_counts = pd.DataFrame(0, index=range(1, num_applicants + 1), columns=[f'Top {i}' for i in range(1, top_n + 1)])
121
 
122
  for _ in range(num_runs):
123
- df_results = simulate_applicant_judging(num_applicants, num_judges, ratings_per_applicant,
124
- alpha=alpha, beta=beta, judge_error=judge_error, judgment_coarse_graining=judgment_coarse_graining)
 
 
 
 
125
  # Sort by Rank of Applicant Quality
126
  sorted_by_quality = df_results.sort_values(by='Applicant Quality', ascending=False).reset_index()
127
  # Sort by Rank of Evaluation
@@ -135,10 +187,10 @@ def summarize_simulation_runs(num_runs, num_applicants, num_judges, ratings_per_
135
  return top_n_counts
136
 
137
  # Example usage
138
- # num_runs = 1000 # Number of simulation runs
139
- # top_n_results = summarize_simulation_runs(num_runs=num_runs, num_applicants=100, num_judges=5, ratings_per_applicant=3,
140
- # top_n=5,alpha=2, beta=1,judge_error=4, judgment_coarse_graining=False)
141
- # top_n_results
142
 
143
 
144
 
@@ -184,6 +236,8 @@ def plot_top_n_results(top_n_results, num_runs):
184
  labels = [label.replace("Top", "Rank") for label in top_n_results.columns] + ['Not chosen']
185
 
186
  ax.legend(labels=labels, title='Rank in Evaluation', loc='lower center', bbox_to_anchor=(0.5, -0.2), ncol=top_n_results.shape[1]+1) # Legend below the chart
 
 
187
  plt.tight_layout()
188
  return fig
189
 
@@ -272,69 +326,128 @@ def plot_top_n_results(top_n_results, num_runs):
272
  # # Example usage
273
  # visualize_applicant_and_judge_distributions(alpha=2, beta=1, judge_error=5)
274
 
 
 
275
  import gradio as gr
276
  import matplotlib.pyplot as plt
277
  from io import BytesIO
278
 
279
- from scipy.stats import beta
 
 
 
 
 
 
 
 
 
 
 
280
 
281
- # Function to plot beta distribution
282
- def plot_beta_distribution(a, b, judgement_variability):
283
- x = np.linspace(0, 1, 1000)
284
- y = beta.pdf(x, a, b)
285
 
286
  fig, ax = plt.subplots(figsize=(7, 3)) # Figure size
287
- plt.fill_between(np.linspace(0, 100, 1000), y, color="#ee4d5a", alpha=0.8)
288
- # plt.title('Distribution of Applicant Qualities')
289
  plt.xlabel('True Applicants Quality-Distribution')
290
  plt.xlim(0, 100)
291
  ax.set_yticklabels([]) # Remove y-axis labels
292
 
293
- # Drawing the line
294
- line_length = 2 * judgement_variability
295
  line_x = [50 - line_length/2, 50 + line_length/2]
296
- plt.plot(line_x, [0, 0], color='black', linewidth=2)
297
 
298
- # Labeling the line
299
- plt.text(np.mean(line_x), 0.02, 'Judgement Variability', ha='center', va='bottom', color='black')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
300
 
301
  return fig
302
 
303
  # Your existing function for running simulation and plotting
304
- def run_simulation_and_plot(num_runs, num_applicants, num_judges, ratings_per_applicant, top_n, alpha, beta, judge_error, judgment_coarse_graining,judgment_coarse_graining_true_false):
305
  if judgment_coarse_graining_true_false == False:
306
  judgment_coarse_graining = False
307
- top_n_results = summarize_simulation_runs(num_runs, num_applicants, num_judges, ratings_per_applicant, top_n, alpha, beta, judge_error, judgment_coarse_graining)
 
308
  return plot_top_n_results(top_n_results, num_runs)
309
 
310
 
311
 
312
- intro_html = """
313
- <h1>On Rankings</h1>
314
- <p>One of the central experiences of being an academic is the experience of being ranked. We are ranked when we apply for graduate school, or maybe already for a master's degree. We are ranked when we're up for faculty positions. We are ranked when we submit abstracts for conferences. And when we publish papers, we do so, of course, in journals that are ranked. The places where we work, the departments, are ranked, of course, as well. But although rankings apparently are catnip to academics, and probably everybody else as well, we do have some agreement. Most people probably share the intuition that there's something weird or iffy about rankings, and the suspicion that maybe often they are not as informative as there are some beings absolutely everywhere who suggest.</p>
 
 
 
 
 
315
  """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
316
  comment_distribution_image = """<p>This is the distribution from which our applicants will be sampled:</p>"""
317
 
318
  # Building the interface
319
- with gr.Blocks(theme=gr.themes.Monochrome(primary_hue="red", secondary_hue="pink",spacing_size="sm",text_size="lg", radius_size="none")) as demo:
320
  with gr.Column():
321
- gr.HTML(intro_html)
322
  with gr.Row():
323
  with gr.Column():
324
  run_button = gr.Button("Run Simulations!")
325
 
326
  # control applicant distribution
327
  # with gr.Group():
328
- alpha_slider = gr.Slider(0.1, 5, step=0.1, value=1.4, label="Alpha (β Distribution)")
329
- beta_slider = gr.Slider(0.1, 5, step=0.1,value=2.7, label="Beta (β Distribution)")
 
 
330
  # simumlation-settings:
331
  with gr.Group():
332
- num_applicants = gr.Slider(10, 300, step=10, value=100, label="Number of Applicants")
333
- num_judges = gr.Slider(1, 100, step=1, value=7, label="Number of Judges")
334
- ratings_per_applicant = gr.Slider(1, 5, step=1, value=3, label="Ratings per Applicant", info='how many different ratings each application gets.')
335
- top_n = gr.Slider(1, 40, step=1, value=5, label="Top N")
 
 
 
336
 
337
- judge_error = gr.Slider(0, 10, step=1, value=2, label="Judge Error")
338
  judgment_coarse_graining_true_false = gr.Checkbox(value= True, label="Coarse grain judgements.")
339
  judgment_coarse_graining = gr.Slider(0, 30, step=1, value=7, label="Coarse Graining Factor")
340
  num_runs = gr.Slider(10, 1000, step=10,value=100, label="Number of Runs")
@@ -344,25 +457,153 @@ with gr.Blocks(theme=gr.themes.Monochrome(primary_hue="red", secondary_hue="pink
344
 
345
  with gr.Column():
346
  with gr.Group():
347
- # gr.HTML(comment_distribution_image)
348
- beta_plot = gr.Plot(label="Applicants quality distribution")
349
-
350
- # Your existing plot output
351
- plot_output = gr.Plot(label="Simulation Results",show_label=True)
352
- #https://discuss.huggingface.co/t/gradio-changing-background-colour-in-all-devices/42519
 
 
 
 
 
 
353
 
354
  # Function call on button click
355
  run_button.click(
356
  run_simulation_and_plot,
357
- inputs=[num_runs, num_applicants, num_judges, ratings_per_applicant, top_n, alpha_slider, beta_slider, judge_error, judgment_coarse_graining,judgment_coarse_graining_true_false],
358
  outputs=[plot_output], scroll_to_output = True
359
  )
360
 
361
- alpha_slider.change(plot_beta_distribution, inputs=[alpha_slider, beta_slider,judge_error], outputs=[beta_plot])
362
- beta_slider.change(plot_beta_distribution, inputs=[alpha_slider, beta_slider,judge_error], outputs=[beta_plot])
363
- judge_error.change(plot_beta_distribution, inputs=[alpha_slider, beta_slider,judge_error], outputs=[beta_plot])
364
-
365
- demo.load(plot_beta_distribution, inputs=[alpha_slider, beta_slider,judge_error], outputs=[beta_plot])
366
 
 
 
 
 
367
  if __name__ == "__main__":
368
- demo.launch(debug=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  # -*- coding: utf-8 -*-
2
+ """ranking_simulation_1_asymmetric_judges.ipynb
3
 
4
  Automatically generated by Colaboratory.
5
 
6
  Original file is located at
7
+ https://colab.research.google.com/drive/1d7GP8R96AgQSvJq4YL3nnzEmtBomevu0
8
  """
9
 
10
  # Commented out IPython magic to ensure Python compatibility.
 
17
  import pandas as pd
18
  import opinionated
19
  import matplotlib.pyplot as plt
20
+ from scipy.stats import skewnorm
21
+
22
+
23
  plt.style.use("opinionated_rc")
24
 
25
  from opinionated.core import download_googlefont
 
29
 
30
  import colormaps as cmaps
31
 
32
+ def sample_skewed_normal(alpha, loc, scale, n, range_min=None, range_max=None):
33
+ samples = []
34
+ while len(samples) < n:
35
+ sample = skewnorm.rvs(alpha, loc, scale)
36
+ if (range_min is None or sample >= range_min) and (range_max is None or sample <= range_max):
37
+ samples.append(sample)
38
+ return np.array(samples)
39
+
40
+
41
+ # # Example usage
42
+ # alpha = 5 # Skewness
43
+ # loc = 0 # Location (mean)
44
+ # scale = 1 # Scale (standard deviation)
45
+ # n_samples = 10000 # Number of samples to generate
46
+ # range_min, range_max = -2, 2 # Range for the samples
47
+
48
+ # samples = sample_skewed_normal(alpha, loc, scale, n_samples, range_min, range_max)
49
+ # # print(samples)
50
+
51
+ # import seaborn as sns
52
+ # sns.histplot(samples)
53
+
54
+ from scipy.stats import norm
55
+
56
+
57
+
58
+ def simulate_applicant_judging(num_applicants=100, num_judges=10, ratings_per_applicant=5,alpha=0, loc=50, scale=15, judge_error=1, judges_attitude=0.3,
59
+ judgment_coarse_graining=False):
60
  """
61
+ Simulates the process of judging applicants by a set of judges, incorporating randomness in evaluations, judge bias, and error.
62
+
63
+ Parameters:
64
+ - num_applicants (int): Number of applicants to be judged.
65
+ - num_judges (int): Number of judges involved in the evaluation process.
66
+ - ratings_per_applicant (int): Number of ratings each applicant receives from different judges.
67
+ - alpha (float): Alpha parameter for the skewed normal distribution to simulate applicant qualities.
68
+ - loc (float): The mean ('location') for the skewed normal distribution of applicant qualities.
69
+ - scale (float): Standard deviation for the skewed normal distribution of applicant qualities.
70
+ - judge_error (float): Standard deviation for the random error in judges' evaluations.
71
+ - judges_attitude (float): Mean for the normal distribution representing judges' biases.
72
+ - judgment_coarse_graining (int/bool): Enables coarse graining of evaluations into specified number of buckets. Set to False to disable.
73
+
74
+ The function simulates the quality of each applicant using a skewed normal distribution and then assigns each applicant a set of evaluations by different judges, considering the judges' workload distribution, biases, and random error margin. Coarse graining of evaluations is applied if enabled.
75
+
76
+ Returns:
77
+ - pandas.DataFrame: A DataFrame with columns for each applicant's identifier, inherent quality, average evaluation score, individual scores from each judge, original ranking based on scores, and final rankings after applying random tie-breaking for identical scores.
78
  """
79
  # Generate the quality of applicants from a Beta distribution normalized to 0-100
80
+ applicant_qualities = sample_skewed_normal(alpha, loc, scale, num_applicants, 0, 100)
81
 
82
  # Function to apply coarse graining
83
  def coarse_grain_evaluation(evaluation, grain_size):
 
86
  # Initialize evaluations dictionary
87
  evaluations = {f"Applicant_{i+1}": [] for i in range(num_applicants)}
88
  judge_workload = np.zeros(num_judges)
89
+ judges_attitudes =np.random.normal(0,judges_attitude,num_judges)
90
 
91
  # Randomly assign judges to applicants
92
  for _ in range(ratings_per_applicant):
 
94
  probabilities = (max(judge_workload) - judge_workload + 1) / sum(max(judge_workload) - judge_workload + 1)
95
  judge = np.random.choice(num_judges, p=probabilities)
96
  judge_workload[judge] += 1
97
+ evaluation = sample_skewed_normal(alpha=judges_attitudes[judge], loc=applicant_qualities[applicant], scale=judge_error/norm.ppf(0.975), n=1)[0]
98
+ # print(np.random.normal(loc=applicant_qualities[applicant], scale=judge_error/norm.ppf(0.975)))
99
+ # print(evaluation)
100
 
101
  # Apply coarse graining if enabled
102
  if judgment_coarse_graining:
 
129
 
130
  return df
131
 
132
+ df_results = simulate_applicant_judging(num_applicants=100, num_judges=10, ratings_per_applicant=5,alpha=0, loc=50, scale=15, judge_error=1, judges_attitude=0.3,
133
+ judgment_coarse_graining=10)
134
+ df_results.head(30)
135
+
136
 
137
 
138
+ df_results.sort_values(by='Rank of Evaluation').head(30)
139
 
140
 
 
141
 
142
  import pandas as pd
143
 
144
+ def summarize_simulation_runs(num_runs, num_applicants, num_judges, ratings_per_applicant, top_n, alpha, loc, scale,
145
+ judge_error, judges_attitude, judgment_coarse_graining):
 
 
 
 
 
 
 
 
 
 
 
 
146
  """
147
+ Runs multiple simulations of applicant judging and summarizes the frequency of each candidate's placement in the top n positions based on their quality ranking.
148
+
149
+ Parameters:
150
+ - num_runs (int): Number of simulation iterations to run.
151
+ - num_applicants (int): Number of applicants in each simulation.
152
+ - num_judges (int): Number of judges evaluating the applicants in each simulation.
153
+ - ratings_per_applicant (int): Number of evaluations each applicant receives.
154
+ - top_n (int): The number of top positions to analyze in the ranking.
155
+ - alpha (float): Alpha parameter for the skewed normal distribution to simulate applicant qualities.
156
+ - loc (float): The mean ('location') for the skewed normal distribution of applicant qualities.
157
+ - scale (float): Standard deviation for the skewed normal distribution of applicant qualities.
158
+ - judge_error (float): Standard deviation for the random error in judges' evaluations.
159
+ - judges_attitude (float): Mean for the normal distribution representing judges' biases.
160
+ - judgment_coarse_graining (int/bool): Enables coarse graining of evaluations into specified number of buckets. Set to False to disable.
161
+
162
+ The function performs multiple runs of the applicant judging simulation. It aggregates and summarizes how often each applicant, based on their quality rank, appears in the top n positions of the evaluation rankings.
163
+
164
+ Returns:
165
+ - pandas.DataFrame: A DataFrame where each row corresponds to an applicant (ranked by quality) and each column represents how often that applicant was in a specific top n position across all simulation runs.
166
+ """
167
  # Initialize counts for each quality-ranked candidate in top n positions
168
  top_n_counts = pd.DataFrame(0, index=range(1, num_applicants + 1), columns=[f'Top {i}' for i in range(1, top_n + 1)])
169
 
170
  for _ in range(num_runs):
171
+ # df_results = simulate_applicant_judging(num_applicants, num_judges, ratings_per_applicant,
172
+ # alpha=alpha, beta=beta, judge_error=judge_error, judgment_coarse_graining=judgment_coarse_graining)
173
+ df_results = simulate_applicant_judging(num_applicants=num_applicants, num_judges=num_judges, ratings_per_applicant=ratings_per_applicant,
174
+ alpha=alpha, loc=loc, scale=scale,
175
+ judge_error=judge_error, judges_attitude=judges_attitude,
176
+ judgment_coarse_graining=judgment_coarse_graining)
177
  # Sort by Rank of Applicant Quality
178
  sorted_by_quality = df_results.sort_values(by='Applicant Quality', ascending=False).reset_index()
179
  # Sort by Rank of Evaluation
 
187
  return top_n_counts
188
 
189
  # Example usage
190
+ num_runs = 100 # Number of simulation runs
191
+ top_n_results = summarize_simulation_runs(num_runs=num_runs, num_applicants=100, num_judges=5, ratings_per_applicant=3,
192
+ top_n=5, alpha=0, loc=50, scale=15, judge_error=4, judges_attitude=0.3, judgment_coarse_graining=False)
193
+ top_n_results
194
 
195
 
196
 
 
236
  labels = [label.replace("Top", "Rank") for label in top_n_results.columns] + ['Not chosen']
237
 
238
  ax.legend(labels=labels, title='Rank in Evaluation', loc='lower center', bbox_to_anchor=(0.5, -0.2), ncol=top_n_results.shape[1]+1) # Legend below the chart
239
+ else:
240
+ ax.legend().set_visible(False)
241
  plt.tight_layout()
242
  return fig
243
 
 
326
  # # Example usage
327
  # visualize_applicant_and_judge_distributions(alpha=2, beta=1, judge_error=5)
328
 
329
+
330
+
331
  import gradio as gr
332
  import matplotlib.pyplot as plt
333
  from io import BytesIO
334
 
335
+ # from scipy.stats import beta
336
+
337
+ # x = np.linspace(-10, 10, 1000)
338
+ # y = skewnorm.pdf(x, alpha, loc, scale)
339
+
340
+ # fig, ax = plt.subplots(figsize=(7, 3))
341
+ # plt.fill_between(x, y, color="#6a4c93", alpha=0.8)
342
+ # plt.xlabel('Value')
343
+ # plt.ylabel('Probability Density')
344
+ # ax.set_yticks([]) # Optional: Remove y-axis labels for cleaner look
345
+
346
+ # return fig
347
 
348
+ def plot_skewed_normal_distribution(alpha, loc, scale, judge_error,judgement_variability,):
349
+ x = np.linspace(0, 100, 1000)
350
+ y_pop_dist = skewnorm.pdf(x, alpha, loc, scale)
 
351
 
352
  fig, ax = plt.subplots(figsize=(7, 3)) # Figure size
353
+ plt.fill_between(np.linspace(0, 100, 1000), y_pop_dist, color="#ee4d5a", alpha=0.8)
 
354
  plt.xlabel('True Applicants Quality-Distribution')
355
  plt.xlim(0, 100)
356
  ax.set_yticklabels([]) # Remove y-axis labels
357
 
358
+ # Judgement Variability Line
359
+ line_length = judge_error
360
  line_x = [50 - line_length/2, 50 + line_length/2]
361
+ plt.plot(line_x, [0, 0], color='black', linewidth=1.4, linestyle='dotted')
362
 
363
+
364
+ # Small Normal Distribution
365
+ std_dev = judge_error / 2 / norm.ppf(0.975)
366
+ small_dist_x = np.linspace(50 - 3*std_dev, 50 + 3*std_dev, 100) # 3 standard deviations on each side
367
+ small_dist_y = skewnorm.pdf(small_dist_x,0, loc=50, scale=std_dev) #
368
+ small_dist_y_scaled = small_dist_y / max(small_dist_y) * np.max(y_pop_dist)*.12 # Scale down for representation
369
+ plt.plot(small_dist_x, small_dist_y_scaled, color='black', linewidth=2)
370
+ plt.text(np.mean(line_x), np.max(y_pop_dist)*.08 + np.max(small_dist_y_scaled) , 'Judgement Variability', ha='center', va='bottom', color='black',weight='bold',)
371
+
372
+
373
+
374
+ # Small Normal Distribution Cranky judge
375
+ small_dist_x = np.linspace(25 - 3*std_dev, 25 + 3*std_dev, 100) # 3 standard deviations on each side
376
+ small_dist_y = skewnorm.pdf(small_dist_x,-judgement_variability, loc=25, scale=std_dev) #
377
+ small_dist_y_scaled = small_dist_y / max(small_dist_y) * np.max(y_pop_dist)*.12 # Scale down for representation
378
+ plt.text(25, np.max(y_pop_dist)*.05 + np.max(small_dist_y_scaled), 'Most Harsh', ha='center', va='bottom', color='black')
379
+ plt.plot(small_dist_x, small_dist_y_scaled, color='black', linewidth=2)
380
+
381
+ # Small Normal Distribution genereous judge
382
+ small_dist_x = np.linspace(75 - 3*std_dev, 75 + 3*std_dev, 100) # 3 standard deviations on each side
383
+ small_dist_y = skewnorm.pdf(small_dist_x,judgement_variability, loc=75, scale=std_dev) #
384
+ small_dist_y_scaled = small_dist_y / max(small_dist_y) * np.max(y_pop_dist)*.12 # Scale down for representation
385
+ plt.text(75, np.max(y_pop_dist)*.05 + np.max(small_dist_y_scaled) , 'Most Generous', ha='center', va='bottom', color='black')
386
+ plt.plot(small_dist_x, small_dist_y_scaled, color='black', linewidth=2)
387
 
388
  return fig
389
 
390
  # Your existing function for running simulation and plotting
391
+ def run_simulation_and_plot(num_runs, num_applicants, num_judges, ratings_per_applicant, top_n, alpha, loc, scale, judge_error, judges_attitude, judgment_coarse_graining,judgment_coarse_graining_true_false):
392
  if judgment_coarse_graining_true_false == False:
393
  judgment_coarse_graining = False
394
+ top_n_results = summarize_simulation_runs(num_runs, num_applicants, num_judges, ratings_per_applicant, top_n, alpha, loc, scale,
395
+ judge_error, judges_attitude, judgment_coarse_graining)
396
  return plot_top_n_results(top_n_results, num_runs)
397
 
398
 
399
 
400
+ intro_md = """
401
+ # On Rankings
402
+
403
+ _by [Max Noichl](https://homepage.univie.ac.at/maximilian.noichl/)_
404
+
405
+ One of the central experiences of being an academic is the experience of being ranked. We are ranked when we submit abstracts for conferences, when we publish papers in ranked journals, when we apply for graduate school or maybe already for a master's degree, and when we apply for faculty positions, at departments, which are, of course, ranked as well. But although rankings are catnip to academics (and presumably everybody else as well), most people probably share the intuition that there's something weird or iffy about rankings and may suspect that often they are not as informative as their prevalence suggests.
406
+
407
+ The simulation, which you can run yourself below, should give some further insight into that perceived weirdness. It involves setting up a population of candidates (like conference abstracts or job applicants) according to a distribution which you specify. Candidates with objective qualities are then sampled from this distribution and evaluated by judges, who have a certain error margin in their assessments, reflecting real-world inaccuracies. The simulation averages the ratings each candidate receives. The results show that in many plausible scenarios the best candidates often don't make it into the top of the rankings, highlighting the rankings limited value for accurately identifying the most qualified individuals. Below, I give some more detailed explanation – but first, the simulation.
408
  """
409
+
410
+
411
+ explanation_md = """
412
+ The simulation works like this: We set up a population of candidates. These could be abstracts for a conference, applicants to a PhD position, departments getting ranked for 'eliteness', etc. We can determine how this distribution looks. For instance, you might expect many strong candidates and few inappropriate ones, a normal distribution around a midpoint, or predominantly poor applicants with rare exceptional cases. You can set the parameters of this distribution, (which we model as a skewed normal), **Mean**, **Standard Deviation**, and **Skewness**, yourself, depending on your prior expectations. Your settings are reflected in the red distribution-graphic on the right. This simulation then generates a **Number of Applicants** with objectively determined qualities for our ranking.
413
+
414
+ Applicant contributions are evaluated by a **Number of Judges**, who evenly distribute the task of rating until all applicants have received a rating. Importantly, like in the real world, judges are not infallible; they make errors in assessing applicants. This is adjustable via the **Judge Error** property, which represents the width of the 95% confidence interval of a distribution centered around the applicant's objective quality. For example, if an applicant has an objective quality score of 70 and the **Judge Error** is set to 4, it means that in 95% of cases, judges are expected to score the applicant between 68 and 72. In most cases of academic ratings, it seems implausible that people can reliably distinguish closely scored candidates (e.g., 75 vs. 77). Depending on your application, you can therefore experiment with different plausible error-ranges for your specific scenario.
415
+
416
+ Judges' rating styles naturally vary. Some may be more critical, rating applicants harshly, while others may be more lenient. This variability is adjustable in the simulation through the **Judges' Attitude Range**, which defines the maximum and minimum levels of strictness or leniency. The impact of these settings is visually represented in the simulation by the black distributions within the population-distribution-graphic on the right.
417
+
418
+ Once all evaluations are complete, each applicant's final score is determined by averaging all their received ratings. Additionally, the simulation allows for coarse-grained judgments, where judges have to select the most appropriate score on a predefined scale (e.g., 0 to 7, determined by the **Coarse Graining Factor**). In that case (a very common practice), we do lose quite some information about the quality of the candidates.
419
+
420
+ To complete the setup, we specify the number of simulation runs before activating them with the **Run Simulations** button. The outcomes are displayed in the green graphic under the 'Simulation Results' tab on the right. These results, indicated by green bars, show the frequency with which the objectively best candidates make it into the top spots after undergoing our ranking. In many realistic scenarios, the simulation reveals a significant element of randomness, particularly in lower top spots. **Often, the most qualified candidates don't consistently reach the top ranks**, and even the highest-ranked candidates are commonly misplaced.
421
+
422
+ I don't think that this is a particularly new or surprising result. But I think that it is very important to keep in mind, and have an intuitive feel for, when interacting with rankings. I hope the simulation can help you with that. We should also note, that the setup of this simulation in many ways presents an unrealistic best case for rankings. While our judges do make errors, they do not make errors systematically. They are not biased against specific candidates for reasons of gender, ethnic, or class background, or the intellectual traditions they come from. But even in our very idealized ranking situation, we can see that the information contained in the final ranking about the reality of things is often severely limited. Which just goes to say that probably we shouldn't make that much of a fuss about rankings, in most cases.
423
+ """
424
+
425
  comment_distribution_image = """<p>This is the distribution from which our applicants will be sampled:</p>"""
426
 
427
  # Building the interface
428
+ with gr.Blocks(theme=gr.themes.Monochrome()) as demo:
429
  with gr.Column():
430
+ gr.Markdown(intro_md)
431
  with gr.Row():
432
  with gr.Column():
433
  run_button = gr.Button("Run Simulations!")
434
 
435
  # control applicant distribution
436
  # with gr.Group():
437
+ loc_slider = gr.Slider(0,100, step=1, value=70, label="Mean (Population)")
438
+ scale_slider = gr.Slider(0, 80, step=1, value=50, label="Standard Deviation (Population)")
439
+ alpha_slider = gr.Slider(-10, 10, step=1, value=0, label="Skewness (Population)")
440
+
441
  # simumlation-settings:
442
  with gr.Group():
443
+ num_applicants = gr.Slider(10, 300, step=10, value=100, label="Number of Applicants", info='How many applications were submitted.')
444
+ num_judges = gr.Slider(1, 100, step=1, value=7, label="Number of Judges", info='How many Judges are involved.')
445
+ ratings_per_applicant = gr.Slider(1, 5, step=1, value=3, label="Ratings per Applicant", info='How many different ratings each application gets.')
446
+ top_n = gr.Slider(1, 40, step=1, value=5, label="Top N", info='How many candidates can be selected.')
447
+
448
+ judge_error = gr.Slider(0, 20, step=1, value=7, label="Judge Error", info='How much error judges can plausibly commit in their ratings.')
449
+ judges_attitude = gr.Slider(0, 10, step=.1, value=1.7, label="Judges attitude-range")
450
 
 
451
  judgment_coarse_graining_true_false = gr.Checkbox(value= True, label="Coarse grain judgements.")
452
  judgment_coarse_graining = gr.Slider(0, 30, step=1, value=7, label="Coarse Graining Factor")
453
  num_runs = gr.Slider(10, 1000, step=10,value=100, label="Number of Runs")
 
457
 
458
  with gr.Column():
459
  with gr.Group():
460
+ population_plot = gr.Plot(label="Applicants quality distribution & judgement errors")
461
+ gr.Markdown("""Above you can see in red the distribution from which we draw the real qualities of our applicants.
462
+ You can alter its **Mean, Scale and Skewness** on the left side. You can also see how large the errors are,
463
+ which our judges commit, and how harshly the most harsh and most generous judges judge.
464
+ You can alter these values by playing with the **Judge Error** and the **Judge's attitude range** on the left.""")
465
+ with gr.Group():
466
+ # Your existing plot output
467
+ plot_output = gr.Plot(label="Simulation Results",show_label=True)
468
+ gr.Markdown("""Above are the results of our simulation. The green bars represent the frequency of the first, second,
469
+ and subsequent applicants being selected for the **Top N** spots. The shade of green shows the rank each
470
+ applicant achieved in the number of simulations. The grey area indicates the frequency with which applicants,
471
+ who were objectively top candidates, failed to be selected into the Top N at all.""")
472
 
473
  # Function call on button click
474
  run_button.click(
475
  run_simulation_and_plot,
476
+ inputs=[num_runs, num_applicants, num_judges, ratings_per_applicant, top_n, alpha_slider, loc_slider, scale_slider, judge_error, judges_attitude, judgment_coarse_graining,judgment_coarse_graining_true_false],
477
  outputs=[plot_output], scroll_to_output = True
478
  )
479
 
480
+ alpha_slider.change(plot_skewed_normal_distribution, inputs=[alpha_slider, loc_slider,scale_slider,judge_error,judges_attitude], outputs=[population_plot])
481
+ loc_slider.change(plot_skewed_normal_distribution, inputs=[alpha_slider, loc_slider,scale_slider,judge_error,judges_attitude], outputs=[population_plot])
482
+ scale_slider.change(plot_skewed_normal_distribution, inputs=[alpha_slider, loc_slider,scale_slider,judge_error,judges_attitude], outputs=[population_plot])
483
+ judge_error.change(plot_skewed_normal_distribution, inputs=[alpha_slider, loc_slider,scale_slider,judge_error,judges_attitude], outputs=[population_plot])
484
+ judges_attitude.change(plot_skewed_normal_distribution, inputs=[alpha_slider, loc_slider,scale_slider,judge_error,judges_attitude], outputs=[population_plot])
485
 
486
+ demo.load(plot_skewed_normal_distribution, inputs=[alpha_slider, loc_slider,scale_slider,judge_error,judges_attitude], outputs=[population_plot])
487
+ demo.load(run_simulation_and_plot,inputs=[num_runs, num_applicants, num_judges, ratings_per_applicant, top_n, alpha_slider, loc_slider, scale_slider, judge_error, judges_attitude, judgment_coarse_graining,judgment_coarse_graining_true_false],
488
+ outputs=[plot_output])
489
+ gr.Markdown(explanation_md)
490
  if __name__ == "__main__":
491
+ demo.launch(debug=True)
492
+
493
+ """Next steps:
494
+ * Add inter-rater-agreement for the simulation.
495
+ * ~make both population and judges Potentially skewed normals.~
496
+ * Add parameter description and fazit. - This is about what we can expect from rankings.
497
+ * add an explanation of graphics.
498
+ * ~immediately run simulation~
499
+ * ~get rid of active legend for large plots~
500
+ """
501
+
502
+ # import gradio as gr
503
+ # import numpy as np
504
+ # import matplotlib.pyplot as plt
505
+ # from scipy.stats import skewnorm
506
+
507
+ # # Function to plot skewed normal distribution
508
+ # def plot_skewed_normal(alpha, loc, scale):
509
+ # x = np.linspace(-10, 10, 1000)
510
+ # y = skewnorm.pdf(x, alpha, loc, scale)
511
+
512
+ # fig, ax = plt.subplots(figsize=(7, 3))
513
+ # plt.fill_between(x, y, color="#6a4c93", alpha=0.8)
514
+ # plt.xlabel('Value')
515
+ # plt.ylabel('Probability Density')
516
+ # ax.set_yticks([]) # Optional: Remove y-axis labels for cleaner look
517
+
518
+ # return fig
519
+
520
+ # # Building the Gradio interface
521
+ # with gr.Blocks() as demo:
522
+ # with gr.Column():
523
+ # gr.Markdown("# Explore the Skewed Normal Distribution")
524
+
525
+ # with gr.Row():
526
+ # with gr.Column():
527
+ # # Control parameters of the skewed normal distribution
528
+ # alpha_slider = gr.Slider(-10, 10, step=0.1, value=0, label="Alpha (Skewness)")
529
+ # loc_slider = gr.Slider(-5, 5, step=0.1, value=0, label="Location (Mean)")
530
+ # scale_slider = gr.Slider(0.1, 10, step=0.1, value=1, label="Scale (Standard Deviation)")
531
+
532
+ # with gr.Column():
533
+ # # Plot output
534
+ # plot_output = gr.Plot(label="Skewed Normal Distribution")
535
+
536
+ # # Update the plot based on slider changes
537
+ # alpha_slider.change(plot_skewed_normal, inputs=[alpha_slider, loc_slider, scale_slider], outputs=[plot_output])
538
+ # loc_slider.change(plot_skewed_normal, inputs=[alpha_slider, loc_slider, scale_slider], outputs=[plot_output])
539
+ # scale_slider.change(plot_skewed_normal, inputs=[alpha_slider, loc_slider, scale_slider], outputs=[plot_output])
540
+
541
+ # # Load initial plot
542
+ # demo.load(plot_skewed_normal, inputs=[alpha_slider, loc_slider, scale_slider], outputs=[plot_output])
543
+
544
+ # if __name__ == "__main__":
545
+ # demo.launch()
546
+
547
+ # import numpy as np
548
+ # import matplotlib.pyplot as plt
549
+ # from scipy.stats import skewnorm, norm
550
+
551
+ # def plot_skewed_normal_distribution(alpha, loc, scale, judgement_variability, judge_error):
552
+ # x = np.linspace(0, 100, 1000)
553
+ # y_pop_dist = skewnorm.pdf(x, alpha, loc, scale)
554
+
555
+ # fig, ax = plt.subplots(figsize=(7, 3)) # Figure size
556
+ # plt.fill_between(np.linspace(0, 100, 1000), y_pop_dist, color="#ee4d5a", alpha=0.8)
557
+ # plt.xlabel('True Applicants Quality-Distribution')
558
+ # plt.xlim(0, 100)
559
+ # ax.set_yticklabels([]) # Remove y-axis labels
560
+
561
+ # # Judgement Variability Line
562
+ # line_length = judge_error
563
+ # line_x = [50 - line_length/2, 50 + line_length/2]
564
+ # plt.plot(line_x, [0, 0], color='black', linewidth=2)
565
+
566
+
567
+ # # Small Normal Distribution
568
+ # std_dev = judge_error / 2 / norm.ppf(0.975)
569
+ # small_dist_x = np.linspace(50 - 3*std_dev, 50 + 3*std_dev, 100) # 3 standard deviations on each side
570
+ # small_dist_y = skewnorm.pdf(small_dist_x,0, loc=50, scale=std_dev) #
571
+ # small_dist_y_scaled = small_dist_y / max(small_dist_y) * np.max(y_pop_dist)*.02 # Scale down for representation
572
+ # plt.plot(small_dist_x, small_dist_y_scaled, color='black', linewidth=2)
573
+ # plt.text(np.mean(line_x), np.max(y_pop_dist)*.05 + np.max(small_dist_y_scaled) , 'Judgement Variability', ha='center', va='bottom', color='black')
574
+
575
+
576
+
577
+ # plt.text(25, 0.008, 'Most Cranky', ha='center', va='bottom', color='black')
578
+ # # Small Normal Distribution Cranky judge
579
+ # std_dev = judge_error / 2 / norm.ppf(0.975)
580
+ # small_dist_x = np.linspace(25 - 3*std_dev, 25 + 3*std_dev, 100) # 3 standard deviations on each side
581
+ # small_dist_y = skewnorm.pdf(small_dist_x,-judgement_variability, loc=25, scale=std_dev) #
582
+ # small_dist_y_scaled = small_dist_y / max(small_dist_y) * 0.005 # Scale down for representation
583
+
584
+ # plt.plot(small_dist_x, small_dist_y_scaled, color='black', linewidth=2)
585
+
586
+ # plt.text(75, 0.008, 'Most Generous', ha='center', va='bottom', color='black')
587
+ # # Small Normal Distribution genereous judge
588
+ # std_dev = judge_error / 2 / norm.ppf(0.975)
589
+ # small_dist_x = np.linspace(75 - 3*std_dev, 75 + 3*std_dev, 100) # 3 standard deviations on each side
590
+ # small_dist_y = skewnorm.pdf(small_dist_x,judgement_variability, loc=75, scale=std_dev) #
591
+ # small_dist_y_scaled = small_dist_y / max(small_dist_y) * 0.005 # Scale down for representation
592
+
593
+ # plt.plot(small_dist_x, small_dist_y_scaled, color='black', linewidth=2)
594
+ # plt.show()
595
+
596
+ # return fig
597
+
598
+
599
+ # # Example parameters
600
+ # alpha = 5
601
+ # loc = 50
602
+ # scale = 10
603
+ # judgement_variability = 2
604
+ # judge_error = 8
605
+
606
+ # # Call the function with the example parameters
607
+ # plot = plot_skewed_normal_distribution(alpha, loc, scale, judgement_variability, judge_error)
608
+ # plt.show()
609
+