Avijit Ghosh commited on
Commit
981ea1d
·
1 Parent(s): 3d19330

removed notebook

Browse files
Files changed (2) hide show
  1. .gitignore +2 -0
  2. temp.ipynb +0 -568
.gitignore CHANGED
@@ -1,2 +1,4 @@
1
  temp/
2
  .env
 
 
 
1
  temp/
2
  .env
3
+ *.ipynb
4
+ *.ipynb_checkpoints/
temp.ipynb DELETED
@@ -1,568 +0,0 @@
1
- {
2
- "cells": [
3
- {
4
- "cell_type": "code",
5
- "execution_count": 20,
6
- "metadata": {},
7
- "outputs": [],
8
- "source": [
9
- "import pandas as pd"
10
- ]
11
- },
12
- {
13
- "cell_type": "code",
14
- "execution_count": 21,
15
- "metadata": {},
16
- "outputs": [],
17
- "source": [
18
- "df = pd.read_csv('DemoData.csv')"
19
- ]
20
- },
21
- {
22
- "cell_type": "code",
23
- "execution_count": 22,
24
- "metadata": {},
25
- "outputs": [],
26
- "source": [
27
- "import pandas as pd\n",
28
- "import yaml\n",
29
- "import os\n",
30
- "import ast\n",
31
- "\n",
32
- "# Create a folder to store YAML files if it doesn't exist\n",
33
- "if not os.path.exists('configs'):\n",
34
- " os.makedirs('configs')\n",
35
- "\n",
36
- "# Iterate over each row in the DataFrame\n",
37
- "for index, row in df.iterrows():\n",
38
- " # Extract Metaname and use it as the filename for YAML\n",
39
- " filename = str(row['Metaname']) + '.yaml'\n",
40
- " # Convert 'Screenshots' column to a Python list\n",
41
- " screenshots_list = None\n",
42
- " try:\n",
43
- "\n",
44
- " screenshots_list = ast.literal_eval(row['Screenshots'])\n",
45
- " except:\n",
46
- " screenshots_list = []\n",
47
- " # Remove the 'Metaname' and 'Screenshots' columns from the data to be converted to YAML\n",
48
- " row_data = row.drop(['Metaname', 'Screenshots'])\n",
49
- " # Convert the remaining data to a dictionary\n",
50
- " data_dict = row_data.to_dict()\n",
51
- " # Add the 'Screenshots' list to the dictionary\n",
52
- " data_dict['Screenshots'] = screenshots_list\n",
53
- " # Write the data as YAML to a new file\n",
54
- " with open(os.path.join('configs', filename), 'w') as yamlfile:\n",
55
- " yaml.dump(data_dict, yamlfile, default_flow_style=False)"
56
- ]
57
- },
58
- {
59
- "cell_type": "code",
60
- "execution_count": 5,
61
- "metadata": {},
62
- "outputs": [
63
- {
64
- "data": {
65
- "text/html": [
66
- "<div>\n",
67
- "<style scoped>\n",
68
- " .dataframe tbody tr th:only-of-type {\n",
69
- " vertical-align: middle;\n",
70
- " }\n",
71
- "\n",
72
- " .dataframe tbody tr th {\n",
73
- " vertical-align: top;\n",
74
- " }\n",
75
- "\n",
76
- " .dataframe thead th {\n",
77
- " text-align: right;\n",
78
- " }\n",
79
- "</style>\n",
80
- "<table border=\"1\" class=\"dataframe\">\n",
81
- " <thead>\n",
82
- " <tr style=\"text-align: right;\">\n",
83
- " <th></th>\n",
84
- " <th>Group</th>\n",
85
- " <th>Modality</th>\n",
86
- " <th>Level</th>\n",
87
- " <th>Metaname</th>\n",
88
- " <th>Suggested Evaluation</th>\n",
89
- " <th>What it is evaluating</th>\n",
90
- " <th>Considerations</th>\n",
91
- " <th>Link</th>\n",
92
- " <th>URL</th>\n",
93
- " <th>Screenshots</th>\n",
94
- " <th>Applicable Models</th>\n",
95
- " <th>Datasets</th>\n",
96
- " <th>Hashtags</th>\n",
97
- " </tr>\n",
98
- " </thead>\n",
99
- " <tbody>\n",
100
- " <tr>\n",
101
- " <th>0</th>\n",
102
- " <td>BiasEvals</td>\n",
103
- " <td>Text</td>\n",
104
- " <td>Model</td>\n",
105
- " <td>weat</td>\n",
106
- " <td>Word Embedding Association Test (WEAT)</td>\n",
107
- " <td>Associations and word embeddings based on Impl...</td>\n",
108
- " <td>Although based in human associations, general ...</td>\n",
109
- " <td>Semantics derived automatically from language ...</td>\n",
110
- " <td>https://researchportal.bath.ac.uk/en/publicati...</td>\n",
111
- " <td>['Images/WEAT1.png', 'Images/WEAT2.png']</td>\n",
112
- " <td>NaN</td>\n",
113
- " <td>NaN</td>\n",
114
- " <td>NaN</td>\n",
115
- " </tr>\n",
116
- " <tr>\n",
117
- " <th>1</th>\n",
118
- " <td>BiasEvals</td>\n",
119
- " <td>Text</td>\n",
120
- " <td>Model</td>\n",
121
- " <td>wefat</td>\n",
122
- " <td>Word Embedding Factual As\\nsociation Test (WEFAT)</td>\n",
123
- " <td>Associations and word embeddings based on Impl...</td>\n",
124
- " <td>Although based in human associations, general ...</td>\n",
125
- " <td>Semantics derived automatically from language ...</td>\n",
126
- " <td>https://researchportal.bath.ac.uk/en/publicati...</td>\n",
127
- " <td>NaN</td>\n",
128
- " <td>NaN</td>\n",
129
- " <td>NaN</td>\n",
130
- " <td>NaN</td>\n",
131
- " </tr>\n",
132
- " <tr>\n",
133
- " <th>2</th>\n",
134
- " <td>BiasEvals</td>\n",
135
- " <td>Text</td>\n",
136
- " <td>Dataset</td>\n",
137
- " <td>stereoset</td>\n",
138
- " <td>StereoSet</td>\n",
139
- " <td>Protected class stereotypes</td>\n",
140
- " <td>Automating stereotype detection makes distingu...</td>\n",
141
- " <td>StereoSet: Measuring stereotypical bias in pre...</td>\n",
142
- " <td>https://arxiv.org/abs/2004.09456</td>\n",
143
- " <td>NaN</td>\n",
144
- " <td>NaN</td>\n",
145
- " <td>NaN</td>\n",
146
- " <td>NaN</td>\n",
147
- " </tr>\n",
148
- " <tr>\n",
149
- " <th>3</th>\n",
150
- " <td>BiasEvals</td>\n",
151
- " <td>Text</td>\n",
152
- " <td>Dataset</td>\n",
153
- " <td>crwospairs</td>\n",
154
- " <td>Crow-S Pairs</td>\n",
155
- " <td>Protected class stereotypes</td>\n",
156
- " <td>Automating stereotype detection makes distingu...</td>\n",
157
- " <td>CrowS-Pairs: A Challenge Dataset for Measuring...</td>\n",
158
- " <td>https://arxiv.org/abs/2010.00133</td>\n",
159
- " <td>NaN</td>\n",
160
- " <td>NaN</td>\n",
161
- " <td>NaN</td>\n",
162
- " <td>NaN</td>\n",
163
- " </tr>\n",
164
- " <tr>\n",
165
- " <th>4</th>\n",
166
- " <td>BiasEvals</td>\n",
167
- " <td>Text</td>\n",
168
- " <td>Output</td>\n",
169
- " <td>honest</td>\n",
170
- " <td>HONEST: Measuring Hurtful Sentence Completion ...</td>\n",
171
- " <td>Protected class stereotypes and hurtful language</td>\n",
172
- " <td>Automating stereotype detection makes distingu...</td>\n",
173
- " <td>HONEST: Measuring Hurtful Sentence Completion ...</td>\n",
174
- " <td>https://aclanthology.org/2021.naacl-main.191.pdf</td>\n",
175
- " <td>NaN</td>\n",
176
- " <td>NaN</td>\n",
177
- " <td>NaN</td>\n",
178
- " <td>NaN</td>\n",
179
- " </tr>\n",
180
- " <tr>\n",
181
- " <th>5</th>\n",
182
- " <td>BiasEvals</td>\n",
183
- " <td>Image</td>\n",
184
- " <td>Model</td>\n",
185
- " <td>ieat</td>\n",
186
- " <td>Image Embedding Association Test (iEAT)</td>\n",
187
- " <td>Embedding associations</td>\n",
188
- " <td>Although based in human associations, general ...</td>\n",
189
- " <td>Image Representations Learned With Unsupervise...</td>\n",
190
- " <td>https://dl.acm.org/doi/abs/10.1145/3442188.344...</td>\n",
191
- " <td>NaN</td>\n",
192
- " <td>NaN</td>\n",
193
- " <td>NaN</td>\n",
194
- " <td>NaN</td>\n",
195
- " </tr>\n",
196
- " <tr>\n",
197
- " <th>6</th>\n",
198
- " <td>BiasEvals</td>\n",
199
- " <td>Image</td>\n",
200
- " <td>Dataset</td>\n",
201
- " <td>imagedataleak</td>\n",
202
- " <td>Dataset leakage and model leakage</td>\n",
203
- " <td>Gender and label bias</td>\n",
204
- " <td>NaN</td>\n",
205
- " <td>Balanced Datasets Are Not Enough: Estimating a...</td>\n",
206
- " <td>https://arxiv.org/abs/1811.08489</td>\n",
207
- " <td>NaN</td>\n",
208
- " <td>NaN</td>\n",
209
- " <td>NaN</td>\n",
210
- " <td>NaN</td>\n",
211
- " </tr>\n",
212
- " <tr>\n",
213
- " <th>7</th>\n",
214
- " <td>BiasEvals</td>\n",
215
- " <td>Image</td>\n",
216
- " <td>Output</td>\n",
217
- " <td>stablebias</td>\n",
218
- " <td>Characterizing the variation in generated images</td>\n",
219
- " <td>NaN</td>\n",
220
- " <td>NaN</td>\n",
221
- " <td>Stable bias: Analyzing societal representation...</td>\n",
222
- " <td>https://arxiv.org/abs/2303.11408</td>\n",
223
- " <td>NaN</td>\n",
224
- " <td>NaN</td>\n",
225
- " <td>NaN</td>\n",
226
- " <td>NaN</td>\n",
227
- " </tr>\n",
228
- " <tr>\n",
229
- " <th>8</th>\n",
230
- " <td>BiasEvals</td>\n",
231
- " <td>Image</td>\n",
232
- " <td>Output</td>\n",
233
- " <td>homoglyphbias</td>\n",
234
- " <td>Effect of different scripts on text-to-image g...</td>\n",
235
- " <td>It evaluates generated images for cultural ste...</td>\n",
236
- " <td>NaN</td>\n",
237
- " <td>Exploiting Cultural Biases via Homoglyphs in T...</td>\n",
238
- " <td>https://arxiv.org/pdf/2209.08891.pdf</td>\n",
239
- " <td>NaN</td>\n",
240
- " <td>NaN</td>\n",
241
- " <td>NaN</td>\n",
242
- " <td>NaN</td>\n",
243
- " </tr>\n",
244
- " <tr>\n",
245
- " <th>9</th>\n",
246
- " <td>BiasEvals</td>\n",
247
- " <td>Audio</td>\n",
248
- " <td>Taxonomy (?)</td>\n",
249
- " <td>notmyvoice</td>\n",
250
- " <td>Not My Voice! A Taxonomy of Ethical and Safety...</td>\n",
251
- " <td>Lists harms of audio/speech generators</td>\n",
252
- " <td>Not necessarily evaluation but a good source o...</td>\n",
253
- " <td>Not My Voice! A Taxonomy of Ethical and Safety...</td>\n",
254
- " <td>https://arxiv.org/pdf/2402.01708.pdf</td>\n",
255
- " <td>NaN</td>\n",
256
- " <td>NaN</td>\n",
257
- " <td>NaN</td>\n",
258
- " <td>NaN</td>\n",
259
- " </tr>\n",
260
- " <tr>\n",
261
- " <th>10</th>\n",
262
- " <td>BiasEvals</td>\n",
263
- " <td>Video</td>\n",
264
- " <td>Output</td>\n",
265
- " <td>videodiversemisinfo</td>\n",
266
- " <td>Diverse Misinformation: Impacts of Human Biase...</td>\n",
267
- " <td>Human led evaluations of deepfakes to understa...</td>\n",
268
- " <td>Repr. harm, incite violence</td>\n",
269
- " <td>Diverse Misinformation: Impacts of Human Biase...</td>\n",
270
- " <td>https://arxiv.org/abs/2210.10026</td>\n",
271
- " <td>NaN</td>\n",
272
- " <td>NaN</td>\n",
273
- " <td>NaN</td>\n",
274
- " <td>NaN</td>\n",
275
- " </tr>\n",
276
- " <tr>\n",
277
- " <th>11</th>\n",
278
- " <td>Privacy</td>\n",
279
- " <td>NaN</td>\n",
280
- " <td>NaN</td>\n",
281
- " <td>NaN</td>\n",
282
- " <td>NaN</td>\n",
283
- " <td>NaN</td>\n",
284
- " <td>NaN</td>\n",
285
- " <td>NaN</td>\n",
286
- " <td>NaN</td>\n",
287
- " <td>NaN</td>\n",
288
- " <td>NaN</td>\n",
289
- " <td>NaN</td>\n",
290
- " <td>NaN</td>\n",
291
- " </tr>\n",
292
- " </tbody>\n",
293
- "</table>\n",
294
- "</div>"
295
- ],
296
- "text/plain": [
297
- " Group Modality Level Metaname \\\n",
298
- "0 BiasEvals Text Model weat \n",
299
- "1 BiasEvals Text Model wefat \n",
300
- "2 BiasEvals Text Dataset stereoset \n",
301
- "3 BiasEvals Text Dataset crwospairs \n",
302
- "4 BiasEvals Text Output honest \n",
303
- "5 BiasEvals Image Model ieat \n",
304
- "6 BiasEvals Image Dataset imagedataleak \n",
305
- "7 BiasEvals Image Output stablebias \n",
306
- "8 BiasEvals Image Output homoglyphbias \n",
307
- "9 BiasEvals Audio Taxonomy (?) notmyvoice \n",
308
- "10 BiasEvals Video Output videodiversemisinfo \n",
309
- "11 Privacy NaN NaN NaN \n",
310
- "\n",
311
- " Suggested Evaluation \\\n",
312
- "0 Word Embedding Association Test (WEAT) \n",
313
- "1 Word Embedding Factual As\\nsociation Test (WEFAT) \n",
314
- "2 StereoSet \n",
315
- "3 Crow-S Pairs \n",
316
- "4 HONEST: Measuring Hurtful Sentence Completion ... \n",
317
- "5 Image Embedding Association Test (iEAT) \n",
318
- "6 Dataset leakage and model leakage \n",
319
- "7 Characterizing the variation in generated images \n",
320
- "8 Effect of different scripts on text-to-image g... \n",
321
- "9 Not My Voice! A Taxonomy of Ethical and Safety... \n",
322
- "10 Diverse Misinformation: Impacts of Human Biase... \n",
323
- "11 NaN \n",
324
- "\n",
325
- " What it is evaluating \\\n",
326
- "0 Associations and word embeddings based on Impl... \n",
327
- "1 Associations and word embeddings based on Impl... \n",
328
- "2 Protected class stereotypes \n",
329
- "3 Protected class stereotypes \n",
330
- "4 Protected class stereotypes and hurtful language \n",
331
- "5 Embedding associations \n",
332
- "6 Gender and label bias \n",
333
- "7 NaN \n",
334
- "8 It evaluates generated images for cultural ste... \n",
335
- "9 Lists harms of audio/speech generators \n",
336
- "10 Human led evaluations of deepfakes to understa... \n",
337
- "11 NaN \n",
338
- "\n",
339
- " Considerations \\\n",
340
- "0 Although based in human associations, general ... \n",
341
- "1 Although based in human associations, general ... \n",
342
- "2 Automating stereotype detection makes distingu... \n",
343
- "3 Automating stereotype detection makes distingu... \n",
344
- "4 Automating stereotype detection makes distingu... \n",
345
- "5 Although based in human associations, general ... \n",
346
- "6 NaN \n",
347
- "7 NaN \n",
348
- "8 NaN \n",
349
- "9 Not necessarily evaluation but a good source o... \n",
350
- "10 Repr. harm, incite violence \n",
351
- "11 NaN \n",
352
- "\n",
353
- " Link \\\n",
354
- "0 Semantics derived automatically from language ... \n",
355
- "1 Semantics derived automatically from language ... \n",
356
- "2 StereoSet: Measuring stereotypical bias in pre... \n",
357
- "3 CrowS-Pairs: A Challenge Dataset for Measuring... \n",
358
- "4 HONEST: Measuring Hurtful Sentence Completion ... \n",
359
- "5 Image Representations Learned With Unsupervise... \n",
360
- "6 Balanced Datasets Are Not Enough: Estimating a... \n",
361
- "7 Stable bias: Analyzing societal representation... \n",
362
- "8 Exploiting Cultural Biases via Homoglyphs in T... \n",
363
- "9 Not My Voice! A Taxonomy of Ethical and Safety... \n",
364
- "10 Diverse Misinformation: Impacts of Human Biase... \n",
365
- "11 NaN \n",
366
- "\n",
367
- " URL \\\n",
368
- "0 https://researchportal.bath.ac.uk/en/publicati... \n",
369
- "1 https://researchportal.bath.ac.uk/en/publicati... \n",
370
- "2 https://arxiv.org/abs/2004.09456 \n",
371
- "3 https://arxiv.org/abs/2010.00133 \n",
372
- "4 https://aclanthology.org/2021.naacl-main.191.pdf \n",
373
- "5 https://dl.acm.org/doi/abs/10.1145/3442188.344... \n",
374
- "6 https://arxiv.org/abs/1811.08489 \n",
375
- "7 https://arxiv.org/abs/2303.11408 \n",
376
- "8 https://arxiv.org/pdf/2209.08891.pdf \n",
377
- "9 https://arxiv.org/pdf/2402.01708.pdf \n",
378
- "10 https://arxiv.org/abs/2210.10026 \n",
379
- "11 NaN \n",
380
- "\n",
381
- " Screenshots Applicable Models Datasets \\\n",
382
- "0 ['Images/WEAT1.png', 'Images/WEAT2.png'] NaN NaN \n",
383
- "1 NaN NaN NaN \n",
384
- "2 NaN NaN NaN \n",
385
- "3 NaN NaN NaN \n",
386
- "4 NaN NaN NaN \n",
387
- "5 NaN NaN NaN \n",
388
- "6 NaN NaN NaN \n",
389
- "7 NaN NaN NaN \n",
390
- "8 NaN NaN NaN \n",
391
- "9 NaN NaN NaN \n",
392
- "10 NaN NaN NaN \n",
393
- "11 NaN NaN NaN \n",
394
- "\n",
395
- " Hashtags \n",
396
- "0 NaN \n",
397
- "1 NaN \n",
398
- "2 NaN \n",
399
- "3 NaN \n",
400
- "4 NaN \n",
401
- "5 NaN \n",
402
- "6 NaN \n",
403
- "7 NaN \n",
404
- "8 NaN \n",
405
- "9 NaN \n",
406
- "10 NaN \n",
407
- "11 NaN "
408
- ]
409
- },
410
- "execution_count": 5,
411
- "metadata": {},
412
- "output_type": "execute_result"
413
- }
414
- ],
415
- "source": [
416
- "df"
417
- ]
418
- },
419
- {
420
- "cell_type": "code",
421
- "execution_count": 9,
422
- "metadata": {},
423
- "outputs": [],
424
- "source": [
425
- "import urllib.request\n",
426
- "from bs4 import BeautifulSoup\n",
427
- "\n",
428
- "from pypdf import PdfReader \n",
429
- "from urllib.request import urlretrieve\n",
430
- "\n",
431
- "import pdfplumber\n",
432
- "\n"
433
- ]
434
- },
435
- {
436
- "cell_type": "code",
437
- "execution_count": 12,
438
- "metadata": {},
439
- "outputs": [
440
- {
441
- "name": "stdout",
442
- "output_type": "stream",
443
- "text": [
444
- "https://researchportal.bath.ac.uk/en/publications/semantics-derived-automatically-from-language-corpora-necessarily\n",
445
- "\n",
446
- " Semantics derived automatically from language corpora contain human-like biases\n",
447
- " — the University of Bath's research portal\n",
448
- "https://researchportal.bath.ac.uk/en/publications/semantics-derived-automatically-from-language-corpora-necessarily\n",
449
- "\n",
450
- " Semantics derived automatically from language corpora contain human-like biases\n",
451
- " — the University of Bath's research portal\n",
452
- "https://arxiv.org/abs/1903.10561\n",
453
- "[1903.10561] On Measuring Social Biases in Sentence Encoders\n",
454
- "https://dl.acm.org/doi/abs/10.5555/3454287.3455472\n",
455
- "Error\n",
456
- "https://arxiv.org/abs/2004.09456\n",
457
- "[2004.09456] StereoSet: Measuring stereotypical bias in pretrained language models\n",
458
- "https://arxiv.org/abs/2010.00133\n",
459
- "[2010.00133] CrowS-Pairs: A Challenge Dataset for Measuring Social Biases in Masked Language Models\n",
460
- "https://aclanthology.org/2021.naacl-main.191.pdf\n"
461
- ]
462
- },
463
- {
464
- "name": "stderr",
465
- "output_type": "stream",
466
- "text": [
467
- "Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.\n"
468
- ]
469
- },
470
- {
471
- "name": "stdout",
472
- "output_type": "stream",
473
- "text": [
474
- "HONEST: Measuring Hurtful Sentence Completion in Language Models\n",
475
- "nan\n",
476
- "Error\n",
477
- "https://aclanthology.org/2022.findings-acl.165.pdf\n"
478
- ]
479
- },
480
- {
481
- "name": "stderr",
482
- "output_type": "stream",
483
- "text": [
484
- "Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.\n"
485
- ]
486
- },
487
- {
488
- "name": "stdout",
489
- "output_type": "stream",
490
- "text": [
491
- "BBQ: A Hand-Built Bias Benchmark for Question Answering \n",
492
- "https://aclanthology.org/2022.findings-naacl.42.pdf\n"
493
- ]
494
- },
495
- {
496
- "name": "stderr",
497
- "output_type": "stream",
498
- "text": [
499
- "Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.\n"
500
- ]
501
- },
502
- {
503
- "name": "stdout",
504
- "output_type": "stream",
505
- "text": [
506
- "On Measuring Social Biases in Prompt-Based Multi-Task Learning\n"
507
- ]
508
- }
509
- ],
510
- "source": [
511
- "def get_page_title(url):\n",
512
- " soup = BeautifulSoup(urllib.request.urlopen(url))\n",
513
- " return soup.title.string\n",
514
- "\n",
515
- "\n",
516
- "def extract_pdf_title(url):\n",
517
- " urlretrieve(url, 'temp.pdf')\n",
518
- " with pdfplumber.open('temp.pdf') as pdf:\n",
519
- " for page in pdf.pages:\n",
520
- " for line in page.extract_text().split('\\n'):\n",
521
- " return line\n",
522
- " return \"\"\n",
523
- "\n",
524
- " \n",
525
- " \n",
526
- "for url in df['URL'][:10]:\n",
527
- " try:\n",
528
- " print(url)\n",
529
- " title = get_page_title(url)\n",
530
- " print(title)\n",
531
- " except:\n",
532
- " try:\n",
533
- " title = extract_pdf_title(url)\n",
534
- " print(title)\n",
535
- " except:\n",
536
- " print(\"Error\")"
537
- ]
538
- },
539
- {
540
- "cell_type": "code",
541
- "execution_count": null,
542
- "metadata": {},
543
- "outputs": [],
544
- "source": []
545
- }
546
- ],
547
- "metadata": {
548
- "kernelspec": {
549
- "display_name": "gradio",
550
- "language": "python",
551
- "name": "python3"
552
- },
553
- "language_info": {
554
- "codemirror_mode": {
555
- "name": "ipython",
556
- "version": 3
557
- },
558
- "file_extension": ".py",
559
- "mimetype": "text/x-python",
560
- "name": "python",
561
- "nbconvert_exporter": "python",
562
- "pygments_lexer": "ipython3",
563
- "version": "3.12.2"
564
- }
565
- },
566
- "nbformat": 4,
567
- "nbformat_minor": 2
568
- }