Zekun Wu commited on
Commit
44466c7
·
1 Parent(s): 1da3bb7
pages/1_Demo_1.py CHANGED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ from datasets import load_dataset
4
+ from random import sample
5
+ from utils.metric import Regard
6
+ from utils.model import gpt2
7
+ import os
8
+
9
+ # Set up the Streamlit interface
10
+ st.title('Gender Bias Analysis in Text Generation')
11
+
12
+
13
+ def check_password():
14
+ def password_entered():
15
+ if password_input == os.getenv('PASSWORD'):
16
+ st.session_state['password_correct'] = True
17
+ else:
18
+ st.error("Incorrect Password, please try again.")
19
+
20
+ password_input = st.text_input("Enter Password:", type="password")
21
+ submit_button = st.button("Submit", on_click=password_entered)
22
+
23
+ if st.session_state.get('password_correct', False):
24
+ load_and_process_data()
25
+ else:
26
+ st.error("Please enter a valid password to access the demo.")
27
+
28
+
29
+ def load_and_process_data():
30
+ st.subheader('Loading and Processing Data')
31
+ st.write('Loading the BOLD dataset...')
32
+ bold = load_dataset("AlexaAI/bold", split="train")
33
+
34
+ st.write('Sampling 10 female and male American actors...')
35
+ female_bold = sample([p for p in bold if p['category'] == 'American_actresses'], 10)
36
+ male_bold = sample([p for p in bold if p['category'] == 'American_actors'], 10)
37
+
38
+ male_prompts = [p['prompts'][0] for p in male_bold]
39
+ female_prompts = [p['prompts'][0] for p in female_bold]
40
+
41
+ GPT2 = gpt2()
42
+
43
+ st.write('Generating text for male prompts...')
44
+ male_generation = GPT2.generate_text(male_prompts, pad_token_id=50256, max_length=50, do_sample=False)
45
+ male_continuations = [gen.replace(prompt, '') for gen, prompt in zip(male_generation, male_prompts)]
46
+
47
+ st.write('Generating text for female prompts...')
48
+ female_generation = GPT2.generate_text(female_prompts, pad_token_id=50256, max_length=50, do_sample=False)
49
+ female_continuations = [gen.replace(prompt, '') for gen, prompt in zip(female_generation, female_prompts)]
50
+
51
+ st.write('Generated {} male continuations'.format(len(male_continuations)))
52
+ st.write('Generated {} female continuations'.format(len(female_continuations)))
53
+
54
+ st.subheader('Sample Generated Texts')
55
+ st.write('**Male Prompt:**', male_prompts[0])
56
+ st.write('**Male Continuation:**', male_continuations[0])
57
+ st.write('**Female Prompt:**', female_prompts[0])
58
+ st.write('**Female Continuation:**', female_continuations[0])
59
+
60
+ regard = Regard("compare")
61
+ st.write('Computing regard results to compare male and female continuations...')
62
+ regard_results = regard.compute(data=male_continuations, references=female_continuations)
63
+ st.write(regard
requirements.txt CHANGED
@@ -1 +1,3 @@
1
- openai
 
 
 
1
+ openai
2
+ transformers
3
+ torch==2.0.1
utils/__init__.py ADDED
File without changes
utils/dataset.py ADDED
File without changes
utils/metric.py ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from collections import defaultdict
2
+ from statistics import mean
3
+ from transformers import AutoModelForSequenceClassification, AutoTokenizer, pipeline
4
+
5
+
6
+
7
+ class Regard:
8
+
9
+ def __init__(self, config_name):
10
+ self.config_name = config_name
11
+ regard_tokenizer = AutoTokenizer.from_pretrained("sasha/regardv3")
12
+ regard_model = AutoModelForSequenceClassification.from_pretrained("sasha/regardv3")
13
+ self.regard_classifier = pipeline(
14
+ "text-classification", model=regard_model, top_k=4, tokenizer=regard_tokenizer, truncation=True)
15
+
16
+ def regard(self,group):
17
+ group_scores = defaultdict(list)
18
+ group_regard = self.regard_classifier(group)
19
+ for pred in group_regard:
20
+ for pred_score in pred:
21
+ group_scores[pred_score["label"]].append(pred_score["score"])
22
+ return group_regard, dict(group_scores)
23
+
24
+ def compute(
25
+ self,
26
+ data,
27
+ references=None,
28
+ aggregation=None,
29
+ ):
30
+ if self.config_name == "compare":
31
+ pred_scores, pred_regard = self.regard(data)
32
+ ref_scores, ref_regard = self.regard(references)
33
+ pred_mean = {k: mean(v) for k, v in pred_regard.items()}
34
+ pred_max = {k: max(v) for k, v in pred_regard.items()}
35
+ ref_mean = {k: mean(v) for k, v in ref_regard.items()}
36
+ ref_max = {k: max(v) for k, v in ref_regard.items()}
37
+ if aggregation == "maximum":
38
+ return {
39
+ "max_data_regard": pred_max,
40
+ "max_references_regard": ref_max,
41
+ }
42
+ elif aggregation == "average":
43
+ return {"average_data_regard": pred_mean, "average_references_regard": ref_mean}
44
+ else:
45
+ return {"regard_difference": {key: pred_mean[key] - ref_mean.get(key, 0) for key in pred_mean}}
46
+ else:
47
+ pred_scores, pred_regard = self.regard(data)
48
+ pred_mean = {k: mean(v) for k, v in pred_regard.items()}
49
+ pred_max = {k: max(v) for k, v in pred_regard.items()}
50
+ if aggregation == "maximum":
51
+ return {"max_regard": pred_max}
52
+ elif aggregation == "average":
53
+ return {"average_regard": pred_mean}
54
+ else:
55
+ return {"regard": pred_scores}
utils/model.py ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import pipeline, AutoTokenizer
2
+
3
+
4
+ class gpt2:
5
+ def __init__(self,device="cpu"):
6
+ self.text_generation = pipeline("text-generation", model="gpt2",device=device)
7
+ self.tokenizer = AutoTokenizer.from_pretrained("gpt2")
8
+
9
+ def generate_text(self,**kwargs):
10
+ results = self.text_generation(**kwargs)
11
+
12
+ return [item['generated_text'] for item in results[0]]
13
+
14
+ def get_tokenizer(self):
15
+ return self.tokenizer
16
+
17
+ if __name__ == '__main__':
18
+ gpt2 = gpt2()
19
+ print(gpt2.generate_text(["Hello, how are you?","I am fine, thank you."]))