luulinh90s
commited on
Commit
·
4faaa69
1
Parent(s):
abf035a
update
Browse files- app.py +38 -44
- templates/completed.html +6 -54
- templates/experiment.html +4 -76
app.py
CHANGED
@@ -101,21 +101,24 @@ def select_balanced_samples(samples):
|
|
101 |
def generate_random_string(length=8):
|
102 |
return ''.join(random.choices(string.ascii_letters + string.digits, k=length))
|
103 |
|
|
|
104 |
@app.route('/', methods=['GET', 'POST'])
|
105 |
def index():
|
106 |
logger.info("Rendering index page.")
|
107 |
if request.method == 'POST':
|
108 |
username = request.form.get('username')
|
109 |
seed = request.form.get('seed')
|
110 |
-
method = request.form.get('method')
|
111 |
|
112 |
-
if not username or not seed
|
113 |
-
logger.error("Missing username
|
114 |
-
return "Missing username
|
115 |
|
116 |
try:
|
117 |
seed = int(seed)
|
118 |
random.seed(seed)
|
|
|
|
|
|
|
119 |
all_samples = load_samples(method)
|
120 |
selected_samples = select_balanced_samples(all_samples)
|
121 |
logger.info(f"Number of selected samples: {len(selected_samples)}")
|
@@ -144,7 +147,6 @@ def index():
|
|
144 |
logger.exception(f"Error in index route: {e}")
|
145 |
return "An error occurred", 500
|
146 |
return render_template('index.html')
|
147 |
-
|
148 |
@app.route('/experiment/<username>', methods=['GET', 'POST'])
|
149 |
def experiment(username):
|
150 |
try:
|
@@ -162,13 +164,10 @@ def experiment(username):
|
|
162 |
|
163 |
visualization_file = selected_samples[current_index]
|
164 |
|
165 |
-
if method == "Chain-of-Table"
|
166 |
-
vis_dir = 'htmls_COT'
|
167 |
-
else:
|
168 |
-
vis_dir = 'htmls_POS'
|
169 |
|
170 |
# Determine the correct visualization directory based on the category
|
171 |
-
for category, dir_path in
|
172 |
if visualization_file in os.listdir(dir_path):
|
173 |
visualization_path = os.path.join(vis_dir, category, visualization_file)
|
174 |
break
|
@@ -178,11 +177,9 @@ def experiment(username):
|
|
178 |
|
179 |
logger.info(f"Rendering experiment page with visualization: {visualization_path}")
|
180 |
|
181 |
-
# statement = "Please make a decision to Accept/Reject the AI prediction based on the explanation."
|
182 |
statement = """
|
183 |
-
|
184 |
-
|
185 |
-
Please carefully analyze the explanation and determine whether you should Accept or Reject the AI prediction.
|
186 |
"""
|
187 |
|
188 |
return render_template('experiment.html',
|
@@ -198,17 +195,17 @@ Please carefully analyze the explanation and determine whether you should Accept
|
|
198 |
def feedback():
|
199 |
try:
|
200 |
username = request.form['username']
|
201 |
-
|
202 |
|
203 |
session_data = load_session_data(username)
|
204 |
if not session_data:
|
205 |
logger.error(f"No session data found for user: {username}")
|
206 |
return redirect(url_for('index'))
|
207 |
|
208 |
-
# Store the
|
209 |
session_data['responses'].append({
|
210 |
'sample_id': session_data['current_index'],
|
211 |
-
'
|
212 |
})
|
213 |
|
214 |
# Move to the next sample
|
@@ -216,7 +213,7 @@ def feedback():
|
|
216 |
|
217 |
# Save updated session data
|
218 |
save_session_data(username, session_data)
|
219 |
-
logger.info(f"
|
220 |
|
221 |
if session_data['current_index'] >= len(session_data['selected_samples']):
|
222 |
return redirect(url_for('completed', username=username))
|
@@ -226,6 +223,7 @@ def feedback():
|
|
226 |
logger.exception(f"Error in feedback route: {e}")
|
227 |
return "An error occurred", 500
|
228 |
|
|
|
229 |
@app.route('/completed/<username>')
|
230 |
def completed(username):
|
231 |
try:
|
@@ -237,52 +235,48 @@ def completed(username):
|
|
237 |
responses = session_data['responses']
|
238 |
method = session_data['method']
|
239 |
|
240 |
-
if method == "Chain-of-Table"
|
241 |
-
json_file = 'Tabular_LLMs_human_study_vis_6_COT.json'
|
242 |
-
else: # Default to Plan-of-SQLs
|
243 |
-
json_file = 'Tabular_LLMs_human_study_vis_6_POS.json'
|
244 |
|
245 |
with open(json_file, 'r') as f:
|
246 |
ground_truth = json.load(f)
|
247 |
|
248 |
-
|
249 |
-
|
250 |
-
|
251 |
|
252 |
for response in responses:
|
253 |
sample_id = response['sample_id']
|
254 |
-
|
255 |
visualization_file = session_data['selected_samples'][sample_id]
|
256 |
index = visualization_file.split('-')[1].split('.')[0] # Extract index from filename
|
257 |
|
258 |
-
|
259 |
-
accept_count += 1
|
260 |
-
elif feedback.upper() == "FALSE":
|
261 |
-
reject_count += 1
|
262 |
|
263 |
-
if
|
264 |
-
|
265 |
-
|
266 |
-
|
267 |
|
268 |
-
|
269 |
-
|
|
|
|
|
270 |
else:
|
271 |
-
logger.warning(f"Missing
|
272 |
|
273 |
-
accuracy = (
|
274 |
accuracy = round(accuracy, 2)
|
275 |
|
276 |
-
|
277 |
-
|
278 |
|
279 |
-
|
280 |
-
|
281 |
|
282 |
return render_template('completed.html',
|
283 |
accuracy=accuracy,
|
284 |
-
|
285 |
-
|
286 |
except Exception as e:
|
287 |
logger.exception(f"An error occurred in the completed route: {e}")
|
288 |
return "An error occurred", 500
|
|
|
101 |
def generate_random_string(length=8):
|
102 |
return ''.join(random.choices(string.ascii_letters + string.digits, k=length))
|
103 |
|
104 |
+
|
105 |
@app.route('/', methods=['GET', 'POST'])
|
106 |
def index():
|
107 |
logger.info("Rendering index page.")
|
108 |
if request.method == 'POST':
|
109 |
username = request.form.get('username')
|
110 |
seed = request.form.get('seed')
|
|
|
111 |
|
112 |
+
if not username or not seed:
|
113 |
+
logger.error("Missing username or seed.")
|
114 |
+
return "Missing username or seed", 400
|
115 |
|
116 |
try:
|
117 |
seed = int(seed)
|
118 |
random.seed(seed)
|
119 |
+
|
120 |
+
# Use only one method (e.g., "Chain-of-Table")
|
121 |
+
method = "Chain-of-Table"
|
122 |
all_samples = load_samples(method)
|
123 |
selected_samples = select_balanced_samples(all_samples)
|
124 |
logger.info(f"Number of selected samples: {len(selected_samples)}")
|
|
|
147 |
logger.exception(f"Error in index route: {e}")
|
148 |
return "An error occurred", 500
|
149 |
return render_template('index.html')
|
|
|
150 |
@app.route('/experiment/<username>', methods=['GET', 'POST'])
|
151 |
def experiment(username):
|
152 |
try:
|
|
|
164 |
|
165 |
visualization_file = selected_samples[current_index]
|
166 |
|
167 |
+
vis_dir = 'htmls_COT' if method == "Chain-of-Table" else 'htmls_POS'
|
|
|
|
|
|
|
168 |
|
169 |
# Determine the correct visualization directory based on the category
|
170 |
+
for category, dir_path in VISUALIZATION_DIRS_CHAIN_OF_TABLE.items():
|
171 |
if visualization_file in os.listdir(dir_path):
|
172 |
visualization_path = os.path.join(vis_dir, category, visualization_file)
|
173 |
break
|
|
|
177 |
|
178 |
logger.info(f"Rendering experiment page with visualization: {visualization_path}")
|
179 |
|
|
|
180 |
statement = """
|
181 |
+
Based on the explanation provided, what do you think the AI model will predict?
|
182 |
+
Will it predict the statement as TRUE or FALSE?
|
|
|
183 |
"""
|
184 |
|
185 |
return render_template('experiment.html',
|
|
|
195 |
def feedback():
|
196 |
try:
|
197 |
username = request.form['username']
|
198 |
+
prediction = request.form['prediction']
|
199 |
|
200 |
session_data = load_session_data(username)
|
201 |
if not session_data:
|
202 |
logger.error(f"No session data found for user: {username}")
|
203 |
return redirect(url_for('index'))
|
204 |
|
205 |
+
# Store the user's prediction
|
206 |
session_data['responses'].append({
|
207 |
'sample_id': session_data['current_index'],
|
208 |
+
'user_prediction': prediction
|
209 |
})
|
210 |
|
211 |
# Move to the next sample
|
|
|
213 |
|
214 |
# Save updated session data
|
215 |
save_session_data(username, session_data)
|
216 |
+
logger.info(f"Prediction saved for user {username}, sample {session_data['current_index'] - 1}")
|
217 |
|
218 |
if session_data['current_index'] >= len(session_data['selected_samples']):
|
219 |
return redirect(url_for('completed', username=username))
|
|
|
223 |
logger.exception(f"Error in feedback route: {e}")
|
224 |
return "An error occurred", 500
|
225 |
|
226 |
+
|
227 |
@app.route('/completed/<username>')
|
228 |
def completed(username):
|
229 |
try:
|
|
|
235 |
responses = session_data['responses']
|
236 |
method = session_data['method']
|
237 |
|
238 |
+
json_file = 'Tabular_LLMs_human_study_vis_6_COT.json' if method == "Chain-of-Table" else 'Tabular_LLMs_human_study_vis_6_POS.json'
|
|
|
|
|
|
|
239 |
|
240 |
with open(json_file, 'r') as f:
|
241 |
ground_truth = json.load(f)
|
242 |
|
243 |
+
correct_predictions = 0
|
244 |
+
true_predictions = 0
|
245 |
+
false_predictions = 0
|
246 |
|
247 |
for response in responses:
|
248 |
sample_id = response['sample_id']
|
249 |
+
user_prediction = response['user_prediction']
|
250 |
visualization_file = session_data['selected_samples'][sample_id]
|
251 |
index = visualization_file.split('-')[1].split('.')[0] # Extract index from filename
|
252 |
|
253 |
+
ground_truth_key = f"COT_test-{index}.html" if method == "Chain-of-Table" else f"POS_test-{index}.html"
|
|
|
|
|
|
|
254 |
|
255 |
+
if ground_truth_key in ground_truth:
|
256 |
+
model_prediction = ground_truth[ground_truth_key]['answer'].upper()
|
257 |
+
if user_prediction.upper() == model_prediction:
|
258 |
+
correct_predictions += 1
|
259 |
|
260 |
+
if user_prediction.upper() == "TRUE":
|
261 |
+
true_predictions += 1
|
262 |
+
elif user_prediction.upper() == "FALSE":
|
263 |
+
false_predictions += 1
|
264 |
else:
|
265 |
+
logger.warning(f"Missing key in ground truth: {ground_truth_key}")
|
266 |
|
267 |
+
accuracy = (correct_predictions / len(responses)) * 100 if responses else 0
|
268 |
accuracy = round(accuracy, 2)
|
269 |
|
270 |
+
true_percentage = (true_predictions / len(responses)) * 100 if len(responses) else 0
|
271 |
+
false_percentage = (false_predictions / len(responses)) * 100 if len(responses) else 0
|
272 |
|
273 |
+
true_percentage = round(true_percentage, 2)
|
274 |
+
false_percentage = round(false_percentage, 2)
|
275 |
|
276 |
return render_template('completed.html',
|
277 |
accuracy=accuracy,
|
278 |
+
true_percentage=true_percentage,
|
279 |
+
false_percentage=false_percentage)
|
280 |
except Exception as e:
|
281 |
logger.exception(f"An error occurred in the completed route: {e}")
|
282 |
return "An error occurred", 500
|
templates/completed.html
CHANGED
@@ -3,64 +3,16 @@
|
|
3 |
<head>
|
4 |
<title>Experiment Completed</title>
|
5 |
<style>
|
6 |
-
|
7 |
-
font-family: 'Roboto', sans-serif;
|
8 |
-
background: url('/static/images/background.jpg') no-repeat center center fixed;
|
9 |
-
background-size: cover;
|
10 |
-
display: flex;
|
11 |
-
justify-content: center;
|
12 |
-
align-items: center;
|
13 |
-
height: 100vh;
|
14 |
-
margin: 0;
|
15 |
-
}
|
16 |
-
.container {
|
17 |
-
text-align: center;
|
18 |
-
background-color: #ffffff; /* Solid background color */
|
19 |
-
padding: 60px;
|
20 |
-
border-radius: 10px;
|
21 |
-
box-shadow: 0 4px 8px rgba(0, 0, 0, 0.1);
|
22 |
-
width: 60%;
|
23 |
-
}
|
24 |
-
h1 {
|
25 |
-
color: #000000; /* Black text color */
|
26 |
-
font-size: 48px; /* Larger text */
|
27 |
-
margin-bottom: 30px;
|
28 |
-
}
|
29 |
-
p {
|
30 |
-
color: #000000; /* Black text color */
|
31 |
-
font-size: 24px; /* Larger text */
|
32 |
-
margin-bottom: 30px;
|
33 |
-
}
|
34 |
-
.thank-you {
|
35 |
-
color: #4CAF50;
|
36 |
-
font-size: 24px; /* Larger text */
|
37 |
-
margin-top: 20px;
|
38 |
-
}
|
39 |
-
.button-container {
|
40 |
-
margin-top: 30px;
|
41 |
-
}
|
42 |
-
button {
|
43 |
-
background-color: #4CAF50;
|
44 |
-
color: white;
|
45 |
-
padding: 15px 30px;
|
46 |
-
border: none;
|
47 |
-
border-radius: 5px;
|
48 |
-
cursor: pointer;
|
49 |
-
font-size: 24px; /* Larger text */
|
50 |
-
transition: background-color 0.3s ease;
|
51 |
-
}
|
52 |
-
button:hover {
|
53 |
-
background-color: #45a049;
|
54 |
-
}
|
55 |
</style>
|
56 |
</head>
|
57 |
<body>
|
58 |
<div class="container">
|
59 |
<h1>Thank you! 😃😃😃</h1>
|
60 |
-
<p>Experiment completed! Your
|
61 |
-
<p>Accuracy: {{ accuracy }}%</p>
|
62 |
-
<p>
|
63 |
-
<p>
|
64 |
<div class="button-container">
|
65 |
<a href="/" style="text-decoration: none;">
|
66 |
<button>Back to Start Page</button>
|
@@ -68,4 +20,4 @@
|
|
68 |
</div>
|
69 |
</div>
|
70 |
</body>
|
71 |
-
</html>
|
|
|
3 |
<head>
|
4 |
<title>Experiment Completed</title>
|
5 |
<style>
|
6 |
+
/* ... (keep existing styles) ... */
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
7 |
</style>
|
8 |
</head>
|
9 |
<body>
|
10 |
<div class="container">
|
11 |
<h1>Thank you! 😃😃😃</h1>
|
12 |
+
<p>Experiment completed! Your predictions have been recorded.</p>
|
13 |
+
<p>Prediction Accuracy: {{ accuracy }}%</p>
|
14 |
+
<p>Predicted TRUE: {{ true_percentage }}%</p>
|
15 |
+
<p>Predicted FALSE: {{ false_percentage }}%</p>
|
16 |
<div class="button-container">
|
17 |
<a href="/" style="text-decoration: none;">
|
18 |
<button>Back to Start Page</button>
|
|
|
20 |
</div>
|
21 |
</div>
|
22 |
</body>
|
23 |
+
</html>
|
templates/experiment.html
CHANGED
@@ -3,83 +3,11 @@
|
|
3 |
<head>
|
4 |
<title>Experiment</title>
|
5 |
<style>
|
6 |
-
|
7 |
-
font-family: 'Roboto', sans-serif;
|
8 |
-
background: url('/static/images/background.jpg') no-repeat center center fixed;
|
9 |
-
background-size: cover;
|
10 |
-
margin: 0;
|
11 |
-
padding: 0;
|
12 |
-
display: flex;
|
13 |
-
justify-content: center;
|
14 |
-
align-items: center;
|
15 |
-
height: 100vh;
|
16 |
-
}
|
17 |
-
.container {
|
18 |
-
background-color: #ffffff; /* Solid background color */
|
19 |
-
padding: 20px;
|
20 |
-
border-radius: 10px;
|
21 |
-
box-shadow: 0 4px 8px rgba(0, 0, 0, 0.1);
|
22 |
-
width: 100%;
|
23 |
-
height: 100%;
|
24 |
-
margin: 0;
|
25 |
-
text-align: center;
|
26 |
-
display: flex;
|
27 |
-
flex-direction: column;
|
28 |
-
}
|
29 |
-
h1, h2 {
|
30 |
-
color: #000000; /* Black text color */
|
31 |
-
font-size: 24px; /* Smaller text */
|
32 |
-
}
|
33 |
-
p {
|
34 |
-
color: #000000; /* Black text color */
|
35 |
-
font-size: 18px; /* Smaller text */
|
36 |
-
}
|
37 |
-
.visualization-container {
|
38 |
-
flex: 1;
|
39 |
-
margin: 20px 0;
|
40 |
-
}
|
41 |
-
.buttons {
|
42 |
-
display: flex;
|
43 |
-
justify-content: space-between;
|
44 |
-
}
|
45 |
-
button {
|
46 |
-
background-color: #4CAF50;
|
47 |
-
color: white;
|
48 |
-
padding: 15px 30px;
|
49 |
-
border: none;
|
50 |
-
border-radius: 5px;
|
51 |
-
cursor: pointer;
|
52 |
-
font-size: 18px; /* Smaller text */
|
53 |
-
margin-top: 20px;
|
54 |
-
width: 48%;
|
55 |
-
transition: background-color 0.3s ease;
|
56 |
-
}
|
57 |
-
button:hover {
|
58 |
-
background-color: #45a049;
|
59 |
-
}
|
60 |
-
button.reject {
|
61 |
-
background-color: #f44336;
|
62 |
-
}
|
63 |
-
button.reject:hover {
|
64 |
-
background-color: #e53935;
|
65 |
-
}
|
66 |
-
iframe {
|
67 |
-
width: 100%;
|
68 |
-
height: calc(100vh - 150px); /* Adjust height to fit the screen */
|
69 |
-
border: none;
|
70 |
-
}
|
71 |
</style>
|
72 |
<link href="https://fonts.googleapis.com/css2?family=Roboto:wght@400;700&display=swap" rel="stylesheet">
|
73 |
<script>
|
74 |
-
|
75 |
-
var iframe = document.querySelector('iframe');
|
76 |
-
iframe.onload = function() {
|
77 |
-
var iframeDocument = iframe.contentDocument || iframe.contentWindow.document;
|
78 |
-
var style = document.createElement('style');
|
79 |
-
style.innerHTML = 'body { font-size: 14px; }'; /* Adjust this value as needed */
|
80 |
-
iframeDocument.head.appendChild(style);
|
81 |
-
};
|
82 |
-
};
|
83 |
</script>
|
84 |
</head>
|
85 |
<body>
|
@@ -92,11 +20,11 @@
|
|
92 |
<div class="buttons">
|
93 |
<form action="{{ url_for('feedback') }}" method="post" style="width: 48%;">
|
94 |
<input type="hidden" name="username" value="{{ username }}">
|
95 |
-
<button type="submit" name="
|
96 |
</form>
|
97 |
<form action="{{ url_for('feedback') }}" method="post" style="width: 48%;">
|
98 |
<input type="hidden" name="username" value="{{ username }}">
|
99 |
-
<button type="submit" name="
|
100 |
</form>
|
101 |
</div>
|
102 |
</div>
|
|
|
3 |
<head>
|
4 |
<title>Experiment</title>
|
5 |
<style>
|
6 |
+
/* ... (keep existing styles) ... */
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
7 |
</style>
|
8 |
<link href="https://fonts.googleapis.com/css2?family=Roboto:wght@400;700&display=swap" rel="stylesheet">
|
9 |
<script>
|
10 |
+
// ... (keep existing script) ...
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
11 |
</script>
|
12 |
</head>
|
13 |
<body>
|
|
|
20 |
<div class="buttons">
|
21 |
<form action="{{ url_for('feedback') }}" method="post" style="width: 48%;">
|
22 |
<input type="hidden" name="username" value="{{ username }}">
|
23 |
+
<button type="submit" name="prediction" value="TRUE">Predict TRUE</button>
|
24 |
</form>
|
25 |
<form action="{{ url_for('feedback') }}" method="post" style="width: 48%;">
|
26 |
<input type="hidden" name="username" value="{{ username }}">
|
27 |
+
<button type="submit" name="prediction" value="FALSE" class="reject">Predict FALSE</button>
|
28 |
</form>
|
29 |
</div>
|
30 |
</div>
|