luulinh90s commited on
Commit
4faaa69
·
1 Parent(s): abf035a
Files changed (3) hide show
  1. app.py +38 -44
  2. templates/completed.html +6 -54
  3. templates/experiment.html +4 -76
app.py CHANGED
@@ -101,21 +101,24 @@ def select_balanced_samples(samples):
101
  def generate_random_string(length=8):
102
  return ''.join(random.choices(string.ascii_letters + string.digits, k=length))
103
 
 
104
  @app.route('/', methods=['GET', 'POST'])
105
  def index():
106
  logger.info("Rendering index page.")
107
  if request.method == 'POST':
108
  username = request.form.get('username')
109
  seed = request.form.get('seed')
110
- method = request.form.get('method')
111
 
112
- if not username or not seed or not method:
113
- logger.error("Missing username, seed, or method.")
114
- return "Missing username, seed, or method", 400
115
 
116
  try:
117
  seed = int(seed)
118
  random.seed(seed)
 
 
 
119
  all_samples = load_samples(method)
120
  selected_samples = select_balanced_samples(all_samples)
121
  logger.info(f"Number of selected samples: {len(selected_samples)}")
@@ -144,7 +147,6 @@ def index():
144
  logger.exception(f"Error in index route: {e}")
145
  return "An error occurred", 500
146
  return render_template('index.html')
147
-
148
  @app.route('/experiment/<username>', methods=['GET', 'POST'])
149
  def experiment(username):
150
  try:
@@ -162,13 +164,10 @@ def experiment(username):
162
 
163
  visualization_file = selected_samples[current_index]
164
 
165
- if method == "Chain-of-Table":
166
- vis_dir = 'htmls_COT'
167
- else:
168
- vis_dir = 'htmls_POS'
169
 
170
  # Determine the correct visualization directory based on the category
171
- for category, dir_path in (VISUALIZATION_DIRS_CHAIN_OF_TABLE if method == "Chain-of-Table" else VISUALIZATION_DIRS_PLAN_OF_SQLS).items():
172
  if visualization_file in os.listdir(dir_path):
173
  visualization_path = os.path.join(vis_dir, category, visualization_file)
174
  break
@@ -178,11 +177,9 @@ def experiment(username):
178
 
179
  logger.info(f"Rendering experiment page with visualization: {visualization_path}")
180
 
181
- # statement = "Please make a decision to Accept/Reject the AI prediction based on the explanation."
182
  statement = """
183
- A Table Question Answering model is working on Table Fact Verification task (TabFact dataset), verifying if a given Statement is TRUE or FALSE on a given input Table.
184
- You are given an explanation that describes the reasoning process of the Table QA model.
185
- Please carefully analyze the explanation and determine whether you should Accept or Reject the AI prediction.
186
  """
187
 
188
  return render_template('experiment.html',
@@ -198,17 +195,17 @@ Please carefully analyze the explanation and determine whether you should Accept
198
  def feedback():
199
  try:
200
  username = request.form['username']
201
- feedback = request.form['feedback']
202
 
203
  session_data = load_session_data(username)
204
  if not session_data:
205
  logger.error(f"No session data found for user: {username}")
206
  return redirect(url_for('index'))
207
 
208
- # Store the feedback
209
  session_data['responses'].append({
210
  'sample_id': session_data['current_index'],
211
- 'feedback': feedback
212
  })
213
 
214
  # Move to the next sample
@@ -216,7 +213,7 @@ def feedback():
216
 
217
  # Save updated session data
218
  save_session_data(username, session_data)
219
- logger.info(f"Feedback saved for user {username}, sample {session_data['current_index'] - 1}")
220
 
221
  if session_data['current_index'] >= len(session_data['selected_samples']):
222
  return redirect(url_for('completed', username=username))
@@ -226,6 +223,7 @@ def feedback():
226
  logger.exception(f"Error in feedback route: {e}")
227
  return "An error occurred", 500
228
 
 
229
  @app.route('/completed/<username>')
230
  def completed(username):
231
  try:
@@ -237,52 +235,48 @@ def completed(username):
237
  responses = session_data['responses']
238
  method = session_data['method']
239
 
240
- if method == "Chain-of-Table":
241
- json_file = 'Tabular_LLMs_human_study_vis_6_COT.json'
242
- else: # Default to Plan-of-SQLs
243
- json_file = 'Tabular_LLMs_human_study_vis_6_POS.json'
244
 
245
  with open(json_file, 'r') as f:
246
  ground_truth = json.load(f)
247
 
248
- correct_responses = 0
249
- accept_count = 0
250
- reject_count = 0
251
 
252
  for response in responses:
253
  sample_id = response['sample_id']
254
- feedback = response['feedback']
255
  visualization_file = session_data['selected_samples'][sample_id]
256
  index = visualization_file.split('-')[1].split('.')[0] # Extract index from filename
257
 
258
- if feedback.upper() == "TRUE":
259
- accept_count += 1
260
- elif feedback.upper() == "FALSE":
261
- reject_count += 1
262
 
263
- if method == "Chain-of-Table":
264
- ground_truth_key = f"COT_test-{index}.html"
265
- else:
266
- ground_truth_key = f"POS_test-{index}.html"
267
 
268
- if ground_truth_key in ground_truth and ground_truth[ground_truth_key]['answer'].upper() == feedback.upper():
269
- correct_responses += 1
 
 
270
  else:
271
- logger.warning(f"Missing or mismatched key: {ground_truth_key}")
272
 
273
- accuracy = (correct_responses / len(responses)) * 100 if responses else 0
274
  accuracy = round(accuracy, 2)
275
 
276
- accept_percentage = (accept_count / len(responses)) * 100 if len(responses) else 0
277
- reject_percentage = (reject_count / len(responses)) * 100 if len(responses) else 0
278
 
279
- accept_percentage = round(accept_percentage, 2)
280
- reject_percentage = round(reject_percentage, 2)
281
 
282
  return render_template('completed.html',
283
  accuracy=accuracy,
284
- accept_percentage=accept_percentage,
285
- reject_percentage=reject_percentage)
286
  except Exception as e:
287
  logger.exception(f"An error occurred in the completed route: {e}")
288
  return "An error occurred", 500
 
101
  def generate_random_string(length=8):
102
  return ''.join(random.choices(string.ascii_letters + string.digits, k=length))
103
 
104
+
105
  @app.route('/', methods=['GET', 'POST'])
106
  def index():
107
  logger.info("Rendering index page.")
108
  if request.method == 'POST':
109
  username = request.form.get('username')
110
  seed = request.form.get('seed')
 
111
 
112
+ if not username or not seed:
113
+ logger.error("Missing username or seed.")
114
+ return "Missing username or seed", 400
115
 
116
  try:
117
  seed = int(seed)
118
  random.seed(seed)
119
+
120
+ # Use only one method (e.g., "Chain-of-Table")
121
+ method = "Chain-of-Table"
122
  all_samples = load_samples(method)
123
  selected_samples = select_balanced_samples(all_samples)
124
  logger.info(f"Number of selected samples: {len(selected_samples)}")
 
147
  logger.exception(f"Error in index route: {e}")
148
  return "An error occurred", 500
149
  return render_template('index.html')
 
150
  @app.route('/experiment/<username>', methods=['GET', 'POST'])
151
  def experiment(username):
152
  try:
 
164
 
165
  visualization_file = selected_samples[current_index]
166
 
167
+ vis_dir = 'htmls_COT' if method == "Chain-of-Table" else 'htmls_POS'
 
 
 
168
 
169
  # Determine the correct visualization directory based on the category
170
+ for category, dir_path in VISUALIZATION_DIRS_CHAIN_OF_TABLE.items():
171
  if visualization_file in os.listdir(dir_path):
172
  visualization_path = os.path.join(vis_dir, category, visualization_file)
173
  break
 
177
 
178
  logger.info(f"Rendering experiment page with visualization: {visualization_path}")
179
 
 
180
  statement = """
181
+ Based on the explanation provided, what do you think the AI model will predict?
182
+ Will it predict the statement as TRUE or FALSE?
 
183
  """
184
 
185
  return render_template('experiment.html',
 
195
  def feedback():
196
  try:
197
  username = request.form['username']
198
+ prediction = request.form['prediction']
199
 
200
  session_data = load_session_data(username)
201
  if not session_data:
202
  logger.error(f"No session data found for user: {username}")
203
  return redirect(url_for('index'))
204
 
205
+ # Store the user's prediction
206
  session_data['responses'].append({
207
  'sample_id': session_data['current_index'],
208
+ 'user_prediction': prediction
209
  })
210
 
211
  # Move to the next sample
 
213
 
214
  # Save updated session data
215
  save_session_data(username, session_data)
216
+ logger.info(f"Prediction saved for user {username}, sample {session_data['current_index'] - 1}")
217
 
218
  if session_data['current_index'] >= len(session_data['selected_samples']):
219
  return redirect(url_for('completed', username=username))
 
223
  logger.exception(f"Error in feedback route: {e}")
224
  return "An error occurred", 500
225
 
226
+
227
  @app.route('/completed/<username>')
228
  def completed(username):
229
  try:
 
235
  responses = session_data['responses']
236
  method = session_data['method']
237
 
238
+ json_file = 'Tabular_LLMs_human_study_vis_6_COT.json' if method == "Chain-of-Table" else 'Tabular_LLMs_human_study_vis_6_POS.json'
 
 
 
239
 
240
  with open(json_file, 'r') as f:
241
  ground_truth = json.load(f)
242
 
243
+ correct_predictions = 0
244
+ true_predictions = 0
245
+ false_predictions = 0
246
 
247
  for response in responses:
248
  sample_id = response['sample_id']
249
+ user_prediction = response['user_prediction']
250
  visualization_file = session_data['selected_samples'][sample_id]
251
  index = visualization_file.split('-')[1].split('.')[0] # Extract index from filename
252
 
253
+ ground_truth_key = f"COT_test-{index}.html" if method == "Chain-of-Table" else f"POS_test-{index}.html"
 
 
 
254
 
255
+ if ground_truth_key in ground_truth:
256
+ model_prediction = ground_truth[ground_truth_key]['answer'].upper()
257
+ if user_prediction.upper() == model_prediction:
258
+ correct_predictions += 1
259
 
260
+ if user_prediction.upper() == "TRUE":
261
+ true_predictions += 1
262
+ elif user_prediction.upper() == "FALSE":
263
+ false_predictions += 1
264
  else:
265
+ logger.warning(f"Missing key in ground truth: {ground_truth_key}")
266
 
267
+ accuracy = (correct_predictions / len(responses)) * 100 if responses else 0
268
  accuracy = round(accuracy, 2)
269
 
270
+ true_percentage = (true_predictions / len(responses)) * 100 if len(responses) else 0
271
+ false_percentage = (false_predictions / len(responses)) * 100 if len(responses) else 0
272
 
273
+ true_percentage = round(true_percentage, 2)
274
+ false_percentage = round(false_percentage, 2)
275
 
276
  return render_template('completed.html',
277
  accuracy=accuracy,
278
+ true_percentage=true_percentage,
279
+ false_percentage=false_percentage)
280
  except Exception as e:
281
  logger.exception(f"An error occurred in the completed route: {e}")
282
  return "An error occurred", 500
templates/completed.html CHANGED
@@ -3,64 +3,16 @@
3
  <head>
4
  <title>Experiment Completed</title>
5
  <style>
6
- body {
7
- font-family: 'Roboto', sans-serif;
8
- background: url('/static/images/background.jpg') no-repeat center center fixed;
9
- background-size: cover;
10
- display: flex;
11
- justify-content: center;
12
- align-items: center;
13
- height: 100vh;
14
- margin: 0;
15
- }
16
- .container {
17
- text-align: center;
18
- background-color: #ffffff; /* Solid background color */
19
- padding: 60px;
20
- border-radius: 10px;
21
- box-shadow: 0 4px 8px rgba(0, 0, 0, 0.1);
22
- width: 60%;
23
- }
24
- h1 {
25
- color: #000000; /* Black text color */
26
- font-size: 48px; /* Larger text */
27
- margin-bottom: 30px;
28
- }
29
- p {
30
- color: #000000; /* Black text color */
31
- font-size: 24px; /* Larger text */
32
- margin-bottom: 30px;
33
- }
34
- .thank-you {
35
- color: #4CAF50;
36
- font-size: 24px; /* Larger text */
37
- margin-top: 20px;
38
- }
39
- .button-container {
40
- margin-top: 30px;
41
- }
42
- button {
43
- background-color: #4CAF50;
44
- color: white;
45
- padding: 15px 30px;
46
- border: none;
47
- border-radius: 5px;
48
- cursor: pointer;
49
- font-size: 24px; /* Larger text */
50
- transition: background-color 0.3s ease;
51
- }
52
- button:hover {
53
- background-color: #45a049;
54
- }
55
  </style>
56
  </head>
57
  <body>
58
  <div class="container">
59
  <h1>Thank you! 😃😃😃</h1>
60
- <p>Experiment completed! Your responses have been recorded.</p>
61
- <p>Accuracy: {{ accuracy }}%</p>
62
- <p>Accepted: {{ accept_percentage }}%</p>
63
- <p>Rejected: {{ reject_percentage }}%</p>
64
  <div class="button-container">
65
  <a href="/" style="text-decoration: none;">
66
  <button>Back to Start Page</button>
@@ -68,4 +20,4 @@
68
  </div>
69
  </div>
70
  </body>
71
- </html>
 
3
  <head>
4
  <title>Experiment Completed</title>
5
  <style>
6
+ /* ... (keep existing styles) ... */
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
  </style>
8
  </head>
9
  <body>
10
  <div class="container">
11
  <h1>Thank you! 😃😃😃</h1>
12
+ <p>Experiment completed! Your predictions have been recorded.</p>
13
+ <p>Prediction Accuracy: {{ accuracy }}%</p>
14
+ <p>Predicted TRUE: {{ true_percentage }}%</p>
15
+ <p>Predicted FALSE: {{ false_percentage }}%</p>
16
  <div class="button-container">
17
  <a href="/" style="text-decoration: none;">
18
  <button>Back to Start Page</button>
 
20
  </div>
21
  </div>
22
  </body>
23
+ </html>
templates/experiment.html CHANGED
@@ -3,83 +3,11 @@
3
  <head>
4
  <title>Experiment</title>
5
  <style>
6
- body {
7
- font-family: 'Roboto', sans-serif;
8
- background: url('/static/images/background.jpg') no-repeat center center fixed;
9
- background-size: cover;
10
- margin: 0;
11
- padding: 0;
12
- display: flex;
13
- justify-content: center;
14
- align-items: center;
15
- height: 100vh;
16
- }
17
- .container {
18
- background-color: #ffffff; /* Solid background color */
19
- padding: 20px;
20
- border-radius: 10px;
21
- box-shadow: 0 4px 8px rgba(0, 0, 0, 0.1);
22
- width: 100%;
23
- height: 100%;
24
- margin: 0;
25
- text-align: center;
26
- display: flex;
27
- flex-direction: column;
28
- }
29
- h1, h2 {
30
- color: #000000; /* Black text color */
31
- font-size: 24px; /* Smaller text */
32
- }
33
- p {
34
- color: #000000; /* Black text color */
35
- font-size: 18px; /* Smaller text */
36
- }
37
- .visualization-container {
38
- flex: 1;
39
- margin: 20px 0;
40
- }
41
- .buttons {
42
- display: flex;
43
- justify-content: space-between;
44
- }
45
- button {
46
- background-color: #4CAF50;
47
- color: white;
48
- padding: 15px 30px;
49
- border: none;
50
- border-radius: 5px;
51
- cursor: pointer;
52
- font-size: 18px; /* Smaller text */
53
- margin-top: 20px;
54
- width: 48%;
55
- transition: background-color 0.3s ease;
56
- }
57
- button:hover {
58
- background-color: #45a049;
59
- }
60
- button.reject {
61
- background-color: #f44336;
62
- }
63
- button.reject:hover {
64
- background-color: #e53935;
65
- }
66
- iframe {
67
- width: 100%;
68
- height: calc(100vh - 150px); /* Adjust height to fit the screen */
69
- border: none;
70
- }
71
  </style>
72
  <link href="https://fonts.googleapis.com/css2?family=Roboto:wght@400;700&display=swap" rel="stylesheet">
73
  <script>
74
- window.onload = function() {
75
- var iframe = document.querySelector('iframe');
76
- iframe.onload = function() {
77
- var iframeDocument = iframe.contentDocument || iframe.contentWindow.document;
78
- var style = document.createElement('style');
79
- style.innerHTML = 'body { font-size: 14px; }'; /* Adjust this value as needed */
80
- iframeDocument.head.appendChild(style);
81
- };
82
- };
83
  </script>
84
  </head>
85
  <body>
@@ -92,11 +20,11 @@
92
  <div class="buttons">
93
  <form action="{{ url_for('feedback') }}" method="post" style="width: 48%;">
94
  <input type="hidden" name="username" value="{{ username }}">
95
- <button type="submit" name="feedback" value="TRUE">Accept</button>
96
  </form>
97
  <form action="{{ url_for('feedback') }}" method="post" style="width: 48%;">
98
  <input type="hidden" name="username" value="{{ username }}">
99
- <button type="submit" name="feedback" value="FALSE" class="reject">Reject</button>
100
  </form>
101
  </div>
102
  </div>
 
3
  <head>
4
  <title>Experiment</title>
5
  <style>
6
+ /* ... (keep existing styles) ... */
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
  </style>
8
  <link href="https://fonts.googleapis.com/css2?family=Roboto:wght@400;700&display=swap" rel="stylesheet">
9
  <script>
10
+ // ... (keep existing script) ...
 
 
 
 
 
 
 
 
11
  </script>
12
  </head>
13
  <body>
 
20
  <div class="buttons">
21
  <form action="{{ url_for('feedback') }}" method="post" style="width: 48%;">
22
  <input type="hidden" name="username" value="{{ username }}">
23
+ <button type="submit" name="prediction" value="TRUE">Predict TRUE</button>
24
  </form>
25
  <form action="{{ url_for('feedback') }}" method="post" style="width: 48%;">
26
  <input type="hidden" name="username" value="{{ username }}">
27
+ <button type="submit" name="prediction" value="FALSE" class="reject">Predict FALSE</button>
28
  </form>
29
  </div>
30
  </div>