Tabular-LLM-Study-Forward-Simulation

Sleeping

App Files Files Community

luulinh90s commited on Sep 9, 2024

Commit

4faaa69

1 Parent(s): abf035a

update

Browse files

Files changed (3) hide show

app.py +38 -44
templates/completed.html +6 -54
templates/experiment.html +4 -76

app.py CHANGED Viewed

@@ -101,21 +101,24 @@ def select_balanced_samples(samples):
 def generate_random_string(length=8):
     return ''.join(random.choices(string.ascii_letters + string.digits, k=length))
 @app.route('/', methods=['GET', 'POST'])
 def index():
     logger.info("Rendering index page.")
     if request.method == 'POST':
         username = request.form.get('username')
         seed = request.form.get('seed')
-        method = request.form.get('method')
-        if not username or not seed or not method:
-            logger.error("Missing username, seed, or method.")
-            return "Missing username, seed, or method", 400
         try:
             seed = int(seed)
             random.seed(seed)
             all_samples = load_samples(method)
             selected_samples = select_balanced_samples(all_samples)
             logger.info(f"Number of selected samples: {len(selected_samples)}")
@@ -144,7 +147,6 @@ def index():
             logger.exception(f"Error in index route: {e}")
             return "An error occurred", 500
     return render_template('index.html')
 @app.route('/experiment/<username>', methods=['GET', 'POST'])
 def experiment(username):
     try:
@@ -162,13 +164,10 @@ def experiment(username):
         visualization_file = selected_samples[current_index]
-        if method == "Chain-of-Table":
-            vis_dir = 'htmls_COT'
-        else:
-            vis_dir = 'htmls_POS'
         # Determine the correct visualization directory based on the category
-        for category, dir_path in (VISUALIZATION_DIRS_CHAIN_OF_TABLE if method == "Chain-of-Table" else VISUALIZATION_DIRS_PLAN_OF_SQLS).items():
             if visualization_file in os.listdir(dir_path):
                 visualization_path = os.path.join(vis_dir, category, visualization_file)
                 break
@@ -178,11 +177,9 @@ def experiment(username):
         logger.info(f"Rendering experiment page with visualization: {visualization_path}")
-        # statement = "Please make a decision to Accept/Reject the AI prediction based on the explanation."
         statement = """
-A Table Question Answering model is working on Table Fact Verification task (TabFact dataset), verifying if a given Statement is TRUE or FALSE on a given input Table.
-You are given an explanation that describes the reasoning process of the Table QA model.
-Please carefully analyze the explanation and determine whether you should Accept or Reject the AI prediction.
         """
         return render_template('experiment.html',
@@ -198,17 +195,17 @@ Please carefully analyze the explanation and determine whether you should Accept
 def feedback():
     try:
         username = request.form['username']
-        feedback = request.form['feedback']
         session_data = load_session_data(username)
         if not session_data:
             logger.error(f"No session data found for user: {username}")
             return redirect(url_for('index'))
-        # Store the feedback
         session_data['responses'].append({
             'sample_id': session_data['current_index'],
-            'feedback': feedback
         })
         # Move to the next sample
@@ -216,7 +213,7 @@ def feedback():
         # Save updated session data
         save_session_data(username, session_data)
-        logger.info(f"Feedback saved for user {username}, sample {session_data['current_index'] - 1}")
         if session_data['current_index'] >= len(session_data['selected_samples']):
             return redirect(url_for('completed', username=username))
@@ -226,6 +223,7 @@ def feedback():
         logger.exception(f"Error in feedback route: {e}")
         return "An error occurred", 500
 @app.route('/completed/<username>')
 def completed(username):
     try:
@@ -237,52 +235,48 @@ def completed(username):
         responses = session_data['responses']
         method = session_data['method']
-        if method == "Chain-of-Table":
-            json_file = 'Tabular_LLMs_human_study_vis_6_COT.json'
-        else:  # Default to Plan-of-SQLs
-            json_file = 'Tabular_LLMs_human_study_vis_6_POS.json'
         with open(json_file, 'r') as f:
             ground_truth = json.load(f)
-        correct_responses = 0
-        accept_count = 0
-        reject_count = 0
         for response in responses:
             sample_id = response['sample_id']
-            feedback = response['feedback']
             visualization_file = session_data['selected_samples'][sample_id]
             index = visualization_file.split('-')[1].split('.')[0]  # Extract index from filename
-            if feedback.upper() == "TRUE":
-                accept_count += 1
-            elif feedback.upper() == "FALSE":
-                reject_count += 1
-            if method == "Chain-of-Table":
-                ground_truth_key = f"COT_test-{index}.html"
-            else:
-                ground_truth_key = f"POS_test-{index}.html"
-            if ground_truth_key in ground_truth and ground_truth[ground_truth_key]['answer'].upper() == feedback.upper():
-                correct_responses += 1
             else:
-                logger.warning(f"Missing or mismatched key: {ground_truth_key}")
-        accuracy = (correct_responses / len(responses)) * 100 if responses else 0
         accuracy = round(accuracy, 2)
-        accept_percentage = (accept_count / len(responses)) * 100 if len(responses) else 0
-        reject_percentage = (reject_count / len(responses)) * 100 if len(responses) else 0
-        accept_percentage = round(accept_percentage, 2)
-        reject_percentage = round(reject_percentage, 2)
         return render_template('completed.html',
                                accuracy=accuracy,
-                               accept_percentage=accept_percentage,
-                               reject_percentage=reject_percentage)
     except Exception as e:
         logger.exception(f"An error occurred in the completed route: {e}")
         return "An error occurred", 500

 def generate_random_string(length=8):
     return ''.join(random.choices(string.ascii_letters + string.digits, k=length))
 @app.route('/', methods=['GET', 'POST'])
 def index():
     logger.info("Rendering index page.")
     if request.method == 'POST':
         username = request.form.get('username')
         seed = request.form.get('seed')
+        if not username or not seed:
+            logger.error("Missing username or seed.")
+            return "Missing username or seed", 400
         try:
             seed = int(seed)
             random.seed(seed)
+            # Use only one method (e.g., "Chain-of-Table")
+            method = "Chain-of-Table"
             all_samples = load_samples(method)
             selected_samples = select_balanced_samples(all_samples)
             logger.info(f"Number of selected samples: {len(selected_samples)}")
             logger.exception(f"Error in index route: {e}")
             return "An error occurred", 500
     return render_template('index.html')
 @app.route('/experiment/<username>', methods=['GET', 'POST'])
 def experiment(username):
     try:
         visualization_file = selected_samples[current_index]
+        vis_dir = 'htmls_COT' if method == "Chain-of-Table" else 'htmls_POS'
         # Determine the correct visualization directory based on the category
+        for category, dir_path in VISUALIZATION_DIRS_CHAIN_OF_TABLE.items():
             if visualization_file in os.listdir(dir_path):
                 visualization_path = os.path.join(vis_dir, category, visualization_file)
                 break
         logger.info(f"Rendering experiment page with visualization: {visualization_path}")
         statement = """
+Based on the explanation provided, what do you think the AI model will predict?
+Will it predict the statement as TRUE or FALSE?
         """
         return render_template('experiment.html',
 def feedback():
     try:
         username = request.form['username']
+        prediction = request.form['prediction']
         session_data = load_session_data(username)
         if not session_data:
             logger.error(f"No session data found for user: {username}")
             return redirect(url_for('index'))
+        # Store the user's prediction
         session_data['responses'].append({
             'sample_id': session_data['current_index'],
+            'user_prediction': prediction
         })
         # Move to the next sample
         # Save updated session data
         save_session_data(username, session_data)
+        logger.info(f"Prediction saved for user {username}, sample {session_data['current_index'] - 1}")
         if session_data['current_index'] >= len(session_data['selected_samples']):
             return redirect(url_for('completed', username=username))
         logger.exception(f"Error in feedback route: {e}")
         return "An error occurred", 500
 @app.route('/completed/<username>')
 def completed(username):
     try:
         responses = session_data['responses']
         method = session_data['method']
+        json_file = 'Tabular_LLMs_human_study_vis_6_COT.json' if method == "Chain-of-Table" else 'Tabular_LLMs_human_study_vis_6_POS.json'
         with open(json_file, 'r') as f:
             ground_truth = json.load(f)
+        correct_predictions = 0
+        true_predictions = 0
+        false_predictions = 0
         for response in responses:
             sample_id = response['sample_id']
+            user_prediction = response['user_prediction']
             visualization_file = session_data['selected_samples'][sample_id]
             index = visualization_file.split('-')[1].split('.')[0]  # Extract index from filename
+            ground_truth_key = f"COT_test-{index}.html" if method == "Chain-of-Table" else f"POS_test-{index}.html"
+            if ground_truth_key in ground_truth:
+                model_prediction = ground_truth[ground_truth_key]['answer'].upper()
+                if user_prediction.upper() == model_prediction:
+                    correct_predictions += 1
+                if user_prediction.upper() == "TRUE":
+                    true_predictions += 1
+                elif user_prediction.upper() == "FALSE":
+                    false_predictions += 1
             else:
+                logger.warning(f"Missing key in ground truth: {ground_truth_key}")
+        accuracy = (correct_predictions / len(responses)) * 100 if responses else 0
         accuracy = round(accuracy, 2)
+        true_percentage = (true_predictions / len(responses)) * 100 if len(responses) else 0
+        false_percentage = (false_predictions / len(responses)) * 100 if len(responses) else 0
+        true_percentage = round(true_percentage, 2)
+        false_percentage = round(false_percentage, 2)
         return render_template('completed.html',
                                accuracy=accuracy,
+                               true_percentage=true_percentage,
+                               false_percentage=false_percentage)
     except Exception as e:
         logger.exception(f"An error occurred in the completed route: {e}")
         return "An error occurred", 500

templates/completed.html CHANGED Viewed

@@ -3,64 +3,16 @@
 <head>
     <title>Experiment Completed</title>
     <style>
-        body {
-            font-family: 'Roboto', sans-serif;
-            background: url('/static/images/background.jpg') no-repeat center center fixed;
-            background-size: cover;
-            display: flex;
-            justify-content: center;
-            align-items: center;
-            height: 100vh;
-            margin: 0;
-        }
-        .container {
-            text-align: center;
-            background-color: #ffffff; /* Solid background color */
-            padding: 60px;
-            border-radius: 10px;
-            box-shadow: 0 4px 8px rgba(0, 0, 0, 0.1);
-            width: 60%;
-        }
-        h1 {
-            color: #000000; /* Black text color */
-            font-size: 48px; /* Larger text */
-            margin-bottom: 30px;
-        }
-        p {
-            color: #000000; /* Black text color */
-            font-size: 24px; /* Larger text */
-            margin-bottom: 30px;
-        }
-        .thank-you {
-            color: #4CAF50;
-            font-size: 24px; /* Larger text */
-            margin-top: 20px;
-        }
-        .button-container {
-            margin-top: 30px;
-        }
-        button {
-            background-color: #4CAF50;
-            color: white;
-            padding: 15px 30px;
-            border: none;
-            border-radius: 5px;
-            cursor: pointer;
-            font-size: 24px; /* Larger text */
-            transition: background-color 0.3s ease;
-        }
-        button:hover {
-            background-color: #45a049;
-        }
     </style>
 </head>
 <body>
     <div class="container">
         <h1>Thank you! 😃😃😃</h1>
-        <p>Experiment completed! Your responses have been recorded.</p>
-        <p>Accuracy: {{ accuracy }}%</p>
-        <p>Accepted: {{ accept_percentage }}%</p>
-        <p>Rejected: {{ reject_percentage }}%</p>
         <div class="button-container">
             <a href="/" style="text-decoration: none;">
                 <button>Back to Start Page</button>
@@ -68,4 +20,4 @@
         </div>
     </div>
 </body>
-</html>

 <head>
     <title>Experiment Completed</title>
     <style>
+        /* ... (keep existing styles) ... */
     </style>
 </head>
 <body>
     <div class="container">
         <h1>Thank you! 😃😃😃</h1>
+        <p>Experiment completed! Your predictions have been recorded.</p>
+        <p>Prediction Accuracy: {{ accuracy }}%</p>
+        <p>Predicted TRUE: {{ true_percentage }}%</p>
+        <p>Predicted FALSE: {{ false_percentage }}%</p>
         <div class="button-container">
             <a href="/" style="text-decoration: none;">
                 <button>Back to Start Page</button>
         </div>
     </div>
 </body>
+</html>

templates/experiment.html CHANGED Viewed

@@ -3,83 +3,11 @@
 <head>
     <title>Experiment</title>
     <style>
-        body {
-            font-family: 'Roboto', sans-serif;
-            background: url('/static/images/background.jpg') no-repeat center center fixed;
-            background-size: cover;
-            margin: 0;
-            padding: 0;
-            display: flex;
-            justify-content: center;
-            align-items: center;
-            height: 100vh;
-        }
-        .container {
-            background-color: #ffffff; /* Solid background color */
-            padding: 20px;
-            border-radius: 10px;
-            box-shadow: 0 4px 8px rgba(0, 0, 0, 0.1);
-            width: 100%;
-            height: 100%;
-            margin: 0;
-            text-align: center;
-            display: flex;
-            flex-direction: column;
-        }
-        h1, h2 {
-            color: #000000; /* Black text color */
-            font-size: 24px; /* Smaller text */
-        }
-        p {
-            color: #000000; /* Black text color */
-            font-size: 18px; /* Smaller text */
-        }
-        .visualization-container {
-            flex: 1;
-            margin: 20px 0;
-        }
-        .buttons {
-            display: flex;
-            justify-content: space-between;
-        }
-        button {
-            background-color: #4CAF50;
-            color: white;
-            padding: 15px 30px;
-            border: none;
-            border-radius: 5px;
-            cursor: pointer;
-            font-size: 18px; /* Smaller text */
-            margin-top: 20px;
-            width: 48%;
-            transition: background-color 0.3s ease;
-        }
-        button:hover {
-            background-color: #45a049;
-        }
-        button.reject {
-            background-color: #f44336;
-        }
-        button.reject:hover {
-            background-color: #e53935;
-        }
-        iframe {
-            width: 100%;
-            height: calc(100vh - 150px); /* Adjust height to fit the screen */
-            border: none;
-        }
     </style>
     <link href="https://fonts.googleapis.com/css2?family=Roboto:wght@400;700&display=swap" rel="stylesheet">
     <script>
-        window.onload = function() {
-            var iframe = document.querySelector('iframe');
-            iframe.onload = function() {
-                var iframeDocument = iframe.contentDocument || iframe.contentWindow.document;
-                var style = document.createElement('style');
-                style.innerHTML = 'body { font-size: 14px; }'; /* Adjust this value as needed */
-                iframeDocument.head.appendChild(style);
-            };
-        };
     </script>
 </head>
 <body>
@@ -92,11 +20,11 @@
         <div class="buttons">
             <form action="{{ url_for('feedback') }}" method="post" style="width: 48%;">
                 <input type="hidden" name="username" value="{{ username }}">
-                <button type="submit" name="feedback" value="TRUE">Accept</button>
             </form>
             <form action="{{ url_for('feedback') }}" method="post" style="width: 48%;">
                 <input type="hidden" name="username" value="{{ username }}">
-                <button type="submit" name="feedback" value="FALSE" class="reject">Reject</button>
             </form>
         </div>
     </div>

 <head>
     <title>Experiment</title>
     <style>
+        /* ... (keep existing styles) ... */
     </style>
     <link href="https://fonts.googleapis.com/css2?family=Roboto:wght@400;700&display=swap" rel="stylesheet">
     <script>
+        // ... (keep existing script) ...
     </script>
 </head>
 <body>
         <div class="buttons">
             <form action="{{ url_for('feedback') }}" method="post" style="width: 48%;">
                 <input type="hidden" name="username" value="{{ username }}">
+                <button type="submit" name="prediction" value="TRUE">Predict TRUE</button>
             </form>
             <form action="{{ url_for('feedback') }}" method="post" style="width: 48%;">
                 <input type="hidden" name="username" value="{{ username }}">
+                <button type="submit" name="prediction" value="FALSE" class="reject">Predict FALSE</button>
             </form>
         </div>
     </div>