Jimin Huang
commited on
Commit
·
2f1ff79
1
Parent(s):
b44eb8b
feat: modify leaderboard
Browse files- app.py +7 -0
- task1_result.csv +9 -0
app.py
CHANGED
@@ -10,6 +10,7 @@ TASK1_COLS = [
|
|
10 |
("Acc", "number"),
|
11 |
("F1", "number"),
|
12 |
("MCC", "number"),
|
|
|
13 |
]
|
14 |
|
15 |
TASK2_COLS = [
|
@@ -19,6 +20,7 @@ TASK2_COLS = [
|
|
19 |
("Rouge-L", "number"),
|
20 |
("BertScore", "number"),
|
21 |
("BartScore", "number"),
|
|
|
22 |
]
|
23 |
|
24 |
TASK3_COLS = [
|
@@ -88,12 +90,17 @@ Our leaderboard incorporates a comprehensive evaluation using diverse metrics li
|
|
88 |
- **Dataset:** 291 data points.
|
89 |
- **Evaluation Metrics:** Sharpe Ratio (final ranking metric), Cumulative Return, Daily and Annualized Volatility, Maximum Drawdown.
|
90 |
|
|
|
|
|
|
|
|
|
91 |
For more details, refer to our [Challenge page](https://sites.google.com/nlg.csie.ntu.edu.tw/finnlp-agentscen/shared-task-finllm?authuser=0).
|
92 |
"""
|
93 |
|
94 |
|
95 |
def create_data_interface(df):
|
96 |
headers = df.columns
|
|
|
97 |
types = ["str"] + ["number"] * (len(headers) - 1)
|
98 |
|
99 |
return gr.components.Dataframe(
|
|
|
10 |
("Acc", "number"),
|
11 |
("F1", "number"),
|
12 |
("MCC", "number"),
|
13 |
+
("DTL", "number"),
|
14 |
]
|
15 |
|
16 |
TASK2_COLS = [
|
|
|
20 |
("Rouge-L", "number"),
|
21 |
("BertScore", "number"),
|
22 |
("BartScore", "number"),
|
23 |
+
("DTL", "number"),
|
24 |
]
|
25 |
|
26 |
TASK3_COLS = [
|
|
|
90 |
- **Dataset:** 291 data points.
|
91 |
- **Evaluation Metrics:** Sharpe Ratio (final ranking metric), Cumulative Return, Daily and Annualized Volatility, Maximum Drawdown.
|
92 |
|
93 |
+
**Model Cheating Detection: Data Leakage Test (DLT)**
|
94 |
+
|
95 |
+
To measure the risk of data leakage from the test set used in training, we introduce the Data Leakage Test (DLT). The DLT calculates the difference in perplexity between the training set and the test set. A larger difference indicates a lower likelihood of model cheating, while a smaller difference suggests a higher likelihood.
|
96 |
+
|
97 |
For more details, refer to our [Challenge page](https://sites.google.com/nlg.csie.ntu.edu.tw/finnlp-agentscen/shared-task-finllm?authuser=0).
|
98 |
"""
|
99 |
|
100 |
|
101 |
def create_data_interface(df):
|
102 |
headers = df.columns
|
103 |
+
print (headers)
|
104 |
types = ["str"] + ["number"] * (len(headers) - 1)
|
105 |
|
106 |
return gr.components.Dataframe(
|
task1_result.csv
CHANGED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[email protected],0.7626,0.5237,0.7427,38.9031
|
2 |
+
[email protected],0.7575,0.5174,0.7555
|
3 |
+
[email protected],0.7544,0.5149,0.7581,2.2565
|
4 |
+
[email protected],0.7513,0.5018,0.7406
|
5 |
+
[email protected],0.7286,0.4554,0.7008
|
6 |
+
catmemo,0.711,0.4199,0.6818
|
7 |
+
[email protected],0.709,0.4166,0.6941
|
8 |
+
[email protected],0.7079,0.4141,0.69
|
9 |
+
[email protected],0.4933,0.0141,0.5905
|