Chen42 commited on
Commit
5a9888f
·
verified ·
1 Parent(s): 41404b8

Upload folder using huggingface_hub

Browse files
Files changed (1) hide show
  1. make_comet_hyp_and_ref.py +9 -6
make_comet_hyp_and_ref.py CHANGED
@@ -1,17 +1,20 @@
1
  from datasets import load_dataset
2
  import pandas as pd
3
 
4
- text_src_jsonl = '/home/zychen/hwproject/my_modeling_phase_1/mytest/text_src.jsonl'
5
- dataset = load_dataset("json", data_files=text_src_jsonl)["train"]
6
- print(f"Number of examples: {len(dataset)}")
7
- text_src_df = dataset.to_pandas()
8
 
9
- decoding_res = '/home/zychen/hwproject/my_modeling_phase_1/mytest_3600_test5k/decoding_res.json'
 
10
  dataset2 = load_dataset("json", data_files=decoding_res)["train"]
11
  print(f"Number of examples: {len(dataset2)}")
12
  decoding_df = dataset2.to_pandas()
13
 
14
- df_merged = pd.concat([text_src_df, decoding_df], axis=1)
 
 
15
  print(df_merged.columns.tolist(), df_merged.iloc[4500])
16
 
17
 
 
1
  from datasets import load_dataset
2
  import pandas as pd
3
 
4
+ # text_src_jsonl = '/home/zychen/hwproject/my_modeling_phase_1/mytest/text_src.jsonl'
5
+ # dataset = load_dataset("json", data_files=text_src_jsonl)["train"]
6
+ # print(f"Number of examples: {len(dataset)}")
7
+ # text_src_df = dataset.to_pandas()
8
 
9
+ # decoding_res = '/home/zychen/hwproject/my_modeling_phase_1/mytest_3600_test5k/decoding_res.json'
10
+ decoding_res = '/home/zychen/hwproject/my_modeling_phase_1/mytest_from56k+64k/decoding_res.json'
11
  dataset2 = load_dataset("json", data_files=decoding_res)["train"]
12
  print(f"Number of examples: {len(dataset2)}")
13
  decoding_df = dataset2.to_pandas()
14
 
15
+ # df_merged = pd.concat([text_src_df, decoding_df], axis=1)
16
+
17
+ df_merged = decoding_df
18
  print(df_merged.columns.tolist(), df_merged.iloc[4500])
19
 
20