Ubuntu
commited on
Commit
Β·
d0702fa
1
Parent(s):
ed39e1a
finetuned the bert model again to classify things right
Browse filesThis view is limited to 50 files because it contains too many changes. Β
See raw diff
- data_categories/Computers_and_Electronics.csv +2 -2
- data_categories/Final_Category_Data_With_Labels.csv +2 -2
- data_categories/Food_and_Drink.csv +2 -2
- data_categories/Pets_and_Animals.csv +2 -2
- data_categories/Real Estate.csv +2 -2
- data_categories/Reference.csv +2 -2
- data_categories/Sensitive Subjects.csv +2 -2
- data_categories/Shopping.csv +2 -2
- data_test/keywords-2.csv +3 -0
- data_test/labelled_data.csv +3 -0
- finetuned_entity_categorical_classification/{checkpoint-1576 β checkpoint-1681}/added_tokens.json +0 -0
- finetuned_entity_categorical_classification/{checkpoint-1576 β checkpoint-1681}/config.json +53 -53
- finetuned_entity_categorical_classification/{checkpoint-1576 β checkpoint-1681}/optimizer.pt +1 -1
- finetuned_entity_categorical_classification/{checkpoint-1576 β checkpoint-1681}/pytorch_model.bin +1 -1
- finetuned_entity_categorical_classification/{checkpoint-1576 β checkpoint-1681}/rng_state.pth +0 -0
- finetuned_entity_categorical_classification/{checkpoint-1576 β checkpoint-1681}/scheduler.pt +1 -1
- finetuned_entity_categorical_classification/{checkpoint-1576 β checkpoint-1681}/special_tokens_map.json +0 -0
- finetuned_entity_categorical_classification/{checkpoint-1576 β checkpoint-1681}/tokenizer.json +0 -0
- finetuned_entity_categorical_classification/{checkpoint-1576 β checkpoint-1681}/tokenizer_config.json +0 -0
- finetuned_entity_categorical_classification/{checkpoint-1576 β checkpoint-1681}/trainer_state.json +21 -21
- finetuned_entity_categorical_classification/{checkpoint-1576 β checkpoint-1681}/training_args.bin +1 -1
- finetuned_entity_categorical_classification/{checkpoint-1576 β checkpoint-1681}/vocab.txt +0 -0
- finetuned_entity_categorical_classification/checkpoint-3346/added_tokens.json +7 -0
- finetuned_entity_categorical_classification/checkpoint-3346/config.json +83 -0
- finetuned_entity_categorical_classification/checkpoint-3346/optimizer.pt +3 -0
- finetuned_entity_categorical_classification/checkpoint-3346/pytorch_model.bin +3 -0
- finetuned_entity_categorical_classification/checkpoint-3346/rng_state.pth +0 -0
- finetuned_entity_categorical_classification/checkpoint-3346/scheduler.pt +3 -0
- finetuned_entity_categorical_classification/checkpoint-3346/special_tokens_map.json +7 -0
- finetuned_entity_categorical_classification/checkpoint-3346/tokenizer.json +0 -0
- finetuned_entity_categorical_classification/checkpoint-3346/tokenizer_config.json +56 -0
- finetuned_entity_categorical_classification/checkpoint-3346/trainer_state.json +73 -0
- finetuned_entity_categorical_classification/checkpoint-3346/training_args.bin +3 -0
- finetuned_entity_categorical_classification/checkpoint-3346/vocab.txt +0 -0
- finetuned_entity_categorical_classification/checkpoint-3362/added_tokens.json +7 -0
- finetuned_entity_categorical_classification/checkpoint-3362/config.json +83 -0
- finetuned_entity_categorical_classification/checkpoint-3362/optimizer.pt +3 -0
- finetuned_entity_categorical_classification/checkpoint-3362/pytorch_model.bin +3 -0
- finetuned_entity_categorical_classification/checkpoint-3362/rng_state.pth +0 -0
- finetuned_entity_categorical_classification/checkpoint-3362/scheduler.pt +3 -0
- finetuned_entity_categorical_classification/checkpoint-3362/special_tokens_map.json +7 -0
- finetuned_entity_categorical_classification/checkpoint-3362/tokenizer.json +0 -0
- finetuned_entity_categorical_classification/checkpoint-3362/tokenizer_config.json +56 -0
- finetuned_entity_categorical_classification/checkpoint-3362/trainer_state.json +73 -0
- finetuned_entity_categorical_classification/checkpoint-3362/training_args.bin +3 -0
- finetuned_entity_categorical_classification/checkpoint-3362/vocab.txt +0 -0
- finetuned_entity_categorical_classification/runs/Oct12_11-19-39_ip-172-31-95-165/events.out.tfevents.1697109579.ip-172-31-95-165.128350.0 +0 -0
- finetuned_entity_categorical_classification/runs/Oct12_11-43-16_ip-172-31-95-165/events.out.tfevents.1697110996.ip-172-31-95-165.128941.0 +0 -0
- finetuned_entity_categorical_classification/runs/Oct12_11-59-06_ip-172-31-95-165/events.out.tfevents.1697111947.ip-172-31-95-165.129502.0 +0 -0
- research/08_organizing_entire_datacategories.ipynb +222 -222
data_categories/Computers_and_Electronics.csv
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3bfb344e958c0625df92cecfd61ee937ad46f4ae3c1fe7b4a43d64bc66ea025b
|
3 |
+
size 53312
|
data_categories/Final_Category_Data_With_Labels.csv
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0e98ec1b4ff3e48cf46b76010bdb651b013e29b73a2ba8afde2691ff2c7ffd89
|
3 |
+
size 1755664
|
data_categories/Food_and_Drink.csv
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6f93635d1d2a6cb1bcce6f246efe265a22a21f9b0f9e09ad64ff4f4135e9a873
|
3 |
+
size 50513
|
data_categories/Pets_and_Animals.csv
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6f02553cb1d35b2874332bdc31f355f85f17bf22d93024ddbe3ed174897c5c60
|
3 |
+
size 60136
|
data_categories/Real Estate.csv
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4d2c95f708a885aa30ff47132f588c8b1f69ea65587a9ff2dcdb9012e11754c3
|
3 |
+
size 40030
|
data_categories/Reference.csv
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f4f2e6a675d15a45ca557f267488141912d0898e231e890f40075b2dad1bf1ce
|
3 |
+
size 57698
|
data_categories/Sensitive Subjects.csv
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1a2594ab9518c2def44acaef1c8661194ae16c95755face04b58d255bac1b33f
|
3 |
+
size 11256
|
data_categories/Shopping.csv
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:dc8a3ba5ed07db6b06b247d33d1b91bded1647f55b9ebcb1d50b1072a51eeecf
|
3 |
+
size 56890
|
data_test/keywords-2.csv
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fc87cd18c79ecaa87b16dca63019b9577f72073c987ac18c624b059252e32d0f
|
3 |
+
size 8356
|
data_test/labelled_data.csv
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f208650b882688e9a5cd9e0f2b3787dfd81ca7f5cb524a98c6d1e75d4aadfbf5
|
3 |
+
size 19932
|
finetuned_entity_categorical_classification/{checkpoint-1576 β checkpoint-1681}/added_tokens.json
RENAMED
File without changes
|
finetuned_entity_categorical_classification/{checkpoint-1576 β checkpoint-1681}/config.json
RENAMED
@@ -1,5 +1,5 @@
|
|
1 |
{
|
2 |
-
"_name_or_path": "
|
3 |
"activation": "gelu",
|
4 |
"architectures": [
|
5 |
"DistilBertForSequenceClassification"
|
@@ -9,63 +9,63 @@
|
|
9 |
"dropout": 0.1,
|
10 |
"hidden_dim": 3072,
|
11 |
"id2label": {
|
12 |
-
"0": "
|
13 |
-
"1": "
|
14 |
-
"2": "
|
15 |
-
"3": "
|
16 |
-
"4": "
|
17 |
-
"5": "
|
18 |
-
"6": "
|
19 |
-
"7": "
|
20 |
-
"8": "
|
21 |
-
"9": "
|
22 |
-
"10": "
|
23 |
-
"11": "
|
24 |
-
"12": "
|
25 |
-
"13": "
|
26 |
-
"14": "
|
27 |
-
"15": "
|
28 |
-
"16": "
|
29 |
-
"17": "
|
30 |
-
"18": "
|
31 |
-
"19": "
|
32 |
-
"20": "
|
33 |
-
"21": "
|
34 |
"22": "Arts_and_Entertainment",
|
35 |
-
"23": "
|
36 |
-
"24": "
|
37 |
-
"25": "
|
38 |
-
"26": "
|
39 |
},
|
40 |
"initializer_range": 0.02,
|
41 |
"label2id": {
|
42 |
-
"Adult":
|
43 |
"Arts_and_Entertainment": 22,
|
44 |
-
"Autos_and_Vehicles":
|
45 |
-
"Beauty_and_Fitness":
|
46 |
-
"Books_and_Literature":
|
47 |
-
"Business_and_Industrial":
|
48 |
-
"Computers_and_Electronics":
|
49 |
-
"Finance":
|
50 |
-
"Food_and_Drink":
|
51 |
-
"Games":
|
52 |
-
"Health":
|
53 |
-
"Hobbies_and_Leisure":
|
54 |
-
"Home_and_Garden":
|
55 |
-
"Internet_and_Telecom":
|
56 |
-
"Jobs_and_Education":
|
57 |
-
"Law_and_Government":
|
58 |
-
"News":
|
59 |
-
"Online Communities":
|
60 |
-
"People_and_Society":
|
61 |
-
"Pets_and_Animals":
|
62 |
-
"Real Estate":
|
63 |
-
"Reference":
|
64 |
-
"Science":
|
65 |
-
"Sensitive Subjects":
|
66 |
-
"Shopping":
|
67 |
-
"Sports":
|
68 |
-
"Travel_and_Transportation":
|
69 |
},
|
70 |
"max_position_embeddings": 512,
|
71 |
"model_type": "distilbert",
|
|
|
1 |
{
|
2 |
+
"_name_or_path": "finetuned_entity_categorical_classification/checkpoint-3346",
|
3 |
"activation": "gelu",
|
4 |
"architectures": [
|
5 |
"DistilBertForSequenceClassification"
|
|
|
9 |
"dropout": 0.1,
|
10 |
"hidden_dim": 3072,
|
11 |
"id2label": {
|
12 |
+
"0": "Hobbies_and_Leisure",
|
13 |
+
"1": "News",
|
14 |
+
"2": "Science",
|
15 |
+
"3": "Autos_and_Vehicles",
|
16 |
+
"4": "Health",
|
17 |
+
"5": "Pets_and_Animals",
|
18 |
+
"6": "Adult",
|
19 |
+
"7": "Computers_and_Electronics",
|
20 |
+
"8": "Online Communities",
|
21 |
+
"9": "Beauty_and_Fitness",
|
22 |
+
"10": "People_and_Society",
|
23 |
+
"11": "Business_and_Industrial",
|
24 |
+
"12": "Reference",
|
25 |
+
"13": "Shopping",
|
26 |
+
"14": "Travel_and_Transportation",
|
27 |
+
"15": "Food_and_Drink",
|
28 |
+
"16": "Law_and_Government",
|
29 |
+
"17": "Books_and_Literature",
|
30 |
+
"18": "Finance",
|
31 |
+
"19": "Games",
|
32 |
+
"20": "Home_and_Garden",
|
33 |
+
"21": "Jobs_and_Education",
|
34 |
"22": "Arts_and_Entertainment",
|
35 |
+
"23": "Sensitive Subjects",
|
36 |
+
"24": "Real Estate",
|
37 |
+
"25": "Internet_and_Telecom",
|
38 |
+
"26": "Sports"
|
39 |
},
|
40 |
"initializer_range": 0.02,
|
41 |
"label2id": {
|
42 |
+
"Adult": 6,
|
43 |
"Arts_and_Entertainment": 22,
|
44 |
+
"Autos_and_Vehicles": 3,
|
45 |
+
"Beauty_and_Fitness": 9,
|
46 |
+
"Books_and_Literature": 17,
|
47 |
+
"Business_and_Industrial": 11,
|
48 |
+
"Computers_and_Electronics": 7,
|
49 |
+
"Finance": 18,
|
50 |
+
"Food_and_Drink": 15,
|
51 |
+
"Games": 19,
|
52 |
+
"Health": 4,
|
53 |
+
"Hobbies_and_Leisure": 0,
|
54 |
+
"Home_and_Garden": 20,
|
55 |
+
"Internet_and_Telecom": 25,
|
56 |
+
"Jobs_and_Education": 21,
|
57 |
+
"Law_and_Government": 16,
|
58 |
+
"News": 1,
|
59 |
+
"Online Communities": 8,
|
60 |
+
"People_and_Society": 10,
|
61 |
+
"Pets_and_Animals": 5,
|
62 |
+
"Real Estate": 24,
|
63 |
+
"Reference": 12,
|
64 |
+
"Science": 2,
|
65 |
+
"Sensitive Subjects": 23,
|
66 |
+
"Shopping": 13,
|
67 |
+
"Sports": 26,
|
68 |
+
"Travel_and_Transportation": 14
|
69 |
},
|
70 |
"max_position_embeddings": 512,
|
71 |
"model_type": "distilbert",
|
finetuned_entity_categorical_classification/{checkpoint-1576 β checkpoint-1681}/optimizer.pt
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 535881018
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7482411d85a2d5cf5f632c997d2e07449fe4217bcf4b1aad0b38f9138d1acd0a
|
3 |
size 535881018
|
finetuned_entity_categorical_classification/{checkpoint-1576 β checkpoint-1681}/pytorch_model.bin
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 267932842
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f30aacfea59fa26f3b7edc0f510fe6d083c82c0a92e3118f80f0b13f375cb74e
|
3 |
size 267932842
|
finetuned_entity_categorical_classification/{checkpoint-1576 β checkpoint-1681}/rng_state.pth
RENAMED
Binary files a/finetuned_entity_categorical_classification/checkpoint-1576/rng_state.pth and b/finetuned_entity_categorical_classification/checkpoint-1681/rng_state.pth differ
|
|
finetuned_entity_categorical_classification/{checkpoint-1576 β checkpoint-1681}/scheduler.pt
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9c77a82e248c93cca9760dd3358cd21c9eded35e9713e3141aaaa12789322001
|
3 |
size 1064
|
finetuned_entity_categorical_classification/{checkpoint-1576 β checkpoint-1681}/special_tokens_map.json
RENAMED
File without changes
|
finetuned_entity_categorical_classification/{checkpoint-1576 β checkpoint-1681}/tokenizer.json
RENAMED
File without changes
|
finetuned_entity_categorical_classification/{checkpoint-1576 β checkpoint-1681}/tokenizer_config.json
RENAMED
File without changes
|
finetuned_entity_categorical_classification/{checkpoint-1576 β checkpoint-1681}/trainer_state.json
RENAMED
@@ -1,46 +1,46 @@
|
|
1 |
{
|
2 |
-
"best_metric": 0.
|
3 |
-
"best_model_checkpoint": "finetuned_entity_categorical_classification/checkpoint-
|
4 |
"epoch": 1.0,
|
5 |
"eval_steps": 500,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
10 |
"log_history": [
|
11 |
{
|
12 |
-
"epoch": 0.
|
13 |
-
"learning_rate": 1.
|
14 |
-
"loss":
|
15 |
"step": 500
|
16 |
},
|
17 |
{
|
18 |
-
"epoch": 0.
|
19 |
-
"learning_rate": 1.
|
20 |
-
"loss": 0.
|
21 |
"step": 1000
|
22 |
},
|
23 |
{
|
24 |
-
"epoch": 0.
|
25 |
-
"learning_rate": 1.
|
26 |
-
"loss": 0.
|
27 |
"step": 1500
|
28 |
},
|
29 |
{
|
30 |
"epoch": 1.0,
|
31 |
-
"eval_accuracy": 0.
|
32 |
-
"eval_loss": 0.
|
33 |
-
"eval_runtime": 2.
|
34 |
-
"eval_samples_per_second":
|
35 |
-
"eval_steps_per_second":
|
36 |
-
"step":
|
37 |
}
|
38 |
],
|
39 |
"logging_steps": 500,
|
40 |
-
"max_steps":
|
41 |
-
"num_train_epochs":
|
42 |
"save_steps": 500,
|
43 |
-
"total_flos":
|
44 |
"trial_name": null,
|
45 |
"trial_params": null
|
46 |
}
|
|
|
1 |
{
|
2 |
+
"best_metric": 0.10296357423067093,
|
3 |
+
"best_model_checkpoint": "finetuned_entity_categorical_classification/checkpoint-1681",
|
4 |
"epoch": 1.0,
|
5 |
"eval_steps": 500,
|
6 |
+
"global_step": 1681,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
10 |
"log_history": [
|
11 |
{
|
12 |
+
"epoch": 0.3,
|
13 |
+
"learning_rate": 1.7025580011897683e-05,
|
14 |
+
"loss": 0.1045,
|
15 |
"step": 500
|
16 |
},
|
17 |
{
|
18 |
+
"epoch": 0.59,
|
19 |
+
"learning_rate": 1.405116002379536e-05,
|
20 |
+
"loss": 0.1056,
|
21 |
"step": 1000
|
22 |
},
|
23 |
{
|
24 |
+
"epoch": 0.89,
|
25 |
+
"learning_rate": 1.1076740035693041e-05,
|
26 |
+
"loss": 0.1041,
|
27 |
"step": 1500
|
28 |
},
|
29 |
{
|
30 |
"epoch": 1.0,
|
31 |
+
"eval_accuracy": 0.9721850364420646,
|
32 |
+
"eval_loss": 0.10296357423067093,
|
33 |
+
"eval_runtime": 2.316,
|
34 |
+
"eval_samples_per_second": 2902.854,
|
35 |
+
"eval_steps_per_second": 181.779,
|
36 |
+
"step": 1681
|
37 |
}
|
38 |
],
|
39 |
"logging_steps": 500,
|
40 |
+
"max_steps": 3362,
|
41 |
+
"num_train_epochs": 2,
|
42 |
"save_steps": 500,
|
43 |
+
"total_flos": 108413372385396.0,
|
44 |
"trial_name": null,
|
45 |
"trial_params": null
|
46 |
}
|
finetuned_entity_categorical_classification/{checkpoint-1576 β checkpoint-1681}/training_args.bin
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4600
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2de83bc1893d1870cbe886f5287e02f718e1fe0be09dba843ccfc561aeb95ec6
|
3 |
size 4600
|
finetuned_entity_categorical_classification/{checkpoint-1576 β checkpoint-1681}/vocab.txt
RENAMED
File without changes
|
finetuned_entity_categorical_classification/checkpoint-3346/added_tokens.json
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"[CLS]": 101,
|
3 |
+
"[MASK]": 103,
|
4 |
+
"[PAD]": 0,
|
5 |
+
"[SEP]": 102,
|
6 |
+
"[UNK]": 100
|
7 |
+
}
|
finetuned_entity_categorical_classification/checkpoint-3346/config.json
ADDED
@@ -0,0 +1,83 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "finetuned_entity_categorical_classification/checkpoint-3338",
|
3 |
+
"activation": "gelu",
|
4 |
+
"architectures": [
|
5 |
+
"DistilBertForSequenceClassification"
|
6 |
+
],
|
7 |
+
"attention_dropout": 0.1,
|
8 |
+
"dim": 768,
|
9 |
+
"dropout": 0.1,
|
10 |
+
"hidden_dim": 3072,
|
11 |
+
"id2label": {
|
12 |
+
"0": "Hobbies_and_Leisure",
|
13 |
+
"1": "News",
|
14 |
+
"2": "Science",
|
15 |
+
"3": "Autos_and_Vehicles",
|
16 |
+
"4": "Health",
|
17 |
+
"5": "Pets_and_Animals",
|
18 |
+
"6": "Adult",
|
19 |
+
"7": "Computers_and_Electronics",
|
20 |
+
"8": "Online Communities",
|
21 |
+
"9": "Beauty_and_Fitness",
|
22 |
+
"10": "People_and_Society",
|
23 |
+
"11": "Business_and_Industrial",
|
24 |
+
"12": "Reference",
|
25 |
+
"13": "Shopping",
|
26 |
+
"14": "Travel_and_Transportation",
|
27 |
+
"15": "Food_and_Drink",
|
28 |
+
"16": "Law_and_Government",
|
29 |
+
"17": "Books_and_Literature",
|
30 |
+
"18": "Finance",
|
31 |
+
"19": "Games",
|
32 |
+
"20": "Home_and_Garden",
|
33 |
+
"21": "Jobs_and_Education",
|
34 |
+
"22": "Arts_and_Entertainment",
|
35 |
+
"23": "Sensitive Subjects",
|
36 |
+
"24": "Real Estate",
|
37 |
+
"25": "Internet_and_Telecom",
|
38 |
+
"26": "Sports"
|
39 |
+
},
|
40 |
+
"initializer_range": 0.02,
|
41 |
+
"label2id": {
|
42 |
+
"Adult": 6,
|
43 |
+
"Arts_and_Entertainment": 22,
|
44 |
+
"Autos_and_Vehicles": 3,
|
45 |
+
"Beauty_and_Fitness": 9,
|
46 |
+
"Books_and_Literature": 17,
|
47 |
+
"Business_and_Industrial": 11,
|
48 |
+
"Computers_and_Electronics": 7,
|
49 |
+
"Finance": 18,
|
50 |
+
"Food_and_Drink": 15,
|
51 |
+
"Games": 19,
|
52 |
+
"Health": 4,
|
53 |
+
"Hobbies_and_Leisure": 0,
|
54 |
+
"Home_and_Garden": 20,
|
55 |
+
"Internet_and_Telecom": 25,
|
56 |
+
"Jobs_and_Education": 21,
|
57 |
+
"Law_and_Government": 16,
|
58 |
+
"News": 1,
|
59 |
+
"Online Communities": 8,
|
60 |
+
"People_and_Society": 10,
|
61 |
+
"Pets_and_Animals": 5,
|
62 |
+
"Real Estate": 24,
|
63 |
+
"Reference": 12,
|
64 |
+
"Science": 2,
|
65 |
+
"Sensitive Subjects": 23,
|
66 |
+
"Shopping": 13,
|
67 |
+
"Sports": 26,
|
68 |
+
"Travel_and_Transportation": 14
|
69 |
+
},
|
70 |
+
"max_position_embeddings": 512,
|
71 |
+
"model_type": "distilbert",
|
72 |
+
"n_heads": 12,
|
73 |
+
"n_layers": 6,
|
74 |
+
"pad_token_id": 0,
|
75 |
+
"problem_type": "single_label_classification",
|
76 |
+
"qa_dropout": 0.1,
|
77 |
+
"seq_classif_dropout": 0.2,
|
78 |
+
"sinusoidal_pos_embds": false,
|
79 |
+
"tie_weights_": true,
|
80 |
+
"torch_dtype": "float32",
|
81 |
+
"transformers_version": "4.34.0",
|
82 |
+
"vocab_size": 30522
|
83 |
+
}
|
finetuned_entity_categorical_classification/checkpoint-3346/optimizer.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3f7fd88a2fd3f16fd9c954418fb3e47832af4a6e96026f465481de95dd8e4b99
|
3 |
+
size 535881018
|
finetuned_entity_categorical_classification/checkpoint-3346/pytorch_model.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f0316f1198be89b32f1ee6ecde222febe6895b798b64d12f8e12b0f5bdaba754
|
3 |
+
size 267932842
|
finetuned_entity_categorical_classification/checkpoint-3346/rng_state.pth
ADDED
Binary file (14.2 kB). View file
|
|
finetuned_entity_categorical_classification/checkpoint-3346/scheduler.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:836b9ae7a26190d3866515097d559222a1c62e5f96c298c8360e09e55b2cf8a4
|
3 |
+
size 1064
|
finetuned_entity_categorical_classification/checkpoint-3346/special_tokens_map.json
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cls_token": "[CLS]",
|
3 |
+
"mask_token": "[MASK]",
|
4 |
+
"pad_token": "[PAD]",
|
5 |
+
"sep_token": "[SEP]",
|
6 |
+
"unk_token": "[UNK]"
|
7 |
+
}
|
finetuned_entity_categorical_classification/checkpoint-3346/tokenizer.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
finetuned_entity_categorical_classification/checkpoint-3346/tokenizer_config.json
ADDED
@@ -0,0 +1,56 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"added_tokens_decoder": {
|
3 |
+
"0": {
|
4 |
+
"content": "[PAD]",
|
5 |
+
"lstrip": false,
|
6 |
+
"normalized": false,
|
7 |
+
"rstrip": false,
|
8 |
+
"single_word": false,
|
9 |
+
"special": true
|
10 |
+
},
|
11 |
+
"100": {
|
12 |
+
"content": "[UNK]",
|
13 |
+
"lstrip": false,
|
14 |
+
"normalized": false,
|
15 |
+
"rstrip": false,
|
16 |
+
"single_word": false,
|
17 |
+
"special": true
|
18 |
+
},
|
19 |
+
"101": {
|
20 |
+
"content": "[CLS]",
|
21 |
+
"lstrip": false,
|
22 |
+
"normalized": false,
|
23 |
+
"rstrip": false,
|
24 |
+
"single_word": false,
|
25 |
+
"special": true
|
26 |
+
},
|
27 |
+
"102": {
|
28 |
+
"content": "[SEP]",
|
29 |
+
"lstrip": false,
|
30 |
+
"normalized": false,
|
31 |
+
"rstrip": false,
|
32 |
+
"single_word": false,
|
33 |
+
"special": true
|
34 |
+
},
|
35 |
+
"103": {
|
36 |
+
"content": "[MASK]",
|
37 |
+
"lstrip": false,
|
38 |
+
"normalized": false,
|
39 |
+
"rstrip": false,
|
40 |
+
"single_word": false,
|
41 |
+
"special": true
|
42 |
+
}
|
43 |
+
},
|
44 |
+
"additional_special_tokens": [],
|
45 |
+
"clean_up_tokenization_spaces": true,
|
46 |
+
"cls_token": "[CLS]",
|
47 |
+
"do_lower_case": true,
|
48 |
+
"mask_token": "[MASK]",
|
49 |
+
"model_max_length": 512,
|
50 |
+
"pad_token": "[PAD]",
|
51 |
+
"sep_token": "[SEP]",
|
52 |
+
"strip_accents": null,
|
53 |
+
"tokenize_chinese_chars": true,
|
54 |
+
"tokenizer_class": "DistilBertTokenizer",
|
55 |
+
"unk_token": "[UNK]"
|
56 |
+
}
|
finetuned_entity_categorical_classification/checkpoint-3346/trainer_state.json
ADDED
@@ -0,0 +1,73 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"best_metric": 0.10625720769166946,
|
3 |
+
"best_model_checkpoint": "finetuned_entity_categorical_classification/checkpoint-1673",
|
4 |
+
"epoch": 2.0,
|
5 |
+
"eval_steps": 500,
|
6 |
+
"global_step": 3346,
|
7 |
+
"is_hyper_param_search": false,
|
8 |
+
"is_local_process_zero": true,
|
9 |
+
"is_world_process_zero": true,
|
10 |
+
"log_history": [
|
11 |
+
{
|
12 |
+
"epoch": 0.3,
|
13 |
+
"learning_rate": 1.7011356843992828e-05,
|
14 |
+
"loss": 0.1126,
|
15 |
+
"step": 500
|
16 |
+
},
|
17 |
+
{
|
18 |
+
"epoch": 0.6,
|
19 |
+
"learning_rate": 1.4022713687985656e-05,
|
20 |
+
"loss": 0.1165,
|
21 |
+
"step": 1000
|
22 |
+
},
|
23 |
+
{
|
24 |
+
"epoch": 0.9,
|
25 |
+
"learning_rate": 1.1034070531978483e-05,
|
26 |
+
"loss": 0.117,
|
27 |
+
"step": 1500
|
28 |
+
},
|
29 |
+
{
|
30 |
+
"epoch": 1.0,
|
31 |
+
"eval_accuracy": 0.9715951562266407,
|
32 |
+
"eval_loss": 0.10625720769166946,
|
33 |
+
"eval_runtime": 2.3554,
|
34 |
+
"eval_samples_per_second": 2839.909,
|
35 |
+
"eval_steps_per_second": 177.892,
|
36 |
+
"step": 1673
|
37 |
+
},
|
38 |
+
{
|
39 |
+
"epoch": 1.2,
|
40 |
+
"learning_rate": 8.04542737597131e-06,
|
41 |
+
"loss": 0.0894,
|
42 |
+
"step": 2000
|
43 |
+
},
|
44 |
+
{
|
45 |
+
"epoch": 1.49,
|
46 |
+
"learning_rate": 5.056784219964137e-06,
|
47 |
+
"loss": 0.0827,
|
48 |
+
"step": 2500
|
49 |
+
},
|
50 |
+
{
|
51 |
+
"epoch": 1.79,
|
52 |
+
"learning_rate": 2.068141063956964e-06,
|
53 |
+
"loss": 0.0755,
|
54 |
+
"step": 3000
|
55 |
+
},
|
56 |
+
{
|
57 |
+
"epoch": 2.0,
|
58 |
+
"eval_accuracy": 0.9706981611601136,
|
59 |
+
"eval_loss": 0.1135576069355011,
|
60 |
+
"eval_runtime": 2.4092,
|
61 |
+
"eval_samples_per_second": 2776.427,
|
62 |
+
"eval_steps_per_second": 173.916,
|
63 |
+
"step": 3346
|
64 |
+
}
|
65 |
+
],
|
66 |
+
"logging_steps": 500,
|
67 |
+
"max_steps": 3346,
|
68 |
+
"num_train_epochs": 2,
|
69 |
+
"save_steps": 500,
|
70 |
+
"total_flos": 209706294909150.0,
|
71 |
+
"trial_name": null,
|
72 |
+
"trial_params": null
|
73 |
+
}
|
finetuned_entity_categorical_classification/checkpoint-3346/training_args.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7813e2902539bc577c8459ca958658172359e179c4ef494972d6db5de3ada53e
|
3 |
+
size 4600
|
finetuned_entity_categorical_classification/checkpoint-3346/vocab.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
finetuned_entity_categorical_classification/checkpoint-3362/added_tokens.json
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"[CLS]": 101,
|
3 |
+
"[MASK]": 103,
|
4 |
+
"[PAD]": 0,
|
5 |
+
"[SEP]": 102,
|
6 |
+
"[UNK]": 100
|
7 |
+
}
|
finetuned_entity_categorical_classification/checkpoint-3362/config.json
ADDED
@@ -0,0 +1,83 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "finetuned_entity_categorical_classification/checkpoint-3346",
|
3 |
+
"activation": "gelu",
|
4 |
+
"architectures": [
|
5 |
+
"DistilBertForSequenceClassification"
|
6 |
+
],
|
7 |
+
"attention_dropout": 0.1,
|
8 |
+
"dim": 768,
|
9 |
+
"dropout": 0.1,
|
10 |
+
"hidden_dim": 3072,
|
11 |
+
"id2label": {
|
12 |
+
"0": "Hobbies_and_Leisure",
|
13 |
+
"1": "News",
|
14 |
+
"2": "Science",
|
15 |
+
"3": "Autos_and_Vehicles",
|
16 |
+
"4": "Health",
|
17 |
+
"5": "Pets_and_Animals",
|
18 |
+
"6": "Adult",
|
19 |
+
"7": "Computers_and_Electronics",
|
20 |
+
"8": "Online Communities",
|
21 |
+
"9": "Beauty_and_Fitness",
|
22 |
+
"10": "People_and_Society",
|
23 |
+
"11": "Business_and_Industrial",
|
24 |
+
"12": "Reference",
|
25 |
+
"13": "Shopping",
|
26 |
+
"14": "Travel_and_Transportation",
|
27 |
+
"15": "Food_and_Drink",
|
28 |
+
"16": "Law_and_Government",
|
29 |
+
"17": "Books_and_Literature",
|
30 |
+
"18": "Finance",
|
31 |
+
"19": "Games",
|
32 |
+
"20": "Home_and_Garden",
|
33 |
+
"21": "Jobs_and_Education",
|
34 |
+
"22": "Arts_and_Entertainment",
|
35 |
+
"23": "Sensitive Subjects",
|
36 |
+
"24": "Real Estate",
|
37 |
+
"25": "Internet_and_Telecom",
|
38 |
+
"26": "Sports"
|
39 |
+
},
|
40 |
+
"initializer_range": 0.02,
|
41 |
+
"label2id": {
|
42 |
+
"Adult": 6,
|
43 |
+
"Arts_and_Entertainment": 22,
|
44 |
+
"Autos_and_Vehicles": 3,
|
45 |
+
"Beauty_and_Fitness": 9,
|
46 |
+
"Books_and_Literature": 17,
|
47 |
+
"Business_and_Industrial": 11,
|
48 |
+
"Computers_and_Electronics": 7,
|
49 |
+
"Finance": 18,
|
50 |
+
"Food_and_Drink": 15,
|
51 |
+
"Games": 19,
|
52 |
+
"Health": 4,
|
53 |
+
"Hobbies_and_Leisure": 0,
|
54 |
+
"Home_and_Garden": 20,
|
55 |
+
"Internet_and_Telecom": 25,
|
56 |
+
"Jobs_and_Education": 21,
|
57 |
+
"Law_and_Government": 16,
|
58 |
+
"News": 1,
|
59 |
+
"Online Communities": 8,
|
60 |
+
"People_and_Society": 10,
|
61 |
+
"Pets_and_Animals": 5,
|
62 |
+
"Real Estate": 24,
|
63 |
+
"Reference": 12,
|
64 |
+
"Science": 2,
|
65 |
+
"Sensitive Subjects": 23,
|
66 |
+
"Shopping": 13,
|
67 |
+
"Sports": 26,
|
68 |
+
"Travel_and_Transportation": 14
|
69 |
+
},
|
70 |
+
"max_position_embeddings": 512,
|
71 |
+
"model_type": "distilbert",
|
72 |
+
"n_heads": 12,
|
73 |
+
"n_layers": 6,
|
74 |
+
"pad_token_id": 0,
|
75 |
+
"problem_type": "single_label_classification",
|
76 |
+
"qa_dropout": 0.1,
|
77 |
+
"seq_classif_dropout": 0.2,
|
78 |
+
"sinusoidal_pos_embds": false,
|
79 |
+
"tie_weights_": true,
|
80 |
+
"torch_dtype": "float32",
|
81 |
+
"transformers_version": "4.34.0",
|
82 |
+
"vocab_size": 30522
|
83 |
+
}
|
finetuned_entity_categorical_classification/checkpoint-3362/optimizer.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d015879f29a2744736a3ba7748885a4ec943584a74c779bc00637389c2d90ccd
|
3 |
+
size 535881018
|
finetuned_entity_categorical_classification/checkpoint-3362/pytorch_model.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a2f9ac5b4263d73b4fe5715bd69766cb18cb5925f401945d0c67275a65364524
|
3 |
+
size 267932842
|
finetuned_entity_categorical_classification/checkpoint-3362/rng_state.pth
ADDED
Binary file (14.2 kB). View file
|
|
finetuned_entity_categorical_classification/checkpoint-3362/scheduler.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8af53710b40243eb9329cc845f9ef3a957c0e1972618f070ad4cc3c95bc43973
|
3 |
+
size 1064
|
finetuned_entity_categorical_classification/checkpoint-3362/special_tokens_map.json
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cls_token": "[CLS]",
|
3 |
+
"mask_token": "[MASK]",
|
4 |
+
"pad_token": "[PAD]",
|
5 |
+
"sep_token": "[SEP]",
|
6 |
+
"unk_token": "[UNK]"
|
7 |
+
}
|
finetuned_entity_categorical_classification/checkpoint-3362/tokenizer.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
finetuned_entity_categorical_classification/checkpoint-3362/tokenizer_config.json
ADDED
@@ -0,0 +1,56 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"added_tokens_decoder": {
|
3 |
+
"0": {
|
4 |
+
"content": "[PAD]",
|
5 |
+
"lstrip": false,
|
6 |
+
"normalized": false,
|
7 |
+
"rstrip": false,
|
8 |
+
"single_word": false,
|
9 |
+
"special": true
|
10 |
+
},
|
11 |
+
"100": {
|
12 |
+
"content": "[UNK]",
|
13 |
+
"lstrip": false,
|
14 |
+
"normalized": false,
|
15 |
+
"rstrip": false,
|
16 |
+
"single_word": false,
|
17 |
+
"special": true
|
18 |
+
},
|
19 |
+
"101": {
|
20 |
+
"content": "[CLS]",
|
21 |
+
"lstrip": false,
|
22 |
+
"normalized": false,
|
23 |
+
"rstrip": false,
|
24 |
+
"single_word": false,
|
25 |
+
"special": true
|
26 |
+
},
|
27 |
+
"102": {
|
28 |
+
"content": "[SEP]",
|
29 |
+
"lstrip": false,
|
30 |
+
"normalized": false,
|
31 |
+
"rstrip": false,
|
32 |
+
"single_word": false,
|
33 |
+
"special": true
|
34 |
+
},
|
35 |
+
"103": {
|
36 |
+
"content": "[MASK]",
|
37 |
+
"lstrip": false,
|
38 |
+
"normalized": false,
|
39 |
+
"rstrip": false,
|
40 |
+
"single_word": false,
|
41 |
+
"special": true
|
42 |
+
}
|
43 |
+
},
|
44 |
+
"additional_special_tokens": [],
|
45 |
+
"clean_up_tokenization_spaces": true,
|
46 |
+
"cls_token": "[CLS]",
|
47 |
+
"do_lower_case": true,
|
48 |
+
"mask_token": "[MASK]",
|
49 |
+
"model_max_length": 512,
|
50 |
+
"pad_token": "[PAD]",
|
51 |
+
"sep_token": "[SEP]",
|
52 |
+
"strip_accents": null,
|
53 |
+
"tokenize_chinese_chars": true,
|
54 |
+
"tokenizer_class": "DistilBertTokenizer",
|
55 |
+
"unk_token": "[UNK]"
|
56 |
+
}
|
finetuned_entity_categorical_classification/checkpoint-3362/trainer_state.json
ADDED
@@ -0,0 +1,73 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"best_metric": 0.10296357423067093,
|
3 |
+
"best_model_checkpoint": "finetuned_entity_categorical_classification/checkpoint-1681",
|
4 |
+
"epoch": 2.0,
|
5 |
+
"eval_steps": 500,
|
6 |
+
"global_step": 3362,
|
7 |
+
"is_hyper_param_search": false,
|
8 |
+
"is_local_process_zero": true,
|
9 |
+
"is_world_process_zero": true,
|
10 |
+
"log_history": [
|
11 |
+
{
|
12 |
+
"epoch": 0.3,
|
13 |
+
"learning_rate": 1.7025580011897683e-05,
|
14 |
+
"loss": 0.1045,
|
15 |
+
"step": 500
|
16 |
+
},
|
17 |
+
{
|
18 |
+
"epoch": 0.59,
|
19 |
+
"learning_rate": 1.405116002379536e-05,
|
20 |
+
"loss": 0.1056,
|
21 |
+
"step": 1000
|
22 |
+
},
|
23 |
+
{
|
24 |
+
"epoch": 0.89,
|
25 |
+
"learning_rate": 1.1076740035693041e-05,
|
26 |
+
"loss": 0.1041,
|
27 |
+
"step": 1500
|
28 |
+
},
|
29 |
+
{
|
30 |
+
"epoch": 1.0,
|
31 |
+
"eval_accuracy": 0.9721850364420646,
|
32 |
+
"eval_loss": 0.10296357423067093,
|
33 |
+
"eval_runtime": 2.316,
|
34 |
+
"eval_samples_per_second": 2902.854,
|
35 |
+
"eval_steps_per_second": 181.779,
|
36 |
+
"step": 1681
|
37 |
+
},
|
38 |
+
{
|
39 |
+
"epoch": 1.19,
|
40 |
+
"learning_rate": 8.10232004759072e-06,
|
41 |
+
"loss": 0.0776,
|
42 |
+
"step": 2000
|
43 |
+
},
|
44 |
+
{
|
45 |
+
"epoch": 1.49,
|
46 |
+
"learning_rate": 5.1279000594884e-06,
|
47 |
+
"loss": 0.0675,
|
48 |
+
"step": 2500
|
49 |
+
},
|
50 |
+
{
|
51 |
+
"epoch": 1.78,
|
52 |
+
"learning_rate": 2.1534800713860798e-06,
|
53 |
+
"loss": 0.0773,
|
54 |
+
"step": 3000
|
55 |
+
},
|
56 |
+
{
|
57 |
+
"epoch": 2.0,
|
58 |
+
"eval_accuracy": 0.9708463483563885,
|
59 |
+
"eval_loss": 0.11056160181760788,
|
60 |
+
"eval_runtime": 2.2742,
|
61 |
+
"eval_samples_per_second": 2956.182,
|
62 |
+
"eval_steps_per_second": 185.119,
|
63 |
+
"step": 3362
|
64 |
+
}
|
65 |
+
],
|
66 |
+
"logging_steps": 500,
|
67 |
+
"max_steps": 3362,
|
68 |
+
"num_train_epochs": 2,
|
69 |
+
"save_steps": 500,
|
70 |
+
"total_flos": 216609059710134.0,
|
71 |
+
"trial_name": null,
|
72 |
+
"trial_params": null
|
73 |
+
}
|
finetuned_entity_categorical_classification/checkpoint-3362/training_args.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2de83bc1893d1870cbe886f5287e02f718e1fe0be09dba843ccfc561aeb95ec6
|
3 |
+
size 4600
|
finetuned_entity_categorical_classification/checkpoint-3362/vocab.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
finetuned_entity_categorical_classification/runs/Oct12_11-19-39_ip-172-31-95-165/events.out.tfevents.1697109579.ip-172-31-95-165.128350.0
ADDED
Binary file (7.68 kB). View file
|
|
finetuned_entity_categorical_classification/runs/Oct12_11-43-16_ip-172-31-95-165/events.out.tfevents.1697110996.ip-172-31-95-165.128941.0
ADDED
Binary file (7.68 kB). View file
|
|
finetuned_entity_categorical_classification/runs/Oct12_11-59-06_ip-172-31-95-165/events.out.tfevents.1697111947.ip-172-31-95-165.129502.0
ADDED
Binary file (7.68 kB). View file
|
|
research/08_organizing_entire_datacategories.ipynb
CHANGED
@@ -438,173 +438,173 @@
|
|
438 |
" </thead>\n",
|
439 |
" <tbody>\n",
|
440 |
" <tr>\n",
|
441 |
-
" <th>
|
442 |
-
" <td>Virtual reality in therapy 2025</td>\n",
|
443 |
-
" <td>Computers_and_Electronics</td>\n",
|
444 |
-
" <td>7</td>\n",
|
445 |
-
" </tr>\n",
|
446 |
-
" <tr>\n",
|
447 |
-
" <th>368</th>\n",
|
448 |
-
" <td>Real estate networking tips</td>\n",
|
449 |
-
" <td>Real Estate</td>\n",
|
450 |
-
" <td>24</td>\n",
|
451 |
-
" </tr>\n",
|
452 |
-
" <tr>\n",
|
453 |
-
" <th>173</th>\n",
|
454 |
-
" <td>Real estate market outlook</td>\n",
|
455 |
-
" <td>Real Estate</td>\n",
|
456 |
-
" <td>24</td>\n",
|
457 |
-
" </tr>\n",
|
458 |
-
" <tr>\n",
|
459 |
-
" <th>1045</th>\n",
|
460 |
" <td>Plus-size clothing stores and shops</td>\n",
|
461 |
" <td>Shopping</td>\n",
|
462 |
" <td>13</td>\n",
|
463 |
" </tr>\n",
|
464 |
" <tr>\n",
|
465 |
-
" <th>
|
466 |
-
" <td>
|
467 |
-
" <td>
|
468 |
-
" <td>
|
469 |
" </tr>\n",
|
470 |
" <tr>\n",
|
471 |
-
" <th>
|
472 |
-
" <td>
|
473 |
-
" <td>
|
474 |
-
" <td>
|
475 |
" </tr>\n",
|
476 |
" <tr>\n",
|
477 |
-
" <th>
|
478 |
-
" <td>
|
479 |
-
" <td>
|
480 |
-
" <td>
|
481 |
" </tr>\n",
|
482 |
" <tr>\n",
|
483 |
-
" <th>
|
484 |
-
" <td>
|
|
|
|
|
|
|
|
|
|
|
|
|
485 |
" <td>Law_and_Government</td>\n",
|
486 |
" <td>16</td>\n",
|
487 |
" </tr>\n",
|
488 |
" <tr>\n",
|
489 |
-
" <th>
|
490 |
-
" <td>
|
491 |
" <td>Home_and_Garden</td>\n",
|
492 |
" <td>20</td>\n",
|
493 |
" </tr>\n",
|
494 |
" <tr>\n",
|
495 |
-
" <th>
|
496 |
-
" <td>
|
497 |
-
" <td>
|
498 |
-
" <td>
|
499 |
" </tr>\n",
|
500 |
" <tr>\n",
|
501 |
-
" <th>
|
502 |
-
" <td>
|
|
|
|
|
|
|
|
|
|
|
|
|
503 |
" <td>Health</td>\n",
|
504 |
" <td>4</td>\n",
|
505 |
" </tr>\n",
|
506 |
" <tr>\n",
|
507 |
-
" <th>
|
508 |
-
" <td>
|
509 |
-
" <td>
|
510 |
-
" <td>
|
511 |
" </tr>\n",
|
512 |
" <tr>\n",
|
513 |
-
" <th>
|
514 |
-
" <td>
|
515 |
-
" <td>
|
516 |
-
" <td>
|
517 |
" </tr>\n",
|
518 |
" <tr>\n",
|
519 |
-
" <th>
|
520 |
-
" <td>
|
521 |
-
" <td>
|
522 |
-
" <td>
|
523 |
" </tr>\n",
|
524 |
" <tr>\n",
|
525 |
-
" <th>
|
526 |
-
" <td>
|
527 |
-
" <td>
|
528 |
-
" <td>
|
529 |
" </tr>\n",
|
530 |
" <tr>\n",
|
531 |
-
" <th>
|
532 |
-
" <td>
|
533 |
-
" <td>
|
534 |
-
" <td>
|
535 |
" </tr>\n",
|
536 |
" <tr>\n",
|
537 |
-
" <th>
|
538 |
-
" <td>Real estate
|
539 |
" <td>Real Estate</td>\n",
|
540 |
" <td>24</td>\n",
|
541 |
" </tr>\n",
|
542 |
" <tr>\n",
|
543 |
-
" <th>
|
544 |
-
" <td>
|
545 |
-
" <td>
|
546 |
-
" <td>
|
547 |
" </tr>\n",
|
548 |
" <tr>\n",
|
549 |
-
" <th>
|
550 |
-
" <td>
|
551 |
-
" <td>
|
552 |
-
" <td>
|
553 |
" </tr>\n",
|
554 |
" <tr>\n",
|
555 |
-
" <th>
|
556 |
-
" <td>
|
557 |
-
" <td>
|
558 |
-
" <td>
|
|
|
|
|
|
|
|
|
|
|
|
|
559 |
" </tr>\n",
|
560 |
" </tbody>\n",
|
561 |
"</table>\n",
|
562 |
"</div>"
|
563 |
],
|
564 |
"text/plain": [
|
565 |
-
"
|
566 |
-
"
|
567 |
-
"
|
568 |
-
"
|
569 |
-
"
|
570 |
-
"
|
571 |
-
"
|
572 |
-
"
|
573 |
-
"
|
574 |
-
"
|
575 |
-
"
|
576 |
-
"
|
577 |
-
"
|
578 |
-
"
|
579 |
-
"
|
580 |
-
"
|
581 |
-
"
|
582 |
-
"
|
583 |
-
"
|
584 |
-
"
|
585 |
-
"
|
586 |
"\n",
|
587 |
" label label_id \n",
|
588 |
-
"
|
589 |
-
"
|
590 |
-
"
|
591 |
-
"
|
592 |
-
"
|
593 |
-
"
|
594 |
-
"
|
595 |
-
"
|
596 |
-
"
|
597 |
-
"
|
598 |
-
"
|
599 |
-
"
|
600 |
-
"
|
601 |
-
"
|
602 |
-
"
|
603 |
-
"
|
604 |
-
"
|
605 |
-
"
|
606 |
-
"
|
607 |
-
"
|
608 |
]
|
609 |
},
|
610 |
"execution_count": 10,
|
@@ -625,21 +625,22 @@
|
|
625 |
"data": {
|
626 |
"text/plain": [
|
627 |
"label\n",
|
628 |
-
"
|
629 |
-
"Shopping
|
630 |
-
"
|
|
|
631 |
"Sports 1399\n",
|
632 |
"Online Communities 1396\n",
|
633 |
"Travel_and_Transportation 1355\n",
|
634 |
"Internet_and_Telecom 1353\n",
|
635 |
-
"
|
636 |
"Beauty_and_Fitness 1259\n",
|
637 |
"People_and_Society 1250\n",
|
638 |
-
"Pets_and_Animals 1228\n",
|
639 |
"Law_and_Government 1226\n",
|
640 |
"Home_and_Garden 1200\n",
|
641 |
"News 1199\n",
|
642 |
"Jobs_and_Education 1188\n",
|
|
|
643 |
"Arts_and_Entertainment 1162\n",
|
644 |
"Business_and_Industrial 1124\n",
|
645 |
"Adult 1100\n",
|
@@ -647,11 +648,10 @@
|
|
647 |
"Autos_and_Vehicles 1072\n",
|
648 |
"Science 1055\n",
|
649 |
"Hobbies_and_Leisure 1049\n",
|
650 |
-
"Finance 1000\n",
|
651 |
-
"Real Estate 1000\n",
|
652 |
"Books_and_Literature 1000\n",
|
|
|
|
|
653 |
"Games 700\n",
|
654 |
-
"Sensitive Subjects 688\n",
|
655 |
"Name: count, dtype: int64"
|
656 |
]
|
657 |
},
|
@@ -698,121 +698,121 @@
|
|
698 |
" <tbody>\n",
|
699 |
" <tr>\n",
|
700 |
" <th>0</th>\n",
|
701 |
-
" <td>
|
702 |
-
" <td>
|
703 |
-
" <td>
|
704 |
" </tr>\n",
|
705 |
" <tr>\n",
|
706 |
" <th>1</th>\n",
|
707 |
-
" <td>
|
708 |
-
" <td>
|
709 |
-
" <td>
|
710 |
" </tr>\n",
|
711 |
" <tr>\n",
|
712 |
" <th>2</th>\n",
|
713 |
-
" <td>
|
714 |
-
" <td>
|
715 |
-
" <td>
|
716 |
" </tr>\n",
|
717 |
" <tr>\n",
|
718 |
" <th>3</th>\n",
|
719 |
-
" <td>
|
720 |
" <td>Food_and_Drink</td>\n",
|
721 |
" <td>15</td>\n",
|
722 |
" </tr>\n",
|
723 |
" <tr>\n",
|
724 |
" <th>4</th>\n",
|
725 |
-
" <td>
|
726 |
-
" <td>
|
727 |
-
" <td>
|
728 |
" </tr>\n",
|
729 |
" <tr>\n",
|
730 |
" <th>5</th>\n",
|
731 |
-
" <td>
|
732 |
-
" <td>
|
733 |
-
" <td>
|
734 |
" </tr>\n",
|
735 |
" <tr>\n",
|
736 |
" <th>6</th>\n",
|
737 |
-
" <td>
|
738 |
-
" <td>
|
739 |
-
" <td>
|
740 |
" </tr>\n",
|
741 |
" <tr>\n",
|
742 |
" <th>7</th>\n",
|
743 |
-
" <td>
|
744 |
-
" <td>
|
745 |
-
" <td>
|
746 |
" </tr>\n",
|
747 |
" <tr>\n",
|
748 |
" <th>8</th>\n",
|
749 |
-
" <td>
|
750 |
-
" <td>
|
751 |
-
" <td>
|
752 |
" </tr>\n",
|
753 |
" <tr>\n",
|
754 |
" <th>9</th>\n",
|
755 |
-
" <td>
|
756 |
-
" <td>
|
757 |
-
" <td>
|
758 |
" </tr>\n",
|
759 |
" <tr>\n",
|
760 |
" <th>10</th>\n",
|
761 |
-
" <td>
|
762 |
-
" <td>
|
763 |
-
" <td>
|
764 |
" </tr>\n",
|
765 |
" <tr>\n",
|
766 |
" <th>11</th>\n",
|
767 |
-
" <td>
|
768 |
-
" <td>
|
769 |
-
" <td>
|
770 |
" </tr>\n",
|
771 |
" <tr>\n",
|
772 |
" <th>12</th>\n",
|
773 |
-
" <td>
|
774 |
-
" <td>
|
775 |
-
" <td>
|
776 |
" </tr>\n",
|
777 |
" <tr>\n",
|
778 |
" <th>13</th>\n",
|
779 |
-
" <td>
|
780 |
-
" <td>
|
781 |
-
" <td>
|
782 |
" </tr>\n",
|
783 |
" <tr>\n",
|
784 |
" <th>14</th>\n",
|
785 |
-
" <td>
|
786 |
-
" <td>
|
787 |
-
" <td>
|
788 |
" </tr>\n",
|
789 |
" <tr>\n",
|
790 |
" <th>15</th>\n",
|
791 |
-
" <td>
|
792 |
-
" <td>
|
793 |
-
" <td>
|
794 |
" </tr>\n",
|
795 |
" <tr>\n",
|
796 |
" <th>16</th>\n",
|
797 |
-
" <td>
|
798 |
-
" <td>
|
799 |
-
" <td>
|
800 |
" </tr>\n",
|
801 |
" <tr>\n",
|
802 |
" <th>17</th>\n",
|
803 |
-
" <td>
|
804 |
-
" <td>
|
805 |
-
" <td>
|
806 |
" </tr>\n",
|
807 |
" <tr>\n",
|
808 |
" <th>18</th>\n",
|
809 |
-
" <td>
|
810 |
-
" <td>
|
811 |
-
" <td>
|
812 |
" </tr>\n",
|
813 |
" <tr>\n",
|
814 |
" <th>19</th>\n",
|
815 |
-
" <td>eSports Game
|
816 |
" <td>Sports</td>\n",
|
817 |
" <td>26</td>\n",
|
818 |
" </tr>\n",
|
@@ -821,49 +821,49 @@
|
|
821 |
"</div>"
|
822 |
],
|
823 |
"text/plain": [
|
824 |
-
" category
|
825 |
-
"0
|
826 |
-
"1
|
827 |
-
"2
|
828 |
-
"3
|
829 |
-
"4
|
830 |
-
"5
|
831 |
-
"6
|
832 |
-
"7
|
833 |
-
"8
|
834 |
-
"9
|
835 |
-
"10
|
836 |
-
"11
|
837 |
-
"12
|
838 |
-
"13
|
839 |
-
"14
|
840 |
-
"15
|
841 |
-
"16
|
842 |
-
"17
|
843 |
-
"18
|
844 |
-
"19
|
845 |
"\n",
|
846 |
-
"
|
847 |
-
"0
|
848 |
-
"1
|
849 |
-
"2
|
850 |
-
"3
|
851 |
-
"4
|
852 |
-
"5
|
853 |
-
"6
|
854 |
-
"7
|
855 |
-
"8
|
856 |
-
"9
|
857 |
-
"10
|
858 |
-
"11
|
859 |
-
"12
|
860 |
-
"13
|
861 |
-
"14
|
862 |
-
"15
|
863 |
-
"16
|
864 |
-
"17
|
865 |
-
"18
|
866 |
-
"19 26 "
|
867 |
]
|
868 |
},
|
869 |
"execution_count": 12,
|
|
|
438 |
" </thead>\n",
|
439 |
" <tbody>\n",
|
440 |
" <tr>\n",
|
441 |
+
" <th>1201</th>\n",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
442 |
" <td>Plus-size clothing stores and shops</td>\n",
|
443 |
" <td>Shopping</td>\n",
|
444 |
" <td>13</td>\n",
|
445 |
" </tr>\n",
|
446 |
" <tr>\n",
|
447 |
+
" <th>853</th>\n",
|
448 |
+
" <td>Citation context extraction techniques</td>\n",
|
449 |
+
" <td>Reference</td>\n",
|
450 |
+
" <td>12</td>\n",
|
451 |
" </tr>\n",
|
452 |
" <tr>\n",
|
453 |
+
" <th>1034</th>\n",
|
454 |
+
" <td>Cat ear headphones with aux</td>\n",
|
455 |
+
" <td>Computers_and_Electronics</td>\n",
|
456 |
+
" <td>7</td>\n",
|
457 |
" </tr>\n",
|
458 |
" <tr>\n",
|
459 |
+
" <th>632</th>\n",
|
460 |
+
" <td>promote such behavior</td>\n",
|
461 |
+
" <td>Sensitive Subjects</td>\n",
|
462 |
+
" <td>23</td>\n",
|
463 |
" </tr>\n",
|
464 |
" <tr>\n",
|
465 |
+
" <th>91</th>\n",
|
466 |
+
" <td>Literature review references</td>\n",
|
467 |
+
" <td>Reference</td>\n",
|
468 |
+
" <td>12</td>\n",
|
469 |
+
" </tr>\n",
|
470 |
+
" <tr>\n",
|
471 |
+
" <th>168</th>\n",
|
472 |
+
" <td>Freedom of speech cases</td>\n",
|
473 |
" <td>Law_and_Government</td>\n",
|
474 |
" <td>16</td>\n",
|
475 |
" </tr>\n",
|
476 |
" <tr>\n",
|
477 |
+
" <th>1111</th>\n",
|
478 |
+
" <td>French country kitchen design inspiration DIY</td>\n",
|
479 |
" <td>Home_and_Garden</td>\n",
|
480 |
" <td>20</td>\n",
|
481 |
" </tr>\n",
|
482 |
" <tr>\n",
|
483 |
+
" <th>492</th>\n",
|
484 |
+
" <td>Credit score improvement techniques overview</td>\n",
|
485 |
+
" <td>Finance</td>\n",
|
486 |
+
" <td>18</td>\n",
|
487 |
" </tr>\n",
|
488 |
" <tr>\n",
|
489 |
+
" <th>657</th>\n",
|
490 |
+
" <td>regulated by laws</td>\n",
|
491 |
+
" <td>Sensitive Subjects</td>\n",
|
492 |
+
" <td>23</td>\n",
|
493 |
+
" </tr>\n",
|
494 |
+
" <tr>\n",
|
495 |
+
" <th>1037</th>\n",
|
496 |
+
" <td>Health Education for Seniors</td>\n",
|
497 |
" <td>Health</td>\n",
|
498 |
" <td>4</td>\n",
|
499 |
" </tr>\n",
|
500 |
" <tr>\n",
|
501 |
+
" <th>109</th>\n",
|
502 |
+
" <td>Quantum mechanics experiments</td>\n",
|
503 |
+
" <td>Science</td>\n",
|
504 |
+
" <td>2</td>\n",
|
505 |
" </tr>\n",
|
506 |
" <tr>\n",
|
507 |
+
" <th>538</th>\n",
|
508 |
+
" <td>Healthcare AI applications</td>\n",
|
509 |
+
" <td>Science</td>\n",
|
510 |
+
" <td>2</td>\n",
|
511 |
" </tr>\n",
|
512 |
" <tr>\n",
|
513 |
+
" <th>1386</th>\n",
|
514 |
+
" <td>AirPods Pro Case</td>\n",
|
515 |
+
" <td>Computers_and_Electronics</td>\n",
|
516 |
+
" <td>7</td>\n",
|
517 |
" </tr>\n",
|
518 |
" <tr>\n",
|
519 |
+
" <th>844</th>\n",
|
520 |
+
" <td>DIY home electrical repairs</td>\n",
|
521 |
+
" <td>Home_and_Garden</td>\n",
|
522 |
+
" <td>20</td>\n",
|
523 |
" </tr>\n",
|
524 |
" <tr>\n",
|
525 |
+
" <th>439</th>\n",
|
526 |
+
" <td>tube sex</td>\n",
|
527 |
+
" <td>Adult</td>\n",
|
528 |
+
" <td>6</td>\n",
|
529 |
" </tr>\n",
|
530 |
" <tr>\n",
|
531 |
+
" <th>231</th>\n",
|
532 |
+
" <td>Real estate sales tactics</td>\n",
|
533 |
" <td>Real Estate</td>\n",
|
534 |
" <td>24</td>\n",
|
535 |
" </tr>\n",
|
536 |
" <tr>\n",
|
537 |
+
" <th>610</th>\n",
|
538 |
+
" <td>Home solar panel cleaning and maintenance</td>\n",
|
539 |
+
" <td>Home_and_Garden</td>\n",
|
540 |
+
" <td>20</td>\n",
|
541 |
" </tr>\n",
|
542 |
" <tr>\n",
|
543 |
+
" <th>422</th>\n",
|
544 |
+
" <td>Real estate legal issues</td>\n",
|
545 |
+
" <td>Real Estate</td>\n",
|
546 |
+
" <td>24</td>\n",
|
547 |
" </tr>\n",
|
548 |
" <tr>\n",
|
549 |
+
" <th>222</th>\n",
|
550 |
+
" <td>Film industry news</td>\n",
|
551 |
+
" <td>Arts_and_Entertainment</td>\n",
|
552 |
+
" <td>22</td>\n",
|
553 |
+
" </tr>\n",
|
554 |
+
" <tr>\n",
|
555 |
+
" <th>1077</th>\n",
|
556 |
+
" <td>Cat ear headphones for PS4</td>\n",
|
557 |
+
" <td>Computers_and_Electronics</td>\n",
|
558 |
+
" <td>7</td>\n",
|
559 |
" </tr>\n",
|
560 |
" </tbody>\n",
|
561 |
"</table>\n",
|
562 |
"</div>"
|
563 |
],
|
564 |
"text/plain": [
|
565 |
+
" category \\\n",
|
566 |
+
"1201 Plus-size clothing stores and shops \n",
|
567 |
+
"853 Citation context extraction techniques \n",
|
568 |
+
"1034 Cat ear headphones with aux \n",
|
569 |
+
"632 promote such behavior \n",
|
570 |
+
"91 Literature review references \n",
|
571 |
+
"168 Freedom of speech cases \n",
|
572 |
+
"1111 French country kitchen design inspiration DIY \n",
|
573 |
+
"492 Credit score improvement techniques overview \n",
|
574 |
+
"657 regulated by laws \n",
|
575 |
+
"1037 Health Education for Seniors \n",
|
576 |
+
"109 Quantum mechanics experiments \n",
|
577 |
+
"538 Healthcare AI applications \n",
|
578 |
+
"1386 AirPods Pro Case \n",
|
579 |
+
"844 DIY home electrical repairs \n",
|
580 |
+
"439 tube sex \n",
|
581 |
+
"231 Real estate sales tactics \n",
|
582 |
+
"610 Home solar panel cleaning and maintenance \n",
|
583 |
+
"422 Real estate legal issues \n",
|
584 |
+
"222 Film industry news \n",
|
585 |
+
"1077 Cat ear headphones for PS4 \n",
|
586 |
"\n",
|
587 |
" label label_id \n",
|
588 |
+
"1201 Shopping 13 \n",
|
589 |
+
"853 Reference 12 \n",
|
590 |
+
"1034 Computers_and_Electronics 7 \n",
|
591 |
+
"632 Sensitive Subjects 23 \n",
|
592 |
+
"91 Reference 12 \n",
|
593 |
+
"168 Law_and_Government 16 \n",
|
594 |
+
"1111 Home_and_Garden 20 \n",
|
595 |
+
"492 Finance 18 \n",
|
596 |
+
"657 Sensitive Subjects 23 \n",
|
597 |
+
"1037 Health 4 \n",
|
598 |
+
"109 Science 2 \n",
|
599 |
+
"538 Science 2 \n",
|
600 |
+
"1386 Computers_and_Electronics 7 \n",
|
601 |
+
"844 Home_and_Garden 20 \n",
|
602 |
+
"439 Adult 6 \n",
|
603 |
+
"231 Real Estate 24 \n",
|
604 |
+
"610 Home_and_Garden 20 \n",
|
605 |
+
"422 Real Estate 24 \n",
|
606 |
+
"222 Arts_and_Entertainment 22 \n",
|
607 |
+
"1077 Computers_and_Electronics 7 "
|
608 |
]
|
609 |
},
|
610 |
"execution_count": 10,
|
|
|
625 |
"data": {
|
626 |
"text/plain": [
|
627 |
"label\n",
|
628 |
+
"Computers_and_Electronics 1959\n",
|
629 |
+
"Shopping 1912\n",
|
630 |
+
"Food_and_Drink 1851\n",
|
631 |
+
"Reference 1453\n",
|
632 |
"Sports 1399\n",
|
633 |
"Online Communities 1396\n",
|
634 |
"Travel_and_Transportation 1355\n",
|
635 |
"Internet_and_Telecom 1353\n",
|
636 |
+
"Pets_and_Animals 1324\n",
|
637 |
"Beauty_and_Fitness 1259\n",
|
638 |
"People_and_Society 1250\n",
|
|
|
639 |
"Law_and_Government 1226\n",
|
640 |
"Home_and_Garden 1200\n",
|
641 |
"News 1199\n",
|
642 |
"Jobs_and_Education 1188\n",
|
643 |
+
"Real Estate 1166\n",
|
644 |
"Arts_and_Entertainment 1162\n",
|
645 |
"Business_and_Industrial 1124\n",
|
646 |
"Adult 1100\n",
|
|
|
648 |
"Autos_and_Vehicles 1072\n",
|
649 |
"Science 1055\n",
|
650 |
"Hobbies_and_Leisure 1049\n",
|
|
|
|
|
651 |
"Books_and_Literature 1000\n",
|
652 |
+
"Finance 1000\n",
|
653 |
+
"Sensitive Subjects 762\n",
|
654 |
"Games 700\n",
|
|
|
655 |
"Name: count, dtype: int64"
|
656 |
]
|
657 |
},
|
|
|
698 |
" <tbody>\n",
|
699 |
" <tr>\n",
|
700 |
" <th>0</th>\n",
|
701 |
+
" <td>Internet usage monitoring</td>\n",
|
702 |
+
" <td>Internet_and_Telecom</td>\n",
|
703 |
+
" <td>25</td>\n",
|
704 |
" </tr>\n",
|
705 |
" <tr>\n",
|
706 |
" <th>1</th>\n",
|
707 |
+
" <td>Food safety guidelines and regulations</td>\n",
|
708 |
+
" <td>Food_and_Drink</td>\n",
|
709 |
+
" <td>15</td>\n",
|
710 |
" </tr>\n",
|
711 |
" <tr>\n",
|
712 |
" <th>2</th>\n",
|
713 |
+
" <td>Internet protocols and edge computing in finance</td>\n",
|
714 |
+
" <td>Internet_and_Telecom</td>\n",
|
715 |
+
" <td>25</td>\n",
|
716 |
" </tr>\n",
|
717 |
" <tr>\n",
|
718 |
" <th>3</th>\n",
|
719 |
+
" <td>Online grocery shopping</td>\n",
|
720 |
" <td>Food_and_Drink</td>\n",
|
721 |
" <td>15</td>\n",
|
722 |
" </tr>\n",
|
723 |
" <tr>\n",
|
724 |
" <th>4</th>\n",
|
725 |
+
" <td>Writing retreats for poets and novelists</td>\n",
|
726 |
+
" <td>Books_and_Literature</td>\n",
|
727 |
+
" <td>17</td>\n",
|
728 |
" </tr>\n",
|
729 |
" <tr>\n",
|
730 |
" <th>5</th>\n",
|
731 |
+
" <td>Unicorn cat ear headphones</td>\n",
|
732 |
+
" <td>Computers_and_Electronics</td>\n",
|
733 |
+
" <td>7</td>\n",
|
734 |
" </tr>\n",
|
735 |
" <tr>\n",
|
736 |
" <th>6</th>\n",
|
737 |
+
" <td>Reference citation context tagging techniques</td>\n",
|
738 |
+
" <td>Reference</td>\n",
|
739 |
+
" <td>12</td>\n",
|
740 |
" </tr>\n",
|
741 |
" <tr>\n",
|
742 |
" <th>7</th>\n",
|
743 |
+
" <td>Motorcycle riding tips for beginners gear chec...</td>\n",
|
744 |
+
" <td>Autos_and_Vehicles</td>\n",
|
745 |
+
" <td>3</td>\n",
|
746 |
" </tr>\n",
|
747 |
" <tr>\n",
|
748 |
" <th>8</th>\n",
|
749 |
+
" <td>Space agency missions</td>\n",
|
750 |
+
" <td>Science</td>\n",
|
751 |
+
" <td>2</td>\n",
|
752 |
" </tr>\n",
|
753 |
" <tr>\n",
|
754 |
" <th>9</th>\n",
|
755 |
+
" <td>Game streaming self-promotion and growth tactics</td>\n",
|
756 |
+
" <td>Games</td>\n",
|
757 |
+
" <td>19</td>\n",
|
758 |
" </tr>\n",
|
759 |
" <tr>\n",
|
760 |
" <th>10</th>\n",
|
761 |
+
" <td>sex videos movies</td>\n",
|
762 |
+
" <td>Adult</td>\n",
|
763 |
+
" <td>6</td>\n",
|
764 |
" </tr>\n",
|
765 |
" <tr>\n",
|
766 |
" <th>11</th>\n",
|
767 |
+
" <td>Citation context organization methods</td>\n",
|
768 |
+
" <td>Reference</td>\n",
|
769 |
+
" <td>12</td>\n",
|
770 |
" </tr>\n",
|
771 |
" <tr>\n",
|
772 |
" <th>12</th>\n",
|
773 |
+
" <td>Healthy office snacks</td>\n",
|
774 |
+
" <td>Health</td>\n",
|
775 |
+
" <td>4</td>\n",
|
776 |
" </tr>\n",
|
777 |
" <tr>\n",
|
778 |
" <th>13</th>\n",
|
779 |
+
" <td>Indigenous rights advocacy</td>\n",
|
780 |
+
" <td>People_and_Society</td>\n",
|
781 |
+
" <td>10</td>\n",
|
782 |
" </tr>\n",
|
783 |
" <tr>\n",
|
784 |
" <th>14</th>\n",
|
785 |
+
" <td>News talk shows</td>\n",
|
786 |
+
" <td>News</td>\n",
|
787 |
+
" <td>1</td>\n",
|
788 |
" </tr>\n",
|
789 |
" <tr>\n",
|
790 |
" <th>15</th>\n",
|
791 |
+
" <td>Best facial cleansers</td>\n",
|
792 |
+
" <td>Hobbies_and_Leisure</td>\n",
|
793 |
+
" <td>0</td>\n",
|
794 |
" </tr>\n",
|
795 |
" <tr>\n",
|
796 |
" <th>16</th>\n",
|
797 |
+
" <td>Letter of recommendation</td>\n",
|
798 |
+
" <td>Reference</td>\n",
|
799 |
+
" <td>12</td>\n",
|
800 |
" </tr>\n",
|
801 |
" <tr>\n",
|
802 |
" <th>17</th>\n",
|
803 |
+
" <td>Fossil preservation techniques</td>\n",
|
804 |
+
" <td>Science</td>\n",
|
805 |
+
" <td>2</td>\n",
|
806 |
" </tr>\n",
|
807 |
" <tr>\n",
|
808 |
" <th>18</th>\n",
|
809 |
+
" <td>Marriage equality</td>\n",
|
810 |
+
" <td>People_and_Society</td>\n",
|
811 |
+
" <td>10</td>\n",
|
812 |
" </tr>\n",
|
813 |
" <tr>\n",
|
814 |
" <th>19</th>\n",
|
815 |
+
" <td>eSports Game Esports Player Fan Engagement Ini...</td>\n",
|
816 |
" <td>Sports</td>\n",
|
817 |
" <td>26</td>\n",
|
818 |
" </tr>\n",
|
|
|
821 |
"</div>"
|
822 |
],
|
823 |
"text/plain": [
|
824 |
+
" category \\\n",
|
825 |
+
"0 Internet usage monitoring \n",
|
826 |
+
"1 Food safety guidelines and regulations \n",
|
827 |
+
"2 Internet protocols and edge computing in finance \n",
|
828 |
+
"3 Online grocery shopping \n",
|
829 |
+
"4 Writing retreats for poets and novelists \n",
|
830 |
+
"5 Unicorn cat ear headphones \n",
|
831 |
+
"6 Reference citation context tagging techniques \n",
|
832 |
+
"7 Motorcycle riding tips for beginners gear chec... \n",
|
833 |
+
"8 Space agency missions \n",
|
834 |
+
"9 Game streaming self-promotion and growth tactics \n",
|
835 |
+
"10 sex videos movies \n",
|
836 |
+
"11 Citation context organization methods \n",
|
837 |
+
"12 Healthy office snacks \n",
|
838 |
+
"13 Indigenous rights advocacy \n",
|
839 |
+
"14 News talk shows \n",
|
840 |
+
"15 Best facial cleansers \n",
|
841 |
+
"16 Letter of recommendation \n",
|
842 |
+
"17 Fossil preservation techniques \n",
|
843 |
+
"18 Marriage equality \n",
|
844 |
+
"19 eSports Game Esports Player Fan Engagement Ini... \n",
|
845 |
"\n",
|
846 |
+
" label label_id \n",
|
847 |
+
"0 Internet_and_Telecom 25 \n",
|
848 |
+
"1 Food_and_Drink 15 \n",
|
849 |
+
"2 Internet_and_Telecom 25 \n",
|
850 |
+
"3 Food_and_Drink 15 \n",
|
851 |
+
"4 Books_and_Literature 17 \n",
|
852 |
+
"5 Computers_and_Electronics 7 \n",
|
853 |
+
"6 Reference 12 \n",
|
854 |
+
"7 Autos_and_Vehicles 3 \n",
|
855 |
+
"8 Science 2 \n",
|
856 |
+
"9 Games 19 \n",
|
857 |
+
"10 Adult 6 \n",
|
858 |
+
"11 Reference 12 \n",
|
859 |
+
"12 Health 4 \n",
|
860 |
+
"13 People_and_Society 10 \n",
|
861 |
+
"14 News 1 \n",
|
862 |
+
"15 Hobbies_and_Leisure 0 \n",
|
863 |
+
"16 Reference 12 \n",
|
864 |
+
"17 Science 2 \n",
|
865 |
+
"18 People_and_Society 10 \n",
|
866 |
+
"19 Sports 26 "
|
867 |
]
|
868 |
},
|
869 |
"execution_count": 12,
|