celise88 commited on
Commit
88a5ae5
·
1 Parent(s): 23fc651

add model shards to reduce memory consumption

Browse files
.gitattributes CHANGED
@@ -1 +1,2 @@
1
- static/*.csv filter=lfs diff=lfs merge=lfs -text
 
 
1
+ static/*.csv filter=lfs diff=lfs merge=lfs -text
2
+ static/model_shards/*.bin filter=lfs diff=lfs merge=lfs -text
main.py CHANGED
@@ -25,7 +25,7 @@ templates = Jinja2Templates(directory="templates/")
25
  onet = pd.read_csv('static/ONET_JobTitles.csv')
26
  simdat = pd.read_csv('static/cohere_embeddings.csv')
27
 
28
- classifier = pipeline('text-classification', model='celise88/distilbert-base-uncased-finetuned-binary-classifier', tokenizer='celise88/distilbert-base-uncased-finetuned-binary-classifier')
29
 
30
  ### job information center ###
31
  # get
@@ -87,7 +87,6 @@ def get_resume(request: Request, resume: UploadFile = File(...)):
87
  with open(path, 'wb') as buffer:
88
  buffer.write(resume.file.read())
89
  file = Document(path)
90
- print(file)
91
  text = []
92
  for para in file.paragraphs:
93
  text.append(para.text)
 
25
  onet = pd.read_csv('static/ONET_JobTitles.csv')
26
  simdat = pd.read_csv('static/cohere_embeddings.csv')
27
 
28
+ classifier = pipeline('text-classification', model = 'static/model_shards', tokenizer = 'static/tokenizer_shards')
29
 
30
  ### job information center ###
31
  # get
 
87
  with open(path, 'wb') as buffer:
88
  buffer.write(resume.file.read())
89
  file = Document(path)
 
90
  text = []
91
  for para in file.paragraphs:
92
  text.append(para.text)
static/model_shards/config.json ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "celise88/distilbert-base-uncased-finetuned-binary-classifier",
3
+ "activation": "gelu",
4
+ "architectures": [
5
+ "DistilBertForSequenceClassification"
6
+ ],
7
+ "attention_dropout": 0.1,
8
+ "dim": 768,
9
+ "dropout": 0.1,
10
+ "hidden_dim": 3072,
11
+ "initializer_range": 0.02,
12
+ "max_position_embeddings": 512,
13
+ "model_type": "distilbert",
14
+ "n_heads": 12,
15
+ "n_layers": 6,
16
+ "pad_token_id": 0,
17
+ "problem_type": "single_label_classification",
18
+ "qa_dropout": 0.1,
19
+ "seq_classif_dropout": 0.2,
20
+ "sinusoidal_pos_embds": false,
21
+ "tie_weights_": true,
22
+ "torch_dtype": "float32",
23
+ "transformers_version": "4.25.1",
24
+ "vocab_size": 30522
25
+ }
static/model_shards/pytorch_model-00001-of-00006.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4b71425a895e228378ca2e132485db2027a2d04fa588241bbe3c91d7557167be
3
+ size 537
static/model_shards/pytorch_model-00002-of-00006.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6912a218252c3bd43d77edb6a94f9baea358e0ef3b0cbb1d7c565dff7317f67c
3
+ size 93764522
static/model_shards/pytorch_model-00003-of-00006.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d000e5c4c8e8d61e178943368d71e1a9d2fc6c3ea9d9f58ade1668599ace06ed
3
+ size 48846141
static/model_shards/pytorch_model-00004-of-00006.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0a3e308faaa42d27babfd6e8b29d65deeb66b109d0477a9e0c9f76a70af3ce3f
3
+ size 47263787
static/model_shards/pytorch_model-00005-of-00006.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1951867aacee99f76ec6374166ff03869c8d5cca4004cfe496cd36e948b8c745
3
+ size 49618047
static/model_shards/pytorch_model-00006-of-00006.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ae443170da3a02b133bfd6f8bc2c8b2c205b1e89e7c38950886eeb920cb9f406
3
+ size 28363923
static/model_shards/pytorch_model.bin.index.json ADDED
@@ -0,0 +1,111 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "metadata": {
3
+ "total_size": 267820040
4
+ },
5
+ "weight_map": {
6
+ "classifier.bias": "pytorch_model-00006-of-00006.bin",
7
+ "classifier.weight": "pytorch_model-00006-of-00006.bin",
8
+ "distilbert.embeddings.LayerNorm.bias": "pytorch_model-00003-of-00006.bin",
9
+ "distilbert.embeddings.LayerNorm.weight": "pytorch_model-00003-of-00006.bin",
10
+ "distilbert.embeddings.position_embeddings.weight": "pytorch_model-00003-of-00006.bin",
11
+ "distilbert.embeddings.word_embeddings.weight": "pytorch_model-00002-of-00006.bin",
12
+ "distilbert.transformer.layer.0.attention.k_lin.bias": "pytorch_model-00003-of-00006.bin",
13
+ "distilbert.transformer.layer.0.attention.k_lin.weight": "pytorch_model-00003-of-00006.bin",
14
+ "distilbert.transformer.layer.0.attention.out_lin.bias": "pytorch_model-00003-of-00006.bin",
15
+ "distilbert.transformer.layer.0.attention.out_lin.weight": "pytorch_model-00003-of-00006.bin",
16
+ "distilbert.transformer.layer.0.attention.q_lin.bias": "pytorch_model-00003-of-00006.bin",
17
+ "distilbert.transformer.layer.0.attention.q_lin.weight": "pytorch_model-00003-of-00006.bin",
18
+ "distilbert.transformer.layer.0.attention.v_lin.bias": "pytorch_model-00003-of-00006.bin",
19
+ "distilbert.transformer.layer.0.attention.v_lin.weight": "pytorch_model-00003-of-00006.bin",
20
+ "distilbert.transformer.layer.0.ffn.lin1.bias": "pytorch_model-00003-of-00006.bin",
21
+ "distilbert.transformer.layer.0.ffn.lin1.weight": "pytorch_model-00003-of-00006.bin",
22
+ "distilbert.transformer.layer.0.ffn.lin2.bias": "pytorch_model-00003-of-00006.bin",
23
+ "distilbert.transformer.layer.0.ffn.lin2.weight": "pytorch_model-00003-of-00006.bin",
24
+ "distilbert.transformer.layer.0.output_layer_norm.bias": "pytorch_model-00003-of-00006.bin",
25
+ "distilbert.transformer.layer.0.output_layer_norm.weight": "pytorch_model-00003-of-00006.bin",
26
+ "distilbert.transformer.layer.0.sa_layer_norm.bias": "pytorch_model-00003-of-00006.bin",
27
+ "distilbert.transformer.layer.0.sa_layer_norm.weight": "pytorch_model-00003-of-00006.bin",
28
+ "distilbert.transformer.layer.1.attention.k_lin.bias": "pytorch_model-00003-of-00006.bin",
29
+ "distilbert.transformer.layer.1.attention.k_lin.weight": "pytorch_model-00003-of-00006.bin",
30
+ "distilbert.transformer.layer.1.attention.out_lin.bias": "pytorch_model-00003-of-00006.bin",
31
+ "distilbert.transformer.layer.1.attention.out_lin.weight": "pytorch_model-00003-of-00006.bin",
32
+ "distilbert.transformer.layer.1.attention.q_lin.bias": "pytorch_model-00003-of-00006.bin",
33
+ "distilbert.transformer.layer.1.attention.q_lin.weight": "pytorch_model-00003-of-00006.bin",
34
+ "distilbert.transformer.layer.1.attention.v_lin.bias": "pytorch_model-00003-of-00006.bin",
35
+ "distilbert.transformer.layer.1.attention.v_lin.weight": "pytorch_model-00003-of-00006.bin",
36
+ "distilbert.transformer.layer.1.ffn.lin1.bias": "pytorch_model-00003-of-00006.bin",
37
+ "distilbert.transformer.layer.1.ffn.lin1.weight": "pytorch_model-00003-of-00006.bin",
38
+ "distilbert.transformer.layer.1.ffn.lin2.bias": "pytorch_model-00004-of-00006.bin",
39
+ "distilbert.transformer.layer.1.ffn.lin2.weight": "pytorch_model-00004-of-00006.bin",
40
+ "distilbert.transformer.layer.1.output_layer_norm.bias": "pytorch_model-00004-of-00006.bin",
41
+ "distilbert.transformer.layer.1.output_layer_norm.weight": "pytorch_model-00004-of-00006.bin",
42
+ "distilbert.transformer.layer.1.sa_layer_norm.bias": "pytorch_model-00003-of-00006.bin",
43
+ "distilbert.transformer.layer.1.sa_layer_norm.weight": "pytorch_model-00003-of-00006.bin",
44
+ "distilbert.transformer.layer.2.attention.k_lin.bias": "pytorch_model-00004-of-00006.bin",
45
+ "distilbert.transformer.layer.2.attention.k_lin.weight": "pytorch_model-00004-of-00006.bin",
46
+ "distilbert.transformer.layer.2.attention.out_lin.bias": "pytorch_model-00004-of-00006.bin",
47
+ "distilbert.transformer.layer.2.attention.out_lin.weight": "pytorch_model-00004-of-00006.bin",
48
+ "distilbert.transformer.layer.2.attention.q_lin.bias": "pytorch_model-00004-of-00006.bin",
49
+ "distilbert.transformer.layer.2.attention.q_lin.weight": "pytorch_model-00004-of-00006.bin",
50
+ "distilbert.transformer.layer.2.attention.v_lin.bias": "pytorch_model-00004-of-00006.bin",
51
+ "distilbert.transformer.layer.2.attention.v_lin.weight": "pytorch_model-00004-of-00006.bin",
52
+ "distilbert.transformer.layer.2.ffn.lin1.bias": "pytorch_model-00004-of-00006.bin",
53
+ "distilbert.transformer.layer.2.ffn.lin1.weight": "pytorch_model-00004-of-00006.bin",
54
+ "distilbert.transformer.layer.2.ffn.lin2.bias": "pytorch_model-00004-of-00006.bin",
55
+ "distilbert.transformer.layer.2.ffn.lin2.weight": "pytorch_model-00004-of-00006.bin",
56
+ "distilbert.transformer.layer.2.output_layer_norm.bias": "pytorch_model-00004-of-00006.bin",
57
+ "distilbert.transformer.layer.2.output_layer_norm.weight": "pytorch_model-00004-of-00006.bin",
58
+ "distilbert.transformer.layer.2.sa_layer_norm.bias": "pytorch_model-00004-of-00006.bin",
59
+ "distilbert.transformer.layer.2.sa_layer_norm.weight": "pytorch_model-00004-of-00006.bin",
60
+ "distilbert.transformer.layer.3.attention.k_lin.bias": "pytorch_model-00004-of-00006.bin",
61
+ "distilbert.transformer.layer.3.attention.k_lin.weight": "pytorch_model-00004-of-00006.bin",
62
+ "distilbert.transformer.layer.3.attention.out_lin.bias": "pytorch_model-00004-of-00006.bin",
63
+ "distilbert.transformer.layer.3.attention.out_lin.weight": "pytorch_model-00004-of-00006.bin",
64
+ "distilbert.transformer.layer.3.attention.q_lin.bias": "pytorch_model-00004-of-00006.bin",
65
+ "distilbert.transformer.layer.3.attention.q_lin.weight": "pytorch_model-00004-of-00006.bin",
66
+ "distilbert.transformer.layer.3.attention.v_lin.bias": "pytorch_model-00004-of-00006.bin",
67
+ "distilbert.transformer.layer.3.attention.v_lin.weight": "pytorch_model-00004-of-00006.bin",
68
+ "distilbert.transformer.layer.3.ffn.lin1.bias": "pytorch_model-00005-of-00006.bin",
69
+ "distilbert.transformer.layer.3.ffn.lin1.weight": "pytorch_model-00005-of-00006.bin",
70
+ "distilbert.transformer.layer.3.ffn.lin2.bias": "pytorch_model-00005-of-00006.bin",
71
+ "distilbert.transformer.layer.3.ffn.lin2.weight": "pytorch_model-00005-of-00006.bin",
72
+ "distilbert.transformer.layer.3.output_layer_norm.bias": "pytorch_model-00005-of-00006.bin",
73
+ "distilbert.transformer.layer.3.output_layer_norm.weight": "pytorch_model-00005-of-00006.bin",
74
+ "distilbert.transformer.layer.3.sa_layer_norm.bias": "pytorch_model-00004-of-00006.bin",
75
+ "distilbert.transformer.layer.3.sa_layer_norm.weight": "pytorch_model-00004-of-00006.bin",
76
+ "distilbert.transformer.layer.4.attention.k_lin.bias": "pytorch_model-00005-of-00006.bin",
77
+ "distilbert.transformer.layer.4.attention.k_lin.weight": "pytorch_model-00005-of-00006.bin",
78
+ "distilbert.transformer.layer.4.attention.out_lin.bias": "pytorch_model-00005-of-00006.bin",
79
+ "distilbert.transformer.layer.4.attention.out_lin.weight": "pytorch_model-00005-of-00006.bin",
80
+ "distilbert.transformer.layer.4.attention.q_lin.bias": "pytorch_model-00005-of-00006.bin",
81
+ "distilbert.transformer.layer.4.attention.q_lin.weight": "pytorch_model-00005-of-00006.bin",
82
+ "distilbert.transformer.layer.4.attention.v_lin.bias": "pytorch_model-00005-of-00006.bin",
83
+ "distilbert.transformer.layer.4.attention.v_lin.weight": "pytorch_model-00005-of-00006.bin",
84
+ "distilbert.transformer.layer.4.ffn.lin1.bias": "pytorch_model-00005-of-00006.bin",
85
+ "distilbert.transformer.layer.4.ffn.lin1.weight": "pytorch_model-00005-of-00006.bin",
86
+ "distilbert.transformer.layer.4.ffn.lin2.bias": "pytorch_model-00005-of-00006.bin",
87
+ "distilbert.transformer.layer.4.ffn.lin2.weight": "pytorch_model-00005-of-00006.bin",
88
+ "distilbert.transformer.layer.4.output_layer_norm.bias": "pytorch_model-00005-of-00006.bin",
89
+ "distilbert.transformer.layer.4.output_layer_norm.weight": "pytorch_model-00005-of-00006.bin",
90
+ "distilbert.transformer.layer.4.sa_layer_norm.bias": "pytorch_model-00005-of-00006.bin",
91
+ "distilbert.transformer.layer.4.sa_layer_norm.weight": "pytorch_model-00005-of-00006.bin",
92
+ "distilbert.transformer.layer.5.attention.k_lin.bias": "pytorch_model-00006-of-00006.bin",
93
+ "distilbert.transformer.layer.5.attention.k_lin.weight": "pytorch_model-00006-of-00006.bin",
94
+ "distilbert.transformer.layer.5.attention.out_lin.bias": "pytorch_model-00006-of-00006.bin",
95
+ "distilbert.transformer.layer.5.attention.out_lin.weight": "pytorch_model-00006-of-00006.bin",
96
+ "distilbert.transformer.layer.5.attention.q_lin.bias": "pytorch_model-00005-of-00006.bin",
97
+ "distilbert.transformer.layer.5.attention.q_lin.weight": "pytorch_model-00005-of-00006.bin",
98
+ "distilbert.transformer.layer.5.attention.v_lin.bias": "pytorch_model-00006-of-00006.bin",
99
+ "distilbert.transformer.layer.5.attention.v_lin.weight": "pytorch_model-00006-of-00006.bin",
100
+ "distilbert.transformer.layer.5.ffn.lin1.bias": "pytorch_model-00006-of-00006.bin",
101
+ "distilbert.transformer.layer.5.ffn.lin1.weight": "pytorch_model-00006-of-00006.bin",
102
+ "distilbert.transformer.layer.5.ffn.lin2.bias": "pytorch_model-00006-of-00006.bin",
103
+ "distilbert.transformer.layer.5.ffn.lin2.weight": "pytorch_model-00006-of-00006.bin",
104
+ "distilbert.transformer.layer.5.output_layer_norm.bias": "pytorch_model-00006-of-00006.bin",
105
+ "distilbert.transformer.layer.5.output_layer_norm.weight": "pytorch_model-00006-of-00006.bin",
106
+ "distilbert.transformer.layer.5.sa_layer_norm.bias": "pytorch_model-00006-of-00006.bin",
107
+ "distilbert.transformer.layer.5.sa_layer_norm.weight": "pytorch_model-00006-of-00006.bin",
108
+ "pre_classifier.bias": "pytorch_model-00006-of-00006.bin",
109
+ "pre_classifier.weight": "pytorch_model-00006-of-00006.bin"
110
+ }
111
+ }
static/tokenizer_shards/special_tokens_map.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": "[CLS]",
3
+ "mask_token": "[MASK]",
4
+ "pad_token": "[PAD]",
5
+ "sep_token": "[SEP]",
6
+ "unk_token": "[UNK]"
7
+ }
static/tokenizer_shards/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
static/tokenizer_shards/tokenizer_config.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": "[CLS]",
3
+ "do_lower_case": true,
4
+ "mask_token": "[MASK]",
5
+ "model_max_length": 512,
6
+ "name_or_path": "celise88/distilbert-base-uncased-finetuned-binary-classifier",
7
+ "pad_token": "[PAD]",
8
+ "sep_token": "[SEP]",
9
+ "special_tokens_map_file": null,
10
+ "strip_accents": null,
11
+ "tokenize_chinese_chars": true,
12
+ "tokenizer_class": "DistilBertTokenizer",
13
+ "unk_token": "[UNK]"
14
+ }
static/tokenizer_shards/vocab.txt ADDED
The diff for this file is too large to render. See raw diff