Update README.md
Browse files
README.md
CHANGED
@@ -10,7 +10,7 @@ ProtST for binary localization
|
|
10 |
## Running script
|
11 |
```python
|
12 |
from transformers import AutoModel, AutoTokenizer, HfArgumentParser, TrainingArguments, Trainer
|
13 |
-
from transformers.data.data_collator import
|
14 |
from transformers.trainer_pt_utils import get_parameter_names
|
15 |
from transformers.pytorch_utils import ALL_LAYERNORM_LAYERS
|
16 |
from datasets import load_dataset
|
@@ -125,8 +125,6 @@ if __name__ == "__main__":
|
|
125 |
for split in ["train", "validation", "test"]:
|
126 |
raw_dataset[split] = raw_dataset[split].map(func_tokenize_protein, batched=False, remove_columns=["Unnamed: 0", "prot_seq", "localization"])
|
127 |
|
128 |
-
data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm_probability=0.0)
|
129 |
-
data_collator = DataCollatorForTokenClassification(tokenizer=tokenizer)
|
130 |
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)
|
131 |
|
132 |
transformers.utils.logging.set_verbosity_info()
|
|
|
10 |
## Running script
|
11 |
```python
|
12 |
from transformers import AutoModel, AutoTokenizer, HfArgumentParser, TrainingArguments, Trainer
|
13 |
+
from transformers.data.data_collator import DataCollatorWithPadding
|
14 |
from transformers.trainer_pt_utils import get_parameter_names
|
15 |
from transformers.pytorch_utils import ALL_LAYERNORM_LAYERS
|
16 |
from datasets import load_dataset
|
|
|
125 |
for split in ["train", "validation", "test"]:
|
126 |
raw_dataset[split] = raw_dataset[split].map(func_tokenize_protein, batched=False, remove_columns=["Unnamed: 0", "prot_seq", "localization"])
|
127 |
|
|
|
|
|
128 |
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)
|
129 |
|
130 |
transformers.utils.logging.set_verbosity_info()
|