Upload config
Browse files- README.md +2 -2
- config.json +2 -2
README.md
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
---
|
|
|
|
|
2 |
license: mit
|
3 |
datasets:
|
4 |
- Salesforce/wikitext
|
5 |
-
language:
|
6 |
-
- en
|
7 |
---
|
8 |
|
9 |
This is a custom implementation of gpt2, where we replace attention with our implementation. Currently, we don't replace softmax, but in future submits we would like to replace the softmax function in attention with other softmax variations.
|
|
|
1 |
---
|
2 |
+
language:
|
3 |
+
- en
|
4 |
license: mit
|
5 |
datasets:
|
6 |
- Salesforce/wikitext
|
|
|
|
|
7 |
---
|
8 |
|
9 |
This is a custom implementation of gpt2, where we replace attention with our implementation. Currently, we don't replace softmax, but in future submits we would like to replace the softmax function in attention with other softmax variations.
|
config.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
-
"_name_or_path": "gpt2
|
3 |
"activation_function": "gelu_new",
|
4 |
"architectures": [
|
5 |
-
"
|
6 |
],
|
7 |
"attn_pdrop": 0.1,
|
8 |
"bos_token_id": 50256,
|
|
|
1 |
{
|
2 |
+
"_name_or_path": "gpt2",
|
3 |
"activation_function": "gelu_new",
|
4 |
"architectures": [
|
5 |
+
"GPT2LMHeadModel"
|
6 |
],
|
7 |
"attn_pdrop": 0.1,
|
8 |
"bos_token_id": 50256,
|