VERSIL91 commited on 17 days ago

Commit

89f45b2

verified ·

1 Parent(s): f62f98a

End of training

Browse files

Files changed (22) hide show

README.md +163 -0
adapter_config.json +34 -0
adapter_model.bin +3 -0
adapter_model.safetensors +3 -0
config.json +30 -0
last-checkpoint/README.md +202 -0
last-checkpoint/adapter_config.json +34 -0
last-checkpoint/adapter_model.safetensors +3 -0
last-checkpoint/optimizer.pt +3 -0
last-checkpoint/rng_state.pth +3 -0
last-checkpoint/scheduler.pt +3 -0
last-checkpoint/special_tokens_map.json +35 -0
last-checkpoint/tokenizer.json +0 -0
last-checkpoint/tokenizer.model +3 -0
last-checkpoint/tokenizer_config.json +50 -0
last-checkpoint/trainer_state.json +416 -0
last-checkpoint/training_args.bin +3 -0
special_tokens_map.json +35 -0
tokenizer.json +0 -0
tokenizer.model +3 -0
tokenizer_config.json +50 -0
training_args.bin +3 -0

README.md ADDED Viewed

	@@ -0,0 +1,163 @@

+---
+library_name: peft
+license: apache-2.0
+base_model: unsloth/zephyr-sft
+tags:
+- axolotl
+- generated_from_trainer
+model-index:
+- name: ba53034e-6d32-456a-8582-80f09fcde818
+  results: []
+---
+<!-- This model card has been generated automatically according to the information the Trainer had access to. You
+should probably proofread and complete it, then remove this comment. -->
+[<img src="https://raw.githubusercontent.com/axolotl-ai-cloud/axolotl/main/image/axolotl-badge-web.png" alt="Built with Axolotl" width="200" height="32"/>](https://github.com/axolotl-ai-cloud/axolotl)
+<details><summary>See axolotl config</summary>
+axolotl version: `0.4.1`
+```yaml
+adapter: lora
+base_model: unsloth/zephyr-sft
+bf16: true
+chat_template: llama3
+data_processes: 16
+dataset_prepared_path: null
+datasets:
+- data_files:
+  - fa96d1d99a0c1392_train_data.json
+  ds_type: json
+  format: custom
+  path: /workspace/input_data/fa96d1d99a0c1392_train_data.json
+  type:
+    field_instruction: article
+    field_output: lead
+    format: '{instruction}'
+    no_input_format: '{instruction}'
+    system_format: '{system}'
+    system_prompt: ''
+debug: null
+deepspeed: null
+device_map: auto
+do_eval: true
+early_stopping_patience: 5
+eval_batch_size: 8
+eval_max_new_tokens: 128
+eval_steps: 25
+eval_table_size: null
+evals_per_epoch: null
+flash_attention: true
+fp16: false
+fsdp: null
+fsdp_config: null
+gradient_accumulation_steps: 4
+gradient_checkpointing: true
+group_by_length: true
+hub_model_id: null
+hub_repo: null
+hub_strategy: checkpoint
+hub_token: null
+learning_rate: 0.0001
+load_in_4bit: false
+load_in_8bit: false
+local_rank: null
+logging_steps: 1
+lora_alpha: 128
+lora_dropout: 0.05
+lora_fan_in_fan_out: null
+lora_model_dir: null
+lora_r: 64
+lora_target_linear: true
+lr_scheduler: cosine
+max_grad_norm: 1.0
+max_memory:
+  0: 75GB
+max_steps: 50
+micro_batch_size: 8
+mlflow_experiment_name: /tmp/fa96d1d99a0c1392_train_data.json
+model_type: AutoModelForCausalLM
+num_epochs: 1
+optim_args:
+  adam_beta1: 0.9
+  adam_beta2: 0.95
+  adam_epsilon: 1e-5
+optimizer: adamw_bnb_8bit
+output_dir: miner_id_24
+pad_to_sequence_len: true
+resume_from_checkpoint: null
+s2_attention: null
+sample_packing: false
+save_steps: 25
+saves_per_epoch: null
+sequence_len: 512
+strict: false
+tf32: true
+tokenizer_type: AutoTokenizer
+train_on_inputs: false
+trust_remote_code: true
+val_set_size: 0.05
+wandb_entity: null
+wandb_mode: online
+wandb_name: f3b476d4-ee28-4fc3-bffb-3cb3d4410e01
+wandb_project: Gradients-On-Demand
+wandb_run: your_name
+wandb_runid: f3b476d4-ee28-4fc3-bffb-3cb3d4410e01
+warmup_steps: 5
+weight_decay: 0.0
+xformers_attention: null
+```
+</details><br>
+# ba53034e-6d32-456a-8582-80f09fcde818
+This model is a fine-tuned version of [unsloth/zephyr-sft](https://huggingface.co/unsloth/zephyr-sft) on the None dataset.
+It achieves the following results on the evaluation set:
+- Loss: 0.6597
+## Model description
+More information needed
+## Intended uses & limitations
+More information needed
+## Training and evaluation data
+More information needed
+## Training procedure
+### Training hyperparameters
+The following hyperparameters were used during training:
+- learning_rate: 0.0001
+- train_batch_size: 8
+- eval_batch_size: 8
+- seed: 42
+- gradient_accumulation_steps: 4
+- total_train_batch_size: 32
+- optimizer: Use OptimizerNames.ADAMW_BNB with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=adam_beta1=0.9,adam_beta2=0.95,adam_epsilon=1e-5
+- lr_scheduler_type: cosine
+- lr_scheduler_warmup_steps: 5
+- training_steps: 50
+### Training results
+| Training Loss | Epoch  | Step | Validation Loss |
+|:-------------:|:------:|:----:|:---------------:|
+| 2.6599        | 0.0004 | 1    | 1.2003          |
+| 2.3261        | 0.0099 | 25   | 0.6671          |
+| 3.5126        | 0.0198 | 50   | 0.6597          |
+### Framework versions
+- PEFT 0.13.2
+- Transformers 4.46.0
+- Pytorch 2.5.0+cu124
+- Datasets 3.0.1
+- Tokenizers 0.20.1

adapter_config.json ADDED Viewed

	@@ -0,0 +1,34 @@

+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "unsloth/zephyr-sft",
+  "bias": "none",
+  "fan_in_fan_out": null,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 128,
+  "lora_dropout": 0.05,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 64,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "o_proj",
+    "down_proj",
+    "gate_proj",
+    "q_proj",
+    "up_proj",
+    "v_proj",
+    "k_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_dora": false,
+  "use_rslora": false
+}

adapter_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:360ce1146cae4a58d53e1d2026f3dea8c956abb2388df4f17839ddf2365e22d4
+size 671250634

adapter_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:dfb8ccccfc230d6a511d22df5a2b72e8dd412ed82a8c6a24d1688f2c3c1957bf
+size 671149168

config.json ADDED Viewed

	@@ -0,0 +1,30 @@

+{
+  "_attn_implementation_autoset": true,
+  "_name_or_path": "unsloth/zephyr-sft",
+  "architectures": [
+    "MistralForCausalLM"
+  ],
+  "attention_dropout": 0.0,
+  "bos_token_id": 1,
+  "eos_token_id": 2,
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 4096,
+  "initializer_range": 0.02,
+  "intermediate_size": 14336,
+  "max_position_embeddings": 32768,
+  "model_type": "mistral",
+  "num_attention_heads": 32,
+  "num_hidden_layers": 32,
+  "num_key_value_heads": 8,
+  "pad_token_id": 0,
+  "rms_norm_eps": 1e-05,
+  "rope_theta": 10000.0,
+  "sliding_window": 4096,
+  "tie_word_embeddings": false,
+  "torch_dtype": "bfloat16",
+  "transformers_version": "4.46.0",
+  "unsloth_version": "2024.9",
+  "use_cache": false,
+  "vocab_size": 32000
+}

last-checkpoint/README.md ADDED Viewed

	@@ -0,0 +1,202 @@

+---
+base_model: unsloth/zephyr-sft
+library_name: peft
+---
+# Model Card for Model ID
+<!-- Provide a quick summary of what the model is/does. -->
+## Model Details
+### Model Description
+<!-- Provide a longer summary of what this model is. -->
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+### Model Sources [optional]
+<!-- Provide the basic links for the model. -->
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+## Uses
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+### Direct Use
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+[More Information Needed]
+### Downstream Use [optional]
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+[More Information Needed]
+### Out-of-Scope Use
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+[More Information Needed]
+## Bias, Risks, and Limitations
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+[More Information Needed]
+### Recommendations
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+## How to Get Started with the Model
+Use the code below to get started with the model.
+[More Information Needed]
+## Training Details
+### Training Data
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+[More Information Needed]
+### Training Procedure
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+#### Preprocessing [optional]
+[More Information Needed]
+#### Training Hyperparameters
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+#### Speeds, Sizes, Times [optional]
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+[More Information Needed]
+## Evaluation
+<!-- This section describes the evaluation protocols and provides the results. -->
+### Testing Data, Factors & Metrics
+#### Testing Data
+<!-- This should link to a Dataset Card if possible. -->
+[More Information Needed]
+#### Factors
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+[More Information Needed]
+#### Metrics
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+[More Information Needed]
+### Results
+[More Information Needed]
+#### Summary
+## Model Examination [optional]
+<!-- Relevant interpretability work for the model goes here -->
+[More Information Needed]
+## Environmental Impact
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+## Technical Specifications [optional]
+### Model Architecture and Objective
+[More Information Needed]
+### Compute Infrastructure
+[More Information Needed]
+#### Hardware
+[More Information Needed]
+#### Software
+[More Information Needed]
+## Citation [optional]
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+**BibTeX:**
+[More Information Needed]
+**APA:**
+[More Information Needed]
+## Glossary [optional]
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+[More Information Needed]
+## More Information [optional]
+[More Information Needed]
+## Model Card Authors [optional]
+[More Information Needed]
+## Model Card Contact
+[More Information Needed]
+### Framework versions
+- PEFT 0.13.2

last-checkpoint/adapter_config.json ADDED Viewed

	@@ -0,0 +1,34 @@

+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "unsloth/zephyr-sft",
+  "bias": "none",
+  "fan_in_fan_out": null,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 128,
+  "lora_dropout": 0.05,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 64,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "o_proj",
+    "down_proj",
+    "gate_proj",
+    "q_proj",
+    "up_proj",
+    "v_proj",
+    "k_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_dora": false,
+  "use_rslora": false
+}

last-checkpoint/adapter_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:dfb8ccccfc230d6a511d22df5a2b72e8dd412ed82a8c6a24d1688f2c3c1957bf
+size 671149168

last-checkpoint/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:771754dbb117e3d7e9a1c3319463225820330403b2a178afeff419356c334e3b
+size 341314196

last-checkpoint/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1c02f6fcbec44fad49149f2bd77ed83553ef9d96496188e4168cbdfe5ead7f43
+size 14244

last-checkpoint/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b1df0528620c07325b8faa7567e59b0c1e86a1f1ee6af1245a69c6c0463fe4e2
+size 1064

last-checkpoint/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,35 @@

+{
+  "additional_special_tokens": [
+    "<unk>",
+    "<s>",
+    "</s>"
+  ],
+  "bos_token": {
+    "content": "<s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "</s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "<unk>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "<unk>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

last-checkpoint/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

last-checkpoint/tokenizer.model ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:dadfd56d766715c61d2ef780a525ab43b8e6da4de6865bda3d95fdef5e134055
+size 493443

last-checkpoint/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,50 @@

+{
+  "add_bos_token": true,
+  "add_eos_token": false,
+  "add_prefix_space": null,
+  "added_tokens_decoder": {
+    "0": {
+      "content": "<unk>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "<s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "</s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "additional_special_tokens": [
+    "<unk>",
+    "<s>",
+    "</s>"
+  ],
+  "bos_token": "<s>",
+  "chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set loop_messages = messages %}{% for message in loop_messages %}{% set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' %}{% if loop.index0 == 0 %}{% set content = bos_token + content %}{% endif %}{{ content }}{% endfor %}{% if add_generation_prompt %}{{ '<|start_header_id|>assistant<|end_header_id|>\n\n' }}{% endif %}",
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "</s>",
+  "legacy": true,
+  "model_max_length": 1000000000000000019884624838656,
+  "pad_token": "<unk>",
+  "padding_side": "left",
+  "sp_model_kwargs": {},
+  "spaces_between_special_tokens": false,
+  "tokenizer_class": "LlamaTokenizer",
+  "truncation_side": "left",
+  "unk_token": "<unk>",
+  "use_default_system_prompt": true
+}

last-checkpoint/trainer_state.json ADDED Viewed

	@@ -0,0 +1,416 @@

+{
+  "best_metric": 0.6597008109092712,
+  "best_model_checkpoint": "miner_id_24/checkpoint-50",
+  "epoch": 0.019807863721897592,
+  "eval_steps": 25,
+  "global_step": 50,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.00039615727443795186,
+      "grad_norm": 15.38939380645752,
+      "learning_rate": 2e-05,
+      "loss": 2.6599,
+      "step": 1
+    },
+    {
+      "epoch": 0.00039615727443795186,
+      "eval_loss": 1.200274109840393,
+      "eval_runtime": 154.9972,
+      "eval_samples_per_second": 27.433,
+      "eval_steps_per_second": 3.432,
+      "step": 1
+    },
+    {
+      "epoch": 0.0007923145488759037,
+      "grad_norm": 16.734960556030273,
+      "learning_rate": 4e-05,
+      "loss": 3.396,
+      "step": 2
+    },
+    {
+      "epoch": 0.0011884718233138556,
+      "grad_norm": 15.702166557312012,
+      "learning_rate": 6e-05,
+      "loss": 2.8498,
+      "step": 3
+    },
+    {
+      "epoch": 0.0015846290977518075,
+      "grad_norm": 13.420672416687012,
+      "learning_rate": 8e-05,
+      "loss": 2.7706,
+      "step": 4
+    },
+    {
+      "epoch": 0.0019807863721897595,
+      "grad_norm": 9.73044490814209,
+      "learning_rate": 0.0001,
+      "loss": 2.5057,
+      "step": 5
+    },
+    {
+      "epoch": 0.0023769436466277113,
+      "grad_norm": 11.664520263671875,
+      "learning_rate": 9.987820251299122e-05,
+      "loss": 2.7975,
+      "step": 6
+    },
+    {
+      "epoch": 0.002773100921065663,
+      "grad_norm": 8.877344131469727,
+      "learning_rate": 9.951340343707852e-05,
+      "loss": 2.5462,
+      "step": 7
+    },
+    {
+      "epoch": 0.003169258195503615,
+      "grad_norm": 9.045130729675293,
+      "learning_rate": 9.890738003669029e-05,
+      "loss": 2.6158,
+      "step": 8
+    },
+    {
+      "epoch": 0.0035654154699415667,
+      "grad_norm": 9.858509063720703,
+      "learning_rate": 9.806308479691595e-05,
+      "loss": 2.8669,
+      "step": 9
+    },
+    {
+      "epoch": 0.003961572744379519,
+      "grad_norm": 8.872286796569824,
+      "learning_rate": 9.698463103929542e-05,
+      "loss": 2.516,
+      "step": 10
+    },
+    {
+      "epoch": 0.004357730018817471,
+      "grad_norm": 9.686171531677246,
+      "learning_rate": 9.567727288213005e-05,
+      "loss": 2.5248,
+      "step": 11
+    },
+    {
+      "epoch": 0.004753887293255423,
+      "grad_norm": 10.663348197937012,
+      "learning_rate": 9.414737964294636e-05,
+      "loss": 3.0213,
+      "step": 12
+    },
+    {
+      "epoch": 0.005150044567693374,
+      "grad_norm": 9.221182823181152,
+      "learning_rate": 9.24024048078213e-05,
+      "loss": 2.4556,
+      "step": 13
+    },
+    {
+      "epoch": 0.005546201842131326,
+      "grad_norm": 9.011594772338867,
+      "learning_rate": 9.045084971874738e-05,
+      "loss": 2.2169,
+      "step": 14
+    },
+    {
+      "epoch": 0.005942359116569278,
+      "grad_norm": 9.134177207946777,
+      "learning_rate": 8.83022221559489e-05,
+      "loss": 2.6884,
+      "step": 15
+    },
+    {
+      "epoch": 0.00633851639100723,
+      "grad_norm": 8.743730545043945,
+      "learning_rate": 8.596699001693255e-05,
+      "loss": 2.4097,
+      "step": 16
+    },
+    {
+      "epoch": 0.006734673665445182,
+      "grad_norm": 9.52989673614502,
+      "learning_rate": 8.345653031794292e-05,
+      "loss": 2.7904,
+      "step": 17
+    },
+    {
+      "epoch": 0.0071308309398831335,
+      "grad_norm": 9.70661735534668,
+      "learning_rate": 8.07830737662829e-05,
+      "loss": 2.4424,
+      "step": 18
+    },
+    {
+      "epoch": 0.007526988214321085,
+      "grad_norm": 10.09250259399414,
+      "learning_rate": 7.795964517353735e-05,
+      "loss": 2.436,
+      "step": 19
+    },
+    {
+      "epoch": 0.007923145488759038,
+      "grad_norm": 8.261049270629883,
+      "learning_rate": 7.500000000000001e-05,
+      "loss": 2.318,
+      "step": 20
+    },
+    {
+      "epoch": 0.008319302763196989,
+      "grad_norm": 9.031270027160645,
+      "learning_rate": 7.191855733945387e-05,
+      "loss": 2.4178,
+      "step": 21
+    },
+    {
+      "epoch": 0.008715460037634942,
+      "grad_norm": 12.21824836730957,
+      "learning_rate": 6.873032967079561e-05,
+      "loss": 2.7133,
+      "step": 22
+    },
+    {
+      "epoch": 0.009111617312072893,
+      "grad_norm": 10.730268478393555,
+      "learning_rate": 6.545084971874738e-05,
+      "loss": 2.6344,
+      "step": 23
+    },
+    {
+      "epoch": 0.009507774586510845,
+      "grad_norm": 9.152085304260254,
+      "learning_rate": 6.209609477998338e-05,
+      "loss": 2.3463,
+      "step": 24
+    },
+    {
+      "epoch": 0.009903931860948796,
+      "grad_norm": 9.404061317443848,
+      "learning_rate": 5.868240888334653e-05,
+      "loss": 2.3261,
+      "step": 25
+    },
+    {
+      "epoch": 0.009903931860948796,
+      "eval_loss": 0.6671332120895386,
+      "eval_runtime": 156.6633,
+      "eval_samples_per_second": 27.141,
+      "eval_steps_per_second": 3.396,
+      "step": 25
+    },
+    {
+      "epoch": 0.010300089135386749,
+      "grad_norm": 12.529690742492676,
+      "learning_rate": 5.522642316338268e-05,
+      "loss": 2.8625,
+      "step": 26
+    },
+    {
+      "epoch": 0.0106962464098247,
+      "grad_norm": 10.027824401855469,
+      "learning_rate": 5.174497483512506e-05,
+      "loss": 2.5588,
+      "step": 27
+    },
+    {
+      "epoch": 0.011092403684262652,
+      "grad_norm": 10.082047462463379,
+      "learning_rate": 4.825502516487497e-05,
+      "loss": 2.4771,
+      "step": 28
+    },
+    {
+      "epoch": 0.011488560958700603,
+      "grad_norm": 11.15814208984375,
+      "learning_rate": 4.477357683661734e-05,
+      "loss": 2.4113,
+      "step": 29
+    },
+    {
+      "epoch": 0.011884718233138556,
+      "grad_norm": 10.759804725646973,
+      "learning_rate": 4.131759111665349e-05,
+      "loss": 2.8421,
+      "step": 30
+    },
+    {
+      "epoch": 0.012280875507576509,
+      "grad_norm": 10.599135398864746,
+      "learning_rate": 3.790390522001662e-05,
+      "loss": 2.6885,
+      "step": 31
+    },
+    {
+      "epoch": 0.01267703278201446,
+      "grad_norm": 11.816875457763672,
+      "learning_rate": 3.4549150281252636e-05,
+      "loss": 3.0024,
+      "step": 32
+    },
+    {
+      "epoch": 0.013073190056452412,
+      "grad_norm": 10.234508514404297,
+      "learning_rate": 3.12696703292044e-05,
+      "loss": 2.0345,
+      "step": 33
+    },
+    {
+      "epoch": 0.013469347330890363,
+      "grad_norm": 12.437875747680664,
+      "learning_rate": 2.8081442660546125e-05,
+      "loss": 2.7898,
+      "step": 34
+    },
+    {
+      "epoch": 0.013865504605328316,
+      "grad_norm": 10.455293655395508,
+      "learning_rate": 2.500000000000001e-05,
+      "loss": 2.2859,
+      "step": 35
+    },
+    {
+      "epoch": 0.014261661879766267,
+      "grad_norm": 10.511425971984863,
+      "learning_rate": 2.2040354826462668e-05,
+      "loss": 2.2994,
+      "step": 36
+    },
+    {
+      "epoch": 0.01465781915420422,
+      "grad_norm": 11.496809959411621,
+      "learning_rate": 1.9216926233717085e-05,
+      "loss": 2.7418,
+      "step": 37
+    },
+    {
+      "epoch": 0.01505397642864217,
+      "grad_norm": 13.628725051879883,
+      "learning_rate": 1.6543469682057106e-05,
+      "loss": 3.0457,
+      "step": 38
+    },
+    {
+      "epoch": 0.015450133703080123,
+      "grad_norm": 13.12936782836914,
+      "learning_rate": 1.4033009983067452e-05,
+      "loss": 2.5216,
+      "step": 39
+    },
+    {
+      "epoch": 0.015846290977518076,
+      "grad_norm": 10.766741752624512,
+      "learning_rate": 1.1697777844051105e-05,
+      "loss": 2.3881,
+      "step": 40
+    },
+    {
+      "epoch": 0.016242448251956025,
+      "grad_norm": 15.045173645019531,
+      "learning_rate": 9.549150281252633e-06,
+      "loss": 3.1368,
+      "step": 41
+    },
+    {
+      "epoch": 0.016638605526393978,
+      "grad_norm": 10.670278549194336,
+      "learning_rate": 7.597595192178702e-06,
+      "loss": 2.4484,
+      "step": 42
+    },
+    {
+      "epoch": 0.01703476280083193,
+      "grad_norm": 14.797165870666504,
+      "learning_rate": 5.852620357053651e-06,
+      "loss": 3.4172,
+      "step": 43
+    },
+    {
+      "epoch": 0.017430920075269883,
+      "grad_norm": 12.900824546813965,
+      "learning_rate": 4.322727117869951e-06,
+      "loss": 2.7403,
+      "step": 44
+    },
+    {
+      "epoch": 0.017827077349707832,
+      "grad_norm": 12.451089859008789,
+      "learning_rate": 3.0153689607045845e-06,
+      "loss": 3.1842,
+      "step": 45
+    },
+    {
+      "epoch": 0.018223234624145785,
+      "grad_norm": 14.523232460021973,
+      "learning_rate": 1.9369152030840556e-06,
+      "loss": 3.4595,
+      "step": 46
+    },
+    {
+      "epoch": 0.018619391898583738,
+      "grad_norm": 12.919535636901855,
+      "learning_rate": 1.0926199633097157e-06,
+      "loss": 3.0023,
+      "step": 47
+    },
+    {
+      "epoch": 0.01901554917302169,
+      "grad_norm": 13.515146255493164,
+      "learning_rate": 4.865965629214819e-07,
+      "loss": 2.8616,
+      "step": 48
+    },
+    {
+      "epoch": 0.019411706447459643,
+      "grad_norm": 13.035895347595215,
+      "learning_rate": 1.2179748700879012e-07,
+      "loss": 2.3172,
+      "step": 49
+    },
+    {
+      "epoch": 0.019807863721897592,
+      "grad_norm": 19.37661361694336,
+      "learning_rate": 0.0,
+      "loss": 3.5126,
+      "step": 50
+    },
+    {
+      "epoch": 0.019807863721897592,
+      "eval_loss": 0.6597008109092712,
+      "eval_runtime": 156.6523,
+      "eval_samples_per_second": 27.143,
+      "eval_steps_per_second": 3.396,
+      "step": 50
+    }
+  ],
+  "logging_steps": 1,
+  "max_steps": 50,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 1,
+  "save_steps": 25,
+  "stateful_callbacks": {
+    "EarlyStoppingCallback": {
+      "args": {
+        "early_stopping_patience": 5,
+        "early_stopping_threshold": 0.0
+      },
+      "attributes": {
+        "early_stopping_patience_counter": 0
+      }
+    },
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": true
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 3.5953824975814656e+16,
+  "train_batch_size": 8,
+  "trial_name": null,
+  "trial_params": null
+}

last-checkpoint/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:51cdb209365988e5a86e5cc74573e4f2fb46ada332d8ddf9cb18285278fa5eec
+size 6840

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,35 @@

+{
+  "additional_special_tokens": [
+    "<unk>",
+    "<s>",
+    "</s>"
+  ],
+  "bos_token": {
+    "content": "<s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "</s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "<unk>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "<unk>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

tokenizer.model ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:dadfd56d766715c61d2ef780a525ab43b8e6da4de6865bda3d95fdef5e134055
+size 493443

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,50 @@

+{
+  "add_bos_token": true,
+  "add_eos_token": false,
+  "add_prefix_space": null,
+  "added_tokens_decoder": {
+    "0": {
+      "content": "<unk>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "<s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "</s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "additional_special_tokens": [
+    "<unk>",
+    "<s>",
+    "</s>"
+  ],
+  "bos_token": "<s>",
+  "chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set loop_messages = messages %}{% for message in loop_messages %}{% set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' %}{% if loop.index0 == 0 %}{% set content = bos_token + content %}{% endif %}{{ content }}{% endfor %}{% if add_generation_prompt %}{{ '<|start_header_id|>assistant<|end_header_id|>\n\n' }}{% endif %}",
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "</s>",
+  "legacy": true,
+  "model_max_length": 1000000000000000019884624838656,
+  "pad_token": "<unk>",
+  "padding_side": "left",
+  "sp_model_kwargs": {},
+  "spaces_between_special_tokens": false,
+  "tokenizer_class": "LlamaTokenizer",
+  "truncation_side": "left",
+  "unk_token": "<unk>",
+  "use_default_system_prompt": true
+}

training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:51cdb209365988e5a86e5cc74573e4f2fb46ada332d8ddf9cb18285278fa5eec
+size 6840