Training in progress, step 100

Files changed (5) hide show

README.md CHANGED Viewed

@@ -34,11 +34,11 @@ This model was trained with SFT.
 ### Framework versions
-- TRL: 0.20.0
-- Transformers: 4.54.1
-- Pytorch: 2.7.1
 - Datasets: 4.0.0
-- Tokenizers: 0.21.4
 ## Citations

 ### Framework versions
+- TRL: 0.23.0
+- Transformers: 4.56.1
+- Pytorch: 2.8.0+cu126
 - Datasets: 4.0.0
+- Tokenizers: 0.22.0
 ## Citations

config.json CHANGED Viewed

@@ -5,6 +5,7 @@
   "attention_bias": false,
   "attention_dropout": 0.0,
   "bos_token_id": 1,
   "eos_token_id": 2,
   "head_dim": 64,
   "hidden_act": "silu",
@@ -25,8 +26,7 @@
   "rope_scaling": null,
   "rope_theta": 100000,
   "tie_word_embeddings": true,
-  "torch_dtype": "float32",
-  "transformers_version": "4.54.1",
   "use_cache": true,
   "vocab_size": 49152
 }

   "attention_bias": false,
   "attention_dropout": 0.0,
   "bos_token_id": 1,
+  "dtype": "float32",
   "eos_token_id": 2,
   "head_dim": 64,
   "hidden_act": "silu",
   "rope_scaling": null,
   "rope_theta": 100000,
   "tie_word_embeddings": true,
+  "transformers_version": "4.56.1",
   "use_cache": true,
   "vocab_size": 49152
 }

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:58d6c47302c54bc7f51f69c57db5f0c831f60df404f8c435b5fb749eec673c6d
 size 1447317080

 version https://git-lfs.github.com/spec/v1
+oid sha256:a3826ec42120a85215511b04224dfe56a8b5170dbe6ae8c40d90ba2cc91eda4e
 size 1447317080

runs/Sep24_04-21-21_62965a392d91/events.out.tfevents.1758687701.62965a392d91.370.0 ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:edf9be393551ee8b873d866a4a4a080680339dbd62b498ad9279715af96b421a
+size 14858

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f4fc9ffb5168f56ceafff0215af4086600e4d2abeb752d24dedc23116ced0c79
-size 6161

 version https://git-lfs.github.com/spec/v1
+oid sha256:1830d1d35a4eb065e2964b6d8a6ab48c2bc754b251a399ea159fc1b22830a985
+size 6225