Spaces:
Sleeping
Sleeping
File size: 302 Bytes
1df0e33 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 |
vocab_size: 50257
d_model: 1600
n_layer: 48
num_experts: 8
top_k: 2
d_ff: 4800
ssm_d_state: 64
ssm_expand: 2
load_balancing_coef: 0.01
router_z_loss_coef: 0.001
max_seq_len: 2048
dtype: "float16"
use_cpu_offload: false
gradient_checkpointing: true
checkpoint_ssm_layers: true
use_flash_attention: true
|