{ "aggregate_gradients_by_tokens": true, "alternate_pp_config": true, "async_batch_iterator": false, "async_batch_iterator_timeout_s": 300, "async_checkpointing": true, "async_eval_ngpus": -1, "attach_debugpy": false, "background_nccl_init": false, "batch_p2p_communication": false, "batch_size": 2, "cached_file_unique_prefix": "", "checkpoint": { "async_checkpointing_staging_method": "async_copy_async_serialize", "barrier_timeout_secs": 120, "checkpoint_barrier_type": "sc", "checkpoint_gc_use_rmdir": true, "checkpoint_groups": false, "checkpoint_server_max_attempts": 10, "checkpoint_server_num_chunks": 10, "checkpoint_server_num_threads": 20, "checkpoint_server_op_timeout_secs": 10.0, "checkpoint_server_threads": 10, "checkpoint_server_timeout_secs": 60.0, "dump_freq_ephemeral": -1, "eager_init_staging_buffer": true, "live_checkpointing": false, "on_demand_checkpointing": false, "sleep_interval": 10, "staging_block_every_n_tensors": -1, "timeout_all_shard_exists": 300, "timeout_barrier_init_secs": 300, "timeout_execution": 1800, "timeout_folder_exists": 300, "timeout_process_init_secs": 60, "use_checkpoint_barrier_tcpstore_libuv": true, "use_checkpoint_barrier_wait_for_all_files": true, "use_checkpoint_barrier_wait_for_dir": false, "use_checkpointing_process": true, "use_shm_manager_for_async_cp": false, "wait_for_tensor_timeout_s": 120 }, "checkpoint_dump_dir": "/mnt/wsfuse/outputs/T-draft-17bx128MoE-N_3-b6pd09653zgfw", "collect_et": false, "context_parallel_size": 1, "data": "", "dataloader": { "always_trim_text": true, "concurrency_timeout_s": 300, "concurrent": false, "datamix": "/mnt/wsfuse/finetune/shared/YonderTrains/12M/LT2R3/coding/sql_sft:0.0001140800,/mnt/wsfuse/finetune/shared/YonderTrains/12M/LT2R3/coding/scale_code_chunk:0.0000850100,/mnt/wsfuse/finetune/shared/YonderTrains/12M/LT2R3/coding/sft_data_surge_tree_sitter_top_3:0.0933991500,/mnt/wsfuse/finetune/shared/YonderTrains/12M/LT2R3/coding/sft_data_scale_tree_sitter_top_3:0.0055914500,/mnt/wsfuse/finetune/shared/YonderTrains/12M/LT2R3/coding/turing_tree_sitter_top_3:0.0003080800,/mnt/wsfuse/finetune/shared/YonderTrains/12M/LT2R3/coding/olivier_synthetic_code:0.0095646900,/mnt/wsfuse/finetune/shared/YonderTrains/12M/LT2R3/coding/suchin_synthetic_code:0.0234710400,/mnt/wsfuse/finetune/shared/YonderTrains/12M/LT2R3/coding/olivier_synthetic_javascript:0.0020783500,/mnt/wsfuse/finetune/shared/YonderTrains/12M/LT2R3/coding/codeforces_few_shot_000:0.0006576000,/mnt/wsfuse/finetune/shared/YonderTrains/12M/LT2R3/coding/synth_data_cleaning_0920_online_judge_sft:0.0003063400,/mnt/wsfuse/finetune/shared/YonderTrains/12M/LT2R3/coding/sten_surge_coding_with_exec_mulitpl_synth_240613_v2_format_clean:0.0006246500,/mnt/wsfuse/finetune/shared/YonderTrains/12M/LT2R3/coding/yundi_codeforces_rewrite_filter_solve_coding_wrapper_sft_format_clean:0.0004503500,/mnt/wsfuse/finetune/shared/YonderTrains/12M/LT2R3/coding/olivier_coding_synthetic_stackoverflow_inspired_samll_multipl_translation_v2_format_clean:0.0026521600,/mnt/wsfuse/finetune/shared/YonderTrains/12M/LT2R3/coding/sten_coding_generated_problem_stack_overflow_L3_405B_self_healing_principled_tests_v8_good_v2_format_clean:0.0050457400,/mnt/wsfuse/finetune/shared/YonderTrains/12M/LT2R3/coding/iopairs_snippets_filtered_inductive_reasoning:0.0008189600,/mnt/wsfuse/finetune/shared/YonderTrains/12M/LT2R3/coding/iopairs_snippets_275k_cruxeval_output:0.0022750500,/mnt/wsfuse/finetune/shared/YonderTrains/12M/LT2R3/coding/iopairs_snippets_275k_cruxeval_input:0.0015900500,/mnt/wsfuse/finetune/shared/YonderTrains/12M/LT2R3/coding/debug_v5:0.0009672300,/mnt/wsfuse/finetune/shared/YonderTrains/12M/LT2R3/general_helpfulness_english/bio_sft_data2:0.0000024200,/mnt/wsfuse/finetune/shared/YonderTrains/12M/LT2R3/general_helpfulness_english/bio_sft_data_systemprompt2:0.0000033400,/mnt/wsfuse/finetune/shared/YonderTrains/12M/LT2R3/general_helpfulness_english/hard_legal_mcq_w_reasoning_sft:0.0011769300,/mnt/wsfuse/finetune/shared/YonderTrains/12M/LT3R2/general_helpfulness_english/autoif_filtered_prompt_v1_v2_filtered_pass_75_format_filter_valid_link_fix_code:0.0011995400,/mnt/wsfuse/finetune/shared/YonderTrains/12M/LT2R3/general_helpfulness_english/surge_precise_if_critic_rewrite_iter1_perfect_frr_tone_filter_format_clean_chunk:0.0201362100,/mnt/wsfuse/finetune/shared/YonderTrains/12M/LT2R3/general_helpfulness_english/surge_precise_if_critic_rlhf6pt5_rs_perfect_frr_tone_filter_format_clean_chunk:0.0330838000,/mnt/wsfuse/finetune/shared/YonderTrains/12M/LT3R2/general_helpfulness_english/scale_weak_areas:0.0004247400,/mnt/wsfuse/finetune/shared/YonderTrains/12M/LT3R2/general_helpfulness_english/knn_mitigation_1shot_v1:0.0092553500,/mnt/wsfuse/finetune/shared/YonderTrains/12M/LT3R2/general_helpfulness_english/surge_helpful_if_critic_format_original_perfect_response_format_clean_hard_chunked:0.0047733600,/mnt/wsfuse/finetune/shared/YonderTrains/12M/LT3R2/general_helpfulness_english/surge_helpful_if_critic_format_original_perfect_response_format_clean_medium_chunked:0.0471786600,/mnt/wsfuse/finetune/shared/YonderTrains/12M/LT3R2/general_helpfulness_english/surge_helpful_if_critic_format_sys_prompt_rewrite_iter1_format_clean_hard_chunked:0.0044165200,/mnt/wsfuse/finetune/shared/YonderTrains/12M/LT3R2/general_helpfulness_english/surge_helpful_if_critic_format_sys_prompt_rewrite_iter1_format_clean_medium_chunked:0.0264769000,/mnt/wsfuse/finetune/shared/YonderTrains/12M/LT3R2/general_helpfulness_english/surge_general_steerability_2024_train_chunk:0.0018149600,/mnt/wsfuse/finetune/shared/YonderTrains/12M/LT3R3/general_helpfulness_english/table_yonder_oss_helpfulness_syngen_preachy_tone_sft_partition_is_partition_eq_true:0.0000572800,/mnt/wsfuse/finetune/shared/YonderPrune/sft_data/hierarchal_clustering_LT3R3_reasoning/sft_dataset_12M_d100_f0_m_rmlt3r2_dmix_l77_sel_on_rewardsf_lrc_2_code_patch_tool_clean_sf_15300/LT3R3_reasoning/reasoning/surge_reasoning:0.0026246000,/mnt/wsfuse/finetune/shared/YonderPrune/sft_data/hierarchal_clustering_LT3R3_reasoning/sft_dataset_12M_d100_f0_m_rmlt3r2_dmix_l77_sel_on_rewardsf_lrc_2_code_patch_tool_clean_sf_15300/LT3R3_reasoning/reasoning/round3_3p_decontaminated_rlhf6_mcq_rscot_50_cjka_fix_nomath_nobio:0.0203449500,/mnt/wsfuse/finetune/shared/YonderPrune/sft_data/hierarchal_clustering_LT3R3_reasoning/sft_dataset_12M_d100_f0_m_rmlt3r2_dmix_l77_sel_on_rewardsf_lrc_2_code_patch_tool_clean_sf_15300/LT3R3_reasoning/reasoning/up_synthetic_verbal_reasoning_405_highq185_few_shot_000:0.0011054200,/mnt/wsfuse/finetune/shared/YonderPrune/sft_data/hierarchal_clustering_LT3R3_reasoning/sft_dataset_12M_d100_f0_m_rmlt3r2_dmix_l77_sel_on_rewardsf_lrc_2_code_patch_tool_clean_sf_15300/LT3R3_reasoning/reasoning/legal_mbe_bar_few_shot_000_format_v3:0.0000727600,/mnt/wsfuse/finetune/shared/YonderPrune/sft_data/hierarchal_clustering_LT3R3_reasoning/sft_dataset_12M_d100_f0_m_rmlt3r2_dmix_l77_sel_on_rewardsf_lrc_2_code_patch_tool_clean_sf_15300/LT3R3_reasoning/reasoning/cpa_few_shot_000_cot_000_format_final_v3:0.0000278400,/mnt/wsfuse/finetune/shared/YonderPrune/sft_data/hierarchal_clustering_LT3R3_reasoning/sft_dataset_12M_d100_f0_m_rmlt3r2_dmix_l77_sel_on_rewardsf_lrc_2_code_patch_tool_clean_sf_15300/LT3R3_reasoning/reasoning/moral_decontaminate:0.0000427200,/mnt/wsfuse/finetune/shared/YonderPrune/sft_data/hierarchal_clustering_LT3R3_reasoning/sft_dataset_12M_d100_f0_m_rmlt3r2_dmix_l77_sel_on_rewardsf_lrc_2_code_patch_tool_clean_sf_15300/LT3R3_reasoning/reasoning/lovish_round6_v2_format_clean:0.0003930200,/mnt/wsfuse/finetune/shared/YonderPrune/sft_data/hierarchal_clustering_LT3R3_reasoning/sft_dataset_12M_d100_f0_m_rmlt3r2_dmix_l77_sel_on_rewardsf_lrc_2_code_patch_tool_clean_sf_15300/LT3R3_reasoning/reasoning/ctg_clean_downsampled0_5_mcq_no_geeks_v3:0.0002844500,/mnt/wsfuse/finetune/shared/YonderPrune/sft_data/hierarchal_clustering_LT3R3_reasoning/sft_dataset_12M_d100_f0_m_rmlt3r2_dmix_l77_sel_on_rewardsf_lrc_2_code_patch_tool_clean_sf_15300/LT3R3_reasoning/reasoning/___mnt___wsfuse___lovish___fennel___post-training___sft-data-v2___70b_robert___t=1.2___top_p=1.0___min_p=0.03___top_k=30___250106_aime_esbs_100_0_75_M30_SCORED_DECONTAM:0.0003522600,/mnt/wsfuse/finetune/shared/YonderPrune/sft_data/hierarchal_clustering_LT3R3_reasoning/sft_dataset_12M_d100_f0_m_rmlt3r2_dmix_l77_sel_on_rewardsf_lrc_2_code_patch_tool_clean_sf_15300/LT3R3_reasoning/reasoning/___mnt___wsfuse___lovish___fennel___post-training___sft-data-v2___70b_robert___t=1.2___top_p=1.0___min_p=0.03___top_k=30___250106_harp_esbs_100_0_75_M30_SCORED_DECONTAM:0.0017419500,/mnt/wsfuse/finetune/shared/YonderPrune/sft_data/hierarchal_clustering_LT3R3_reasoning/sft_dataset_12M_d100_f0_m_rmlt3r2_dmix_l77_sel_on_rewardsf_lrc_2_code_patch_tool_clean_sf_15300/LT3R3_reasoning/reasoning/___mnt___wsfuse___lovish___fennel___post-training___sft-data-v2___70b_robert___t=1.2___top_p=1.0___min_p=0.03___top_k=30___250106_math_train_esbs_100_0_75_M30_SCORED_DECONTAM:0.0045896500,/mnt/wsfuse/finetune/shared/YonderPrune/sft_data/hierarchal_clustering_LT3R3_reasoning/sft_dataset_12M_d100_f0_m_rmlt3r2_dmix_l77_sel_on_rewardsf_lrc_2_code_patch_tool_clean_sf_15300/LT3R3_reasoning/reasoning/___mnt___wsfuse___lovish___fennel___post-training___sft-data-v2___70b_robert___t=1.2___top_p=1.0___min_p=0.03___top_k=30___250106_omni-math_esbs_100_0_75_M30_SCORED_DECONTAM:0.0011989900,/mnt/wsfuse/finetune/shared/YonderPrune/sft_data/hierarchal_clustering_LT3R3_reasoning/sft_dataset_12M_d100_f0_m_rmlt3r2_dmix_l77_sel_on_rewardsf_lrc_2_code_patch_tool_clean_sf_15300/LT3R3_reasoning/reasoning/___mnt___wsfuse___lovish___fennel___post-training___sft-data-v2___70b_robert___t=1.2___top_p=1.0___min_p=0.03___top_k=30___250106_scale-v1_esbs_100_0_75_M30_SCORED_DECONTAM:0.0139459000,/mnt/wsfuse/finetune/shared/YonderPrune/sft_data/hierarchal_clustering_LT3R3_reasoning/sft_dataset_12M_d100_f0_m_rmlt3r2_dmix_l77_sel_on_rewardsf_lrc_2_code_patch_tool_clean_sf_15300/LT3R3_reasoning/reasoning/___mnt___wsfuse___lovish___fennel___post-training___sft-data-v2___70b_robert___t=1.2___top_p=1.0___min_p=0.03___top_k=30___250106_scale-v2_esbs_100_0_75_M30_SCORED_DECONTAM:0.0175181900,/mnt/wsfuse/finetune/shared/YonderPrune/sft_data/hierarchal_clustering_LT3R3_reasoning/sft_dataset_12M_d100_f0_m_rmlt3r2_dmix_l77_sel_on_rewardsf_lrc_2_code_patch_tool_clean_sf_15300/LT3R3_reasoning/reasoning/___mnt___wsfuse___lovish___fennel___post-training___sft-data-v2___70b_robert___t=1.2___top_p=1.0___min_p=0.03___top_k=30___250106_scale-v3_esbs_100_0_75_M30_SCORED_DECONTAM:0.0095064700,/mnt/wsfuse/finetune/shared/YonderPrune/sft_data/hierarchal_clustering_LT3R3_reasoning/sft_dataset_12M_d100_f0_m_rmlt3r2_dmix_l77_sel_on_rewardsf_lrc_2_code_patch_tool_clean_sf_15300/LT3R3_reasoning/reasoning/___mnt___wsfuse___lovish___fennel___post-training___sft-data-v2___70b_robert___t=1.2___top_p=1.0___min_p=0.03___top_k=30___250106_scale-v4_esbs_100_0_75_M30_SCORED_DECONTAM:0.0072784800,/mnt/wsfuse/finetune/shared/YonderPrune/sft_data/hierarchal_clustering_LT3R3_reasoning/sft_dataset_12M_d100_f0_m_rmlt3r2_dmix_l77_sel_on_rewardsf_lrc_2_code_patch_tool_clean_sf_15300/LT3R3_reasoning/reasoning/___mnt___wsfuse___lovish___fennel___post-training___sft-data___viktorkerkez___arpg___unclustered___n20___250105_aime_N20_VERIFIED_SCORED_DECONTAM:0.0001480700,/mnt/wsfuse/finetune/shared/YonderPrune/sft_data/hierarchal_clustering_LT3R3_reasoning/sft_dataset_12M_d100_f0_m_rmlt3r2_dmix_l77_sel_on_rewardsf_lrc_2_code_patch_tool_clean_sf_15300/LT3R3_reasoning/reasoning/___mnt___wsfuse___lovish___fennel___post-training___sft-data___viktorkerkez___arpg___unclustered___n20___250105_aops_N20_VERIFIED_SCORED_DECONTAM:0.0041684300,/mnt/wsfuse/finetune/shared/YonderPrune/sft_data/hierarchal_clustering_LT3R3_reasoning/sft_dataset_12M_d100_f0_m_rmlt3r2_dmix_l77_sel_on_rewardsf_lrc_2_code_patch_tool_clean_sf_15300/LT3R3_reasoning/reasoning/___mnt___wsfuse___lovish___fennel___post-training___sft-data___viktorkerkez___arpg___unclustered___n20___250105_harp_N20_VERIFIED_SCORED_DECONTAM:0.0010190100,/mnt/wsfuse/finetune/shared/YonderPrune/sft_data/hierarchal_clustering_LT3R3_reasoning/sft_dataset_12M_d100_f0_m_rmlt3r2_dmix_l77_sel_on_rewardsf_lrc_2_code_patch_tool_clean_sf_15300/LT3R3_reasoning/reasoning/___mnt___wsfuse___lovish___fennel___post-training___sft-data___viktorkerkez___arpg___unclustered___n20___250105_math_N20_VERIFIED_SCORED_DECONTAM:0.0030298100,/mnt/wsfuse/finetune/shared/YonderPrune/sft_data/hierarchal_clustering_LT3R3_reasoning/sft_dataset_12M_d100_f0_m_rmlt3r2_dmix_l77_sel_on_rewardsf_lrc_2_code_patch_tool_clean_sf_15300/LT3R3_reasoning/reasoning/___mnt___wsfuse___lovish___fennel___post-training___sft-data___viktorkerkez___arpg___unclustered___n20___250105_omni_math_N20_VERIFIED_SCORED_DECONTAM:0.0005474500,/mnt/wsfuse/finetune/shared/YonderPrune/sft_data/hierarchal_clustering_LT3R3_reasoning/sft_dataset_12M_d100_f0_m_rmlt3r2_dmix_l77_sel_on_rewardsf_lrc_2_code_patch_tool_clean_sf_15300/LT3R3_reasoning/reasoning/___mnt___wsfuse___lovish___fennel___post-training___sft-data___viktorkerkez___arpg___unclustered___n20___250105_scale_v1_N20_VERIFIED_SCORED_DECONTAM:0.0098020300,/mnt/wsfuse/finetune/shared/YonderPrune/sft_data/hierarchal_clustering_LT3R3_reasoning/sft_dataset_12M_d100_f0_m_rmlt3r2_dmix_l77_sel_on_rewardsf_lrc_2_code_patch_tool_clean_sf_15300/LT3R3_reasoning/reasoning/___mnt___wsfuse___lovish___fennel___post-training___sft-data___viktorkerkez___arpg___unclustered___n20___250105_scale_v2_N20_VERIFIED_SCORED_DECONTAM:0.0136693500,/mnt/wsfuse/finetune/shared/YonderPrune/sft_data/hierarchal_clustering_LT3R3_reasoning/sft_dataset_12M_d100_f0_m_rmlt3r2_dmix_l77_sel_on_rewardsf_lrc_2_code_patch_tool_clean_sf_15300/LT3R3_reasoning/reasoning/___mnt___wsfuse___lovish___fennel___post-training___sft-data___viktorkerkez___arpg___unclustered___n20___250105_scale_v3_N20_VERIFIED_SCORED_DECONTAM:0.0079475700,/mnt/wsfuse/finetune/shared/YonderPrune/sft_data/hierarchal_clustering_LT3R3_reasoning/sft_dataset_12M_d100_f0_m_rmlt3r2_dmix_l77_sel_on_rewardsf_lrc_2_code_patch_tool_clean_sf_15300/LT3R3_reasoning/reasoning/___mnt___wsfuse___lovish___fennel___post-training___sft-data___viktorkerkez___arpg___unclustered___n20___250105_scale_v4_N20_VERIFIED_SCORED_DECONTAM:0.0047879200,/mnt/wsfuse/finetune/shared/YonderPrune/sft_data/hierarchal_clustering_LT3R3_reasoning/sft_dataset_12M_d100_f0_m_rmlt3r2_dmix_l77_sel_on_rewardsf_lrc_2_code_patch_tool_clean_sf_15300/LT3R3_reasoning/reasoning/250114_r1ab_data:0.0009782000,/mnt/wsfuse/finetune/shared/YonderPrune/sft_data/hierarchal_clustering_LT3R3_reasoning/sft_dataset_12M_d100_f0_m_rmlt3r2_dmix_l77_sel_on_rewardsf_lrc_2_code_patch_tool_clean_sf_15300/LT3R3_reasoning/reasoning/brainly_arpg_weak_area_mcq_mitigated_final:0.0006048700,/mnt/wsfuse/finetune/shared/YonderPrune/sft_data/hierarchal_clustering_LT3R3_reasoning/sft_dataset_12M_d100_f0_m_rmlt3r2_dmix_l77_sel_on_rewardsf_lrc_2_code_patch_tool_clean_sf_15300/LT3R3_reasoning/reasoning/reasoning_sft_mcq_final_sbs:0.0006742800,/mnt/wsfuse/finetune/shared/YonderPrune/sft_data/hierarchal_clustering_LT3R3_reasoning/sft_dataset_12M_d100_f0_m_rmlt3r2_dmix_l77_sel_on_rewardsf_lrc_2_code_patch_tool_clean_sf_15300/LT3R3_reasoning/reasoning/250106_r1_data_M10_RM=oprm-8nodes-lw6wdvf1clmv1c_shuffle=True:0.0064538600,/mnt/wsfuse/finetune/shared/YonderPrune/sft_data/hierarchal_clustering_LT3R3_reasoning/sft_dataset_12M_d100_f0_m_rmlt3r2_dmix_l77_sel_on_rewardsf_lrc_2_code_patch_tool_clean_sf_15300/LT3R3_reasoning/reasoning/careers360_M10_RM=oprm-8nodes-lw6wdvf1clmv1c_shuffle=True:0.0061167600,/mnt/wsfuse/finetune/shared/YonderPrune/sft_data/hierarchal_clustering_LT3R3_reasoning/sft_dataset_12M_d100_f0_m_rmlt3r2_dmix_l77_sel_on_rewardsf_lrc_2_code_patch_tool_clean_sf_15300/LT3R3_reasoning/reasoning/afanti_40k_M10_RM=oprm-8nodes-lw6wdvf1clmv1c_shuffle=True:0.0090247200,/mnt/wsfuse/finetune/shared/YonderTrains/12M/LT2R3/multilingual/multilingual_r6_5_mix_rm_mathv1:0.0332281300,/mnt/wsfuse/finetune/shared/YonderTrains/12M/LT2R3/multilingual/surge_sft_hindi_romanized:0.0003671600,/mnt/wsfuse/finetune/shared/YonderTrains/12M/LT2R3/multilingual/scale_sft_final_format_clean_chunk:0.0081132900,/mnt/wsfuse/finetune/shared/YonderTrains/12M/LT2R3/multilingual/bio:0.0002783100,/mnt/wsfuse/finetune/shared/YonderTrains/12M/LT3R2/multilingual/new_rs_souped_multilinugal_critic_rewrite_data_format_clean:0.0648830400,/mnt/wsfuse/finetune/shared/YonderTrains/12M/LT2R3/multilingual/rus_v7:0.0206462400,/mnt/wsfuse/finetune/shared/YonderTrains/12M/LT2R3/multilingual/zho_v5:0.0224838300,/mnt/wsfuse/finetune/shared/YonderTrains/12M/LT2R3/metaai/general_helpfulness_english/table_mhx_online_positives_exp_v1_msgv2_pii_vr_frr_filtered_formatting_linted_mk_partition_1_eq_1:0.0999299400,/mnt/wsfuse/finetune/shared/YonderTrains/12M/LT2R3/metaai/factuality/table_mh20_2_message_v2_mhx_sft_train_mh17_rlhf2_preference_factuality_perturb_v2_partition_1_eq_1:0.0317194500,/mnt/wsfuse/finetune/shared/YonderTrains/12M/LT2R3/metaai/factuality/table_mh20_2_message_v2_mhx_sft_train_mh16_crs_hallucination_partition_1_eq_1:0.0000143900,/mnt/wsfuse/finetune/shared/YonderTrains/12M/LT2R3/metaai/factuality/table_mh20_2_message_v2_mhx_sft_train_mh15_synth_defamation_v0_diverse_partition_1_eq_1:0.0001121100,/mnt/wsfuse/finetune/shared/YonderTrains/12M/LT2R3/metaai/factuality/table_mh20_2_message_v2_synthetic_private_individuals_data_final_partitioned_partition_1_eq_1:0.0001191900,/mnt/wsfuse/finetune/shared/YonderTrains/12M/LT2R3/metaai/general_helpfulness_english/table_mh20_2_message_v2_meta_ai_mhx_capability_hallucination_for_option_2_sev_compliant_partitioned_partition_source_eq_capability_prompt_k_and_structured_v7_tone_transform_conserved:0.0000013000,/mnt/wsfuse/finetune/shared/YonderTrains/12M/LT2R3/metaai/general_helpfulness_english/table_mh20_2_message_v2_mhx_sft_train_mh16_adversarial_capability_hallucination_partition_1_eq_1:0.0000390300,/mnt/wsfuse/finetune/shared/YonderTrains/12M/LT2R3/metaai/general_helpfulness_english/table_mh20_2_message_v2_mhx_sft_train_imagine_synthetic_v0_partition_1_eq_1:0.0021983400,/mnt/wsfuse/finetune/shared/YonderTrains/12M/LT2R3/metaai/reasoning/table_mh20_2_message_v2_mhx_sft_train_aqua_synthetic_cot_sft_data_v1_selfchecked_partition_1_eq_1:0.0069630200,/mnt/wsfuse/finetune/shared/YonderTrains/12M/LT2R3/metaai/general_helpfulness_english/table_syngen_en_mh24_sft_fRM_by_jtbd_filtered_message_fixed_v2_partition_ds_eq_2024-11-18:0.0104362700,/mnt/wsfuse/finetune/shared/YonderTrains/12M/LT2R3/metaai/general_helpfulness_english/table_mh22_formatting_nested_list_sft_message_v2_partition_ds_eq_2024-09-30:0.0000701200,/mnt/wsfuse/finetune/shared/YonderTrains/12M/LT2R3/metaai/general_helpfulness_english/table_mh22_formatting_nested_list_steerability_sft_message_v2_partition_ds_eq_2024-09-30:0.0000880400,/mnt/wsfuse/finetune/shared/YonderTrains/12M/LT2R3/metaai/general_helpfulness_english/table_flyrs_en_essay_email_formatting_6_0_mh21_linted_message_v2_partitioned_partition_1_eq_1:0.0000523500,/mnt/wsfuse/finetune/shared/YonderTrains/12M/LT2R3/metaai/safety/table_mh20_2_message_v2_mhx_sft_train_scale_precise_instruct_off_the_shelf_expanded_partition_ds_eq_2024-08-05:0.0007582100,/mnt/wsfuse/finetune/shared/YonderTrains/12M/LT2R3/metaai/general_helpfulness_english/table_prod_if_rjs_sft_v1_partition_source_eq_prod_if_tool:0.0040049900,/mnt/wsfuse/finetune/shared/YonderTrains/12M/LT2R3/metaai/general_helpfulness_english/table_prod_if_rjs_sft_v1_partition_source_eq_prod_if_non_tool:0.0053242800,/mnt/wsfuse/finetune/shared/YonderTrains/12M/LT2R3/metaai/general_helpfulness_english/table_prod_if_rjs_sft_v1_partition_source_eq_prod_if_tool_hard_train:0.0000091400,/mnt/wsfuse/finetune/shared/YonderTrains/12M/LT2R3/metaai/multilingual/table_mh20_2_message_v2_mh6_exp_datasets_unified_multilingual_for_option_2_sev_compliant_partitioned_partition_source_eq_mh18_multilingual_redaction_fix:0.0000002000,/mnt/wsfuse/finetune/shared/YonderTrains/12M/LT2R3/metaai/multilingual/table_mh20_2_message_v2_mh6_exp_datasets_unified_multilingual_for_option_2_sev_compliant_partitioned_partition_source_eq_english_lmr_fix_mh18:0.0000005700,/mnt/wsfuse/finetune/shared/YonderTrains/12M/LT2R3/metaai/tooling/table_mh20_2_message_v2_fbsearch_followup_refusal_0703_v1_partition_source_eq_1p_search_summary_history_injection:0.0000243400,/mnt/wsfuse/finetune/shared/YonderTrains/12M/LT2R3/metaai/tooling/table_mh20_2_message_v2_mhx_sft_train_reels_summarization_mh7_partition_1_eq_1:0.0044526900,/mnt/wsfuse/finetune/shared/YonderTrains/12M/LT2R3/metaai/general_helpfulness_english/table_p13n_mh_sft_11_11_mrspdp_msg_v2_partition_ds_eq_2024-11-11:0.0012578800,/mnt/wsfuse/finetune/shared/YonderTrains/12M/LT2R3/metaai/general_helpfulness_english/table_p13n_mh_sft_11_11_memory_msg_v2_partition_ds_eq_2024-11-11:0.0005490800,/mnt/wsfuse/finetune/shared/YonderTrains/12M/LT2R3/metaai/general_helpfulness_english/table_p13n_mh_sft_11_11_canonical_msg_v2_partition_ds_eq_2024-11-11:0.0005627400,/mnt/wsfuse/finetune/shared/YonderTrains/12M/LT2R3/metaai/general_helpfulness_english/table_ai_memory_explic_intent_response_train_500_partition_1_eq_1:0.0005429000,/mnt/wsfuse/finetune/shared/YonderTrains/12M/LT2R3/metaai/multilingual/table_research_sft_data_multilingual_oct_langs_only_with_sys_prompt_randomized_date_msg_v2_partition_ds_eq_2024-09-28:0.0088870400,/mnt/wsfuse/finetune/shared/YonderTrains/12M/LT2R3/metaai/multilingual/table_multilingual_prod_traffic_10_11_vn_th_id_ar_lang_v3_rjs_lmr_out_msg_v2_cs_fix_for_option_2_compliant_lid13_lmr_clean_partition_ds_eq_2024-10-14:0.0012500600,/mnt/wsfuse/finetune/shared/YonderTrains/12M/LT2R3/metaai/multilingual/table_multilingual_oct_lang_flywheel_refresh_rjs_ranked_lid15_cs_pii_filter_final_message_v2_partition_ds_eq_2024-11-15:0.0020233600,/mnt/wsfuse/finetune/shared/YonderTrains/12M/LT2R3/metaai/general_helpfulness_english/table_syngen_yonder0_data_replenish_sft_partition_1_eq_1:0.0158649800,/mnt/wsfuse/finetune/shared/YonderTrains/12M/LT2R3/metaai/multilingual/table_mh20_2_message_v2_mh6_exp_datasets_unified_multilingual_for_option_2_sev_compliant_partitioned_redaction_fix_partition_source_eq_mh18_multilingual_redaction_fix:0.0001173400,/mnt/wsfuse/finetune/shared/YonderTrains/12M/LT2R3/metaai/multilingual/table_mh20_2_message_v2_mh6_exp_datasets_unified_multilingual_for_option_2_sev_compliant_partitioned_english_lmr_fix_partition_source_eq_english_lmr_fix_mh18:0.0000001400,/mnt/wsfuse/finetune/shared/YonderTrains/12M/LT2R3/metaai/general_helpfulness_english/table_yonder_flywheel_syngen_preachy_tone_sft_partition_is_partition_eq_true:0.0017173800,/mnt/wsfuse/finetune/shared/YonderTrains/12M/LT2R3/metaai/general_helpfulness_english/table_yonder_flywheel_syngen_preachy_tone_sft_privacy_safe_partition_is_partition_eq_true:0.0009091400,/mnt/wsfuse/samuelcai/pd/training_data/mh24_eagle_v0/raw_data/v1/flyrs_en_essay_email_formatting_msgv2_v0:0.0000433000,/mnt/wsfuse/samuelcai/pd/training_data/mh24_eagle_v0/raw_data/v1/flyrs_mlt_4_0_dpo_mh23_1_lmr_msgv2_v0:0.0001682800,/mnt/wsfuse/samuelcai/pd/training_data/mh24_eagle_v0/raw_data/v1/mh20_2_message_v2_mhx_sft_train_sg_capability_canned_responses_v1_msgv2_v0:0.0000356300,/mnt/wsfuse/samuelcai/pd/training_data/mh24_eagle_v0/raw_data/v1/mh20_2_message_v2_mhx_sft_train_sg_non_plugins_mh15_msgv2_v0:0.0001764900,/mnt/wsfuse/samuelcai/pd/training_data/mh24_eagle_v0/raw_data/v1/mh24_voice_data_flywheel_tts_filtering_top_candidates_msgv2_v0:0.0017267900,/mnt/wsfuse/samuelcai/pd/training_data/mh24_eagle_v0/raw_data/v1/mhx_non_user_sft_data_rlhf6_msgv2_v0:0.0546009800,/mnt/wsfuse/samuelcai/pd/training_data/mh24_eagle_v0/raw_data/v2/mhx_non_user_sft_data_rlhf6_msgv2_v0_batch1:0.1165484600,/mnt/wsfuse/samuelcai/pd/training_data/mh24_eagle_v0/raw_data/v1/multilingual_oct_lang_flywheel_refresh_rjs_msgv2_v0:0.0004449600,/mnt/wsfuse/samuelcai/pd/training_data/mh24_eagle_v0/raw_data/v1/p13n_mh_sft_11_11_memory_msg_msgv2_v0:0.0002283200,/mnt/wsfuse/samuelcai/pd/training_data/mh24_eagle_v0/raw_data/v1/factual_dpo_11_18_factuality_ace_preference_claim_support_v0:0.0029266800,/mnt/wsfuse/samuelcai/pd/training_data/mh24_eagle_v0/raw_data/v1/factuality_ace_edit_preference_train_v1_join_enrich_1119_v0:0.0014881800,/mnt/wsfuse/samuelcai/pd/training_data/mh24_eagle_v0/raw_data/v1/mh20_2_message_v2_mhx_sft_train_aqua_synthetic_cot_sft_data_v1_selfchecked_v0:0.0012164800,/mnt/wsfuse/samuelcai/pd/training_data/mh24_eagle_v0/raw_data/v1/mh_safety_sft_data_safety_mlg_oct_lang_frr_0916_lid13_lmr_clean_v0:0.0005607500,/mnt/wsfuse/samuelcai/pd/training_data/mh24_eagle_v0/raw_data/v1/mhx_online_positives_exp_v1_msgv2_pii_vr_frr_filtered_v0:0.0147447700,/mnt/wsfuse/samuelcai/pd/training_data/mh24_eagle_v0/raw_data/v1/research_sft_data_multilingual_oct_langs_only_with_sys_prompt_randomized_date_msg_v2_v0:0.0015526100,/mnt/wsfuse/samuelcai/pd/training_data/mh24_eagle_v0/raw_data/v1/syngen_en_mh24_dpo_fRM_by_jtbd_filtered_final_message_v2_v0:0.0035373300", "enable_packing": true, "image": null, "load_only_tp_zero": false, "logging_config": { "log_buffer_size": false, "log_every_n_steps": 10, "log_first_batch": false, "log_full_dataloader_state": false, "log_metadata": false }, "max_world_size": null, "mix_mode": {}, "modality_datamix": null, "pad_mode": { "pad_value": 200018, "seq_len": 4096 }, "pin_memory": true, "prefetch_factor": null, "progress_reporter_log_interval": 0, "rng_mode": {}, "shuffle_seed": 1337, "simulate_training_budget": null, "speech": null, "split_mode": { "discard_text_only": false, "keep_interval": 2, "keep_strategy": "put_back" }, "tail_token_mode": {}, "video": null, "weights_update_config": null, "workers_per_gpu": 1 }, "dataset_iteration_limits": null, "deallocate_pipeline_outputs": true, "disable_logging": false, "disable_workers_print": false, "dtype": "bf16", "dummy_nccl_init": true, "dump_dir": "/mnt/wsfuse/outputs/T-draft-17bx128MoE-N_3-b6pd09653zgfw", "dump_dir_tree_type": "sharded", "dump_freq": 50, "dump_profile_traces": true, "eager_init": true, "enable_anomaly_detection": false, "enable_deterministic_training": false, "enable_loss_tracker": true, "enable_ods": true, "enable_pynvml": false, "et_end_itr": 15, "et_start_itr": 12, "eval_freq": -1, "exp_id": "", "exp_name": "", "expert_parallel_size": 32, "finetuning_dir": "", "fp32_reduce_scatter": "all", "gc_collect_freq": 1000, "gpu_check_level": -1, "increase_seq": null, "instruct": { "no_loss_prompt": false, "no_loss_truncated": false }, "instruct_data": "", "iter_jsonl": { "buffer_size": 64, "same_data": false }, "iter_multi": { "buffer_size": 64, "ignore_extra_chunks": true, "iterate_chunk_by_chunk": false, "max_precompute": 20, "multiprocess": true }, "iter_text_airstore": { "airstore_max_holding_bundles_limit": null, "airstore_max_resharding_factor": null, "airstore_sample_prefetch_limit": null, "airstore_seed": 727, "dataloader_workers_per_gpu": 0, "load_only_pp_zero": false, "load_only_tp_zero": false, "max_world_size": null, "pin_memory": true, "prefetch_factor": null, "simulate_training_budget": null, "unique_token_fraction": null }, "iter_type": "multi", "keep_eval_checkpoints": false, "keep_n_last_checkpoints": -1, "load_optimizer_on_finetuning": false, "log": { "disable_scalars_tb_write": false, "log_loss_tracker_to_scuba": false, "log_scalar_default_log_level": "INFO", "log_scalar_freq": 100, "log_scalar_freq_overrides": "fp8:1000, router/modality:1000, router/dataset:1000, params:0,grads_fsdpv2:0,debug:0,verbose_debug:0", "log_scalar_log_level_overrides": "", "log_scalar_version": 2.0, "log_scalars": false, "log_scalars_to_ods": false, "log_scalars_to_scuba": false, "log_tb": true, "log_tensors": false, "log_tensors_to_scuba": false, "online_wandb": false, "online_wandb_project": null, "online_wandb_team": null, "reduce_scalars": false }, "log_all_steps": true, "log_batch_checksum": true, "log_dataloader_state": false, "log_freq": 1, "log_position_in_data_queue": true, "log_updates": true, "logitwriter": { "compression_algo": "zstd", "enable": false, "index_dtype": "int32", "logit_dtype": "float32", "same_day_logits_backup": false, "speech_topk": 100, "topk": 100, "write_lse": true }, "loss_logging_freq": 10, "loss_rescaling": false, "max_image_tiles_per_gpu": 2000, "mb_recompute_attn": false, "mb_recompute_fc1_fc3": false, "mem_snapshot_max_entries": 100000, "mem_snapshot_profiling_duration": 3, "mem_snapshot_start_step": -1, "mem_snapshot_stop_step": -1, "memory_efficient_pipeline": false, "model": { "alpha_depth": "disabled", "alpha_lrm": 1.0, "alpha_on_resid": false, "alpha_separate": false, "alpha_wdm": 1.0, "attn_bias_type": "block_causal", "attn_dropout": 0, "attn_out_dropout": 0, "attn_temperature_tuning_floor_scale": null, "attn_temperature_tuning_layers": null, "attn_temperature_tuning_q_scale_constant": null, "attn_to_keep": "all", "batchify_local_attention_len": null, "cp_attn_perdoc": false, "cp_attn_save_global_kv": true, "custom_bwd": false, "custom_bwd_sum_first_then_comms": true, "dialog_len": null, "dim": 5120, "efficient_attn": "auto", "efficient_output": false, "enable_fsdpv2": true, "enable_tp_overlapping": false, "enable_weight_sharding_in_pp": false, "enable_wgrad_sharding_in_pp": false, "eos_id": 200001, "every_n_layers_nope": null, "experts_choice_moe": { "auto_scale_F": true, "capacity_factor": 1.5, "clamp_above_std": false, "compute_moe_in_fp64": false, "drop_and_pad": false, "enable_lb_free": false, "enable_lb_loss": false, "enable_router_zloss": false, "eval_threshold_std_mult": 0.0, "eval_with_expert_activation_model": false, "eval_with_saved_stats": true, "eval_with_top_k": false, "expert_act_grad_prop_coeff": 0, "expert_act_init_std": 0.5, "expert_act_loss_coeff": 0.0001, "expert_act_silu": false, "expert_act_threshold": 0, "expert_activation_model": false, "fc1_clamp": null, "fc2_clamp": null, "fc3_clamp": null, "fix_datasource_router_score": "", "fix_image_router_score": null, "fix_speech_router_score": null, "force_looped_impl": false, "fused_shuffle": true, "input_scaling": false, "input_scaling_max_clamp": 2.0, "input_scaling_min_clamp": -2.0, "interleave_moe_layer_step": 2, "is_enabled": false, "lb_free_coeff": 0.0, "lb_loss_coeff": 0.0, "max_experts_per_token": null, "moe_init_scale": 1.0, "mult_moe_weight_grads": null, "norm_expert_output": null, "num_experts": 128, "overlap_token_comm": true, "postgate_experts": false, "recompute_capacity_factor": null, "routed_dropout": 0.0, "router_clamp": null, "router_kld_reg": 0.0, "router_padding_coeff": null, "router_score_gating": "sigmoid", "router_zloss_coeff": 1.3143357982572078e-18, "running_stats_ema": 0.99, "running_stats_sync_freq": 100, "saved_thresholds_are_post_sigmoid": false, "sharding_strategy": "dp2ep-v1", "shuffle_before_assign": false, "shuffle_freq": 1, "shuffle_group_size": null, "shuffle_level": 3, "shuffle_with_random_order": true, "shuffle_within_dp": false, "sigmoid_in_fp32": true, "skip_local_shuffle": false, "std_margin": 15, "std_margin_skip_last": true, "std_penalty_coeff": 0.0, "top_k": 1, "use_fixed_topk": false, "use_fixed_topk_bsz": 1, "use_fsdp": true, "use_shared_expert": true, "use_te_in_moe": false, "use_token_choice": true, "zero_clamp_grads": true, "zero_router_grads": false }, "ffn_dim_multiplier": 1.2, "ffn_exp": 4.0, "ffn_in_dropout": 0, "ffn_out_dropout": 0, "flex_score_mod": "", "fp8_amax_compute_algo": "max", "fp8_amax_history_len": 1024, "fp8_early_bf16_weight_release": false, "fp8_fuse_wgrad_accumulation": false, "fp8_grad_output_dynamic_scale": false, "fp8_input_dynamic_scale": false, "fp8_interval": 1, "fp8_margin": 0, "fp8_rowwise": false, "fp8_wgrad": false, "freeze_decoder": false, "freeze_patterns": null, "freeze_vision_encoder": false, "fsdp_checkpoint_wrap_layer_frequency": 1, "fsdpv1_flatten_params": true, "fsdpv2_cast_root_forward_inputs": false, "fsdpv2_cpu_offload_percentage": null, "fsdpv2_enable_cpu_offload": false, "fsdpv2_use_per_pg_streams": true, "fsdpv2_wrap_pp_model_chunk_only": false, "fuse_sequence_parallel": true, "global_attn_cfg": "all", "head_dim": 128, "high_freq_factor": 32, "hsdp_replicate_num": 1, "init": { "coeff_std": null, "depth_last": false, "fixed_std": null, "no_init": false, "router_coeff_std": 0.1, "truncate_std_mult": 2.0, "use_depth": "current", "use_gaussian": true }, "layer_ckpt": "none", "lc_rope_len": 0, "lc_rope_prob": 0.0, "less_layer_first_pp_stage": 0, "less_layer_last_pp_stage": 0, "local_attention_window_len": null, "loss_parallel": false, "max_length": 2048, "metap": { "base_width": 1024.0, "coeff_std": 1.0, "m_emb": 1.0, "metap_mode": "ntp", "tie_router_bulk_coeff_std": false, "use_metap": false }, "modalities": { "freeze_llm": false, "image": { "enable_projection": true, "encoder_name": "llama4_flash_encoder", "encoder_params": null, "freeze_vision_encoder": true, "image_height": 336, "image_width": 336, "patch_height": 14, "patch_width": 14, "ps_ratio": 0.5, "recompute_transformer": true, "return_intermediate": null, "use_cached_embeddings": false, "use_dynamic_transform": true, "vision_adapter_type": "pixel_shuffle_mlp", "vision_encoder_ckpt_path": "/mnt/wsfuse/nextgen_mm/vision_encoders/llama4_flash_encoder_1016_338k", "vision_encoding_batch_size": null }, "speech": { "append_quantization_output": false, "data_format_args": { "disallow_text_free_seg": true, "emit_text_right_after_sys_start": true, "enable_speech_text_hybrid": false, "hybrid_generation_mode": "single_token_emit", "hybrid_understanding_mode": "streaming", "jitter_system_prompt": false, "jitter_system_prompt_today_date": false, "num_words_in_unit": 1, "speech_delay": 0, "system_text_lookahead": 0, "tool_token_delay_ms_max": 100, "tool_token_delay_ms_min": 0, "transfer_dates_to_template": false, "turn_start_with_white_space": false, "user_text_delay": 6 }, "discrete_codebooks_size": 8192, "enable_aux_user_output": false, "enable_full_duplex": false, "enable_output": false, "encoder_device": "cuda", "freeze_speech_encoder": true, "is_tokenizer": true, "load_tokenizer": false, "share_speech_emb": false, "speech_encoder": null, "speech_encoder_ckpt_dir": null, "speech_extend_vocab_size": 0, "speech_feature_dim": 320, "speech_output_control_format": "", "speech_projection_dim": 1536, "speech_separate_softmax": false, "speech_train_audio_end": false, "speech_train_audio_start": false, "target_speaker_table_size": 0, "use_discrete_codes": false, "use_embedding": false, "use_fp32_for_speech_output": true, "use_fp64": true, "use_projection": true, "user_embedding_by_concat": false, "user_embedding_by_permutation": true, "user_projection_use_mlp": false }, "use_image": false, "use_speech": false, "use_video": false }, "multiple_of": 2048, "n_heads": 40, "n_kv_heads": 8, "n_layers": 3, "non_linearity": "swiglu", "nope_no_qk_norm": true, "norm_affine": true, "norm_eps": 1e-05, "norm_type": "rmsnorm", "num_unfrozen_layers": 0, "output_size": 202048, "parallel_decoding": { "enable_fc_parallelism": true, "fc_with_bias": false, "first_and_last_norm_required": true, "has_parallel_decoding": true, "parallel_decoding_type": "EAGLE", "share_input_output_embed_with_target": true }, "parallel_output_norm": true, "peft_args": null, "pp_use_tensor_pool": false, "pre_norm": true, "prefetch_weight_latency": 1.0, "qat_args": null, "qk_norm_across_heads": false, "qk_norm_affine": false, "recompute_attn": true, "recompute_fc1_out": true, "recompute_fc3_out": true, "recompute_q_norm": false, "rope_attn_scale": false, "rope_scale_factor": 1, "rope_theta": 500000.0, "rope_use_fp32_in_outer_product": true, "sequence_parallel": false, "share_emb": false, "stochastic_depth_p_attn": 0, "stochastic_depth_p_ffn": 0, "te_use_fsdp_mixed_precision": true, "use_flex_attn": false, "use_fp8": false, "use_qk_norm": false, "use_rope": true, "use_scaled_rope": false, "use_te_layers": true, "vocab_parallel": true, "vocab_size": 202048 }, "model_parallel_size": 8, "model_precheck": false, "nan_detector_steps": 0, "no_final_ckpt": false, "num_layers_per_virtual_pipeline_stage": null, "num_microbatches_with_partial_activation_checkpoints": 1, "number_of_manifold_servers_per_host": 8, "old_mp": -1, "old_world_size": -1, "optim": { "annealing_step": 10000, "beta1": 0.9, "beta2": 0.95, "clip": 1.0, "cosine_theta": 1.0, "cycle_length": 1.0, "decay_length_fraction": 0.1, "epsilon": 1e-08, "exp_factor": 0.5, "fused": null, "grad_accumulate_steps": 1, "independent_weight_decay": false, "lr": 0.0002, "lr_min_ratio": 0.1, "modality_order": "text,vision,speech,speech_full_duplex", "non_nope_lr_mult": null, "nope_lr_mult": null, "scheduler": "constant", "start_annealing_step": -1, "use_fp32_copy_optim": true, "vision_encoder_lr": null, "vision_projection_lr": null, "warmup": 0, "weight_decay": 0.1 }, "optimize_backward_concat": false, "overlap_p2p_communication": true, "paft": { "all_reduce_timeout_grow_ms": 300000, "all_reduce_timeout_ms": 60000, "ctran_port_base": 18700, "enable": false, "ib_exchange_port_base": 18600, "max_quorum_num_retries": 5, "max_step_retries": 5, "min_replicas_to_run": null, "qp_connect_timeout_ms": null, "replica_collective_timeout_s": 600, "send_recv_timeout_ms": 5000, "startup_sleep_ms": 10000, "test_only_barrier_timeout_s": 180, "test_only_skip_ftar": false }, "periodic_gpu_check": false, "pg_tuning_options_from_yaml": "", "pipeline_parallel_microbatch_size": 1, "pipeline_parallel_size": 1, "pipeline_strategy": "dora-dfs", "power_consumer": { "enable": false, "run_delay_steps": 0, "run_duration_steps": 100, "run_freq": 1000, "run_mode": "periodic" }, "pp_num_warm_up_microbatch_ratio": null, "profile_acc_events": false, "profile_barrier_timeout_s": 0, "profile_freq": -1, "profile_num_steps_active": 1, "profile_record_shapes": true, "profile_with_stack": false, "py_spy_args": { "active_seconds": 600, "format": "flamegraph", "freq": -1, "rank0_only": true, "rate": 50, "start_offset": 10 }, "recompute_all_mb": false, "reshard_after_forward": false, "restore_dataloader_position": false, "root_dump_dir": "/mnt/wsfuse/outputs/xldumps", "runtime_nccl_timeout_s": 600, "sample_across_datasets": true, "seq_len": 4096, "skip_evals_during_training": true, "slurm": { "global_rank": 0, "is_slurm_job": false, "role_index": 0, "role_rank": 0, "role_replica_count": 1, "role_world_size": 256, "world_size": 256 }, "speech_loss": { "aux_aligned_text_loss": false, "aux_user_loss_weight": 0.9, "dual_channel_aux_user_loss_weight": 0, "enable": false, "force_simulated_sys_loss": true, "full_duplex_dual_loss_mode": "sample", "kind": "single_softmax", "log_logits": false, "maybe_tool_token_loss_weight": null, "perfect_silence_id": null, "speech_loss_weight": null, "system_floors_weight": null, "system_perfect_silence_weight": 0.0, "system_text_escape_audio_weight": null, "user_text_escape_audio_weight": null }, "steps": 1000000, "text_only_steps": null, "tokenizer": { "path": "/mnt/wsfuse/tokenizers/tiktoken/l4_200k_base", "version": "llama4_tiktoken_v6" }, "tokenizer_dir": "/mnt/wsfuse/tokenizers/tiktoken", "torch_seed": 0, "unlimited_steps": false, "use_sum_loss": false, "valid": { "batch_size": 32, "debug": false, "majority_voting": 0, "n_batches": 100, "ppl_files_str": "", "prompt_path": "", "random_fewshots": false, "seed": 42, "seq_len": 2048, "skip_sanity_check": false, "tasks_root_dir": "", "tasks_str": "", "temperature": 0.0, "top_k": 0, "top_p": 0.0, "use_sampling": false, "write_eval": false }, "z_loss_multiplier": 0.0 }