Cadence-Fast / config.json
psidharth567's picture
Update config.json
4c33508 verified
{
"model_type": "cadence_punctuation",
"auto_map": {
"AutoConfig": "modeling_gemma3_punctuation.Gemma3PunctuationConfig",
"AutoModelForTokenClassification": "modeling_gemma3_punctuation.Gemma3ForTokenClassification",
"AutoModel": "modeling_gemma3_punctuation.Gemma3ForTokenClassification"
},
"_sliding_window_pattern": 6,
"architectures": [
"Gemma3ForTokenClassification"
],
"attention_bias": false,
"attention_dropout": 0.0,
"attn_logit_softcapping": null,
"bos_token_id": 2,
"cache_implementation": "hybrid",
"classifier_dropout_prob": 0.0,
"dtype": "float32",
"eos_token_id": 1,
"final_logit_softcapping": null,
"head_dim": 256,
"hidden_activation": "gelu_pytorch_tanh",
"hidden_size": 640,
"id2label": {
"0": "O",
"1": ".",
"10": "\"",
"11": "\u0964",
"12": "(",
"13": ")",
"14": ":",
"15": "\u066c",
"16": "\u06d4",
"17": "\u061f",
"18": ".\"",
"19": ").",
"2": ",",
"20": "),",
"21": "\",",
"22": "\".",
"23": "?\"",
"24": "\"?",
"25": "\u0964\"",
"26": "\"\u0964",
"27": "\u060c",
"28": "\u1c7e",
"29": "\u0965",
"3": "?",
"30": "\u1c7e\u0964",
"4": "-",
"5": ";",
"6": "_",
"7": "!",
"8": "'",
"9": "..."
},
"initializer_range": 0.02,
"intermediate_size": 2048,
"label2id": {
"!": 7,
"\"": 10,
"\",": 21,
"\".": 22,
"\"?": 24,
"\"\u0964": 26,
"'": 8,
"(": 12,
")": 13,
"),": 20,
").": 19,
",": 2,
"-": 4,
".": 1,
".\"": 18,
"...": 9,
":": 14,
";": 5,
"?": 3,
"?\"": 23,
"O": 0,
"_": 6,
"\u060c": 27,
"\u061f": 17,
"\u066c": 15,
"\u06d4": 16,
"\u0964": 11,
"\u0964\"": 25,
"\u0965": 29,
"\u1c7e": 28,
"\u1c7e\u0964": 30
},
"layer_types": [
"sliding_attention",
"sliding_attention",
"sliding_attention",
"sliding_attention",
"sliding_attention",
"full_attention",
"sliding_attention",
"sliding_attention",
"sliding_attention",
"sliding_attention",
"sliding_attention",
"full_attention",
"sliding_attention",
"sliding_attention",
"sliding_attention",
"sliding_attention",
"sliding_attention",
"full_attention"
],
"max_position_embeddings": 32768,
"model_type": "cadence_punctuation",
"num_attention_heads": 4,
"num_hidden_layers": 18,
"num_key_value_heads": 1,
"pad_token_id": 0,
"query_pre_attn_scalar": 256,
"rms_norm_eps": 1e-06,
"rope_local_base_freq": 10000.0,
"rope_scaling": null,
"rope_theta": 1000000.0,
"sliding_window": 512,
"sliding_window_pattern": 6,
"transformers_version": "4.57.1",
"use_bidirectional_attention": false,
"use_cache": false,
"use_non_causal_attention": true,
"vocab_size": 262144
}