| { | |
| "model_type": "cadence_punctuation", | |
| "auto_map": { | |
| "AutoConfig": "modeling_gemma3_punctuation.Gemma3PunctuationConfig", | |
| "AutoModelForTokenClassification": "modeling_gemma3_punctuation.Gemma3ForTokenClassification", | |
| "AutoModel": "modeling_gemma3_punctuation.Gemma3ForTokenClassification" | |
| }, | |
| "_sliding_window_pattern": 6, | |
| "architectures": [ | |
| "Gemma3ForTokenClassification" | |
| ], | |
| "attention_bias": false, | |
| "attention_dropout": 0.0, | |
| "attn_logit_softcapping": null, | |
| "bos_token_id": 2, | |
| "cache_implementation": "hybrid", | |
| "classifier_dropout_prob": 0.0, | |
| "dtype": "float32", | |
| "eos_token_id": 1, | |
| "final_logit_softcapping": null, | |
| "head_dim": 256, | |
| "hidden_activation": "gelu_pytorch_tanh", | |
| "hidden_size": 640, | |
| "id2label": { | |
| "0": "O", | |
| "1": ".", | |
| "10": "\"", | |
| "11": "\u0964", | |
| "12": "(", | |
| "13": ")", | |
| "14": ":", | |
| "15": "\u066c", | |
| "16": "\u06d4", | |
| "17": "\u061f", | |
| "18": ".\"", | |
| "19": ").", | |
| "2": ",", | |
| "20": "),", | |
| "21": "\",", | |
| "22": "\".", | |
| "23": "?\"", | |
| "24": "\"?", | |
| "25": "\u0964\"", | |
| "26": "\"\u0964", | |
| "27": "\u060c", | |
| "28": "\u1c7e", | |
| "29": "\u0965", | |
| "3": "?", | |
| "30": "\u1c7e\u0964", | |
| "4": "-", | |
| "5": ";", | |
| "6": "_", | |
| "7": "!", | |
| "8": "'", | |
| "9": "..." | |
| }, | |
| "initializer_range": 0.02, | |
| "intermediate_size": 2048, | |
| "label2id": { | |
| "!": 7, | |
| "\"": 10, | |
| "\",": 21, | |
| "\".": 22, | |
| "\"?": 24, | |
| "\"\u0964": 26, | |
| "'": 8, | |
| "(": 12, | |
| ")": 13, | |
| "),": 20, | |
| ").": 19, | |
| ",": 2, | |
| "-": 4, | |
| ".": 1, | |
| ".\"": 18, | |
| "...": 9, | |
| ":": 14, | |
| ";": 5, | |
| "?": 3, | |
| "?\"": 23, | |
| "O": 0, | |
| "_": 6, | |
| "\u060c": 27, | |
| "\u061f": 17, | |
| "\u066c": 15, | |
| "\u06d4": 16, | |
| "\u0964": 11, | |
| "\u0964\"": 25, | |
| "\u0965": 29, | |
| "\u1c7e": 28, | |
| "\u1c7e\u0964": 30 | |
| }, | |
| "layer_types": [ | |
| "sliding_attention", | |
| "sliding_attention", | |
| "sliding_attention", | |
| "sliding_attention", | |
| "sliding_attention", | |
| "full_attention", | |
| "sliding_attention", | |
| "sliding_attention", | |
| "sliding_attention", | |
| "sliding_attention", | |
| "sliding_attention", | |
| "full_attention", | |
| "sliding_attention", | |
| "sliding_attention", | |
| "sliding_attention", | |
| "sliding_attention", | |
| "sliding_attention", | |
| "full_attention" | |
| ], | |
| "max_position_embeddings": 32768, | |
| "model_type": "cadence_punctuation", | |
| "num_attention_heads": 4, | |
| "num_hidden_layers": 18, | |
| "num_key_value_heads": 1, | |
| "pad_token_id": 0, | |
| "query_pre_attn_scalar": 256, | |
| "rms_norm_eps": 1e-06, | |
| "rope_local_base_freq": 10000.0, | |
| "rope_scaling": null, | |
| "rope_theta": 1000000.0, | |
| "sliding_window": 512, | |
| "sliding_window_pattern": 6, | |
| "transformers_version": "4.57.1", | |
| "use_bidirectional_attention": false, | |
| "use_cache": false, | |
| "use_non_causal_attention": true, | |
| "vocab_size": 262144 | |
| } | |