Haziq-exe commited on
Commit
4abb940
·
verified ·
1 Parent(s): 49fe28f

Sync config token ids

Browse files
config.json CHANGED
@@ -135,19 +135,24 @@
135
  "torch_dtype": "float32",
136
  "vocab_size": 1024
137
  },
 
138
  "audio_pool_step": 2,
 
139
  "auto_map": {
140
  "AutoConfig": "configuration_minicpm.MiniCPMOConfig",
141
  "AutoModel": "modeling_minicpmo.MiniCPMO",
142
  "AutoModelForCausalLM": "modeling_minicpmo.MiniCPMO"
143
  },
144
  "batch_vision_input": true,
145
- "bos_token_id": 151643,
146
  "chunk_input": true,
147
  "drop_vision_last_layer": false,
148
- "eos_token_id": 151645,
149
  "hidden_act": "silu",
150
  "hidden_size": 48,
 
 
 
151
  "image_size": 448,
152
  "init_audio": true,
153
  "init_tts": true,
@@ -162,6 +167,7 @@
162
  "num_heads": 4,
163
  "num_hidden_layers": 2,
164
  "num_key_value_heads": 4,
 
165
  "patch_size": 14,
166
  "query_num": 64,
167
  "rms_norm_eps": 1e-06,
@@ -170,7 +176,9 @@
170
  "max_slice_nums": 9,
171
  "model_type": "minicpmv"
172
  },
 
173
  "slice_mode": true,
 
174
  "sliding_window": null,
175
  "stream_input": false,
176
  "tie_word_embeddings": false,
@@ -188,6 +196,7 @@
188
  "num_mel_bins": 10,
189
  "num_text_tokens": 1024
190
  },
 
191
  "use_cache": true,
192
  "use_image_id": true,
193
  "use_sliding_window": false,
 
135
  "torch_dtype": "float32",
136
  "vocab_size": 1024
137
  },
138
+ "audio_end_id": 45,
139
  "audio_pool_step": 2,
140
+ "audio_start_id": 43,
141
  "auto_map": {
142
  "AutoConfig": "configuration_minicpm.MiniCPMOConfig",
143
  "AutoModel": "modeling_minicpmo.MiniCPMO",
144
  "AutoModelForCausalLM": "modeling_minicpmo.MiniCPMO"
145
  },
146
  "batch_vision_input": true,
147
+ "bos_token_id": 1,
148
  "chunk_input": true,
149
  "drop_vision_last_layer": false,
150
+ "eos_token_id": 1,
151
  "hidden_act": "silu",
152
  "hidden_size": 48,
153
+ "im_end_id": 3,
154
+ "im_start_id": 2,
155
+ "image_id": 23,
156
  "image_size": 448,
157
  "init_audio": true,
158
  "init_tts": true,
 
167
  "num_heads": 4,
168
  "num_hidden_layers": 2,
169
  "num_key_value_heads": 4,
170
+ "pad_token_id": 1,
171
  "patch_size": 14,
172
  "query_num": 64,
173
  "rms_norm_eps": 1e-06,
 
176
  "max_slice_nums": 9,
177
  "model_type": "minicpmv"
178
  },
179
+ "slice_end_id": 34,
180
  "slice_mode": true,
181
+ "slice_start_id": 33,
182
  "sliding_window": null,
183
  "stream_input": false,
184
  "tie_word_embeddings": false,
 
196
  "num_mel_bins": 10,
197
  "num_text_tokens": 1024
198
  },
199
+ "unk_token_id": 0,
200
  "use_cache": true,
201
  "use_image_id": true,
202
  "use_sliding_window": false,
generation_config.json CHANGED
@@ -1,6 +1,7 @@
1
  {
2
  "_from_model_config": true,
3
- "bos_token_id": 151643,
4
- "eos_token_id": 151645,
 
5
  "transformers_version": "4.44.2"
6
  }
 
1
  {
2
  "_from_model_config": true,
3
+ "bos_token_id": 1,
4
+ "eos_token_id": 1,
5
+ "pad_token_id": 1,
6
  "transformers_version": "4.44.2"
7
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7c738167284d94b6a41654596a1bb5cb419616d832085f2de67280df0ba3f51b
3
  size 5535352
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:59a686f1801e1ed4d44e8e1ac7fc175dcae2b581570dea016e586c9f59e35c75
3
  size 5535352
modeling_minicpmo.py CHANGED
@@ -56,7 +56,7 @@ from transformers.cache_utils import StaticCache
56
  from transformers.modeling_outputs import BaseModelOutputWithPast
57
  from transformers.modeling_outputs import ModelOutput
58
  from transformers.models.whisper.modeling_whisper import ACT2FN
59
- from transformers.models.whisper.modeling_whisper import WhisperAttention
60
  from transformers.models.whisper.modeling_whisper import WhisperConfig
61
  from transformers.models.whisper.modeling_whisper import WhisperEncoder
62
 
@@ -1890,7 +1890,7 @@ class MiniCPMWhisperEncoderLayer(nn.Module):
1890
  def __init__(self, config: WhisperConfig, layer_idx: int = None):
1891
  super().__init__()
1892
  self.embed_dim = config.d_model
1893
- self.self_attn = WhisperAttention(
1894
  embed_dim=self.embed_dim,
1895
  num_heads=config.encoder_attention_heads,
1896
  dropout=config.attention_dropout,
 
56
  from transformers.modeling_outputs import BaseModelOutputWithPast
57
  from transformers.modeling_outputs import ModelOutput
58
  from transformers.models.whisper.modeling_whisper import ACT2FN
59
+ from transformers.models.whisper.modeling_whisper import WHISPER_ATTENTION_CLASSES
60
  from transformers.models.whisper.modeling_whisper import WhisperConfig
61
  from transformers.models.whisper.modeling_whisper import WhisperEncoder
62
 
 
1890
  def __init__(self, config: WhisperConfig, layer_idx: int = None):
1891
  super().__init__()
1892
  self.embed_dim = config.d_model
1893
+ self.self_attn = WHISPER_ATTENTION_CLASSES[config._attn_implementation](
1894
  embed_dim=self.embed_dim,
1895
  num_heads=config.encoder_attention_heads,
1896
  dropout=config.attention_dropout,