guanwenyu1995 commited on
Commit
375563c
·
verified ·
1 Parent(s): d90c25f

Upload config.json with huggingface_hub

Browse files
Files changed (1) hide show
  1. config.json +191 -0
config.json ADDED
@@ -0,0 +1,191 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "MiniCPMForCausalLM"
4
+ ],
5
+ "attention_bias": false,
6
+ "attention_dropout": 0.0,
7
+ "auto_map": {
8
+ "AutoConfig": "configuration_minicpm.MiniCPMConfig",
9
+ "AutoModel": "modeling_minicpm.MiniCPMModel",
10
+ "AutoModelForCausalLM": "modeling_minicpm.MiniCPMForCausalLM",
11
+ "AutoModelForSeq2SeqLM": "modeling_minicpm.MiniCPMForCausalLM",
12
+ "AutoModelForSequenceClassification": "modeling_minicpm.MiniCPMForSequenceClassification"
13
+ },
14
+ "bos_token_id": 1,
15
+ "dim_model_base": 256,
16
+ "eos_token_id": 73440,
17
+ "head_dim": 128,
18
+ "hidden_act": "silu",
19
+ "hidden_size": 4096,
20
+ "initializer_range": 0.1,
21
+ "intermediate_size": 16384,
22
+ "max_position_embeddings": 65536,
23
+ "mlp_bias": false,
24
+ "model_type": "minicpm",
25
+ "num_attention_heads": 32,
26
+ "num_hidden_layers": 32,
27
+ "num_key_value_heads": 2,
28
+ "pad_token_id": 73440,
29
+ "pretraining_tp": 1,
30
+ "quantization_config": {
31
+ "bits": 4,
32
+ "checkpoint_format": "gptq",
33
+ "damp_percent": 0.01,
34
+ "desc_act": false,
35
+ "group_size": 128,
36
+ "lm_head": false,
37
+ "model_file_base_name": null,
38
+ "model_name_or_path": null,
39
+ "quant_method": "gptq",
40
+ "static_groups": true,
41
+ "sym": true,
42
+ "true_sequential": true
43
+ },
44
+ "rms_norm_eps": 1e-06,
45
+ "rope_scaling": {
46
+ "factor": 1.0,
47
+ "long_factor": [
48
+ 0.9982316082870437,
49
+ 1.033048153422584,
50
+ 1.0749920956484724,
51
+ 1.1255096879436193,
52
+ 1.1863348602111476,
53
+ 1.259543828902579,
54
+ 1.3476188888731149,
55
+ 1.4535223827776373,
56
+ 1.5807816745852985,
57
+ 1.7335856049489526,
58
+ 1.9168922912975785,
59
+ 2.1365471404135326,
60
+ 2.3994084200118646,
61
+ 2.713475511863602,
62
+ 3.0880118452194134,
63
+ 3.533650295140154,
64
+ 4.062463396503134,
65
+ 4.687974098908333,
66
+ 5.425075306704039,
67
+ 6.289818967956352,
68
+ 7.29902962722721,
69
+ 8.469695779093664,
70
+ 9.81809877306655,
71
+ 11.358657902065282,
72
+ 13.102505860712087,
73
+ 15.055862949967128,
74
+ 17.218348131364184,
75
+ 19.581439255386453,
76
+ 22.127353314656723,
77
+ 24.828633849376587,
78
+ 27.6486820771775,
79
+ 30.54334096108829,
80
+ 33.46345345363812,
81
+ 36.358112337548896,
82
+ 39.17816056534983,
83
+ 41.879441100069684,
84
+ 44.425355159339965,
85
+ 46.78844628336223,
86
+ 48.95093146475928,
87
+ 50.90428855401433,
88
+ 52.648136512661125,
89
+ 54.18869564165987,
90
+ 55.537098635632745,
91
+ 56.7077647874992,
92
+ 57.71697544677006,
93
+ 58.58171910802236,
94
+ 59.31882031581807,
95
+ 59.94433101822328,
96
+ 60.47314411958625,
97
+ 60.918782569507,
98
+ 61.29331890286281,
99
+ 61.60738599471455,
100
+ 61.87024727431288,
101
+ 62.089902123428836,
102
+ 62.27320880977746,
103
+ 62.42601274014111,
104
+ 62.55327203194878,
105
+ 62.65917552585329,
106
+ 62.74725058582382,
107
+ 62.82045955451526,
108
+ 62.88128472678279,
109
+ 62.931802319077946,
110
+ 62.97374626130382,
111
+ 63.008562806439365
112
+ ],
113
+ "original_max_position_embeddings": 65536,
114
+ "rope_type": "longrope",
115
+ "short_factor": [
116
+ 0.9982316082870437,
117
+ 1.033048153422584,
118
+ 1.0749920956484724,
119
+ 1.1255096879436193,
120
+ 1.1863348602111476,
121
+ 1.259543828902579,
122
+ 1.3476188888731149,
123
+ 1.4535223827776373,
124
+ 1.5807816745852985,
125
+ 1.7335856049489526,
126
+ 1.9168922912975785,
127
+ 2.1365471404135326,
128
+ 2.3994084200118646,
129
+ 2.713475511863602,
130
+ 3.0880118452194134,
131
+ 3.533650295140154,
132
+ 4.062463396503134,
133
+ 4.687974098908333,
134
+ 5.425075306704039,
135
+ 6.289818967956352,
136
+ 7.29902962722721,
137
+ 8.469695779093664,
138
+ 9.81809877306655,
139
+ 11.358657902065282,
140
+ 13.102505860712087,
141
+ 15.055862949967128,
142
+ 17.218348131364184,
143
+ 19.581439255386453,
144
+ 22.127353314656723,
145
+ 24.828633849376587,
146
+ 27.6486820771775,
147
+ 30.54334096108829,
148
+ 33.46345345363812,
149
+ 36.358112337548896,
150
+ 39.17816056534983,
151
+ 41.879441100069684,
152
+ 44.425355159339965,
153
+ 46.78844628336223,
154
+ 48.95093146475928,
155
+ 50.90428855401433,
156
+ 52.648136512661125,
157
+ 54.18869564165987,
158
+ 55.537098635632745,
159
+ 56.7077647874992,
160
+ 57.71697544677006,
161
+ 58.58171910802236,
162
+ 59.31882031581807,
163
+ 59.94433101822328,
164
+ 60.47314411958625,
165
+ 60.918782569507,
166
+ 61.29331890286281,
167
+ 61.60738599471455,
168
+ 61.87024727431288,
169
+ 62.089902123428836,
170
+ 62.27320880977746,
171
+ 62.42601274014111,
172
+ 62.55327203194878,
173
+ 62.65917552585329,
174
+ 62.74725058582382,
175
+ 62.82045955451526,
176
+ 62.88128472678279,
177
+ 62.931802319077946,
178
+ 62.97374626130382,
179
+ 63.008562806439365
180
+ ],
181
+ "type": "longrope"
182
+ },
183
+ "rope_theta": 10000.0,
184
+ "scale_depth": 1.4,
185
+ "scale_emb": 12,
186
+ "tie_word_embeddings": false,
187
+ "torch_dtype": "float16",
188
+ "transformers_version": "4.53.2",
189
+ "use_cache": false,
190
+ "vocab_size": 73448
191
+ }