Upload folder using huggingface_hub
Browse files- README.md +4 -2
- config.json +4 -5
- model.safetensors +2 -2
README.md
CHANGED
|
@@ -140,8 +140,8 @@ config.token2wav_config.bigvgan_config.upsample_initial_channel = 32
|
|
| 140 |
config.token2wav_config.bigvgan_config.upsample_kernel_sizes = [11, 4]
|
| 141 |
config.token2wav_config.bigvgan_config.upsample_rates = [5, 2]
|
| 142 |
|
| 143 |
-
config.token2wav_config.dit_config.depth =
|
| 144 |
-
config.token2wav_config.dit_config.num_hidden_layers =
|
| 145 |
config.token2wav_config.dit_config.hidden_size = 16
|
| 146 |
config.token2wav_config.dit_config.dim = 16
|
| 147 |
config.token2wav_config.dit_config.emb_dim = 16
|
|
@@ -155,6 +155,8 @@ config.token2wav_config.dit_config.enc_lin_neurons = 16
|
|
| 155 |
config.token2wav_config.dit_config.head_dim = 16
|
| 156 |
config.token2wav_config.dit_config.num_attention_heads = 1
|
| 157 |
config.token2wav_config.dit_config.heads = 1
|
|
|
|
|
|
|
| 158 |
# avoid mismatch in vocab size because this is random model!
|
| 159 |
config.token2wav_config.dit_config.num_embeds = config.talker_config.vocab_size
|
| 160 |
print(config)
|
|
|
|
| 140 |
config.token2wav_config.bigvgan_config.upsample_kernel_sizes = [11, 4]
|
| 141 |
config.token2wav_config.bigvgan_config.upsample_rates = [5, 2]
|
| 142 |
|
| 143 |
+
config.token2wav_config.dit_config.depth = 2
|
| 144 |
+
config.token2wav_config.dit_config.num_hidden_layers = 2
|
| 145 |
config.token2wav_config.dit_config.hidden_size = 16
|
| 146 |
config.token2wav_config.dit_config.dim = 16
|
| 147 |
config.token2wav_config.dit_config.emb_dim = 16
|
|
|
|
| 155 |
config.token2wav_config.dit_config.head_dim = 16
|
| 156 |
config.token2wav_config.dit_config.num_attention_heads = 1
|
| 157 |
config.token2wav_config.dit_config.heads = 1
|
| 158 |
+
config.token2wav_config.dit_config.look_ahead_layers = [1]
|
| 159 |
+
config.token2wav_config.dit_config.look_backward_layers = [0]
|
| 160 |
# avoid mismatch in vocab size because this is random model!
|
| 161 |
config.token2wav_config.dit_config.num_embeds = config.talker_config.vocab_size
|
| 162 |
print(config)
|
config.json
CHANGED
|
@@ -437,7 +437,7 @@
|
|
| 437 |
"chunk_size_feed_forward": 0,
|
| 438 |
"cross_attention_hidden_size": null,
|
| 439 |
"decoder_start_token_id": null,
|
| 440 |
-
"depth":
|
| 441 |
"dim": 16,
|
| 442 |
"diversity_penalty": 0.0,
|
| 443 |
"do_sample": false,
|
|
@@ -488,11 +488,10 @@
|
|
| 488 |
},
|
| 489 |
"length_penalty": 1.0,
|
| 490 |
"look_ahead_layers": [
|
| 491 |
-
|
| 492 |
],
|
| 493 |
"look_backward_layers": [
|
| 494 |
-
0
|
| 495 |
-
20
|
| 496 |
],
|
| 497 |
"max_length": 20,
|
| 498 |
"max_position_embeddings": 32768,
|
|
@@ -504,7 +503,7 @@
|
|
| 504 |
"num_beam_groups": 1,
|
| 505 |
"num_beams": 1,
|
| 506 |
"num_embeds": 8448,
|
| 507 |
-
"num_hidden_layers":
|
| 508 |
"num_return_sequences": 1,
|
| 509 |
"output_attentions": false,
|
| 510 |
"output_hidden_states": false,
|
|
|
|
| 437 |
"chunk_size_feed_forward": 0,
|
| 438 |
"cross_attention_hidden_size": null,
|
| 439 |
"decoder_start_token_id": null,
|
| 440 |
+
"depth": 2,
|
| 441 |
"dim": 16,
|
| 442 |
"diversity_penalty": 0.0,
|
| 443 |
"do_sample": false,
|
|
|
|
| 488 |
},
|
| 489 |
"length_penalty": 1.0,
|
| 490 |
"look_ahead_layers": [
|
| 491 |
+
1
|
| 492 |
],
|
| 493 |
"look_backward_layers": [
|
| 494 |
+
0
|
|
|
|
| 495 |
],
|
| 496 |
"max_length": 20,
|
| 497 |
"max_position_embeddings": 32768,
|
|
|
|
| 503 |
"num_beam_groups": 1,
|
| 504 |
"num_beams": 1,
|
| 505 |
"num_embeds": 8448,
|
| 506 |
+
"num_hidden_layers": 2,
|
| 507 |
"num_return_sequences": 1,
|
| 508 |
"output_attentions": false,
|
| 509 |
"output_hidden_states": false,
|
model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:872869d5ab1b80233192acb88483009b3588d82ca7419ec5ff35d7e44673540b
|
| 3 |
+
size 11240320
|