prasadsachin commited on
Commit
b35dc7a
·
verified ·
1 Parent(s): e547eb6

Upload folder using huggingface_hub

Browse files
README.md ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: keras-hub
3
+ ---
4
+ This is a [`StableDiffusion3` model](https://keras.io/api/keras_hub/models/stable_diffusion3) uploaded using the KerasHub library and can be used with JAX, TensorFlow, and PyTorch backends.
5
+ Model config:
6
+ * **name:** stable_diffusion_3.5_medium_backbone
7
+ * **trainable:** True
8
+ * **dtype:** {'module': 'keras', 'class_name': 'DTypePolicy', 'config': {'name': 'bfloat16'}, 'registered_name': None}
9
+ * **mmdit_patch_size:** 2
10
+ * **mmdit_hidden_dim:** 1536
11
+ * **mmdit_num_layers:** 24
12
+ * **mmdit_num_heads:** 24
13
+ * **mmdit_position_size:** 384
14
+ * **mmdit_qk_norm:** rms_norm
15
+ * **mmdit_dual_attention_indices:** [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]
16
+ * **vae:** {'module': 'keras_hub.src.models.vae.vae_backbone', 'class_name': 'VAEBackbone', 'config': {'name': 'vae', 'trainable': True, 'dtype': {'module': 'keras', 'class_name': 'DTypePolicy', 'config': {'name': 'bfloat16'}, 'registered_name': None}, 'encoder_num_filters': [128, 256, 512, 512], 'encoder_num_blocks': [2, 2, 2, 2], 'decoder_num_filters': [512, 512, 256, 128], 'decoder_num_blocks': [3, 3, 3, 3], 'sampler_method': 'sample', 'input_channels': 3, 'sample_channels': 32, 'output_channels': 3, 'scale': 1.5305, 'shift': 0.0609}, 'registered_name': 'VAEBackbone'}
17
+ * **clip_l:** {'module': 'keras_hub.src.models.clip.clip_text_encoder', 'class_name': 'CLIPTextEncoder', 'config': {'name': 'clip_l', 'trainable': True, 'dtype': {'module': 'keras', 'class_name': 'DTypePolicy', 'config': {'name': 'float16'}, 'registered_name': None}, 'vocabulary_size': 49408, 'embedding_dim': 768, 'hidden_dim': 768, 'num_layers': 12, 'num_heads': 12, 'intermediate_dim': 3072, 'intermediate_activation': 'quick_gelu', 'intermediate_output_index': 10, 'max_sequence_length': 77}, 'registered_name': 'keras_hub>CLIPTextEncoder'}
18
+ * **clip_g:** {'module': 'keras_hub.src.models.clip.clip_text_encoder', 'class_name': 'CLIPTextEncoder', 'config': {'name': 'clip_g', 'trainable': True, 'dtype': {'module': 'keras', 'class_name': 'DTypePolicy', 'config': {'name': 'float16'}, 'registered_name': None}, 'vocabulary_size': 49408, 'embedding_dim': 1280, 'hidden_dim': 1280, 'num_layers': 32, 'num_heads': 20, 'intermediate_dim': 5120, 'intermediate_activation': 'gelu', 'intermediate_output_index': 30, 'max_sequence_length': 77}, 'registered_name': 'keras_hub>CLIPTextEncoder'}
19
+ * **t5:** None
20
+ * **latent_channels:** 16
21
+ * **output_channels:** 3
22
+ * **num_train_timesteps:** 1000
23
+ * **shift:** 3.0
24
+ * **image_shape:** [1024, 1024, 3]
25
+
26
+ This model card has been generated automatically and should be completed by the model author. See [Model Cards documentation](https://huggingface.co/docs/hub/model-cards) for more information.
assets/clip_g_tokenizer/merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
assets/clip_g_tokenizer/vocabulary.json ADDED
The diff for this file is too large to render. See raw diff
 
assets/clip_l_tokenizer/merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
assets/clip_l_tokenizer/vocabulary.json ADDED
The diff for this file is too large to render. See raw diff
 
clip_g_preprocessor.json ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "module": "keras_hub.src.models.clip.clip_preprocessor",
3
+ "class_name": "CLIPPreprocessor",
4
+ "config": {
5
+ "name": "clip_g_preprocessor",
6
+ "trainable": true,
7
+ "dtype": {
8
+ "module": "keras",
9
+ "class_name": "DTypePolicy",
10
+ "config": {
11
+ "name": "bfloat16"
12
+ },
13
+ "registered_name": null
14
+ },
15
+ "tokenizer": {
16
+ "module": "keras_hub.src.models.clip.clip_tokenizer",
17
+ "class_name": "CLIPTokenizer",
18
+ "config": {
19
+ "name": "clip_g_tokenizer",
20
+ "trainable": true,
21
+ "dtype": {
22
+ "module": "keras",
23
+ "class_name": "DTypePolicy",
24
+ "config": {
25
+ "name": "int32"
26
+ },
27
+ "registered_name": null
28
+ },
29
+ "config_file": "clip_g_tokenizer.json",
30
+ "sequence_length": null,
31
+ "add_prefix_space": false,
32
+ "pad_with_end_token": false
33
+ },
34
+ "registered_name": "keras_hub>CLIPTokenizer"
35
+ },
36
+ "config_file": "clip_g_preprocessor.json",
37
+ "sequence_length": 77,
38
+ "add_start_token": true,
39
+ "add_end_token": true,
40
+ "to_lower": true
41
+ },
42
+ "registered_name": "keras_hub>CLIPPreprocessor"
43
+ }
clip_g_tokenizer.json ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "module": "keras_hub.src.models.clip.clip_tokenizer",
3
+ "class_name": "CLIPTokenizer",
4
+ "config": {
5
+ "name": "clip_g_tokenizer",
6
+ "trainable": true,
7
+ "dtype": {
8
+ "module": "keras",
9
+ "class_name": "DTypePolicy",
10
+ "config": {
11
+ "name": "int32"
12
+ },
13
+ "registered_name": null
14
+ },
15
+ "config_file": "clip_g_tokenizer.json",
16
+ "sequence_length": null,
17
+ "add_prefix_space": false,
18
+ "pad_with_end_token": false
19
+ },
20
+ "registered_name": "keras_hub>CLIPTokenizer"
21
+ }
clip_l_preprocessor.json ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "module": "keras_hub.src.models.clip.clip_preprocessor",
3
+ "class_name": "CLIPPreprocessor",
4
+ "config": {
5
+ "name": "clip_l_preprocessor",
6
+ "trainable": true,
7
+ "dtype": {
8
+ "module": "keras",
9
+ "class_name": "DTypePolicy",
10
+ "config": {
11
+ "name": "bfloat16"
12
+ },
13
+ "registered_name": null
14
+ },
15
+ "tokenizer": {
16
+ "module": "keras_hub.src.models.clip.clip_tokenizer",
17
+ "class_name": "CLIPTokenizer",
18
+ "config": {
19
+ "name": "clip_l_tokenizer",
20
+ "trainable": true,
21
+ "dtype": {
22
+ "module": "keras",
23
+ "class_name": "DTypePolicy",
24
+ "config": {
25
+ "name": "int32"
26
+ },
27
+ "registered_name": null
28
+ },
29
+ "config_file": "clip_l_tokenizer.json",
30
+ "sequence_length": null,
31
+ "add_prefix_space": false,
32
+ "pad_with_end_token": true
33
+ },
34
+ "registered_name": "keras_hub>CLIPTokenizer"
35
+ },
36
+ "config_file": "clip_l_preprocessor.json",
37
+ "sequence_length": 77,
38
+ "add_start_token": true,
39
+ "add_end_token": true,
40
+ "to_lower": true
41
+ },
42
+ "registered_name": "keras_hub>CLIPPreprocessor"
43
+ }
clip_l_tokenizer.json ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "module": "keras_hub.src.models.clip.clip_tokenizer",
3
+ "class_name": "CLIPTokenizer",
4
+ "config": {
5
+ "name": "clip_l_tokenizer",
6
+ "trainable": true,
7
+ "dtype": {
8
+ "module": "keras",
9
+ "class_name": "DTypePolicy",
10
+ "config": {
11
+ "name": "int32"
12
+ },
13
+ "registered_name": null
14
+ },
15
+ "config_file": "clip_l_tokenizer.json",
16
+ "sequence_length": null,
17
+ "add_prefix_space": false,
18
+ "pad_with_end_token": true
19
+ },
20
+ "registered_name": "keras_hub>CLIPTokenizer"
21
+ }
config.json ADDED
@@ -0,0 +1,147 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "module": "keras_hub.src.models.stable_diffusion_3.stable_diffusion_3_backbone",
3
+ "class_name": "StableDiffusion3Backbone",
4
+ "config": {
5
+ "name": "stable_diffusion_3.5_medium_backbone",
6
+ "trainable": true,
7
+ "dtype": {
8
+ "module": "keras",
9
+ "class_name": "DTypePolicy",
10
+ "config": {
11
+ "name": "bfloat16"
12
+ },
13
+ "registered_name": null
14
+ },
15
+ "mmdit_patch_size": 2,
16
+ "mmdit_hidden_dim": 1536,
17
+ "mmdit_num_layers": 24,
18
+ "mmdit_num_heads": 24,
19
+ "mmdit_position_size": 384,
20
+ "mmdit_qk_norm": "rms_norm",
21
+ "mmdit_dual_attention_indices": [
22
+ 0,
23
+ 1,
24
+ 2,
25
+ 3,
26
+ 4,
27
+ 5,
28
+ 6,
29
+ 7,
30
+ 8,
31
+ 9,
32
+ 10,
33
+ 11,
34
+ 12
35
+ ],
36
+ "vae": {
37
+ "module": "keras_hub.src.models.vae.vae_backbone",
38
+ "class_name": "VAEBackbone",
39
+ "config": {
40
+ "name": "vae",
41
+ "trainable": true,
42
+ "dtype": {
43
+ "module": "keras",
44
+ "class_name": "DTypePolicy",
45
+ "config": {
46
+ "name": "bfloat16"
47
+ },
48
+ "registered_name": null
49
+ },
50
+ "encoder_num_filters": [
51
+ 128,
52
+ 256,
53
+ 512,
54
+ 512
55
+ ],
56
+ "encoder_num_blocks": [
57
+ 2,
58
+ 2,
59
+ 2,
60
+ 2
61
+ ],
62
+ "decoder_num_filters": [
63
+ 512,
64
+ 512,
65
+ 256,
66
+ 128
67
+ ],
68
+ "decoder_num_blocks": [
69
+ 3,
70
+ 3,
71
+ 3,
72
+ 3
73
+ ],
74
+ "sampler_method": "sample",
75
+ "input_channels": 3,
76
+ "sample_channels": 32,
77
+ "output_channels": 3,
78
+ "scale": 1.5305,
79
+ "shift": 0.0609
80
+ },
81
+ "registered_name": "VAEBackbone"
82
+ },
83
+ "clip_l": {
84
+ "module": "keras_hub.src.models.clip.clip_text_encoder",
85
+ "class_name": "CLIPTextEncoder",
86
+ "config": {
87
+ "name": "clip_l",
88
+ "trainable": true,
89
+ "dtype": {
90
+ "module": "keras",
91
+ "class_name": "DTypePolicy",
92
+ "config": {
93
+ "name": "float16"
94
+ },
95
+ "registered_name": null
96
+ },
97
+ "vocabulary_size": 49408,
98
+ "embedding_dim": 768,
99
+ "hidden_dim": 768,
100
+ "num_layers": 12,
101
+ "num_heads": 12,
102
+ "intermediate_dim": 3072,
103
+ "intermediate_activation": "quick_gelu",
104
+ "intermediate_output_index": 10,
105
+ "max_sequence_length": 77
106
+ },
107
+ "registered_name": "keras_hub>CLIPTextEncoder"
108
+ },
109
+ "clip_g": {
110
+ "module": "keras_hub.src.models.clip.clip_text_encoder",
111
+ "class_name": "CLIPTextEncoder",
112
+ "config": {
113
+ "name": "clip_g",
114
+ "trainable": true,
115
+ "dtype": {
116
+ "module": "keras",
117
+ "class_name": "DTypePolicy",
118
+ "config": {
119
+ "name": "float16"
120
+ },
121
+ "registered_name": null
122
+ },
123
+ "vocabulary_size": 49408,
124
+ "embedding_dim": 1280,
125
+ "hidden_dim": 1280,
126
+ "num_layers": 32,
127
+ "num_heads": 20,
128
+ "intermediate_dim": 5120,
129
+ "intermediate_activation": "gelu",
130
+ "intermediate_output_index": 30,
131
+ "max_sequence_length": 77
132
+ },
133
+ "registered_name": "keras_hub>CLIPTextEncoder"
134
+ },
135
+ "t5": null,
136
+ "latent_channels": 16,
137
+ "output_channels": 3,
138
+ "num_train_timesteps": 1000,
139
+ "shift": 3.0,
140
+ "image_shape": [
141
+ 1024,
142
+ 1024,
143
+ 3
144
+ ]
145
+ },
146
+ "registered_name": "keras_hub>StableDiffusion3Backbone"
147
+ }
metadata.json ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "keras_version": "3.10.0",
3
+ "keras_hub_version": "0.23.0.dev0",
4
+ "parameter_count": 3371793763,
5
+ "date_saved": "2025-09-24@21:55:38",
6
+ "tasks": [
7
+ "ImageToImage",
8
+ "Inpaint",
9
+ "TextToImage"
10
+ ]
11
+ }
model.weights.h5 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:df068258b964120bcf6035186d747ae46c088d3f8b59e333a771cc1ca3d50ee5
3
+ size 6747340928
preprocessor.json ADDED
@@ -0,0 +1,105 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "module": "keras_hub.src.models.stable_diffusion_3.stable_diffusion_3_text_to_image_preprocessor",
3
+ "class_name": "StableDiffusion3TextToImagePreprocessor",
4
+ "config": {
5
+ "name": "stable_diffusion_3_text_to_image_preprocessor",
6
+ "trainable": true,
7
+ "dtype": {
8
+ "module": "keras",
9
+ "class_name": "DTypePolicy",
10
+ "config": {
11
+ "name": "bfloat16"
12
+ },
13
+ "registered_name": null
14
+ },
15
+ "config_file": "preprocessor.json",
16
+ "clip_l_preprocessor": {
17
+ "module": "keras_hub.src.models.clip.clip_preprocessor",
18
+ "class_name": "CLIPPreprocessor",
19
+ "config": {
20
+ "name": "clip_l_preprocessor",
21
+ "trainable": true,
22
+ "dtype": {
23
+ "module": "keras",
24
+ "class_name": "DTypePolicy",
25
+ "config": {
26
+ "name": "bfloat16"
27
+ },
28
+ "registered_name": null
29
+ },
30
+ "tokenizer": {
31
+ "module": "keras_hub.src.models.clip.clip_tokenizer",
32
+ "class_name": "CLIPTokenizer",
33
+ "config": {
34
+ "name": "clip_l_tokenizer",
35
+ "trainable": true,
36
+ "dtype": {
37
+ "module": "keras",
38
+ "class_name": "DTypePolicy",
39
+ "config": {
40
+ "name": "int32"
41
+ },
42
+ "registered_name": null
43
+ },
44
+ "config_file": "clip_l_tokenizer.json",
45
+ "sequence_length": null,
46
+ "add_prefix_space": false,
47
+ "pad_with_end_token": true
48
+ },
49
+ "registered_name": "keras_hub>CLIPTokenizer"
50
+ },
51
+ "config_file": "clip_l_preprocessor.json",
52
+ "sequence_length": 77,
53
+ "add_start_token": true,
54
+ "add_end_token": true,
55
+ "to_lower": true
56
+ },
57
+ "registered_name": "keras_hub>CLIPPreprocessor"
58
+ },
59
+ "clip_g_preprocessor": {
60
+ "module": "keras_hub.src.models.clip.clip_preprocessor",
61
+ "class_name": "CLIPPreprocessor",
62
+ "config": {
63
+ "name": "clip_g_preprocessor",
64
+ "trainable": true,
65
+ "dtype": {
66
+ "module": "keras",
67
+ "class_name": "DTypePolicy",
68
+ "config": {
69
+ "name": "bfloat16"
70
+ },
71
+ "registered_name": null
72
+ },
73
+ "tokenizer": {
74
+ "module": "keras_hub.src.models.clip.clip_tokenizer",
75
+ "class_name": "CLIPTokenizer",
76
+ "config": {
77
+ "name": "clip_g_tokenizer",
78
+ "trainable": true,
79
+ "dtype": {
80
+ "module": "keras",
81
+ "class_name": "DTypePolicy",
82
+ "config": {
83
+ "name": "int32"
84
+ },
85
+ "registered_name": null
86
+ },
87
+ "config_file": "clip_g_tokenizer.json",
88
+ "sequence_length": null,
89
+ "add_prefix_space": false,
90
+ "pad_with_end_token": false
91
+ },
92
+ "registered_name": "keras_hub>CLIPTokenizer"
93
+ },
94
+ "config_file": "clip_g_preprocessor.json",
95
+ "sequence_length": 77,
96
+ "add_start_token": true,
97
+ "add_end_token": true,
98
+ "to_lower": true
99
+ },
100
+ "registered_name": "keras_hub>CLIPPreprocessor"
101
+ },
102
+ "t5_preprocessor": null
103
+ },
104
+ "registered_name": "keras_hub>StableDiffusion3TextToImagePreprocessor"
105
+ }