UCSC-VLAA
/

openvision3-vit-large-patch2-32

Model card Files Files and versions

openvision3-vit-large-patch2-32 / open_clip_config.json

Letian2003's picture

Upload standalone vision encoder: vit-only first commit

f9009f7 verified 4 months ago

history blame contribute delete

514 Bytes

	{
	"model_cfg": {
	"embed_dim": 1024,
	"vision_cfg": {
	"layers": 24,
	"width": 1024,
	"patch_size": 2,
	"image_size": 32,
	"no_ln_pre": true,
	"pool_type": "avg",
	"final_ln_after_pool": true,
	"norm_kwargs": {
	"eps": 1e-06
	},
	"output_tokens": true,
	"head_width": 64,
	"in_channels": 16
	},
	"text_cfg": {
	"context_length": 77,
	"vocab_size": 49408,
	"width": 512,
	"heads": 8,
	"layers": 12
	}
	}
	}