Spaces:

AIDC-AI
/

Ovis-Image-7B

Running on Zero

App Files Files Community

Ovis-Image-7B / app.py

Flourish

Update app.py

c27ae96 verified 9 days ago

raw

history blame contribute delete

5.56 kB

	import os
	import torch
	import gradio as gr
	import spaces
	import random
	import numpy as np
	from safetensors.torch import load_file
	from huggingface_hub import hf_hub_download

	from diffusers.utils import logging
	from PIL import Image

	from ovis_image.model.tokenizer import build_ovis_tokenizer
	from ovis_image.model.autoencoder import load_ae
	from ovis_image.model.hf_embedder import OvisEmbedder
	from ovis_image.model.model import OvisImageModel
	from ovis_image.sampling import generate_image
	from ovis_image import ovis_image_configs

	logging.set_verbosity_error()

	# DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
	MAX_SEED = np.iinfo(np.int32).max

	device = "cuda"
	_dtype = torch.bfloat16
	hf_token = os.getenv("HF_TOKEN")

	print("init ovis_image")
	model_config = ovis_image_configs["ovis-image-7b"]
	ovis_image = OvisImageModel(model_config)
	ovis_image_path = hf_hub_download(
	repo_id="AIDC-AI/Ovis-Image-7B",
	filename="ovis_image.safetensors",
	token=hf_token,
	)
	model_state_dict = load_file(ovis_image_path)
	missing_keys, unexpected_keys = ovis_image.load_state_dict(model_state_dict)
	print(f"Load Missing Keys {missing_keys}")
	print(f"Load Unexpected Keys {unexpected_keys}")
	ovis_image = ovis_image.to(device=device, dtype=_dtype)
	ovis_image.eval()

	print("init vae")
	vae_path = hf_hub_download(
	repo_id="AIDC-AI/Ovis-Image-7B",
	filename="ae.safetensors",
	token=hf_token,
	)
	autoencoder = load_ae(
	vae_path,
	model_config.autoencoder_params,
	device=device,
	dtype=_dtype,
	random_init=False,
	)
	autoencoder.eval()

	print("init ovis")
	# ovis_path = hf_hub_download(
	# repo_id="AIDC-AI/Ovis-Image-7B",
	# subfolder="Ovis2.5-2B",
	# token=hf_token,
	# )
	ovis_tokenizer = build_ovis_tokenizer(
	"AIDC-AI/Ovis2.5-2B",
	)
	ovis_encoder = OvisEmbedder(
	model_path="AIDC-AI/Ovis2.5-2B",
	random_init=False,
	low_cpu_mem_usage=True,
	torch_dtype=torch.bfloat16,
	).to(device=device, dtype=_dtype)


	@spaces.GPU(duration=75)
	def generate(prompt, img_height=1024, img_width=1024, seed=42, steps=50, guidance_scale=5.0):
	print(f'inference with prompt : {prompt}, size: {img_height}x{img_width}, seed : {seed}, step : {steps}, cfg : {guidance_scale}')
	image = generate_image(
	device=next(ovis_image.parameters()).device,
	dtype=_dtype,
	model=ovis_image,
	prompt=prompt,
	autoencoder=autoencoder,
	ovis_tokenizer=ovis_tokenizer,
	ovis_encoder=ovis_encoder,
	img_height=img_height,
	img_width=img_width,
	denoising_steps=steps,
	cfg_scale=guidance_scale,
	seed=seed,
	)
	# bring into PIL format and save
	image = image.clamp(-1, 1)
	image = image.cpu().permute(0, 2, 3, 1).float().numpy()
	image = (image * 255).round().astype("uint8")

	return image[0]

	examples = [
	"Solar punk vehicle in a bustling city",
	"An anthropomorphic cat riding a Harley Davidson in Arizona with sunglasses and a leather jacket",
	"An elderly woman poses for a high fashion photoshoot in colorful, patterned clothes with a cyberpunk 2077 vibe",
	]

	css="""
	#col-container {
	margin: 0 auto;
	max-width: 520px;
	}
	"""

	with gr.Blocks(css=css) as demo:

	with gr.Column(elem_id="col-container"):
	gr.Markdown(f"""# Ovis-Image
	[[code](https://github.com/AIDC-AI/Ovis-Image)] [[model](https://huggingface.co/AIDC-AI/Ovis-Image-7B)]
	""")

	with gr.Row():

	prompt = gr.Text(
	label="Prompt",
	show_label=False,
	max_lines=1,
	placeholder="Enter your prompt here",
	container=False,
	)

	run_button = gr.Button("Run", scale=0)

	result = gr.Image(label="Result", show_label=False)

	with gr.Accordion("Advanced Settings", open=False):
	with gr.Row():

	img_height = gr.Slider(
	label="Image Height",
	minimum=256,
	maximum=2048,
	step=32,
	value=1024,
	)

	img_width = gr.Slider(
	label="Image Width",
	minimum=256,
	maximum=2048,
	step=32,
	value=1024,
	)

	with gr.Row():

	guidance_scale = gr.Slider(
	label="Guidance Scale",
	minimum=1,
	maximum=14,
	step=0.1,
	value=5.0,
	)

	num_inference_steps = gr.Slider(
	label="Number of inference steps",
	minimum=1,
	maximum=100,
	step=1,
	value=50,
	)

	seed = gr.Slider(
	label="Seed",
	minimum=0,
	maximum=MAX_SEED,
	step=1,
	value=42,
	)

	gr.Examples(
	examples = examples,
	fn = generate,
	inputs = [prompt],
	outputs = [result],
	cache_examples="lazy"
	)

	gr.on(
	triggers=[run_button.click, prompt.submit],
	fn = generate,
	inputs = [prompt, img_height, img_width, seed, num_inference_steps, guidance_scale],
	outputs = [result]
	)

	if __name__ == '__main__':
	demo.launch()