Spaces:
Paused
Paused
File size: 4,029 Bytes
0084610 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 |
import argparse
import time
import warnings
import logging
import torch
from kandinsky import get_T2V_pipeline
def validate_args(args):
size = (args.width, args.height)
supported_sizes = [(512, 512), (512, 768), (768, 512)]
if not size in supported_sizes:
raise NotImplementedError(
f"Provided size of video is not supported: {size}")
def disable_warnings():
warnings.filterwarnings("ignore")
logging.getLogger("torch").setLevel(logging.ERROR)
torch._logging.set_logs(
dynamo=logging.ERROR,
dynamic=logging.ERROR,
aot=logging.ERROR,
inductor=logging.ERROR,
guards=False,
recompiles=False
)
def parse_args():
parser = argparse.ArgumentParser(
description="Generate a video using Kandinsky 5"
)
parser.add_argument(
'--local-rank',
type=int,
help='local rank'
)
parser.add_argument(
"--config",
type=str,
default="./configs/config_5s_sft.yaml",
help="The config file of the model"
)
parser.add_argument(
"--prompt",
type=str,
default="a cat in a blue hat",
help="The prompt to generate video"
)
parser.add_argument(
"--negative_prompt",
type=str,
default="Static, 2D cartoon, cartoon, 2d animation, paintings, images, worst quality, low quality, ugly, deformed, walking backwards",
help="Negative prompt for classifier-free guidance"
)
parser.add_argument(
"--width",
type=int,
default=768,
choices=[768, 512],
help="Width of the video in pixels"
)
parser.add_argument(
"--height",
type=int,
default=512,
choices=[768, 512],
help="Height of the video in pixels"
)
parser.add_argument(
"--video_duration",
type=int,
default=5,
help="Duratioin of the video in seconds"
)
parser.add_argument(
"--expand_prompt",
type=int,
default=1,
help="Whether to use prompt expansion."
)
parser.add_argument(
"--sample_steps",
type=int,
default=None,
help="The sampling steps number."
)
parser.add_argument(
"--guidance_weight",
type=float,
default=None,
help="Guidance weight."
)
parser.add_argument(
"--scheduler_scale",
type=float,
default=5.0,
help="Scheduler scale."
)
parser.add_argument(
"--output_filename",
type=str,
default="./test.mp4",
help="Name of the resulting file"
)
parser.add_argument(
"--offload",
action='store_true',
default=False,
help="Offload models to save memory or not"
)
parser.add_argument(
"--magcache",
action='store_true',
default=False,
help="Using MagCache (for 50 steps models only)"
)
args = parser.parse_args()
return args
if __name__ == "__main__":
disable_warnings()
args = parse_args()
validate_args(args)
pipe = get_T2V_pipeline(
device_map={"dit": "cuda:0", "vae": "cuda:0",
"text_embedder": "cuda:0"},
conf_path=args.config,
offload=args.offload,
magcache=args.magcache,
)
if args.output_filename is None:
args.output_filename = "./" + args.prompt.replace(" ", "_") + ".mp4"
start_time = time.perf_counter()
x = pipe(args.prompt,
time_length=args.video_duration,
width=args.width,
height=args.height,
num_steps=args.sample_steps,
guidance_weight=args.guidance_weight,
scheduler_scale=args.scheduler_scale,
expand_prompts=args.expand_prompt,
save_path=args.output_filename)
print(f"TIME ELAPSED: {time.perf_counter() - start_time}")
print(f"Generated video is saved to {args.output_filename}")
|